search/context/Context.java

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
 *
 * Portions Copyright 2011 Jens Elkner.
 */

/**
 * This is supposed to get the matching lines from sourcefile.
 * since lucene does not easily give the match context.
 */
package org.opensolaris.opengrok.search.context;

import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.lucene.search.Query;
import org.opensolaris.opengrok.analysis.Definitions;
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
import org.opensolaris.opengrok.search.Hit;
import org.opensolaris.opengrok.util.IOUtils;
import org.opensolaris.opengrok.web.Util;

/**
 * Search Context.
 *
 * @author  Trond Norbye
 * @version $Revision$
 */
public class Context {

    private static final Logger logger = Logger.getLogger(Context.class.getName());
    private final LineMatcher[] m;
    static final int MAXFILEREAD = 1024 * 1024;
    private char[] buffer;
    PlainLineTokenizer tokens;
    String queryAsURI;

    /**
     * Map whose keys tell which fields to look for in the source file, and
     * whose values tell if the field is case insensitive (true for
     * insensitivity, false for sensitivity).
     */
    private static final Map<String, Boolean> tokenFields =
            new HashMap<String, Boolean>();
    static {
        tokenFields.put("full", Boolean.TRUE);
        tokenFields.put("refs", Boolean.FALSE);
        tokenFields.put("defs", Boolean.FALSE);
    }

    /**
     * Constructs a context generator
     * @param query the query to generate the result for
     * @param queryStrings map from field names to queries against the fields
     */
    public Context(Query query, Map<String, String> queryStrings) {
        QueryMatchers qm = new QueryMatchers();
        m = qm.getMatchers(query, tokenFields);
        if (m != null) {
            buildQueryAsURI(queryStrings);
            //System.err.println("Found Matchers = "+ m.length + " for " + query);
            buffer = new char[MAXFILEREAD];
            tokens = new PlainLineTokenizer((Reader) null);
        }
    }

    /**
     * Check, whether one ore more line matchers (query terms) are present.
     * @return {@code true} if present.
     * @see #Context(Query, Map)
     */
    public boolean isEmpty() {
        return m == null;
    }

    /**
     * Build the {@code queryAsURI} string that holds the query in a form
     * that's suitable for sending it as part of a URI.
     *
     * @param subqueries a map containing the query text for each field
     */
    private void buildQueryAsURI(Map<String, String> subqueries) {
        if (subqueries.isEmpty()) {
            queryAsURI = "";
            return;
        }
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<String, String> entry : subqueries.entrySet()) {
            String field = entry.getKey();
            String queryText = entry.getValue();
            if ("full".equals(field)) {
                field = "q"; // bah - search query params should be consistent!
            }
            sb.append(field).append("=")
                .append(Util.uriEncodeQueryValue(queryText)).append('&');
        }
        sb.setLength(sb.length()-1);
        queryAsURI = sb.toString();
    }

    private boolean alt = true;

    /**
     * Search given tags in the given input stream and write out htmlized
     * results to the given output stream.
     * Closes the given <var>in</var> reader on return.
     *
     * @param in File to be matched
     * @param out to write the context
     * @param urlPrefix URL prefix to use for generated links. Ignored if
     *      {@code null}.
     * @param morePrefix to link to more... page
     * @param path path of the file
     * @param tags format to highlight defs.
     * @param limit should the number of matching lines be limited?
     * @param hits  where to add obtained hits
     * @return Did it get any matching context?
     */
    @SuppressWarnings("boxing")
    public boolean getContext(Reader in, Writer out, String urlPrefix,
        String morePrefix, String path, Definitions tags, boolean limit,
        List<Hit> hits)
    {
        alt = !alt;
        if (m == null) {
            IOUtils.close(in);
            return false;
        }
        boolean anything = false;
        TreeMap<Integer, String[]> matchingTags = null;
        String urlPrefixE =
                (urlPrefix == null) ? "" : Util.uriEncodePath(urlPrefix);
        String pathE = Util.uriEncodePath(path);
        if (tags != null) {
            matchingTags = new TreeMap<Integer, String[]>();
            try {
                for (Definitions.Tag tag : tags.getTags()) {
                    for (int i = 0; i < m.length; i++) {
                        if (m[i].match(tag.symbol) == LineMatcher.MATCHED) {
                            String[] desc = {
                                tag.symbol,                 // matched symbol
                                Integer.toString(tag.line), // line number
                                tag.type,                   // tag type
                                tag.text                    // matching line
                            };
                            if (in == null) {
                                if (out == null) {
                                    Hit hit = new Hit(path,
                                            Util.htmlize(desc[3]).replace(
                                            desc[0], "<b>" + desc[0] + "</b>"),
                                            desc[1], false, alt);
                                    hits.add(hit);
                                    anything = true;
                                } else {
                                    out.write("<a class=\"rsh\" href=\"");
                                    out.write(urlPrefixE);
                                    out.write(pathE);
                                    out.write('#');
                                    out.write(desc[1]);
                                    out.write("\"><span class=\"l\">");
                                    out.write(desc[1]);
                                    out.write("</span> ");
                                    out.write(Util.htmlize(desc[3]).replace(
                                        desc[0], "<b>" + desc[0] + "</b>"));
                                    out.write("</a> <span class=\"rshd\">");
                                    out.write(desc[2]);
                                    out.write("</span><br/>");
                                    anything = true;
                                }
                            } else {
                                matchingTags.put(tag.line, desc);
                            }
                            break;
                        }
                    }
                }
            } catch (Exception e) {
                if (hits != null) {
                    // @todo verify why we ignore all exceptions?
                    logger.warning("Could not get context for '" + path + "': "
                        + e.getMessage());
                    logger.log(Level.FINE, "getContext", e);
                }
            }
        }
        /**
         * Just to get the matching tag send a null in
         */
        if (in == null) {
            return anything;
        }
        int charsRead = 0;
        boolean truncated = false;

        boolean lim = limit;
        if (!RuntimeEnvironment.getConfig().isQuickContextScan()) {
            lim = false;
        }

        if (lim) {
            try {
                charsRead = in.read(buffer);
                if (charsRead == MAXFILEREAD) {
                    // we probably only read parts of the file, so set the
                    // truncated flag to enable the [all...] link that
                    // requests all matches
                    truncated = true;
                    // truncate to last line read (don't look more than 100
                    // characters back)
                    for (int i = charsRead - 1; i > charsRead - 100; i--) {
                        if (buffer[i] == '\n') {
                            charsRead = i;
                            break;
                        }
                    }
                }
            } catch (IOException e) {
                logger.warning("An error occured while reading data: " + e.getMessage());
                logger.log(Level.FINE, "getContext", e);
                return anything;
            }
            if (charsRead == 0) {
                return anything;
            }

            tokens.reInit(buffer, charsRead, out, urlPrefixE + pathE + '#',
                matchingTags);
        } else {
            tokens.reInit(in, out, urlPrefixE + pathE + "#", matchingTags);
        }

        if (hits != null) {
            tokens.setAlt(alt);
            tokens.setHitList(hits);
            tokens.setFilename(path);
        }

        try {
            String token;
            int matchState = LineMatcher.NOT_MATCHED;
            int matchedLines = 0;
            while ((token = tokens.yylex()) != null && (!lim || matchedLines < 10)) {
                for (int i = 0; i < m.length; i++) {
                    matchState = m[i].match(token);
                    if (matchState == LineMatcher.MATCHED) {
                        tokens.printContext();
                        matchedLines++;
                        //out.write("<br> <i>Matched " + token + " maxlines = "
                        //  + matchedLines + "</i><br>");
                        break;
                    } else if (matchState == LineMatcher.WAIT) {
                        tokens.holdOn();
                    } else {
                        tokens.neverMind();
                    }
                }
            }
            anything = matchedLines > 0;
            tokens.dumpRest();
            if (lim && (truncated || matchedLines == 10) && out != null) {
                out.write("<a href=\"" + morePrefix + pathE
                    + '?' + queryAsURI + "\">[all...]</a>");
            }
        } catch (IOException e) {
            logger.warning("Could not get context for '" + path + "': "
                + e.getMessage());
            logger.log(Level.FINE, "getContext", e);
        } finally {
            IOUtils.close(in);
            // don't close
            if (out != null) {
                try {
                    out.flush();
                } catch (IOException e) {
                    logger.warning("Failed to flush stream: " + e.getMessage());
                    logger.log(Level.FINE, "getContext", e);
                }
            }
        }
        return anything;
    }
}