opengrok/search/SearchEngine.java

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
 */

package org.opensolaris.opengrok.search;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.opensolaris.opengrok.analysis.CompatibleAnalyser;
import org.opensolaris.opengrok.analysis.Definitions;
import org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
import org.opensolaris.opengrok.analysis.TagFilter;
import org.opensolaris.opengrok.analysis.XrefReader;
import org.opensolaris.opengrok.configuration.Configuration;
import org.opensolaris.opengrok.configuration.Project;
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
import org.opensolaris.opengrok.history.HistoryException;
import org.opensolaris.opengrok.search.Summary.Fragment;
import org.opensolaris.opengrok.search.context.Context;
import org.opensolaris.opengrok.search.context.HistoryContext;
import org.opensolaris.opengrok.util.IOUtils;
import org.opensolaris.opengrok.web.Prefix;

/**
 * This is an encapsulation of the details on how to seach in the index
 * database.
 *
 * @author Trond Norbye 2005
 * @author Lubos Kosco 2010 - upgrade to lucene 3.0.0
 * @author Lubos Kosco 2011 - upgrade to lucene 3.5.0
 * @author Lubos Kosco 2012 - upgrade to lucene 3.6.0
 */
public class SearchEngine {
    private static final Logger logger = Logger.getLogger(SearchEngine.class.getName());
    /** Message text used when logging exceptions thrown when searching. */
    private static final String SEARCH_EXCEPTION_MSG = "Exception searching";

    //NOTE below will need to be changed after new lucene upgrade, if they
    //increase the version - every change of below makes us incompatible with the
    //old index and we need to ask for reindex
    /** version of lucene index common for whole app*/
    public static final Version LUCENE_VERSION=Version.LUCENE_36;

    /**
     * Holds value of property definition.
     */
    private String definition;

    /**
     * Holds value of property file.
     */
    private String file;

    /**
     * Holds value of property freetext.
     */
    private String freetext;

    /**
     * Holds value of property history.
     */
    private String history;

    /**
     * Holds value of property symbol.
     */
    private String symbol;

    /**
     * Holds value of property indexDatabase.
     */
    private Query query;
    private final CompatibleAnalyser analyzer = new CompatibleAnalyser();
    private Context sourceContext;
    private HistoryContext historyContext;
    private Summarizer summarizer;
    // internal structure to hold the results from lucene
    private final List<org.apache.lucene.document.Document> docs;
    private final char[] content = new char[1024*8];
    private String source;
    private String data;
    private static final boolean docsScoredInOrder = false;

    int hitsPerPage = RuntimeEnvironment.getConfig().getHitsPerPage();
    int cachePages= RuntimeEnvironment.getConfig().getCachePages();
    int totalHits=0;

    private ScoreDoc[] hits;
    private TopScoreDocCollector collector;
    private IndexSearcher searcher;
    boolean allCollected;

    /**
     * Creates a new instance of SearchEngine
     */
    public SearchEngine() {
        docs = new ArrayList<org.apache.lucene.document.Document>();
    }

    /**
     * Create a QueryBuilder using the fields that have been set on this
     * SearchEngine.
     *
     * @return a query builder
     */
    private QueryBuilder createQueryBuilder() {
        return new QueryBuilder()
            .setFreetext(freetext)
            .setDefs(definition)
            .setRefs(symbol)
            .setPath(file)
            .setHist(history);
    }

    /**
     * Check, whether a query can be build with current information available.
     * @return {@code true} if a query could be build.
     */
    public boolean isValidQuery() {
        boolean ret;
        try {
            query = createQueryBuilder().build();
            ret = (query != null);
        } catch (Exception e) {
            ret = false;
        }

        return ret;
    }

    /**
     *
     * @param paging whether to use paging (if yes, first X pages will load faster)
     * @param root which db to search
     * @throws IOException
     */
    @SuppressWarnings("resource")
    private void searchSingleDatabase(File root,boolean paging) throws IOException {
        IndexReader ireader = null;
        ireader = IndexReader.open(FSDirectory.open(root));
        searcher = new IndexSearcher(ireader);
        collector = TopScoreDocCollector
            .create(hitsPerPage * cachePages, docsScoredInOrder);
        searcher.search(query, collector);
        totalHits = collector.getTotalHits();
        if (!paging && totalHits>0) {
            collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
            searcher.search(query, collector);
        }
        hits = collector.topDocs().scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            docs.add(d);
        }
    }

    /**
     *
     * @param paging whether to use paging (if yes, first X pages will load faster)
     * @param root list of projects to search
     * @throws IOException
     */
    private void searchMultiDatabase(List<Project> root,boolean paging)
        throws IOException
    {
        IndexReader[] subreaders = new IndexReader[root.size()];
        File droot = new File(RuntimeEnvironment.getConfig().getDataRootFile(), "index");
        int ii = 0;
        for (Project project : root) {
            IndexReader ireader = (IndexReader.open(FSDirectory
                .open(new File(droot,project.getPath()))));
            subreaders[ii++] = ireader;
        }
        MultiReader searchables = new MultiReader(subreaders, true);
        if (Runtime.getRuntime().availableProcessors() > 1) {
            // TODO there might be a better way for counting this - or we should
            // honor the command line option here too!
            int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors());
            ExecutorService executor = Executors.newFixedThreadPool(noThreads);
            searcher = new IndexSearcher(searchables, executor);
        } else {
            searcher = new IndexSearcher(searchables);
        }
        collector = TopScoreDocCollector
            .create(hitsPerPage * cachePages, docsScoredInOrder);
        searcher.search(query, collector);
        totalHits = collector.getTotalHits();
        if (!paging && totalHits > 0) {
            collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
            searcher.search(query, collector);
        }
        hits = collector.topDocs().scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            docs.add(d);
        }
    }

    /**
     * Get the current query used by this instance. Use {@link #isValidQuery()}
     * to build one.
     * @return current query as string
     */
    public String getQuery() {
        return query.toString();
    }

    /**
     * Execute a search. Before calling this function, you must set the
     * appropriate seach critera with the set-functions.
     * Note that this search will return the first cachePages of hitsPerPage, for more you need to call more
     *
     * @return The number of hits
     */
    public int search() {
        Configuration cfg = RuntimeEnvironment.getConfig();
        source = cfg.getSourceRoot();
        data = cfg.getDataRoot();
        docs.clear();

        QueryBuilder queryBuilder = createQueryBuilder();

        try {
            query = queryBuilder.build();
            if (query != null) {
                File root = new File(cfg.getDataRootFile(), "index");

                if (cfg.hasProjects()) {
                    // search all projects
                    // TODO support paging per project (in search.java)
                    // TODO optimize if only one project by falling back to
                    // SingleDatabase ?
                    searchMultiDatabase(cfg.getProjects(), false);
                } else {
                    // search the index database
                    searchSingleDatabase(root, true);
                }
            }
        } catch (Exception e) {
            logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
            logger.log(Level.FINE, "search", e);
        }

        if (!docs.isEmpty()) {
            sourceContext = null;
            summarizer = null;
            try {
                sourceContext = new Context(query, queryBuilder.getQueries());
                if (sourceContext.isEmpty()) {
                    sourceContext = null;
                }
                summarizer = new Summarizer(query, analyzer);
            } catch (Exception e) {
                logger.warning("An error occured while creating summary: "
                    + e.getMessage());
                logger.log(Level.FINE, "search", e);
            }

            historyContext = null;
            try {
                historyContext = new HistoryContext(query);
                if (historyContext.isEmpty()) {
                    historyContext = null;
                }
            } catch (Exception e) {
                logger.warning("An error occured while getting history context: "
                    + e.getMessage());
                logger.log(Level.FINE, "search", e);
            }
        }
        int count = hits == null ? 0 : hits.length;
        return count;
    }

    /**
     * Get results , if no search was started before, no results are returned
     * this method will requery if end end is more than first query from search,
     * hence performance hit applies, if you want results in later pages than
     * number of cachePages
     * also end has to be bigger than start !
     * @param start start of the hit list
     * @param end end of the hit list
     * @param ret list of results from start to end or null/empty if no search
     *  was started
     */
    public void results(int start, int end, List<Hit> ret) {

        // return if no start search() was done
        if (hits == null || (end < start) ) {
            ret.clear();
            return;
        }

        ret.clear();

        // TODO check if below fits for if end=old hits.length, or it should
        // include it
        if (end > hits.length & !allCollected) {
         // do the requery, we want more than 5 pages
         collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
         try {
             searcher.search(query,collector);
         } catch (Exception e) {
             // should never be hit, since search() will hit this before
             logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
             logger.log(Level.FINE, "results", e);
         }
         hits = collector.topDocs().scoreDocs;
         Document d = null;
         for (int i = start; i < hits.length; i++) {
             int docId = hits[i].doc;
             try {
                 d = searcher.doc(docId);
             }  catch (Exception e) {
                 logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
                 logger.log(Level.FINE, "results", e);
             }
             docs.add(d);
         }
         allCollected=true;
        }

        // TODO generation of ret(results) could be cashed and consumers of
        // engine would just print them in whatever form they need, this way we
        // could get rid of docs. The only problem is that count of docs is
        // usually smaller than number of results
        for (int ii = start; ii < end; ++ii) {
            boolean alt = (ii % 2 == 0);
            boolean hasContext = false;
            try {
                Document doc = docs.get(ii);
                String filename = doc.get("path");

                Genre genre = Genre.get(doc.get("t"));
                Definitions tags = null;
                Fieldable tagsField = doc.getFieldable("tags");
                if (tagsField != null) {
                    tags = Definitions.deserialize(tagsField.getBinaryValue());
                }
                int nhits = docs.size();

                if (sourceContext != null) {
                    try {
                        if (Genre.PLAIN == genre && (source != null)) {
                            hasContext = sourceContext
                                .getContext(new InputStreamReader(new FileInputStream(source
                                    + filename)), null, null, null, filename,
                                    tags, nhits > 100, ret);
                        } else if (Genre.XREFABLE == genre && data != null
                            && summarizer != null)
                        {
                            int l = 0;
                            File file = new File(data + Prefix.XREF_P + filename);
                            @SuppressWarnings("resource")
                            Reader r = new TagFilter(new XrefReader(file));
                            try {
                                l = r.read(content);
                            } finally {
                                IOUtils.close(r);
                            }
                            // TODO FIX below fragmenter according to either
                            // summarizer or context (to get line numbers, might
                            // be hard, since xref writers will need to be fixed
                            // too, they generate just one line of html code now :( )
                            Summary sum = summarizer.getSummary(new String(content, 0, l));
                            Fragment fragments[] = sum.getFragments();
                            for (int jj = 0; jj < fragments.length; ++jj) {
                                String match = fragments[jj].toString();
                                if (match.length() > 0) {
                                    if (!fragments[jj].isEllipsis()) {
                                        Hit hit = new Hit(filename,
                                            fragments[jj].toString(), "", true, alt);
                                        ret.add(hit);
                                    }
                                    hasContext = true;
                                }
                            }
                        } else {
                            logger.warning("Unknown genre '" + genre + "' for '"
                                + filename + "'");
                            hasContext |= sourceContext.getContext(null, null,
                                null, null, filename, tags, false, ret);
                        }
                    } catch (FileNotFoundException exp) {
                        logger.warning("Couldn't read summary from '"
                            + filename + "': " + exp.getMessage());
                        hasContext |= sourceContext.getContext(null, null, null,
                            null, filename, tags, false, ret);
                    }
                }
                if (historyContext != null) {
                    hasContext |= historyContext
                        .getContext(source + filename, filename, ret);
                }
                if (!hasContext) {
                    ret.add(new Hit(filename, "...", "", false, alt));
                }
            } catch (IOException e) {
                logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
                logger.log(Level.FINE, "results", e);
            } catch (ClassNotFoundException e) {
                logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
                logger.log(Level.FINE, "results", e);
            } catch (HistoryException e) {
                logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
                logger.log(Level.FINE, "results", e);
            }
        }

    }

    /**
     * Getter for property definition.
     *
     * @return Value of property definition.
     */
    public String getDefinition() {
        return definition;
    }

    /**
     * Setter for property definition.
     *
     * @param definition New value of property definition.
     */
    public void setDefinition(String definition) {
        this.definition = definition;
    }

    /**
     * Getter for property file.
     *
     * @return Value of property file.
     */
    public String getFile() {
        return file;
    }

    /**
     * Setter for property file.
     *
     * @param file New value of property file.
     */
    public void setFile(String file) {
        this.file = file;
    }

    /**
     * Getter for property freetext.
     *
     * @return Value of property freetext.
     */
    public String getFreetext() {
        return freetext;
    }

    /**
     * Setter for property freetext.
     *
     * @param freetext New value of property freetext.
     */
    public void setFreetext(String freetext) {
        this.freetext = freetext;
    }

    /**
     * Getter for property history.
     *
     * @return Value of property history.
     */
    public String getHistory() {
        return history;
    }

    /**
     * Setter for property history.
     *
     * @param history New value of property history.
     */
    public void setHistory(String history) {
        this.history = history;
    }

    /**
     * Getter for property symbol.
     *
     * @return Value of property symbol.
     */
    public String getSymbol() {
        return symbol;
    }

    /**
     * Setter for property symbol.
     *
     * @param symbol New value of property symbol.
     */
    public void setSymbol(String symbol) {
        this.symbol = symbol;
    }
}