opengrok/search/SearchEngine.java

0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
0N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
405N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
1425N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
0N/A */
0N/A
376N/Apackage org.opensolaris.opengrok.search;
0N/A
1327N/Aimport java.io.File;
1327N/Aimport java.io.FileInputStream;
1327N/Aimport java.io.FileNotFoundException;
1327N/Aimport java.io.IOException;
1327N/Aimport java.io.InputStreamReader;
1327N/Aimport java.io.Reader;
234N/Aimport java.util.ArrayList;
0N/Aimport java.util.List;
1318N/Aimport java.util.concurrent.ExecutorService;
1318N/Aimport java.util.concurrent.Executors;
428N/Aimport java.util.logging.Level;
1327N/Aimport java.util.logging.Logger;
1327N/A
0N/Aimport org.apache.lucene.document.Document;
350N/Aimport org.apache.lucene.document.Fieldable;
0N/Aimport org.apache.lucene.index.IndexReader;
1318N/Aimport org.apache.lucene.index.MultiReader;
0N/Aimport org.apache.lucene.search.IndexSearcher;
0N/Aimport org.apache.lucene.search.Query;
816N/Aimport org.apache.lucene.search.ScoreDoc;
928N/Aimport org.apache.lucene.search.TopScoreDocCollector;
928N/Aimport org.apache.lucene.store.FSDirectory;
928N/Aimport org.apache.lucene.util.Version;
986N/Aimport org.opensolaris.opengrok.analysis.CompatibleAnalyser;
350N/Aimport org.opensolaris.opengrok.analysis.Definitions;
1185N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
0N/Aimport org.opensolaris.opengrok.analysis.TagFilter;
1385N/Aimport org.opensolaris.opengrok.analysis.XrefReader;
1470N/Aimport org.opensolaris.opengrok.configuration.Configuration;
234N/Aimport org.opensolaris.opengrok.configuration.Project;
125N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
615N/Aimport org.opensolaris.opengrok.history.HistoryException;
0N/Aimport org.opensolaris.opengrok.search.Summary.Fragment;
0N/Aimport org.opensolaris.opengrok.search.context.Context;
0N/Aimport org.opensolaris.opengrok.search.context.HistoryContext;
1195N/Aimport org.opensolaris.opengrok.util.IOUtils;
1419N/Aimport org.opensolaris.opengrok.web.Prefix;
0N/A
0N/A/**
0N/A * This is an encapsulation of the details on how to seach in the index
0N/A * database.
0N/A *
816N/A * @author Trond Norbye 2005
928N/A * @author Lubos Kosco 2010 - upgrade to lucene 3.0.0
1318N/A * @author Lubos Kosco 2011 - upgrade to lucene 3.5.0
1425N/A * @author Lubos Kosco 2012 - upgrade to lucene 3.6.0
0N/A */
0N/Apublic class SearchEngine {
1327N/A    private static final Logger logger = Logger.getLogger(SearchEngine.class.getName());
615N/A    /** Message text used when logging exceptions thrown when searching. */
615N/A    private static final String SEARCH_EXCEPTION_MSG = "Exception searching";
615N/A
947N/A    //NOTE below will need to be changed after new lucene upgrade, if they
947N/A    //increase the version - every change of below makes us incompatible with the
947N/A    //old index and we need to ask for reindex
947N/A    /** version of lucene index common for whole app*/
1425N/A    public static final Version LUCENE_VERSION=Version.LUCENE_36;
947N/A
0N/A    /**
0N/A     * Holds value of property definition.
0N/A     */
0N/A    private String definition;
1190N/A
0N/A    /**
0N/A     * Holds value of property file.
0N/A     */
0N/A    private String file;
1190N/A
0N/A    /**
0N/A     * Holds value of property freetext.
0N/A     */
0N/A    private String freetext;
1190N/A
0N/A    /**
0N/A     * Holds value of property history.
0N/A     */
0N/A    private String history;
1190N/A
0N/A    /**
0N/A     * Holds value of property symbol.
0N/A     */
0N/A    private String symbol;
234N/A
0N/A    /**
0N/A     * Holds value of property indexDatabase.
0N/A     */
0N/A    private Query query;
986N/A    private final CompatibleAnalyser analyzer = new CompatibleAnalyser();
0N/A    private Context sourceContext;
0N/A    private HistoryContext historyContext;
816N/A    private Summarizer summarizer;
816N/A    // internal structure to hold the results from lucene
816N/A    private final List<org.apache.lucene.document.Document> docs;
456N/A    private final char[] content = new char[1024*8];
234N/A    private String source;
234N/A    private String data;
1238N/A    private static final boolean docsScoredInOrder = false;
816N/A
1470N/A    int hitsPerPage = RuntimeEnvironment.getConfig().getHitsPerPage();
1470N/A    int cachePages= RuntimeEnvironment.getConfig().getCachePages();
816N/A    int totalHits=0;
1190N/A
1185N/A    private ScoreDoc[] hits;
1185N/A    private TopScoreDocCollector collector;
1318N/A    private IndexSearcher searcher;
1185N/A    boolean allCollected;
816N/A
0N/A    /**
0N/A     * Creates a new instance of SearchEngine
0N/A     */
0N/A    public SearchEngine() {
816N/A        docs = new ArrayList<org.apache.lucene.document.Document>();
234N/A    }
236N/A
985N/A    /**
986N/A     * Create a QueryBuilder using the fields that have been set on this
986N/A     * SearchEngine.
986N/A     *
986N/A     * @return a query builder
985N/A     */
986N/A    private QueryBuilder createQueryBuilder() {
986N/A        return new QueryBuilder()
1470N/A            .setFreetext(freetext)
1470N/A            .setDefs(definition)
1470N/A            .setRefs(symbol)
1470N/A            .setPath(file)
1470N/A            .setHist(history);
985N/A    }
985N/A
1470N/A    /**
1470N/A     * Check, whether a query can be build with current information available.
1470N/A     * @return {@code true} if a query could be build.
1470N/A     */
236N/A    public boolean isValidQuery() {
538N/A        boolean ret;
986N/A        try {
986N/A            query = createQueryBuilder().build();
986N/A            ret = (query != null);
986N/A        } catch (Exception e) {
538N/A            ret = false;
236N/A        }
986N/A
236N/A        return ret;
236N/A    }
816N/A
816N/A    /**
816N/A     *
816N/A     * @param paging whether to use paging (if yes, first X pages will load faster)
1190N/A     * @param root which db to search
816N/A     * @throws IOException
816N/A     */
1470N/A    @SuppressWarnings("resource")
816N/A    private void searchSingleDatabase(File root,boolean paging) throws IOException {
1470N/A        IndexReader ireader = null;
1470N/A        ireader = IndexReader.open(FSDirectory.open(root));
1470N/A        searcher = new IndexSearcher(ireader);
1470N/A        collector = TopScoreDocCollector
1470N/A            .create(hitsPerPage * cachePages, docsScoredInOrder);
1470N/A        searcher.search(query, collector);
1470N/A        totalHits = collector.getTotalHits();
1318N/A        if (!paging && totalHits>0) {
1470N/A            collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
1470N/A            searcher.search(query, collector);
1190N/A        }
816N/A        hits = collector.topDocs().scoreDocs;
816N/A        for (int i = 0; i < hits.length; i++) {
816N/A            int docId = hits[i].doc;
816N/A            Document d = searcher.doc(docId);
816N/A            docs.add(d);
235N/A        }
235N/A    }
816N/A
816N/A    /**
816N/A     *
816N/A     * @param paging whether to use paging (if yes, first X pages will load faster)
816N/A     * @param root list of projects to search
816N/A     * @throws IOException
816N/A     */
1470N/A    private void searchMultiDatabase(List<Project> root,boolean paging)
1470N/A        throws IOException
1470N/A    {
1470N/A        IndexReader[] subreaders = new IndexReader[root.size()];
1470N/A        File droot = new File(RuntimeEnvironment.getConfig().getDataRootFile(), "index");
1470N/A        int ii = 0;
816N/A        for (Project project : root) {
1470N/A            IndexReader ireader = (IndexReader.open(FSDirectory
1470N/A                .open(new File(droot,project.getPath()))));
1470N/A            subreaders[ii++] = ireader;
816N/A        }
1470N/A        MultiReader searchables = new MultiReader(subreaders, true);
1470N/A        if (Runtime.getRuntime().availableProcessors() > 1) {
1470N/A            // TODO there might be a better way for counting this - or we should
1470N/A            // honor the command line option here too!
1470N/A            int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors());
1470N/A            ExecutorService executor = Executors.newFixedThreadPool(noThreads);
1470N/A            searcher = new IndexSearcher(searchables, executor);
1470N/A        } else {
1470N/A            searcher = new IndexSearcher(searchables);
1470N/A        }
1470N/A        collector = TopScoreDocCollector
1470N/A            .create(hitsPerPage * cachePages, docsScoredInOrder);
1470N/A        searcher.search(query, collector);
1470N/A        totalHits = collector.getTotalHits();
1470N/A        if (!paging && totalHits > 0) {
1470N/A            collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
1470N/A            searcher.search(query, collector);
816N/A        }
816N/A        hits = collector.topDocs().scoreDocs;
816N/A        for (int i = 0; i < hits.length; i++) {
816N/A            int docId = hits[i].doc;
816N/A            Document d = searcher.doc(docId);
816N/A            docs.add(d);
816N/A        }
816N/A    }
816N/A
1470N/A    /**
1470N/A     * Get the current query used by this instance. Use {@link #isValidQuery()}
1470N/A     * to build one.
1470N/A     * @return current query as string
1470N/A     */
235N/A    public String getQuery() {
235N/A        return query.toString();
0N/A    }
494N/A
0N/A    /**
0N/A     * Execute a search. Before calling this function, you must set the
0N/A     * appropriate seach critera with the set-functions.
816N/A     * Note that this search will return the first cachePages of hitsPerPage, for more you need to call more
0N/A     *
830N/A     * @return The number of hits
0N/A     */
0N/A    public int search() {
1470N/A        Configuration cfg = RuntimeEnvironment.getConfig();
1470N/A        source = cfg.getSourceRoot();
1470N/A        data = cfg.getDataRoot();
816N/A        docs.clear();
986N/A
986N/A        QueryBuilder queryBuilder = createQueryBuilder();
986N/A
986N/A        try {
986N/A            query = queryBuilder.build();
986N/A            if (query != null) {
1470N/A                File root = new File(cfg.getDataRootFile(), "index");
234N/A
1470N/A                if (cfg.hasProjects()) {
234N/A                    // search all projects
1470N/A                    // TODO support paging per project (in search.java)
1470N/A                    // TODO optimize if only one project by falling back to
1470N/A                    // SingleDatabase ?
1470N/A                    searchMultiDatabase(cfg.getProjects(), false);
234N/A                } else {
234N/A                    // search the index database
1470N/A                    searchSingleDatabase(root, true);
0N/A                }
0N/A            }
986N/A        } catch (Exception e) {
1327N/A            logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
1327N/A            logger.log(Level.FINE, "search", e);
0N/A        }
986N/A
816N/A        if (!docs.isEmpty()) {
0N/A            sourceContext = null;
816N/A            summarizer = null;
0N/A            try {
986N/A                sourceContext = new Context(query, queryBuilder.getQueries());
1470N/A                if (sourceContext.isEmpty()) {
0N/A                    sourceContext = null;
4N/A                }
986N/A                summarizer = new Summarizer(query, analyzer);
0N/A            } catch (Exception e) {
1470N/A                logger.warning("An error occured while creating summary: "
1327N/A                    + e.getMessage());
1327N/A                logger.log(Level.FINE, "search", e);
0N/A            }
1190N/A
0N/A            historyContext = null;
0N/A            try {
0N/A                historyContext = new HistoryContext(query);
1470N/A                if (historyContext.isEmpty()) {
0N/A                    historyContext = null;
4N/A                }
0N/A            } catch (Exception e) {
1470N/A                logger.warning("An error occured while getting history context: "
1327N/A                    + e.getMessage());
1327N/A                logger.log(Level.FINE, "search", e);
0N/A            }
0N/A        }
1470N/A        int count = hits == null ? 0 : hits.length;
1470N/A        return count;
0N/A    }
816N/A
816N/A    /**
1470N/A     * Get results , if no search was started before, no results are returned
816N/A     * this method will requery if end end is more than first query from search,
1470N/A     * hence performance hit applies, if you want results in later pages than
1470N/A     * number of cachePages
816N/A     * also end has to be bigger than start !
816N/A     * @param start start of the hit list
816N/A     * @param end end of the hit list
1470N/A     * @param ret list of results from start to end or null/empty if no search
1470N/A     *  was started
816N/A     */
816N/A    public void results(int start, int end, List<Hit> ret) {
816N/A
1470N/A        // return if no start search() was done
1470N/A        if (hits == null || (end < start) ) {
1185N/A            ret.clear();
1185N/A            return;
1185N/A        }
816N/A
816N/A        ret.clear();
816N/A
1470N/A        // TODO check if below fits for if end=old hits.length, or it should
1470N/A        // include it
1185N/A        if (end > hits.length & !allCollected) {
1470N/A         // do the requery, we want more than 5 pages
1470N/A         collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
816N/A         try {
816N/A             searcher.search(query,collector);
1470N/A         } catch (Exception e) {
1470N/A             // should never be hit, since search() will hit this before
1327N/A             logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
1327N/A             logger.log(Level.FINE, "results", e);
816N/A         }
816N/A         hits = collector.topDocs().scoreDocs;
1470N/A         Document d = null;
816N/A         for (int i = start; i < hits.length; i++) {
816N/A             int docId = hits[i].doc;
816N/A             try {
1185N/A                 d = searcher.doc(docId);
816N/A             }  catch (Exception e) {
1327N/A                 logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
1327N/A                 logger.log(Level.FINE, "results", e);
816N/A             }
816N/A             docs.add(d);
816N/A         }
816N/A         allCollected=true;
234N/A        }
816N/A
1470N/A        // TODO generation of ret(results) could be cashed and consumers of
1470N/A        // engine would just print them in whatever form they need, this way we
1470N/A        // could get rid of docs. The only problem is that count of docs is
1470N/A        // usually smaller than number of results
816N/A        for (int ii = start; ii < end; ++ii) {
0N/A            boolean alt = (ii % 2 == 0);
0N/A            boolean hasContext = false;
0N/A            try {
816N/A                Document doc = docs.get(ii);
0N/A                String filename = doc.get("path");
816N/A
1185N/A                Genre genre = Genre.get(doc.get("t"));
350N/A                Definitions tags = null;
1190N/A                Fieldable tagsField = doc.getFieldable("tags");
350N/A                if (tagsField != null) {
928N/A                    tags = Definitions.deserialize(tagsField.getBinaryValue());
350N/A                }
816N/A                int nhits = docs.size();
1190N/A
1470N/A                if (sourceContext != null) {
0N/A                    try {
1185N/A                        if (Genre.PLAIN == genre && (source != null)) {
1470N/A                            hasContext = sourceContext
1470N/A                                .getContext(new InputStreamReader(new FileInputStream(source
1470N/A                                    + filename)), null, null, null, filename,
0N/A                                    tags, nhits > 100, ret);
1470N/A                        } else if (Genre.XREFABLE == genre && data != null
1470N/A                            && summarizer != null)
1470N/A                        {
439N/A                            int l = 0;
1419N/A                            File file = new File(data + Prefix.XREF_P + filename);
1470N/A                            @SuppressWarnings("resource")
1385N/A                            Reader r = new TagFilter(new XrefReader(file));
380N/A                            try {
1190N/A                                l = r.read(content);
380N/A                            } finally {
1195N/A                                IOUtils.close(r);
380N/A                            }
1470N/A                            // TODO FIX below fragmenter according to either
1470N/A                            // summarizer or context (to get line numbers, might
1470N/A                            // be hard, since xref writers will need to be fixed
1470N/A                            // too, they generate just one line of html code now :( )
816N/A                            Summary sum = summarizer.getSummary(new String(content, 0, l));
0N/A                            Fragment fragments[] = sum.getFragments();
0N/A                            for (int jj = 0; jj < fragments.length; ++jj) {
0N/A                                String match = fragments[jj].toString();
0N/A                                if (match.length() > 0) {
0N/A                                    if (!fragments[jj].isEllipsis()) {
1470N/A                                        Hit hit = new Hit(filename,
1470N/A                                            fragments[jj].toString(), "", true, alt);
0N/A                                        ret.add(hit);
0N/A                                    }
0N/A                                    hasContext = true;
0N/A                                }
0N/A                            }
0N/A                        } else {
1327N/A                            logger.warning("Unknown genre '" + genre + "' for '"
1327N/A                                + filename + "'");
1470N/A                            hasContext |= sourceContext.getContext(null, null,
1470N/A                                null, null, filename, tags, false, ret);
0N/A                        }
0N/A                    } catch (FileNotFoundException exp) {
1327N/A                        logger.warning("Couldn't read summary from '"
1327N/A                            + filename + "': " + exp.getMessage());
1470N/A                        hasContext |= sourceContext.getContext(null, null, null,
1470N/A                            null, filename, tags, false, ret);
0N/A                    }
0N/A                }
0N/A                if (historyContext != null) {
1470N/A                    hasContext |= historyContext
1470N/A                        .getContext(source + filename, filename, ret);
0N/A                }
1470N/A                if (!hasContext) {
0N/A                    ret.add(new Hit(filename, "...", "", false, alt));
0N/A                }
0N/A            } catch (IOException e) {
1327N/A                logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
1327N/A                logger.log(Level.FINE, "results", e);
350N/A            } catch (ClassNotFoundException e) {
1327N/A                logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
1327N/A                logger.log(Level.FINE, "results", e);
615N/A            } catch (HistoryException e) {
1327N/A                logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
1327N/A                logger.log(Level.FINE, "results", e);
0N/A            }
0N/A        }
1190N/A
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property definition.
0N/A     *
0N/A     * @return Value of property definition.
0N/A     */
0N/A    public String getDefinition() {
1470N/A        return definition;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property definition.
0N/A     *
0N/A     * @param definition New value of property definition.
0N/A     */
0N/A    public void setDefinition(String definition) {
0N/A        this.definition = definition;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property file.
0N/A     *
0N/A     * @return Value of property file.
0N/A     */
0N/A    public String getFile() {
1470N/A        return file;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property file.
0N/A     *
0N/A     * @param file New value of property file.
0N/A     */
0N/A    public void setFile(String file) {
0N/A        this.file = file;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property freetext.
0N/A     *
0N/A     * @return Value of property freetext.
0N/A     */
0N/A    public String getFreetext() {
1470N/A        return freetext;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property freetext.
0N/A     *
0N/A     * @param freetext New value of property freetext.
0N/A     */
0N/A    public void setFreetext(String freetext) {
0N/A        this.freetext = freetext;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property history.
0N/A     *
0N/A     * @return Value of property history.
0N/A     */
0N/A    public String getHistory() {
1470N/A        return history;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property history.
0N/A     *
0N/A     * @param history New value of property history.
0N/A     */
0N/A    public void setHistory(String history) {
0N/A        this.history = history;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property symbol.
0N/A     *
0N/A     * @return Value of property symbol.
0N/A     */
0N/A    public String getSymbol() {
1470N/A        return symbol;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property symbol.
0N/A     *
0N/A     * @param symbol New value of property symbol.
0N/A     */
0N/A    public void setSymbol(String symbol) {
0N/A        this.symbol = symbol;
0N/A    }
0N/A}