opengrok/search/SearchEngine.java

	SearchEngine.java revision 1318
0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
0N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
405N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
928N/A * Copyright 2010 Sun Micosystems.  All rights reserved.
0N/A * Use is subject to license terms.
0N/A */
0N/A
376N/Apackage org.opensolaris.opengrok.search;
0N/A
1318N/Aimport java.io.*;
234N/Aimport java.util.ArrayList;
0N/Aimport java.util.List;
1318N/Aimport java.util.concurrent.ExecutorService;
1318N/Aimport java.util.concurrent.Executors;
428N/Aimport java.util.logging.Level;
816N/Aimport java.util.zip.GZIPInputStream;
0N/Aimport org.apache.lucene.document.Document;
350N/Aimport org.apache.lucene.document.Fieldable;
0N/Aimport org.apache.lucene.index.IndexReader;
1318N/Aimport org.apache.lucene.index.MultiReader;
0N/Aimport org.apache.lucene.search.IndexSearcher;
0N/Aimport org.apache.lucene.search.Query;
816N/Aimport org.apache.lucene.search.ScoreDoc;
928N/Aimport org.apache.lucene.search.TopScoreDocCollector;
928N/Aimport org.apache.lucene.store.FSDirectory;
928N/Aimport org.apache.lucene.util.Version;
428N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
986N/Aimport org.opensolaris.opengrok.analysis.CompatibleAnalyser;
350N/Aimport org.opensolaris.opengrok.analysis.Definitions;
1185N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
0N/Aimport org.opensolaris.opengrok.analysis.TagFilter;
234N/Aimport org.opensolaris.opengrok.configuration.Project;
125N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
615N/Aimport org.opensolaris.opengrok.history.HistoryException;
0N/Aimport org.opensolaris.opengrok.search.Summary.Fragment;
0N/Aimport org.opensolaris.opengrok.search.context.Context;
0N/Aimport org.opensolaris.opengrok.search.context.HistoryContext;
1195N/Aimport org.opensolaris.opengrok.util.IOUtils;
0N/A
0N/A/**
0N/A * This is an encapsulation of the details on how to seach in the index
0N/A * database.
0N/A *
816N/A * @author Trond Norbye 2005
928N/A * @author Lubos Kosco 2010 - upgrade to lucene 3.0.0
1318N/A * @author Lubos Kosco 2011 - upgrade to lucene 3.5.0
0N/A */
0N/Apublic class SearchEngine {
615N/A    /** Message text used when logging exceptions thrown when searching. */
615N/A    private static final String SEARCH_EXCEPTION_MSG = "Exception searching";
615N/A
947N/A    //NOTE below will need to be changed after new lucene upgrade, if they
947N/A    //increase the version - every change of below makes us incompatible with the
947N/A    //old index and we need to ask for reindex
947N/A    /** version of lucene index common for whole app*/
1318N/A    public static final Version LUCENE_VERSION=Version.LUCENE_35;
947N/A
0N/A    /**
0N/A     * Holds value of property definition.
0N/A     */
0N/A    private String definition;
1190N/A
0N/A    /**
0N/A     * Holds value of property file.
0N/A     */
0N/A    private String file;
1190N/A
0N/A    /**
0N/A     * Holds value of property freetext.
0N/A     */
0N/A    private String freetext;
1190N/A
0N/A    /**
0N/A     * Holds value of property history.
0N/A     */
0N/A    private String history;
1190N/A
0N/A    /**
0N/A     * Holds value of property symbol.
0N/A     */
0N/A    private String symbol;
234N/A
0N/A    /**
0N/A     * Holds value of property indexDatabase.
0N/A     */
0N/A    private Query query;
986N/A    private final CompatibleAnalyser analyzer = new CompatibleAnalyser();
0N/A    private Context sourceContext;
0N/A    private HistoryContext historyContext;
816N/A    private Summarizer summarizer;
816N/A    // internal structure to hold the results from lucene
816N/A    private final List<org.apache.lucene.document.Document> docs;
456N/A    private final char[] content = new char[1024*8];
234N/A    private String source;
234N/A    private String data;
1238N/A    private static final boolean docsScoredInOrder = false;
816N/A
816N/A    int hitsPerPage = RuntimeEnvironment.getInstance().getHitsPerPage();
816N/A    int cachePages= RuntimeEnvironment.getInstance().getCachePages();
816N/A    int totalHits=0;
1190N/A
1185N/A    private ScoreDoc[] hits;
1185N/A    private TopScoreDocCollector collector;
1318N/A    private IndexSearcher searcher;
1185N/A    boolean allCollected;
816N/A
0N/A    /**
0N/A     * Creates a new instance of SearchEngine
0N/A     */
0N/A    public SearchEngine() {
816N/A        docs = new ArrayList<org.apache.lucene.document.Document>();
234N/A    }
236N/A
985N/A    /**
986N/A     * Create a QueryBuilder using the fields that have been set on this
986N/A     * SearchEngine.
986N/A     *
986N/A     * @return a query builder
985N/A     */
986N/A    private QueryBuilder createQueryBuilder() {
986N/A        return new QueryBuilder()
986N/A                .setFreetext(freetext)
986N/A                .setDefs(definition)
986N/A                .setRefs(symbol)
986N/A                .setPath(file)
986N/A                .setHist(history);
985N/A    }
985N/A
236N/A    public boolean isValidQuery() {
538N/A        boolean ret;
986N/A        try {
986N/A            query = createQueryBuilder().build();
986N/A            ret = (query != null);
986N/A        } catch (Exception e) {
538N/A            ret = false;
236N/A        }
986N/A
236N/A        return ret;
236N/A    }
816N/A
816N/A    /**
816N/A     *
816N/A     * @param paging whether to use paging (if yes, first X pages will load faster)
1190N/A     * @param root which db to search
816N/A     * @throws IOException
816N/A     */
816N/A    private void searchSingleDatabase(File root,boolean paging) throws IOException {
928N/A        IndexReader ireader = IndexReader.open(FSDirectory.open(root),true);
1318N/A        searcher = new IndexSearcher(ireader);
928N/A        collector = TopScoreDocCollector.create(hitsPerPage*cachePages,docsScoredInOrder);
816N/A        searcher.search(query,collector);
816N/A        totalHits=collector.getTotalHits();
1318N/A        if (!paging && totalHits>0) {
928N/A               collector = TopScoreDocCollector.create(totalHits,docsScoredInOrder);
816N/A               searcher.search(query,collector);
1190N/A        }
816N/A        hits = collector.topDocs().scoreDocs;
816N/A        for (int i = 0; i < hits.length; i++) {
816N/A            int docId = hits[i].doc;
816N/A            Document d = searcher.doc(docId);
816N/A            docs.add(d);
235N/A        }
235N/A    }
816N/A
816N/A    /**
816N/A     *
816N/A     * @param paging whether to use paging (if yes, first X pages will load faster)
816N/A     * @param root list of projects to search
816N/A     * @throws IOException
816N/A     */
1318N/A    private void searchMultiDatabase(List<Project> root,boolean paging) throws IOException {
1318N/A        IndexReader[] subreaders=new IndexReader[root.size()];
816N/A        File droot=new File(RuntimeEnvironment.getInstance().getDataRootFile(), "index");
816N/A        int ii=0;
816N/A        for (Project project : root) {
928N/A        IndexReader ireader = (IndexReader.open(FSDirectory.open(new File(droot,project.getPath()) ),true));
1318N/A        subreaders[ii++]=ireader;
816N/A        }
1318N/A        MultiReader searchables=new MultiReader(subreaders, true);
816N/A        if (Runtime.getRuntime().availableProcessors()>1) {
1318N/A            int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors()); //TODO there might be a better way for counting this - or we should honor the command line option here too!
1318N/A            ExecutorService executor=Executors.newFixedThreadPool(noThreads);
1318N/A            searcher = new IndexSearcher(searchables,executor); }
1318N/A        else { searcher = new IndexSearcher(searchables); }
928N/A        collector = TopScoreDocCollector.create(hitsPerPage*cachePages,docsScoredInOrder);
816N/A        searcher.search(query,collector);
816N/A        totalHits=collector.getTotalHits();
1318N/A        if (!paging && totalHits>0) {
928N/A               collector = TopScoreDocCollector.create(totalHits,docsScoredInOrder);
816N/A               searcher.search(query,collector);
816N/A        }
816N/A        hits = collector.topDocs().scoreDocs;
816N/A        for (int i = 0; i < hits.length; i++) {
816N/A            int docId = hits[i].doc;
816N/A            Document d = searcher.doc(docId);
816N/A            docs.add(d);
816N/A        }
816N/A    }
816N/A
235N/A    public String getQuery() {
235N/A        return query.toString();
0N/A    }
494N/A
0N/A    /**
0N/A     * Execute a search. Before calling this function, you must set the
0N/A     * appropriate seach critera with the set-functions.
816N/A     * Note that this search will return the first cachePages of hitsPerPage, for more you need to call more
0N/A     *
830N/A     * @return The number of hits
0N/A     */
0N/A    public int search() {
235N/A        source = RuntimeEnvironment.getInstance().getSourceRootPath();
235N/A        data = RuntimeEnvironment.getInstance().getDataRootPath();
816N/A        docs.clear();
986N/A
986N/A        QueryBuilder queryBuilder = createQueryBuilder();
986N/A
986N/A        try {
986N/A            query = queryBuilder.build();
986N/A            if (query != null) {
234N/A                RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1190N/A                File root = new File(env.getDataRootFile(), "index");
234N/A
234N/A                if (env.hasProjects()) {
234N/A                    // search all projects
830N/A                    //TODO support paging per project (in search.java)
830N/A                    //TODO optimize if only one project by falling back to SingleDatabase ?
1185N/A                    searchMultiDatabase(env.getProjects(),false);
234N/A                } else {
234N/A                    // search the index database
816N/A                    searchSingleDatabase(root,true);
0N/A                }
0N/A            }
986N/A        } catch (Exception e) {
986N/A            OpenGrokLogger.getLogger().log(
986N/A                    Level.WARNING, SEARCH_EXCEPTION_MSG, e);
0N/A        }
986N/A
816N/A        if (!docs.isEmpty()) {
0N/A            sourceContext = null;
816N/A            summarizer = null;
0N/A            try {
986N/A                sourceContext = new Context(query, queryBuilder.getQueries());
4N/A                if(sourceContext.isEmpty()) {
0N/A                    sourceContext = null;
4N/A                }
986N/A                summarizer = new Summarizer(query, analyzer);
0N/A            } catch (Exception e) {
508N/A                OpenGrokLogger.getLogger().log(Level.WARNING, "An error occured while creating summary", e);
0N/A            }
1190N/A
0N/A            historyContext = null;
0N/A            try {
0N/A                historyContext = new HistoryContext(query);
4N/A                if(historyContext.isEmpty()) {
0N/A                    historyContext = null;
4N/A                }
0N/A            } catch (Exception e) {
508N/A                OpenGrokLogger.getLogger().log(Level.WARNING, "An error occured while getting history context", e);
0N/A            }
0N/A        }
1318N/A        int count=hits==null?0:hits.length;
1318N/A        return count;
0N/A    }
816N/A
816N/A    /**
816N/A     * get results , if no search was started before, no results are returned
816N/A     * this method will requery if end end is more than first query from search,
816N/A     * hence performance hit applies, if you want results in later pages than number of cachePages
816N/A     * also end has to be bigger than start !
816N/A     * @param start start of the hit list
816N/A     * @param end end of the hit list
816N/A     * @param ret list of results from start to end or null/empty if no search was started
816N/A     */
816N/A    public void results(int start, int end, List<Hit> ret) {
816N/A
816N/A        //return if no start search() was done
1185N/A        if (hits == null || (end<start) ) {
1185N/A            ret.clear();
1185N/A            return;
1185N/A        }
816N/A
816N/A        ret.clear();
816N/A
816N/A        //TODO check if below fits for if end=old hits.length, or it should include it
1185N/A        if (end > hits.length & !allCollected) {
816N/A         //do the requery, we want more than 5 pages
928N/A         collector = TopScoreDocCollector.create(totalHits,docsScoredInOrder);
816N/A         try {
816N/A             searcher.search(query,collector);
816N/A         } catch (Exception e) { // this exception should never be hit, since search() will hit this before
816N/A                 OpenGrokLogger.getLogger().log(
816N/A                         Level.WARNING, SEARCH_EXCEPTION_MSG, e);
816N/A         }
816N/A         hits = collector.topDocs().scoreDocs;
816N/A         Document d=null;
816N/A         for (int i = start; i < hits.length; i++) {
816N/A             int docId = hits[i].doc;
816N/A             try {
1185N/A                 d = searcher.doc(docId);
816N/A             }  catch (Exception e) {
816N/A                 OpenGrokLogger.getLogger().log(
816N/A                         Level.SEVERE, SEARCH_EXCEPTION_MSG, e);
816N/A             }
816N/A             docs.add(d);
816N/A         }
816N/A         allCollected=true;
234N/A        }
816N/A
816N/A        //TODO generation of ret(results) could be cashed and consumers of engine would just print them in whatever form they need, this way we could get rid of docs
816N/A        // the only problem is that count of docs is usually smaller than number of results
816N/A        for (int ii = start; ii < end; ++ii) {
0N/A            boolean alt = (ii % 2 == 0);
0N/A            boolean hasContext = false;
0N/A            try {
816N/A                Document doc = docs.get(ii);
0N/A                String filename = doc.get("path");
816N/A
1185N/A                Genre genre = Genre.get(doc.get("t"));
350N/A                Definitions tags = null;
1190N/A                Fieldable tagsField = doc.getFieldable("tags");
350N/A                if (tagsField != null) {
928N/A                    tags = Definitions.deserialize(tagsField.getBinaryValue());
350N/A                }
816N/A                int nhits = docs.size();
1190N/A
0N/A                if(sourceContext != null) {
0N/A                    try {
1185N/A                        if (Genre.PLAIN == genre && (source != null)) {
234N/A                            hasContext = sourceContext.getContext(new InputStreamReader(new FileInputStream(source +
0N/A                                    filename)), null, null, null, filename,
0N/A                                    tags, nhits > 100, ret);
1185N/A                        } else if (Genre.XREFABLE == genre && data != null && summarizer != null){
439N/A                            int l = 0;
1190N/A                            Reader r=null;
816N/A                            if ( RuntimeEnvironment.getInstance().isCompressXref() ) {
816N/A                                    r = new TagFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(data + "/xref" + filename+".gz"))))); }
816N/A                            else {
816N/A                                    r = new TagFilter(new BufferedReader(new FileReader(data + "/xref" + filename))); }
380N/A                            try {
1190N/A                                l = r.read(content);
380N/A                            } finally {
1195N/A                                IOUtils.close(r);
380N/A                            }
816N/A                            //TODO FIX below fragmenter according to either summarizer or context (to get line numbers, might be hard, since xref writers will need to be fixed too, they generate just one line of html code now :( )
816N/A                            Summary sum = summarizer.getSummary(new String(content, 0, l));
0N/A                            Fragment fragments[] = sum.getFragments();
0N/A                            for (int jj = 0; jj < fragments.length; ++jj) {
0N/A                                String match = fragments[jj].toString();
0N/A                                if (match.length() > 0) {
0N/A                                    if (!fragments[jj].isEllipsis()) {
0N/A                                        Hit hit = new Hit(filename, fragments[jj].toString(), "", true, alt);
0N/A                                        ret.add(hit);
0N/A                                    }
0N/A                                    hasContext = true;
0N/A                                }
0N/A                            }
0N/A                        } else {
816N/A                            OpenGrokLogger.getLogger().warning("Unknown genre: " + genre + " for "+filename);
0N/A                            hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, ret);
0N/A                        }
0N/A                    } catch (FileNotFoundException exp) {
816N/A                        OpenGrokLogger.getLogger().warning("Couldn't read summary from "+filename+" ("+exp.getMessage()+")");
0N/A                        hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, ret);
0N/A                    }
0N/A                }
0N/A                if (historyContext != null) {
234N/A                    hasContext |= historyContext.getContext(source + filename, filename, ret);
0N/A                }
0N/A                if(!hasContext) {
0N/A                    ret.add(new Hit(filename, "...", "", false, alt));
0N/A                }
0N/A            } catch (IOException e) {
615N/A                OpenGrokLogger.getLogger().log(
615N/A                        Level.WARNING, SEARCH_EXCEPTION_MSG, e);
350N/A            } catch (ClassNotFoundException e) {
615N/A                OpenGrokLogger.getLogger().log(
615N/A                        Level.WARNING, SEARCH_EXCEPTION_MSG, e);
615N/A            } catch (HistoryException e) {
615N/A                OpenGrokLogger.getLogger().log(
615N/A                        Level.WARNING, SEARCH_EXCEPTION_MSG, e);
0N/A            }
0N/A        }
1190N/A
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property definition.
0N/A     *
0N/A     * @return Value of property definition.
0N/A     */
0N/A    public String getDefinition() {
0N/A        return this.definition;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property definition.
0N/A     *
0N/A     * @param definition New value of property definition.
0N/A     */
0N/A    public void setDefinition(String definition) {
0N/A        this.definition = definition;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property file.
0N/A     *
0N/A     * @return Value of property file.
0N/A     */
0N/A    public String getFile() {
0N/A        return this.file;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property file.
0N/A     *
0N/A     * @param file New value of property file.
0N/A     */
0N/A    public void setFile(String file) {
0N/A        this.file = file;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property freetext.
0N/A     *
0N/A     * @return Value of property freetext.
0N/A     */
0N/A    public String getFreetext() {
0N/A        return this.freetext;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property freetext.
0N/A     *
0N/A     * @param freetext New value of property freetext.
0N/A     */
0N/A    public void setFreetext(String freetext) {
0N/A        this.freetext = freetext;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property history.
0N/A     *
0N/A     * @return Value of property history.
0N/A     */
0N/A    public String getHistory() {
0N/A        return this.history;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property history.
0N/A     *
0N/A     * @param history New value of property history.
0N/A     */
0N/A    public void setHistory(String history) {
0N/A        this.history = history;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Getter for property symbol.
0N/A     *
0N/A     * @return Value of property symbol.
0N/A     */
0N/A    public String getSymbol() {
0N/A        return this.symbol;
0N/A    }
1190N/A
0N/A    /**
0N/A     * Setter for property symbol.
0N/A     *
0N/A     * @param symbol New value of property symbol.
0N/A     */
0N/A    public void setSymbol(String symbol) {
0N/A        this.symbol = symbol;
0N/A    }
0N/A}