SearchEngine.java revision 1470
2N/A/*
2N/A * CDDL HEADER START
2N/A *
2N/A * The contents of this file are subject to the terms of the
2N/A * Common Development and Distribution License (the "License").
2N/A * You may not use this file except in compliance with the License.
2N/A *
2N/A * See LICENSE.txt included in this distribution for the specific
2N/A * language governing permissions and limitations under the License.
2N/A *
2N/A * When distributing Covered Code, include this CDDL HEADER in each
2N/A * file and include the License file at LICENSE.txt.
2N/A * If applicable, add the following below this CDDL HEADER, with the
2N/A * fields enclosed by brackets "[]" replaced with your own identifying
2N/A * information: Portions Copyright [yyyy] [name of copyright owner]
2N/A *
2N/A * CDDL HEADER END
2N/A */
2N/A
2N/A/*
2N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
2N/A */
2N/A
2N/Apackage org.opensolaris.opengrok.search;
2N/A
2N/Aimport java.io.File;
2N/Aimport java.io.FileInputStream;
2N/Aimport java.io.FileNotFoundException;
2N/Aimport java.io.IOException;
2N/Aimport java.io.InputStreamReader;
2N/Aimport java.io.Reader;
2N/Aimport java.util.ArrayList;
2N/Aimport java.util.List;
2N/Aimport java.util.concurrent.ExecutorService;
2N/Aimport java.util.concurrent.Executors;
2N/Aimport java.util.logging.Level;
2N/Aimport java.util.logging.Logger;
2N/A
2N/Aimport org.apache.lucene.document.Document;
2N/Aimport org.apache.lucene.document.Fieldable;
2N/Aimport org.apache.lucene.index.IndexReader;
2N/Aimport org.apache.lucene.index.MultiReader;
2N/Aimport org.apache.lucene.search.IndexSearcher;
2N/Aimport org.apache.lucene.search.Query;
2N/Aimport org.apache.lucene.search.ScoreDoc;
2N/Aimport org.apache.lucene.search.TopScoreDocCollector;
2N/Aimport org.apache.lucene.store.FSDirectory;
2N/Aimport org.apache.lucene.util.Version;
2N/Aimport org.opensolaris.opengrok.analysis.CompatibleAnalyser;
2N/Aimport org.opensolaris.opengrok.analysis.Definitions;
2N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
2N/Aimport org.opensolaris.opengrok.analysis.TagFilter;
2N/Aimport org.opensolaris.opengrok.analysis.XrefReader;
2N/Aimport org.opensolaris.opengrok.configuration.Configuration;
2N/Aimport org.opensolaris.opengrok.configuration.Project;
2N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
2N/Aimport org.opensolaris.opengrok.history.HistoryException;
2N/Aimport org.opensolaris.opengrok.search.Summary.Fragment;
2N/Aimport org.opensolaris.opengrok.search.context.Context;
2N/Aimport org.opensolaris.opengrok.search.context.HistoryContext;
2N/Aimport org.opensolaris.opengrok.util.IOUtils;
2N/Aimport org.opensolaris.opengrok.web.Prefix;
2N/A
2N/A/**
2N/A * This is an encapsulation of the details on how to seach in the index
2N/A * database.
2N/A *
2N/A * @author Trond Norbye 2005
2N/A * @author Lubos Kosco 2010 - upgrade to lucene 3.0.0
2N/A * @author Lubos Kosco 2011 - upgrade to lucene 3.5.0
2N/A * @author Lubos Kosco 2012 - upgrade to lucene 3.6.0
2N/A */
2N/Apublic class SearchEngine {
2N/A private static final Logger logger = Logger.getLogger(SearchEngine.class.getName());
2N/A /** Message text used when logging exceptions thrown when searching. */
2N/A private static final String SEARCH_EXCEPTION_MSG = "Exception searching";
2N/A
2N/A //NOTE below will need to be changed after new lucene upgrade, if they
2N/A //increase the version - every change of below makes us incompatible with the
2N/A //old index and we need to ask for reindex
2N/A /** version of lucene index common for whole app*/
2N/A public static final Version LUCENE_VERSION=Version.LUCENE_36;
2N/A
2N/A /**
2N/A * Holds value of property definition.
2N/A */
2N/A private String definition;
2N/A
2N/A /**
2N/A * Holds value of property file.
2N/A */
2N/A private String file;
2N/A
2N/A /**
2N/A * Holds value of property freetext.
2N/A */
2N/A private String freetext;
2N/A
2N/A /**
2N/A * Holds value of property history.
2N/A */
2N/A private String history;
2N/A
2N/A /**
2N/A * Holds value of property symbol.
2N/A */
2N/A private String symbol;
2N/A
2N/A /**
2N/A * Holds value of property indexDatabase.
2N/A */
2N/A private Query query;
2N/A private final CompatibleAnalyser analyzer = new CompatibleAnalyser();
2N/A private Context sourceContext;
2N/A private HistoryContext historyContext;
2N/A private Summarizer summarizer;
2N/A // internal structure to hold the results from lucene
2N/A private final List<org.apache.lucene.document.Document> docs;
2N/A private final char[] content = new char[1024*8];
2N/A private String source;
2N/A private String data;
2N/A private static final boolean docsScoredInOrder = false;
2N/A
2N/A int hitsPerPage = RuntimeEnvironment.getConfig().getHitsPerPage();
2N/A int cachePages= RuntimeEnvironment.getConfig().getCachePages();
2N/A int totalHits=0;
2N/A
2N/A private ScoreDoc[] hits;
2N/A private TopScoreDocCollector collector;
2N/A private IndexSearcher searcher;
2N/A boolean allCollected;
2N/A
2N/A /**
2N/A * Creates a new instance of SearchEngine
2N/A */
2N/A public SearchEngine() {
2N/A docs = new ArrayList<org.apache.lucene.document.Document>();
2N/A }
2N/A
2N/A /**
2N/A * Create a QueryBuilder using the fields that have been set on this
2N/A * SearchEngine.
2N/A *
2N/A * @return a query builder
2N/A */
2N/A private QueryBuilder createQueryBuilder() {
2N/A return new QueryBuilder()
2N/A .setFreetext(freetext)
2N/A .setDefs(definition)
2N/A .setRefs(symbol)
2N/A .setPath(file)
2N/A .setHist(history);
2N/A }
2N/A
2N/A /**
2N/A * Check, whether a query can be build with current information available.
2N/A * @return {@code true} if a query could be build.
2N/A */
2N/A public boolean isValidQuery() {
2N/A boolean ret;
2N/A try {
2N/A query = createQueryBuilder().build();
2N/A ret = (query != null);
2N/A } catch (Exception e) {
2N/A ret = false;
2N/A }
2N/A
2N/A return ret;
2N/A }
2N/A
2N/A /**
2N/A *
2N/A * @param paging whether to use paging (if yes, first X pages will load faster)
2N/A * @param root which db to search
2N/A * @throws IOException
2N/A */
2N/A @SuppressWarnings("resource")
2N/A private void searchSingleDatabase(File root,boolean paging) throws IOException {
2N/A IndexReader ireader = null;
2N/A ireader = IndexReader.open(FSDirectory.open(root));
2N/A searcher = new IndexSearcher(ireader);
2N/A collector = TopScoreDocCollector
2N/A .create(hitsPerPage * cachePages, docsScoredInOrder);
2N/A searcher.search(query, collector);
2N/A totalHits = collector.getTotalHits();
2N/A if (!paging && totalHits>0) {
2N/A collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
2N/A searcher.search(query, collector);
2N/A }
2N/A hits = collector.topDocs().scoreDocs;
2N/A for (int i = 0; i < hits.length; i++) {
2N/A int docId = hits[i].doc;
2N/A Document d = searcher.doc(docId);
2N/A docs.add(d);
2N/A }
2N/A }
2N/A
2N/A /**
2N/A *
2N/A * @param paging whether to use paging (if yes, first X pages will load faster)
2N/A * @param root list of projects to search
2N/A * @throws IOException
2N/A */
2N/A private void searchMultiDatabase(List<Project> root,boolean paging)
2N/A throws IOException
2N/A {
2N/A IndexReader[] subreaders = new IndexReader[root.size()];
2N/A File droot = new File(RuntimeEnvironment.getConfig().getDataRootFile(), "index");
2N/A int ii = 0;
2N/A for (Project project : root) {
2N/A IndexReader ireader = (IndexReader.open(FSDirectory
2N/A .open(new File(droot,project.getPath()))));
2N/A subreaders[ii++] = ireader;
2N/A }
2N/A MultiReader searchables = new MultiReader(subreaders, true);
2N/A if (Runtime.getRuntime().availableProcessors() > 1) {
2N/A // TODO there might be a better way for counting this - or we should
2N/A // honor the command line option here too!
2N/A int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors());
2N/A ExecutorService executor = Executors.newFixedThreadPool(noThreads);
2N/A searcher = new IndexSearcher(searchables, executor);
2N/A } else {
2N/A searcher = new IndexSearcher(searchables);
2N/A }
2N/A collector = TopScoreDocCollector
2N/A .create(hitsPerPage * cachePages, docsScoredInOrder);
2N/A searcher.search(query, collector);
2N/A totalHits = collector.getTotalHits();
2N/A if (!paging && totalHits > 0) {
2N/A collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
2N/A searcher.search(query, collector);
2N/A }
2N/A hits = collector.topDocs().scoreDocs;
2N/A for (int i = 0; i < hits.length; i++) {
2N/A int docId = hits[i].doc;
2N/A Document d = searcher.doc(docId);
2N/A docs.add(d);
2N/A }
2N/A }
2N/A
2N/A /**
2N/A * Get the current query used by this instance. Use {@link #isValidQuery()}
2N/A * to build one.
2N/A * @return current query as string
2N/A */
2N/A public String getQuery() {
2N/A return query.toString();
2N/A }
2N/A
2N/A /**
2N/A * Execute a search. Before calling this function, you must set the
2N/A * appropriate seach critera with the set-functions.
2N/A * Note that this search will return the first cachePages of hitsPerPage, for more you need to call more
2N/A *
2N/A * @return The number of hits
2N/A */
2N/A public int search() {
2N/A Configuration cfg = RuntimeEnvironment.getConfig();
2N/A source = cfg.getSourceRoot();
2N/A data = cfg.getDataRoot();
2N/A docs.clear();
2N/A
2N/A QueryBuilder queryBuilder = createQueryBuilder();
2N/A
2N/A try {
2N/A query = queryBuilder.build();
2N/A if (query != null) {
2N/A File root = new File(cfg.getDataRootFile(), "index");
2N/A
2N/A if (cfg.hasProjects()) {
2N/A // search all projects
2N/A // TODO support paging per project (in search.java)
2N/A // TODO optimize if only one project by falling back to
2N/A // SingleDatabase ?
2N/A searchMultiDatabase(cfg.getProjects(), false);
2N/A } else {
2N/A // search the index database
2N/A searchSingleDatabase(root, true);
2N/A }
2N/A }
2N/A } catch (Exception e) {
2N/A logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
2N/A logger.log(Level.FINE, "search", e);
2N/A }
2N/A
2N/A if (!docs.isEmpty()) {
2N/A sourceContext = null;
2N/A summarizer = null;
2N/A try {
2N/A sourceContext = new Context(query, queryBuilder.getQueries());
2N/A if (sourceContext.isEmpty()) {
2N/A sourceContext = null;
2N/A }
2N/A summarizer = new Summarizer(query, analyzer);
2N/A } catch (Exception e) {
2N/A logger.warning("An error occured while creating summary: "
2N/A + e.getMessage());
2N/A logger.log(Level.FINE, "search", e);
2N/A }
2N/A
2N/A historyContext = null;
2N/A try {
2N/A historyContext = new HistoryContext(query);
2N/A if (historyContext.isEmpty()) {
2N/A historyContext = null;
2N/A }
2N/A } catch (Exception e) {
2N/A logger.warning("An error occured while getting history context: "
2N/A + e.getMessage());
2N/A logger.log(Level.FINE, "search", e);
2N/A }
2N/A }
2N/A int count = hits == null ? 0 : hits.length;
2N/A return count;
2N/A }
2N/A
2N/A /**
2N/A * Get results , if no search was started before, no results are returned
2N/A * this method will requery if end end is more than first query from search,
2N/A * hence performance hit applies, if you want results in later pages than
2N/A * number of cachePages
2N/A * also end has to be bigger than start !
2N/A * @param start start of the hit list
2N/A * @param end end of the hit list
2N/A * @param ret list of results from start to end or null/empty if no search
2N/A * was started
2N/A */
2N/A public void results(int start, int end, List<Hit> ret) {
2N/A
2N/A // return if no start search() was done
2N/A if (hits == null || (end < start) ) {
2N/A ret.clear();
2N/A return;
2N/A }
2N/A
2N/A ret.clear();
2N/A
2N/A // TODO check if below fits for if end=old hits.length, or it should
2N/A // include it
2N/A if (end > hits.length & !allCollected) {
2N/A // do the requery, we want more than 5 pages
2N/A collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
2N/A try {
2N/A searcher.search(query,collector);
2N/A } catch (Exception e) {
2N/A // should never be hit, since search() will hit this before
2N/A logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
2N/A logger.log(Level.FINE, "results", e);
2N/A }
2N/A hits = collector.topDocs().scoreDocs;
2N/A Document d = null;
2N/A for (int i = start; i < hits.length; i++) {
2N/A int docId = hits[i].doc;
2N/A try {
2N/A d = searcher.doc(docId);
2N/A } catch (Exception e) {
2N/A logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
2N/A logger.log(Level.FINE, "results", e);
2N/A }
2N/A docs.add(d);
2N/A }
2N/A allCollected=true;
2N/A }
2N/A
2N/A // TODO generation of ret(results) could be cashed and consumers of
2N/A // engine would just print them in whatever form they need, this way we
2N/A // could get rid of docs. The only problem is that count of docs is
2N/A // usually smaller than number of results
2N/A for (int ii = start; ii < end; ++ii) {
2N/A boolean alt = (ii % 2 == 0);
2N/A boolean hasContext = false;
2N/A try {
2N/A Document doc = docs.get(ii);
2N/A String filename = doc.get("path");
2N/A
2N/A Genre genre = Genre.get(doc.get("t"));
2N/A Definitions tags = null;
2N/A Fieldable tagsField = doc.getFieldable("tags");
2N/A if (tagsField != null) {
2N/A tags = Definitions.deserialize(tagsField.getBinaryValue());
2N/A }
2N/A int nhits = docs.size();
2N/A
2N/A if (sourceContext != null) {
2N/A try {
2N/A if (Genre.PLAIN == genre && (source != null)) {
2N/A hasContext = sourceContext
2N/A .getContext(new InputStreamReader(new FileInputStream(source
2N/A + filename)), null, null, null, filename,
2N/A tags, nhits > 100, ret);
2N/A } else if (Genre.XREFABLE == genre && data != null
2N/A && summarizer != null)
2N/A {
2N/A int l = 0;
2N/A File file = new File(data + Prefix.XREF_P + filename);
2N/A @SuppressWarnings("resource")
2N/A Reader r = new TagFilter(new XrefReader(file));
2N/A try {
2N/A l = r.read(content);
2N/A } finally {
2N/A IOUtils.close(r);
2N/A }
2N/A // TODO FIX below fragmenter according to either
2N/A // summarizer or context (to get line numbers, might
2N/A // be hard, since xref writers will need to be fixed
2N/A // too, they generate just one line of html code now :( )
2N/A Summary sum = summarizer.getSummary(new String(content, 0, l));
2N/A Fragment fragments[] = sum.getFragments();
2N/A for (int jj = 0; jj < fragments.length; ++jj) {
2N/A String match = fragments[jj].toString();
2N/A if (match.length() > 0) {
2N/A if (!fragments[jj].isEllipsis()) {
2N/A Hit hit = new Hit(filename,
2N/A fragments[jj].toString(), "", true, alt);
2N/A ret.add(hit);
2N/A }
2N/A hasContext = true;
2N/A }
2N/A }
2N/A } else {
2N/A logger.warning("Unknown genre '" + genre + "' for '"
2N/A + filename + "'");
2N/A hasContext |= sourceContext.getContext(null, null,
2N/A null, null, filename, tags, false, ret);
2N/A }
2N/A } catch (FileNotFoundException exp) {
2N/A logger.warning("Couldn't read summary from '"
2N/A + filename + "': " + exp.getMessage());
2N/A hasContext |= sourceContext.getContext(null, null, null,
2N/A null, filename, tags, false, ret);
2N/A }
2N/A }
2N/A if (historyContext != null) {
2N/A hasContext |= historyContext
2N/A .getContext(source + filename, filename, ret);
2N/A }
2N/A if (!hasContext) {
2N/A ret.add(new Hit(filename, "...", "", false, alt));
2N/A }
2N/A } catch (IOException e) {
2N/A logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
2N/A logger.log(Level.FINE, "results", e);
2N/A } catch (ClassNotFoundException e) {
2N/A logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
2N/A logger.log(Level.FINE, "results", e);
2N/A } catch (HistoryException e) {
2N/A logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
2N/A logger.log(Level.FINE, "results", e);
2N/A }
2N/A }
2N/A
2N/A }
2N/A
2N/A /**
2N/A * Getter for property definition.
2N/A *
2N/A * @return Value of property definition.
2N/A */
2N/A public String getDefinition() {
2N/A return definition;
2N/A }
2N/A
2N/A /**
2N/A * Setter for property definition.
*
* @param definition New value of property definition.
*/
public void setDefinition(String definition) {
this.definition = definition;
}
/**
* Getter for property file.
*
* @return Value of property file.
*/
public String getFile() {
return file;
}
/**
* Setter for property file.
*
* @param file New value of property file.
*/
public void setFile(String file) {
this.file = file;
}
/**
* Getter for property freetext.
*
* @return Value of property freetext.
*/
public String getFreetext() {
return freetext;
}
/**
* Setter for property freetext.
*
* @param freetext New value of property freetext.
*/
public void setFreetext(String freetext) {
this.freetext = freetext;
}
/**
* Getter for property history.
*
* @return Value of property history.
*/
public String getHistory() {
return history;
}
/**
* Setter for property history.
*
* @param history New value of property history.
*/
public void setHistory(String history) {
this.history = history;
}
/**
* Getter for property symbol.
*
* @return Value of property symbol.
*/
public String getSymbol() {
return symbol;
}
/**
* Setter for property symbol.
*
* @param symbol New value of property symbol.
*/
public void setSymbol(String symbol) {
this.symbol = symbol;
}
}