/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
*/
package org.opensolaris.opengrok.search;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.opensolaris.opengrok.analysis.CompatibleAnalyser;
import org.opensolaris.opengrok.analysis.Definitions;
import org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
import org.opensolaris.opengrok.analysis.TagFilter;
import org.opensolaris.opengrok.analysis.XrefReader;
import org.opensolaris.opengrok.configuration.Configuration;
import org.opensolaris.opengrok.configuration.Project;
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
import org.opensolaris.opengrok.history.HistoryException;
import org.opensolaris.opengrok.search.Summary.Fragment;
import org.opensolaris.opengrok.search.context.Context;
import org.opensolaris.opengrok.search.context.HistoryContext;
import org.opensolaris.opengrok.util.IOUtils;
import org.opensolaris.opengrok.web.Prefix;
/**
* This is an encapsulation of the details on how to seach in the index
* database.
*
* @author Trond Norbye 2005
* @author Lubos Kosco 2010 - upgrade to lucene 3.0.0
* @author Lubos Kosco 2011 - upgrade to lucene 3.5.0
* @author Lubos Kosco 2012 - upgrade to lucene 3.6.0
*/
public class SearchEngine {
private static final Logger logger = Logger.getLogger(SearchEngine.class.getName());
/** Message text used when logging exceptions thrown when searching. */
private static final String SEARCH_EXCEPTION_MSG = "Exception searching";
//NOTE below will need to be changed after new lucene upgrade, if they
//increase the version - every change of below makes us incompatible with the
//old index and we need to ask for reindex
/** version of lucene index common for whole app*/
public static final Version LUCENE_VERSION=Version.LUCENE_36;
/**
* Holds value of property definition.
*/
private String definition;
/**
* Holds value of property file.
*/
private String file;
/**
* Holds value of property freetext.
*/
private String freetext;
/**
* Holds value of property history.
*/
private String history;
/**
* Holds value of property symbol.
*/
private String symbol;
/**
* Holds value of property indexDatabase.
*/
private Query query;
private final CompatibleAnalyser analyzer = new CompatibleAnalyser();
private Context sourceContext;
private HistoryContext historyContext;
private Summarizer summarizer;
// internal structure to hold the results from lucene
private final List<org.apache.lucene.document.Document> docs;
private final char[] content = new char[1024*8];
private String source;
private String data;
private static final boolean docsScoredInOrder = false;
int hitsPerPage = RuntimeEnvironment.getConfig().getHitsPerPage();
int cachePages= RuntimeEnvironment.getConfig().getCachePages();
int totalHits=0;
private ScoreDoc[] hits;
private TopScoreDocCollector collector;
private IndexSearcher searcher;
boolean allCollected;
/**
* Creates a new instance of SearchEngine
*/
public SearchEngine() {
docs = new ArrayList<org.apache.lucene.document.Document>();
}
/**
* Create a QueryBuilder using the fields that have been set on this
* SearchEngine.
*
* @return a query builder
*/
private QueryBuilder createQueryBuilder() {
return new QueryBuilder()
.setFreetext(freetext)
.setDefs(definition)
.setRefs(symbol)
.setPath(file)
.setHist(history);
}
/**
* Check, whether a query can be build with current information available.
* @return {@code true} if a query could be build.
*/
public boolean isValidQuery() {
boolean ret;
try {
query = createQueryBuilder().build();
ret = (query != null);
} catch (Exception e) {
ret = false;
}
return ret;
}
/**
*
* @param paging whether to use paging (if yes, first X pages will load faster)
* @param root which db to search
* @throws IOException
*/
@SuppressWarnings("resource")
private void searchSingleDatabase(File root,boolean paging) throws IOException {
IndexReader ireader = null;
ireader = IndexReader.open(FSDirectory.open(root));
searcher = new IndexSearcher(ireader);
collector = TopScoreDocCollector
.create(hitsPerPage * cachePages, docsScoredInOrder);
searcher.search(query, collector);
totalHits = collector.getTotalHits();
if (!paging && totalHits>0) {
collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
searcher.search(query, collector);
}
hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
docs.add(d);
}
}
/**
*
* @param paging whether to use paging (if yes, first X pages will load faster)
* @param root list of projects to search
* @throws IOException
*/
private void searchMultiDatabase(List<Project> root,boolean paging)
throws IOException
{
IndexReader[] subreaders = new IndexReader[root.size()];
File droot = new File(RuntimeEnvironment.getConfig().getDataRootFile(), "index");
int ii = 0;
for (Project project : root) {
IndexReader ireader = (IndexReader.open(FSDirectory
.open(new File(droot,project.getPath()))));
subreaders[ii++] = ireader;
}
MultiReader searchables = new MultiReader(subreaders, true);
if (Runtime.getRuntime().availableProcessors() > 1) {
// TODO there might be a better way for counting this - or we should
// honor the command line option here too!
int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors());
ExecutorService executor = Executors.newFixedThreadPool(noThreads);
searcher = new IndexSearcher(searchables, executor);
} else {
searcher = new IndexSearcher(searchables);
}
collector = TopScoreDocCollector
.create(hitsPerPage * cachePages, docsScoredInOrder);
searcher.search(query, collector);
totalHits = collector.getTotalHits();
if (!paging && totalHits > 0) {
collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
searcher.search(query, collector);
}
hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
docs.add(d);
}
}
/**
* Get the current query used by this instance. Use {@link #isValidQuery()}
* to build one.
* @return current query as string
*/
public String getQuery() {
return query.toString();
}
/**
* Execute a search. Before calling this function, you must set the
* appropriate seach critera with the set-functions.
* Note that this search will return the first cachePages of hitsPerPage, for more you need to call more
*
* @return The number of hits
*/
public int search() {
Configuration cfg = RuntimeEnvironment.getConfig();
source = cfg.getSourceRoot();
data = cfg.getDataRoot();
docs.clear();
QueryBuilder queryBuilder = createQueryBuilder();
try {
query = queryBuilder.build();
if (query != null) {
File root = new File(cfg.getDataRootFile(), "index");
if (cfg.hasProjects()) {
// search all projects
// TODO support paging per project (in search.java)
// TODO optimize if only one project by falling back to
// SingleDatabase ?
searchMultiDatabase(cfg.getProjects(), false);
} else {
// search the index database
searchSingleDatabase(root, true);
}
}
} catch (Exception e) {
logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
logger.log(Level.FINE, "search", e);
}
if (!docs.isEmpty()) {
sourceContext = null;
summarizer = null;
try {
sourceContext = new Context(query, queryBuilder.getQueries());
if (sourceContext.isEmpty()) {
sourceContext = null;
}
summarizer = new Summarizer(query, analyzer);
} catch (Exception e) {
logger.warning("An error occured while creating summary: "
+ e.getMessage());
logger.log(Level.FINE, "search", e);
}
historyContext = null;
try {
historyContext = new HistoryContext(query);
if (historyContext.isEmpty()) {
historyContext = null;
}
} catch (Exception e) {
logger.warning("An error occured while getting history context: "
+ e.getMessage());
logger.log(Level.FINE, "search", e);
}
}
int count = hits == null ? 0 : hits.length;
return count;
}
/**
* Get results , if no search was started before, no results are returned
* this method will requery if end end is more than first query from search,
* hence performance hit applies, if you want results in later pages than
* number of cachePages
* also end has to be bigger than start !
* @param start start of the hit list
* @param end end of the hit list
* @param ret list of results from start to end or null/empty if no search
* was started
*/
public void results(int start, int end, List<Hit> ret) {
// return if no start search() was done
if (hits == null || (end < start) ) {
ret.clear();
return;
}
ret.clear();
// TODO check if below fits for if end=old hits.length, or it should
// include it
if (end > hits.length & !allCollected) {
// do the requery, we want more than 5 pages
collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
try {
searcher.search(query,collector);
} catch (Exception e) {
// should never be hit, since search() will hit this before
logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
logger.log(Level.FINE, "results", e);
}
hits = collector.topDocs().scoreDocs;
Document d = null;
for (int i = start; i < hits.length; i++) {
int docId = hits[i].doc;
try {
d = searcher.doc(docId);
} catch (Exception e) {
logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
logger.log(Level.FINE, "results", e);
}
docs.add(d);
}
allCollected=true;
}
// TODO generation of ret(results) could be cashed and consumers of
// engine would just print them in whatever form they need, this way we
// could get rid of docs. The only problem is that count of docs is
// usually smaller than number of results
for (int ii = start; ii < end; ++ii) {
boolean alt = (ii % 2 == 0);
boolean hasContext = false;
try {
Document doc = docs.get(ii);
String filename = doc.get("path");
Genre genre = Genre.get(doc.get("t"));
Definitions tags = null;
Fieldable tagsField = doc.getFieldable("tags");
if (tagsField != null) {
tags = Definitions.deserialize(tagsField.getBinaryValue());
}
int nhits = docs.size();
if (sourceContext != null) {
try {
if (Genre.PLAIN == genre && (source != null)) {
hasContext = sourceContext
.getContext(new InputStreamReader(new FileInputStream(source
+ filename)), null, null, null, filename,
tags, nhits > 100, ret);
} else if (Genre.XREFABLE == genre && data != null
&& summarizer != null)
{
int l = 0;
File file = new File(data + Prefix.XREF_P + filename);
@SuppressWarnings("resource")
Reader r = new TagFilter(new XrefReader(file));
try {
l = r.read(content);
} finally {
IOUtils.close(r);
}
// TODO FIX below fragmenter according to either
// summarizer or context (to get line numbers, might
// be hard, since xref writers will need to be fixed
// too, they generate just one line of html code now :( )
Summary sum = summarizer.getSummary(new String(content, 0, l));
Fragment fragments[] = sum.getFragments();
for (int jj = 0; jj < fragments.length; ++jj) {
String match = fragments[jj].toString();
if (match.length() > 0) {
if (!fragments[jj].isEllipsis()) {
Hit hit = new Hit(filename,
fragments[jj].toString(), "", true, alt);
ret.add(hit);
}
hasContext = true;
}
}
} else {
logger.warning("Unknown genre '" + genre + "' for '"
+ filename + "'");
hasContext |= sourceContext.getContext(null, null,
null, null, filename, tags, false, ret);
}
} catch (FileNotFoundException exp) {
logger.warning("Couldn't read summary from '"
+ filename + "': " + exp.getMessage());
hasContext |= sourceContext.getContext(null, null, null,
null, filename, tags, false, ret);
}
}
if (historyContext != null) {
hasContext |= historyContext
.getContext(source + filename, filename, ret);
}
if (!hasContext) {
ret.add(new Hit(filename, "...", "", false, alt));
}
} catch (IOException e) {
logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
logger.log(Level.FINE, "results", e);
} catch (ClassNotFoundException e) {
logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
logger.log(Level.FINE, "results", e);
} catch (HistoryException e) {
logger.warning(SEARCH_EXCEPTION_MSG + ": " + e.getMessage());
logger.log(Level.FINE, "results", e);
}
}
}
/**
* Getter for property definition.
*
* @return Value of property definition.
*/
public String getDefinition() {
return definition;
}
/**
* Setter for property definition.
*
* @param definition New value of property definition.
*/
public void setDefinition(String definition) {
this.definition = definition;
}
/**
* Getter for property file.
*
* @return Value of property file.
*/
public String getFile() {
return file;
}
/**
* Setter for property file.
*
* @param file New value of property file.
*/
public void setFile(String file) {
this.file = file;
}
/**
* Getter for property freetext.
*
* @return Value of property freetext.
*/
public String getFreetext() {
return freetext;
}
/**
* Setter for property freetext.
*
* @param freetext New value of property freetext.
*/
public void setFreetext(String freetext) {
this.freetext = freetext;
}
/**
* Getter for property history.
*
* @return Value of property history.
*/
public String getHistory() {
return history;
}
/**
* Setter for property history.
*
* @param history New value of property history.
*/
public void setHistory(String history) {
this.history = history;
}
/**
* Getter for property symbol.
*
* @return Value of property symbol.
*/
public String getSymbol() {
return symbol;
}
/**
* Setter for property symbol.
*
* @param symbol New value of property symbol.
*/
public void setSymbol(String symbol) {
this.symbol = symbol;
}
}