1185N/A/*
1185N/A * CDDL HEADER START
1185N/A *
1185N/A * The contents of this file are subject to the terms of the
1185N/A * Common Development and Distribution License (the "License").
1185N/A * You may not use this file except in compliance with the License.
1185N/A *
1185N/A * See LICENSE.txt included in this distribution for the specific
1185N/A * language governing permissions and limitations under the License.
1185N/A *
1185N/A * When distributing Covered Code, include this CDDL HEADER in each
1185N/A * file and include the License file at LICENSE.txt.
1185N/A * If applicable, add the following below this CDDL HEADER, with the
1185N/A * fields enclosed by brackets "[]" replaced with your own identifying
1185N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1185N/A *
1185N/A * CDDL HEADER END
1185N/A */
1220N/A
1185N/A/*
1185N/A * Copyright (c) 2011 Jens Elkner.
1247N/A * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
1185N/A */
1185N/Apackage org.opensolaris.opengrok.web;
1185N/A
1185N/Aimport java.io.File;
1185N/Aimport java.io.FileNotFoundException;
1185N/Aimport java.io.IOException;
1185N/Aimport java.util.ArrayList;
1209N/Aimport java.util.Arrays;
1185N/Aimport java.util.List;
1199N/Aimport java.util.SortedSet;
1318N/Aimport java.util.concurrent.ExecutorService;
1318N/Aimport java.util.concurrent.Executors;
1185N/Aimport java.util.logging.Level;
1219N/Aimport java.util.logging.Logger;
1185N/Aimport java.util.regex.Pattern;
1327N/A
1185N/Aimport org.apache.lucene.document.Document;
1318N/Aimport org.apache.lucene.index.IndexReader;
1318N/Aimport org.apache.lucene.index.MultiReader;
1185N/Aimport org.apache.lucene.queryParser.ParseException;
1327N/Aimport org.apache.lucene.search.BooleanQuery;
1327N/Aimport org.apache.lucene.search.IndexSearcher;
1327N/Aimport org.apache.lucene.search.Query;
1327N/Aimport org.apache.lucene.search.ScoreDoc;
1327N/Aimport org.apache.lucene.search.Sort;
1327N/Aimport org.apache.lucene.search.SortField;
1327N/Aimport org.apache.lucene.search.TermQuery;
1327N/Aimport org.apache.lucene.search.TopFieldDocs;
1185N/Aimport org.apache.lucene.search.spell.SpellChecker;
1185N/Aimport org.apache.lucene.store.FSDirectory;
1185N/Aimport org.opensolaris.opengrok.analysis.CompatibleAnalyser;
1185N/Aimport org.opensolaris.opengrok.analysis.Definitions;
1185N/Aimport org.opensolaris.opengrok.search.QueryBuilder;
1185N/Aimport org.opensolaris.opengrok.search.Summarizer;
1185N/Aimport org.opensolaris.opengrok.search.context.Context;
1185N/Aimport org.opensolaris.opengrok.search.context.HistoryContext;
1247N/Aimport org.opensolaris.opengrok.util.IOUtils;
1185N/A
1185N/A/**
1190N/A * Working set for a search basically to factor out/separate search related
1185N/A * complexity from UI design.
1190N/A *
1185N/A * @author Jens Elkner
1185N/A * @version $Revision$
1185N/A */
1185N/Apublic class SearchHelper {
1327N/A private static final Logger log = Logger.getLogger(SearchHelper.class.getName());
1185N/A /** opengrok's data root: used to find the search index file */
1185N/A public File dataRoot;
1190N/A /** context path, i.e. the applications context path (usually /source) to
1185N/A * use when generating a redirect URL */
1185N/A public String contextPath;
1185N/A /** piggyback: the source root directory. */
1185N/A public File sourceRoot;
1185N/A /** piggyback: the eftar filereader to use. */
1185N/A public EftarFileReader desc;
1185N/A /** the result cursor start index, i.e. where to start displaying results */
1185N/A public int start;
1185N/A /** max. number of result items to show */
1185N/A public int maxItems;
1185N/A /** the QueryBuilder used to create the query */
1185N/A public QueryBuilder builder;
1185N/A /** the order to use to ordery query results */
1185N/A public SortOrder order;
1438N/A /** if {@code true} multi-threaded search will be used. */
1185N/A public boolean parallel;
1185N/A /** Indicate, whether this is search from a cross reference. If {@code true}
1185N/A * {@link #executeQuery()} sets {@link #redirect} if certain conditions are
1185N/A * met. */
1185N/A public boolean isCrossRefSearch;
1190N/A /** if not {@code null}, the consumer should redirect the client to a
1185N/A * separate result page denoted by the value of this field. Automatically
1220N/A * set via {@link #prepareExec(SortedSet)} and {@link #executeQuery()}. */
1185N/A public String redirect;
1185N/A /** if not {@code null}, the UI should show this error message and stop
1220N/A * processing the search. Automatically set via {@link #prepareExec(SortedSet)}
1185N/A * and {@link #executeQuery()}.*/
1185N/A public String errorMsg;
1185N/A /** the searcher used to open/search the index. Automatically set via
1220N/A * {@link #prepareExec(SortedSet)}. */
1318N/A public IndexSearcher searcher;
1185N/A /** list of docs which result from the executing the query */
1185N/A public ScoreDoc[] hits;
1185N/A /** total number of hits */
1185N/A public int totalHits;
1190N/A /** the query created by the used {@link QueryBuilder} via
1220N/A * {@link #prepareExec(SortedSet)}. */
1185N/A public Query query;
1190N/A /** the lucene sort instruction based on {@link #order} created via
1220N/A * {@link #prepareExec(SortedSet)}. */
1185N/A protected Sort sort;
1190N/A /** projects to use to setup indexer searchers. Usually setup via
1220N/A * {@link #prepareExec(SortedSet)}. */
1199N/A public SortedSet<String> projects;
1185N/A /** opengrok summary context. Usually created via {@link #prepareSummary()}. */
1185N/A public Context sourceContext = null;
1185N/A /** result summarizer usually created via {@link #prepareSummary()}. */
1185N/A public Summarizer summerizer = null;
1185N/A /** history context usually created via {@link #prepareSummary()}.*/
1185N/A public HistoryContext historyContext;
1185N/A /** Default query parse error message prefix */
1185N/A public static final String PARSE_ERROR_MSG = "Unable to parse your query: ";
1424N/A private ExecutorService executor = null;
1219N/A
1185N/A /**
1185N/A * Create the searcher to use wrt. to currently set parameters and the given
1190N/A * projects. Does not produce any {@link #redirect} link. It also does
1185N/A * nothing if {@link #redirect} or {@link #errorMsg} have a none-{@code null}
1185N/A * value.
1185N/A * <p>
1185N/A * Parameters which should be populated/set at this time:
1185N/A * <ul>
1185N/A * <li>{@link #builder}</li>
1185N/A * <li>{@link #dataRoot}</li>
1185N/A * <li>{@link #order} (falls back to relevance if unset)</li>
1185N/A * <li>{@link #parallel} (default: false)</li>
1185N/A * </ul>
1185N/A * Populates/sets:
1185N/A * <ul>
1185N/A * <li>{@link #query}</li>
1185N/A * <li>{@link #searcher}</li>
1185N/A * <li>{@link #sort}</li>
1185N/A * <li>{@link #projects}</li>
1185N/A * <li>{@link #errorMsg} if an error occurs</li>
1185N/A * </ul>
1190N/A *
1185N/A * @param projects project to use query. If empty, a none-project opengrok
1185N/A * setup is assumed (i.e. DATA_ROOT/index will be used instead of possible
1185N/A * multiple DATA_ROOT/$project/index).
1185N/A * @return this instance
1185N/A */
1461N/A @SuppressWarnings("resource")
1213N/A public SearchHelper prepareExec(SortedSet<String> projects) {
1185N/A if (redirect != null || errorMsg != null) {
1185N/A return this;
1185N/A }
1185N/A // the Query created by the QueryBuilder
1185N/A try {
1185N/A query = builder.build();
1185N/A if (projects == null) {
1185N/A errorMsg = "No project selected!";
1185N/A return this;
1185N/A }
1185N/A this.projects = projects;
1185N/A File indexDir = new File(dataRoot, "index");
1185N/A if (projects.isEmpty()) {
1185N/A //no project setup
1185N/A FSDirectory dir = FSDirectory.open(indexDir);
1318N/A searcher = new IndexSearcher(IndexReader.open(dir));
1185N/A } else if (projects.size() == 1) {
1185N/A // just 1 project selected
1185N/A FSDirectory dir =
1185N/A FSDirectory.open(new File(indexDir, projects.first()));
1318N/A searcher = new IndexSearcher(IndexReader.open(dir));
1185N/A } else {
1318N/A //more projects
1318N/A IndexReader[] subreaders=new IndexReader[projects.size()];
1185N/A int ii = 0;
1190N/A //TODO might need to rewrite to Project instead of
1185N/A // String , need changes in projects.jspf too
1185N/A for (String proj : projects) {
1185N/A FSDirectory dir = FSDirectory.open(new File(indexDir, proj));
1318N/A subreaders[ii++] = IndexReader.open(dir);
1318N/A }
1461N/A MultiReader searchables = new MultiReader(subreaders, true);
1318N/A if (parallel) {
1461N/A //TODO there might be a better way for counting this
1461N/A int noThreads =
1461N/A 2 + (2 * Runtime.getRuntime().availableProcessors());
1318N/A executor= Executors.newFixedThreadPool(noThreads);
1185N/A }
1185N/A searcher = parallel
1318N/A ? new IndexSearcher(searchables,executor)
1318N/A : new IndexSearcher(searchables);
1185N/A }
1190N/A // TODO check if below is somehow reusing sessions so we don't
1190N/A // requery again and again, I guess 2min timeout sessions could be
1190N/A // usefull, since you click on the next page within 2mins, if not,
1185N/A // then wait ;)
1185N/A switch (order) {
1185N/A case LASTMODIFIED:
1185N/A sort = new Sort(new SortField("date", SortField.STRING, true));
1185N/A break;
1185N/A case BY_PATH:
1185N/A sort = new Sort(new SortField("fullpath", SortField.STRING));
1185N/A break;
1185N/A default:
1185N/A sort = Sort.RELEVANCE;
1185N/A break;
1185N/A }
1185N/A } catch (ParseException e) {
1282N/A errorMsg = PARSE_ERROR_MSG + e.getMessage();
1185N/A } catch (FileNotFoundException e) {
1185N/A// errorMsg = "Index database(s) not found: " + e.getMessage();
1185N/A errorMsg = "Index database(s) not found.";
1185N/A } catch (Exception e) {
1185N/A errorMsg = e.getMessage();
1185N/A }
1185N/A return this;
1185N/A }
1185N/A
1185N/A /**
1220N/A * Start the search prepared by {@link #prepareExec(SortedSet)}.
1190N/A * It does nothing if {@link #redirect} or {@link #errorMsg} have a
1185N/A * none-{@code null} value.
1185N/A * <p>
1185N/A * Parameters which should be populated/set at this time:
1185N/A * <ul>
1220N/A * <li>all fields required for and populated by {@link #prepareExec(SortedSet)})</li>
1185N/A * <li>{@link #start} (default: 0)</li>
1185N/A * <li>{@link #maxItems} (default: 0)</li>
1185N/A * <li>{@link #isCrossRefSearch} (default: false)</li>
1185N/A * </ul>
1185N/A * Populates/sets:
1185N/A * <ul>
1185N/A * <li>{@link #hits} (see {@link TopFieldDocs#scoreDocs})</li>
1185N/A * <li>{@link #totalHits} (see {@link TopFieldDocs#totalHits})</li>
1185N/A * <li>{@link #contextPath}</li>
1185N/A * <li>{@link #errorMsg} if an error occurs</li>
1185N/A * <li>{@link #redirect} if certain conditions are met</li>
1185N/A * </ul>
1185N/A * @return this instance
1185N/A */
1185N/A public SearchHelper executeQuery() {
1185N/A if (redirect != null || errorMsg != null) {
1185N/A return this;
1185N/A }
1185N/A try {
1185N/A TopFieldDocs fdocs = searcher.search(query, null, start + maxItems, sort);
1185N/A totalHits = fdocs.totalHits;
1185N/A hits = fdocs.scoreDocs;
1190N/A // Bug #3900: Check if this is a search for a single term, and that
1185N/A // term is a definition. If that's the case, and we only have one match,
1185N/A // we'll generate a direct link instead of a listing.
1185N/A boolean isSingleDefinitionSearch =
1185N/A (query instanceof TermQuery) && (builder.getDefs() != null);
1185N/A
1190N/A // Attempt to create a direct link to the definition if we search for
1190N/A // one single definition term AND we have exactly one match AND there
1185N/A // is only one definition of that symbol in the document that matches.
1185N/A boolean uniqueDefinition = false;
1185N/A if (isSingleDefinitionSearch && hits != null && hits.length == 1) {
1185N/A Document doc = searcher.doc(hits[0].doc);
1185N/A if (doc.getFieldable("tags") != null) {
1185N/A byte[] rawTags = doc.getFieldable("tags").getBinaryValue();
1185N/A Definitions tags = Definitions.deserialize(rawTags);
1185N/A String symbol = ((TermQuery) query).getTerm().text();
1185N/A if (tags.occurrences(symbol) == 1) {
1185N/A uniqueDefinition = true;
1185N/A }
1185N/A }
1185N/A }
1190N/A // @TODO fix me. I should try to figure out where the exact hit is
1185N/A // instead of returning a page with just _one_ entry in....
1209N/A if (uniqueDefinition && hits != null && hits.length > 0 && isCrossRefSearch) {
1185N/A redirect = contextPath + Prefix.XREF_P
1469N/A + Util.uriEncodePath(searcher.doc(hits[0].doc).get("path"))
1469N/A + '#' + Util.uriEncodeQueryValue(((TermQuery) query).getTerm().text());
1185N/A }
1185N/A } catch (BooleanQuery.TooManyClauses e) {
1185N/A errorMsg = "Too many results for wildcard!";
1185N/A } catch (Exception e) {
1185N/A errorMsg = e.getMessage();
1185N/A }
1185N/A return this;
1185N/A }
1185N/A private static final Pattern TABSPACE = Pattern.compile("[\t ]+");
1185N/A
1209N/A private static void getSuggestion(String term, SpellChecker checker,
1185N/A List<String> result) throws IOException {
1185N/A if (term == null) {
1185N/A return;
1185N/A }
1185N/A String[] toks = TABSPACE.split(term, 0);
1185N/A for (int j = 0; j < toks.length; j++) {
1185N/A if (toks[j].length() <= 3) {
1185N/A continue;
1185N/A }
1209N/A result.addAll(Arrays.asList(checker.suggestSimilar(toks[j].toLowerCase(), 5)));
1185N/A }
1185N/A }
1185N/A
1185N/A /**
1185N/A * If a search did not return a hit, one may use this method to obtain
1185N/A * suggestions for a new search.
1190N/A *
1185N/A * <p>
1185N/A * Parameters which should be populated/set at this time:
1185N/A * <ul>
1185N/A * <li>{@link #projects}</li>
1185N/A * <li>{@link #dataRoot}</li>
1185N/A * <li>{@link #builder}</li>
1185N/A * </ul>
1185N/A * @return a possible empty list of sugeestions.
1185N/A */
1461N/A @SuppressWarnings("resource")
1185N/A public List<Suggestion> getSuggestions() {
1185N/A if (projects == null) {
1185N/A return new ArrayList<Suggestion>(0);
1185N/A }
1185N/A File[] spellIndex = null;
1185N/A if (projects.isEmpty()) {
1185N/A spellIndex = new File[]{new File(dataRoot, "spellIndex")};
1185N/A } else if (projects.size() == 1) {
1185N/A spellIndex = new File[]{
1185N/A new File(dataRoot, "spellIndex/" + projects.first())
1185N/A };
1185N/A } else {
1185N/A spellIndex = new File[projects.size()];
1185N/A int ii = 0;
1185N/A File indexDir = new File(dataRoot, "spellIndex");
1185N/A for (String proj : projects) {
1185N/A spellIndex[ii++] = new File(indexDir, proj);
1185N/A }
1185N/A }
1185N/A List<Suggestion> res = new ArrayList<Suggestion>();
1185N/A List<String> dummy = new ArrayList<String>();
1185N/A for (int idx = 0; idx < spellIndex.length; idx++) {
1185N/A if (!spellIndex[idx].exists()) {
1185N/A continue;
1185N/A }
1185N/A FSDirectory spellDirectory = null;
1185N/A SpellChecker checker = null;
1185N/A Suggestion s = new Suggestion(spellIndex[idx].getName());
1185N/A try {
1185N/A spellDirectory = FSDirectory.open(spellIndex[idx]);
1185N/A checker = new SpellChecker(spellDirectory);
1185N/A getSuggestion(builder.getFreetext(), checker, dummy);
1185N/A s.freetext = dummy.toArray(new String[dummy.size()]);
1185N/A dummy.clear();
1185N/A getSuggestion(builder.getRefs(), checker, dummy);
1185N/A s.refs = dummy.toArray(new String[dummy.size()]);
1185N/A dummy.clear();
1190N/A // TODO it seems the only true spellchecker is for
1190N/A // below field, see IndexDatabase
1185N/A // createspellingsuggestions ...
1185N/A getSuggestion(builder.getDefs(), checker, dummy);
1185N/A s.defs = dummy.toArray(new String[dummy.size()]);
1185N/A dummy.clear();
1185N/A if (s.freetext.length > 0 || s.defs.length > 0 || s.refs.length > 0) {
1185N/A res.add(s);
1185N/A }
1185N/A } catch (IOException e) {
1327N/A log.warning("Got exception while getting spelling suggestions: "
1327N/A + e.getMessage());
1327N/A log.log(Level.FINE, "getSuggestions", e);
1185N/A } finally {
1461N/A IOUtils.close(spellDirectory);
1461N/A IOUtils.close(checker);
1185N/A }
1185N/A }
1185N/A return res;
1185N/A }
1185N/A
1185N/A /**
1185N/A * Prepare the fields to support printing a fullblown summary. Does nothing
1185N/A * if {@link #redirect} or {@link #errorMsg} have a none-{@code null} value.
1190N/A *
1185N/A * <p>
1185N/A * Parameters which should be populated/set at this time:
1185N/A * <ul>
1185N/A * <li>{@link #query}</li>
1185N/A * <li>{@link #builder}</li>
1185N/A * </ul>
1185N/A * Populates/sets:
1185N/A * Otherwise the following fields are set (includes {@code null}):
1185N/A * <ul>
1185N/A * <li>{@link #sourceContext}</li>
1185N/A * <li>{@link #summerizer}</li>
1185N/A * <li>{@link #historyContext}</li>
1185N/A * </ul>
1190N/A *
1185N/A * @return this instance.
1185N/A */
1185N/A public SearchHelper prepareSummary() {
1185N/A if (redirect != null || errorMsg != null) {
1185N/A return this;
1185N/A }
1185N/A try {
1185N/A sourceContext = new Context(query, builder.getQueries());
1185N/A summerizer = new Summarizer(query, new CompatibleAnalyser());
1185N/A } catch (Exception e) {
1327N/A log.warning("Summerizer: " + e.getMessage());
1185N/A }
1185N/A try {
1185N/A historyContext = new HistoryContext(query);
1185N/A } catch (Exception e) {
1327N/A log.warning("HistoryContext: " + e.getMessage());
1185N/A }
1185N/A return this;
1185N/A }
1185N/A
1185N/A /**
1190N/A * Free any resources associated with this helper (that includes closing
1185N/A * the used {@link #searcher}).
1185N/A */
1185N/A public void destroy() {
1476N/A if (searcher != null) {
1476N/A IOUtils.close(searcher);
1476N/A IOUtils.close(searcher.getIndexReader());
1476N/A }
1438N/A if (executor != null) {
1438N/A try {
1438N/A executor.shutdown();
1438N/A } catch (SecurityException se) {
1438N/A log.warning(se.getLocalizedMessage());
1438N/A if (log.isLoggable(Level.FINE)) {
1438N/A log.log(Level.FINE, "destroy", se);
1438N/A }
1438N/A }
1438N/A }
1185N/A }
1185N/A}