opengrok/web/SearchHelper.java

	SearchHelper.java revision 1190
1364N/A/*
1364N/A * CDDL HEADER START
1364N/A *
1364N/A * The contents of this file are subject to the terms of the
1364N/A * Common Development and Distribution License (the "License").
1364N/A * You may not use this file except in compliance with the License.
1364N/A *
1364N/A * See LICENSE.txt included in this distribution for the specific
1364N/A * language governing permissions and limitations under the License.
1364N/A *
1364N/A * When distributing Covered Code, include this CDDL HEADER in each
1364N/A * file and include the License file at LICENSE.txt.
1364N/A * If applicable, add the following below this CDDL HEADER, with the
1364N/A * fields enclosed by brackets "[]" replaced with your own identifying
1364N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1364N/A *
1364N/A * CDDL HEADER END
1364N/A */
1364N/A/*
1364N/A * Copyright (c) 2011 Jens Elkner.
1383N/A */
1364N/Apackage org.opensolaris.opengrok.web;
1364N/A
1370N/Aimport java.io.File;
1364N/Aimport java.io.FileNotFoundException;
1364N/Aimport java.io.IOException;
1370N/Aimport java.util.ArrayList;
1364N/Aimport java.util.List;
1364N/Aimport java.util.TreeSet;
1364N/Aimport java.util.logging.Level;
1364N/Aimport java.util.regex.Pattern;
1364N/A
1364N/Aimport org.apache.lucene.document.Document;
1364N/Aimport org.apache.lucene.queryParser.ParseException;
1389N/Aimport org.apache.lucene.search.BooleanQuery;
1364N/Aimport org.apache.lucene.search.IndexSearcher;
1364N/Aimport org.apache.lucene.search.MultiSearcher;
1383N/Aimport org.apache.lucene.search.ParallelMultiSearcher;
1364N/Aimport org.apache.lucene.search.Query;
1364N/Aimport org.apache.lucene.search.ScoreDoc;
1364N/Aimport org.apache.lucene.search.Searcher;
1364N/Aimport org.apache.lucene.search.Sort;
1364N/Aimport org.apache.lucene.search.SortField;
1364N/Aimport org.apache.lucene.search.TermQuery;
1364N/Aimport org.apache.lucene.search.TopFieldDocs;
1364N/Aimport org.apache.lucene.search.spell.SpellChecker;
1364N/Aimport org.apache.lucene.store.FSDirectory;
1364N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
1383N/Aimport org.opensolaris.opengrok.analysis.CompatibleAnalyser;
1364N/Aimport org.opensolaris.opengrok.analysis.Definitions;
1364N/Aimport org.opensolaris.opengrok.search.QueryBuilder;
1364N/Aimport org.opensolaris.opengrok.search.Summarizer;
1364N/Aimport org.opensolaris.opengrok.search.context.Context;
1364N/Aimport org.opensolaris.opengrok.search.context.HistoryContext;
1364N/A
1383N/A/**
1383N/A * Working set for a search basically to factor out/separate search related
1383N/A * complexity from UI design.
1383N/A *
1364N/A * @author  Jens Elkner
1364N/A * @version $Revision$
1364N/A */
1364N/Apublic class SearchHelper {
1364N/A
1364N/A    /** opengrok's data root: used to find the search index file */
1364N/A    public File dataRoot;
1364N/A    /** context path, i.e. the applications context path (usually /source) to
1364N/A     * use when generating a redirect URL */
1364N/A    public String contextPath;
1364N/A    /** piggyback: if {@code true}, files in opengrok's data directory are
1364N/A     * gzipped compressed. */
1364N/A    public boolean compressed;
1364N/A    /** piggyback: the source root directory. */
1364N/A    public File sourceRoot;
1364N/A    /** piggyback: the eftar filereader to use. */
1364N/A    public EftarFileReader desc;
1364N/A    /** the result cursor start index, i.e. where to start displaying results */
1383N/A    public int start;
1364N/A    /** max. number of result items to show */
1364N/A    public int maxItems;
1364N/A    /** the QueryBuilder used to create the query */
1364N/A    public QueryBuilder builder;
1364N/A    /** the order to use to ordery query results */
1364N/A    public SortOrder order;
1364N/A    /** if {@code true} a {@link ParallelMultiSearcher} will be used instead of
1370N/A     * a {@link MultiSearcher}. */
1364N/A    public boolean parallel;
1364N/A    /** Indicate, whether this is search from a cross reference. If {@code true}
1370N/A     * {@link #executeQuery()} sets {@link #redirect} if certain conditions are
1364N/A     * met. */
1364N/A    public boolean isCrossRefSearch;
1370N/A    /** if not {@code null}, the consumer should redirect the client to a
1364N/A     * separate result page denoted by the value of this field. Automatically
1364N/A     * set via {@link #prepareExec(TreeSet)} and {@link #executeQuery()}. */
1364N/A    public String redirect;
1370N/A    /** if not {@code null}, the UI should show this error message and stop
1364N/A     * processing the search. Automatically set via {@link #prepareExec(TreeSet)}
1364N/A     * and {@link #executeQuery()}.*/
1383N/A    public String errorMsg;
1364N/A    /** the searcher used to open/search the index. Automatically set via
1364N/A     * {@link #prepareExec(TreeSet)}. */
1389N/A    public Searcher searcher;
1383N/A    /** list of docs which result from the executing the query */
1364N/A    public ScoreDoc[] hits;
1370N/A    /** total number of hits */
1364N/A    public int totalHits;
1364N/A    /** the query created by the used {@link QueryBuilder} via
1364N/A     * {@link #prepareExec(TreeSet)}. */
1364N/A    public Query query;
1383N/A    /** the lucene sort instruction based on {@link #order} created via
1364N/A     * {@link #prepareExec(TreeSet)}. */
1364N/A    protected Sort sort;
1383N/A    /** projects to use to setup indexer searchers. Usually setup via
1364N/A     * {@link #prepareExec(TreeSet)}. */
1383N/A    public TreeSet<String> projects;
1383N/A    /** opengrok summary context. Usually created via {@link #prepareSummary()}. */
1370N/A    public Context sourceContext = null;
1364N/A    /** result summarizer usually created via {@link #prepareSummary()}. */
1364N/A    public Summarizer summerizer = null;
1364N/A    /** history context usually created via {@link #prepareSummary()}.*/
1364N/A    public HistoryContext historyContext;
1370N/A    /** Default query parse error message prefix */
1364N/A    public static final String PARSE_ERROR_MSG = "Unable to parse your query: ";
1364N/A
1364N/A    /**
1383N/A     * Create the searcher to use wrt. to currently set parameters and the given
1383N/A     * projects. Does not produce any {@link #redirect} link. It also does
1364N/A     * nothing if {@link #redirect} or {@link #errorMsg} have a none-{@code null}
1383N/A     * value.
1383N/A     * <p>
1364N/A     * Parameters which should be populated/set at this time:
1370N/A     * <ul>
1364N/A     * <li>{@link #builder}</li>
1364N/A     * <li>{@link #dataRoot}</li>
1370N/A     * <li>{@link #order} (falls back to relevance if unset)</li>
1364N/A     * <li>{@link #parallel} (default: false)</li>
1364N/A     * </ul>
1364N/A     * Populates/sets:
1364N/A     * <ul>
1383N/A     * <li>{@link #query}</li>
1383N/A     * <li>{@link #searcher}</li>
1383N/A     * <li>{@link #sort}</li>
1383N/A     * <li>{@link #projects}</li>
1389N/A     * <li>{@link #errorMsg} if an error occurs</li>
1389N/A     * </ul>
1383N/A     *
1383N/A     * @param projects  project to use query. If empty, a none-project opengrok
1383N/A     *  setup is assumed (i.e. DATA_ROOT/index will be used instead of possible
1383N/A     *  multiple DATA_ROOT/$project/index).
1383N/A     * @return this instance
1389N/A     */
1389N/A    public SearchHelper prepareExec(TreeSet<String> projects) {
1389N/A        if (redirect != null || errorMsg != null) {
1389N/A            return this;
1383N/A        }
1383N/A        // the Query created by the QueryBuilder
1383N/A        try {
1383N/A            query = builder.build();
1383N/A            if (projects == null) {
1389N/A                errorMsg = "No project selected!";
1389N/A                return this;
1383N/A            }
1383N/A            this.projects = projects;
1383N/A            File indexDir = new File(dataRoot, "index");
1383N/A            if (projects.isEmpty()) {
1364N/A                //no project setup
1364N/A                FSDirectory dir = FSDirectory.open(indexDir);
1389N/A                searcher = new IndexSearcher(dir);
1364N/A            } else if (projects.size() == 1) {
1364N/A                // just 1 project selected
1364N/A                FSDirectory dir =
1364N/A                        FSDirectory.open(new File(indexDir, projects.first()));
1364N/A                searcher = new IndexSearcher(dir);
1364N/A            } else {
1364N/A                //more projects
1364N/A                IndexSearcher[] searchables = new IndexSearcher[projects.size()];
1364N/A                int ii = 0;
1364N/A                //TODO might need to rewrite to Project instead of
1364N/A                // String , need changes in projects.jspf too
1364N/A                for (String proj : projects) {
1364N/A                    FSDirectory dir = FSDirectory.open(new File(indexDir, proj));
1364N/A                    searchables[ii++] = new IndexSearcher(dir);
1364N/A                }
1364N/A                searcher = parallel
1364N/A                        ? new ParallelMultiSearcher(searchables)
1364N/A                        : new MultiSearcher(searchables);
1364N/A            }
1364N/A            // TODO check if below is somehow reusing sessions so we don't
1364N/A            // requery again and again, I guess 2min timeout sessions could be
1364N/A            // usefull, since you click on the next page within 2mins, if not,
1364N/A            // then wait ;)
1364N/A            switch (order) {
1364N/A                case LASTMODIFIED:
1364N/A                    sort = new Sort(new SortField("date", SortField.STRING, true));
1364N/A                    break;
1364N/A                case BY_PATH:
1364N/A                    sort = new Sort(new SortField("fullpath", SortField.STRING));
1364N/A                    break;
1364N/A                default:
1364N/A                    sort = Sort.RELEVANCE;
1364N/A                    break;
1364N/A            }
1364N/A        } catch (ParseException e) {
1364N/A            errorMsg = "Unable to parse your query: " + e.getMessage();
1364N/A        } catch (FileNotFoundException e) {
1364N/A//          errorMsg = "Index database(s) not found: " + e.getMessage();
1364N/A            errorMsg = "Index database(s) not found.";
1364N/A        } catch (Exception e) {
1364N/A            errorMsg = e.getMessage();
1364N/A        }
1364N/A        return this;
1364N/A    }
1364N/A
1364N/A    /**
1364N/A     * Start the search prepared by {@link #prepareExec(TreeSet)}.
1364N/A     * It does nothing if {@link #redirect} or {@link #errorMsg} have a
1364N/A     * none-{@code null} value.
1364N/A     * <p>
1364N/A     * Parameters which should be populated/set at this time:
1364N/A     * <ul>
1364N/A     * <li>all fields required for and populated by {@link #prepareExec(TreeSet)})</li>
1364N/A     * <li>{@link #start} (default: 0)</li>
1364N/A     * <li>{@link #maxItems} (default: 0)</li>
1364N/A     * <li>{@link #isCrossRefSearch} (default: false)</li>
1364N/A     * </ul>
1364N/A     * Populates/sets:
1364N/A     * <ul>
1364N/A     * <li>{@link #hits} (see {@link TopFieldDocs#scoreDocs})</li>
1364N/A     * <li>{@link #totalHits} (see {@link TopFieldDocs#totalHits})</li>
1364N/A     * <li>{@link #contextPath}</li>
1364N/A     * <li>{@link #errorMsg} if an error occurs</li>
1364N/A     * <li>{@link #redirect} if certain conditions are met</li>
1364N/A     * </ul>
1364N/A     * @return this instance
1364N/A     */
1364N/A    public SearchHelper executeQuery() {
1364N/A        if (redirect != null || errorMsg != null) {
1364N/A            return this;
1364N/A        }
1364N/A        try {
1364N/A            TopFieldDocs fdocs = searcher.search(query, null, start + maxItems, sort);
1364N/A            totalHits = fdocs.totalHits;
1364N/A            hits = fdocs.scoreDocs;
1364N/A            // Bug #3900: Check if this is a search for a single term, and that
1364N/A            // term is a definition. If that's the case, and we only have one match,
1364N/A            // we'll generate a direct link instead of a listing.
1364N/A            boolean isSingleDefinitionSearch =
1364N/A                    (query instanceof TermQuery) && (builder.getDefs() != null);
1364N/A
1364N/A            // Attempt to create a direct link to the definition if we search for
1364N/A            // one single definition term AND we have exactly one match AND there
1364N/A            // is only one definition of that symbol in the document that matches.
1364N/A            boolean uniqueDefinition = false;
1364N/A            if (isSingleDefinitionSearch && hits != null && hits.length == 1) {
1364N/A                Document doc = searcher.doc(hits[0].doc);
1364N/A                if (doc.getFieldable("tags") != null) {
1364N/A                    byte[] rawTags = doc.getFieldable("tags").getBinaryValue();
1364N/A                    Definitions tags = Definitions.deserialize(rawTags);
1364N/A                    String symbol = ((TermQuery) query).getTerm().text();
1364N/A                    if (tags.occurrences(symbol) == 1) {
1364N/A                        uniqueDefinition = true;
1364N/A                    }
1364N/A                }
1364N/A            }
1364N/A            // @TODO fix me. I should try to figure out where the exact hit is
1364N/A            // instead of returning a page with just _one_ entry in....
1364N/A            if (uniqueDefinition && searcher != null && hits != null
1364N/A                    && hits.length > 0 && isCrossRefSearch) {
1364N/A                redirect = contextPath + Prefix.XREF_P
1364N/A                        + Util.URIEncodePath(searcher.doc(hits[0].doc).get("path"))
1364N/A                        + '#' + Util.URIEncode(((TermQuery) query).getTerm().text());
1364N/A            }
1364N/A        } catch (BooleanQuery.TooManyClauses e) {
1364N/A            errorMsg = "Too many results for wildcard!";
1364N/A        } catch (Exception e) {
1364N/A            errorMsg = e.getMessage();
1364N/A        }
1364N/A        return this;
1364N/A    }
1364N/A    private static final Pattern TABSPACE = Pattern.compile("[\t ]+");
1364N/A
1364N/A    private static final void getSuggestion(String term, SpellChecker checker,
1364N/A            List<String> result) throws IOException {
1364N/A        if (term == null) {
1364N/A            return;
1364N/A        }
1364N/A        String[] toks = TABSPACE.split(term, 0);
1364N/A        for (int j = 0; j < toks.length; j++) {
1364N/A            if (toks[j].length() <= 3) {
1364N/A                continue;
1364N/A            }
1364N/A            for (String s : checker.suggestSimilar(toks[j].toLowerCase(), 5)) {
1364N/A                result.add(s);
1364N/A            }
1364N/A        }
1389N/A    }
1364N/A
1364N/A    /**
1364N/A     * If a search did not return a hit, one may use this method to obtain
1364N/A     * suggestions for a new search.
949N/A     *
1186N/A     * <p>
1186N/A     * Parameters which should be populated/set at this time:
1294N/A     * <ul>
1186N/A     * <li>{@link #projects}</li>
949N/A     * <li>{@link #dataRoot}</li>
954N/A     * <li>{@link #builder}</li>
1186N/A     * </ul>
949N/A     * @return a possible empty list of sugeestions.
1186N/A     */
1186N/A    public List<Suggestion> getSuggestions() {
1186N/A        if (projects == null) {
1186N/A            return new ArrayList<Suggestion>(0);
1186N/A        }
1186N/A        File[] spellIndex = null;
1186N/A        if (projects.isEmpty()) {
1186N/A            spellIndex = new File[]{new File(dataRoot, "spellIndex")};
1186N/A        } else if (projects.size() == 1) {
1186N/A            spellIndex = new File[]{
1186N/A                new File(dataRoot, "spellIndex/" + projects.first())
1186N/A            };
1186N/A        } else {
1186N/A            spellIndex = new File[projects.size()];
1186N/A            int ii = 0;
1186N/A            File indexDir = new File(dataRoot, "spellIndex");
1186N/A            for (String proj : projects) {
1186N/A                spellIndex[ii++] = new File(indexDir, proj);
1186N/A            }
1186N/A        }
1186N/A        List<Suggestion> res = new ArrayList<Suggestion>();
1186N/A        List<String> dummy = new ArrayList<String>();
949N/A        for (int idx = 0; idx < spellIndex.length; idx++) {
949N/A            if (!spellIndex[idx].exists()) {
1186N/A                continue;
1186N/A            }
1186N/A            FSDirectory spellDirectory = null;
1186N/A            SpellChecker checker = null;
1186N/A            Suggestion s = new Suggestion(spellIndex[idx].getName());
1390N/A            try {
1186N/A                spellDirectory = FSDirectory.open(spellIndex[idx]);
1186N/A                checker = new SpellChecker(spellDirectory);
1390N/A                getSuggestion(builder.getFreetext(), checker, dummy);
1390N/A                s.freetext = dummy.toArray(new String[dummy.size()]);
1390N/A                dummy.clear();
1390N/A                getSuggestion(builder.getRefs(), checker, dummy);
1390N/A                s.refs = dummy.toArray(new String[dummy.size()]);
1186N/A                dummy.clear();
1186N/A                // TODO it seems the only true spellchecker is for
1390N/A                // below field, see IndexDatabase
1390N/A                // createspellingsuggestions ...
1390N/A                getSuggestion(builder.getDefs(), checker, dummy);
1254N/A                s.defs = dummy.toArray(new String[dummy.size()]);
1254N/A                dummy.clear();
1186N/A                if (s.freetext.length > 0 || s.defs.length > 0 || s.refs.length > 0) {
1186N/A                    res.add(s);
1186N/A                }
1186N/A            } catch (IOException e) {
1186N/A                /* ignore */
1186N/A            } finally {
1186N/A                if (spellDirectory != null) {
1186N/A                    spellDirectory.close();
1186N/A                }
1186N/A                if (checker != null) {
1186N/A                    try {
1388N/A                        checker.close();
1186N/A                    } catch (Exception x) { /* ignore */ }
1186N/A                }
949N/A            }
949N/A        }
1186N/A        return res;
1186N/A    }
1186N/A
1390N/A    /**
1186N/A     * Prepare the fields to support printing a fullblown summary. Does nothing
1186N/A     * if {@link #redirect} or {@link #errorMsg} have a none-{@code null} value.
1186N/A     *
1186N/A     * <p>
1389N/A     * Parameters which should be populated/set at this time:
1186N/A     * <ul>
1186N/A     * <li>{@link #query}</li>
1186N/A     * <li>{@link #builder}</li>
1186N/A     * </ul>
1186N/A     * Populates/sets:
1186N/A     * Otherwise the following fields are set (includes {@code null}):
1186N/A     * <ul>
1186N/A     * <li>{@link #sourceContext}</li>
1186N/A     * <li>{@link #summerizer}</li>
1186N/A     * <li>{@link #historyContext}</li>
1186N/A     * </ul>
1186N/A     *
1186N/A     * @return this instance.
1390N/A     */
1186N/A    public SearchHelper prepareSummary() {
1186N/A        if (redirect != null || errorMsg != null) {
1186N/A            return this;
1186N/A        }
1186N/A        try {
1186N/A            sourceContext = new Context(query, builder.getQueries());
1186N/A            summerizer = new Summarizer(query, new CompatibleAnalyser());
949N/A        } catch (Exception e) {
949N/A            OpenGrokLogger.getLogger().log(Level.WARNING,
1186N/A                    "Summerizer: " + e.getMessage());
1390N/A        }
1390N/A        try {
1390N/A            historyContext = new HistoryContext(query);
1390N/A        } catch (Exception e) {
1390N/A            OpenGrokLogger.getLogger().log(Level.WARNING,
1390N/A                    "HistoryContext: " + e.getMessage());
1390N/A        }
1390N/A        return this;
1390N/A    }
1186N/A
1186N/A    /**
1186N/A     * Free any resources associated with this helper (that includes closing
1186N/A     * the used {@link #searcher}).
1390N/A     */
1186N/A    public void destroy() {
1186N/A        if (searcher != null) {
1186N/A            try {
1390N/A                searcher.close();
1390N/A            } catch (IOException e) {
1390N/A                /* ignore */
1390N/A            }
1390N/A        }
1390N/A    }
1390N/A}
1390N/A