/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved. */ package org.opensolaris.opengrok.index; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.spell.LuceneDictionary; import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.SimpleFSLockFactory; import org.opensolaris.opengrok.analysis.AnalyzerGuru; import org.opensolaris.opengrok.analysis.Ctags; import org.opensolaris.opengrok.analysis.Definitions; import org.opensolaris.opengrok.analysis.FileAnalyzer; import org.opensolaris.opengrok.analysis.FileAnalyzer.Genre; import org.opensolaris.opengrok.configuration.Configuration; import org.opensolaris.opengrok.configuration.Project; import org.opensolaris.opengrok.configuration.RuntimeEnvironment; import org.opensolaris.opengrok.history.HistoryException; import org.opensolaris.opengrok.history.HistoryGuru; import org.opensolaris.opengrok.search.QueryBuilder; import org.opensolaris.opengrok.search.SearchEngine; import org.opensolaris.opengrok.util.IOUtils; import org.opensolaris.opengrok.web.Util; /** * This class is used to create / update the index databases. Currently we use * one index database per project. * * @author Trond Norbye * @author Lubos Kosco , update for lucene 3.0.0 */ public class IndexDatabase { private Project project; private FSDirectory indexDirectory; private FSDirectory spellDirectory; private IndexWriter writer; private TermEnum uidIter; private IgnoredNames ignoredNames; private Filter includedNames; private AnalyzerGuru analyzerGuru; private File xrefDir; private boolean interrupted; private List listeners; private File dirtyFile; private final Object lock = new Object(); private boolean dirty; private boolean running; private List directories; static final Logger logger = Logger.getLogger(IndexDatabase.class.getName()); private Ctags ctags; private LockFactory lockfact; /** * Create a new instance of the Index Database. Use this constructor if * you don't use any projects * * @throws java.io.IOException if an error occurs while creating directories */ public IndexDatabase() throws IOException { this(null); } /** * Create a new instance of an Index Database for a given project * @param project the project to create the database for * @throws IOException if an errror occurs while creating directories */ public IndexDatabase(Project project) throws IOException { this.project = project; lockfact = new SimpleFSLockFactory(); initialize(); } /** * Update the index database for all of the projects. Print progress to * standard out. * @param executor An executor to run the job * @throws IOException if an error occurs */ public static void updateAll(ExecutorService executor) throws IOException { updateAll(executor, null); } /** * Update the index database for all of the projects * @param executor An executor to run the job * @param listener where to signal the changes to the database * @throws IOException if an error occurs */ static void updateAll(ExecutorService executor, IndexChangedListener listener) throws IOException { Configuration config = RuntimeEnvironment.getConfig(); List dbs = new ArrayList(); if (config.hasProjects()) { for (Project project : config.getProjects()) { dbs.add(new IndexDatabase(project)); } } else { dbs.add(new IndexDatabase()); } for (IndexDatabase d : dbs) { final IndexDatabase db = d; if (listener != null) { db.addIndexChangedListener(listener); } executor.submit(new Runnable() { @Override public void run() { try { db.update(); } catch (Throwable e) { logger.warning("Problem updating " + db + ": "+ e.getMessage()); logger.log(Level.FINE, "updateAll", e); } } }); } } /** * Update the index database for a number of sub-directories * @param executor An executor to run the job * @param listener where to signal the changes to the database * @param paths */ public static void update(ExecutorService executor, IndexChangedListener listener, List paths) { Configuration config = RuntimeEnvironment.getConfig(); List dbs = new ArrayList(); for (String path : paths) { Project project = Project.getProject(path); if (project == null && config.hasProjects()) { logger.warning("Could not find a project for '" + path + "'"); } else { IndexDatabase db = null; try { if (project == null) { db = new IndexDatabase(); } else { db = new IndexDatabase(project); } int idx = dbs.indexOf(db); if (idx != -1) { db = dbs.get(idx); } if (db.addDirectory(path)) { if (idx == -1) { dbs.add(db); } } else { logger.warning("Directory does not exist '" + path + "'"); } } catch (IOException e) { logger.warning("An error occured while updating " + db + ": " + e.getMessage()); logger.log(Level.FINE, "update", e); } } for (final IndexDatabase db : dbs) { db.addIndexChangedListener(listener); executor.submit(new Runnable() { @Override public void run() { try { db.update(); } catch (Throwable e) { logger.warning("An error occured while updating " + db + ": " + e.getLocalizedMessage()); logger.log(Level.FINE, "run", e); } } }); } } } @SuppressWarnings("PMD.CollapsibleIfStatements") private void initialize() throws IOException { synchronized (this) { Configuration config = RuntimeEnvironment.getConfig(); File indexDir = new File(config.getDataRootFile(), "index"); File spellDir = new File(config.getDataRootFile(), "spellIndex"); if (project != null) { indexDir = new File(indexDir, project.getPath()); spellDir = new File(spellDir, project.getPath()); } if (!indexDir.exists() && !indexDir.mkdirs()) { // to avoid race conditions, just recheck.. if (!indexDir.exists()) { throw new FileNotFoundException("Failed to create root directory '" + indexDir.getAbsolutePath() + "'"); } } if (!spellDir.exists() && !spellDir.mkdirs()) { if (!spellDir.exists()) { throw new FileNotFoundException("Failed to create root directory '" + spellDir.getAbsolutePath() + "'"); } } if (!config.isUsingLuceneLocking()) { lockfact = NoLockFactory.getNoLockFactory(); } indexDirectory = FSDirectory.open(indexDir,lockfact); spellDirectory = FSDirectory.open(spellDir,lockfact); ignoredNames = config.getIgnoredNames(); includedNames = config.getIncludedNames(); analyzerGuru = new AnalyzerGuru(); if (config.isGenerateHtml()) { xrefDir = new File(config.getDataRootFile(), "xref"); } listeners = new ArrayList(); dirtyFile = new File(indexDir, "dirty"); dirty = dirtyFile.exists(); directories = new ArrayList(); } } /** * By default the indexer will traverse all directories in the project. * If you add directories with this function update will just process * the specified directories. * * @param dir The directory to scan * @return true if the file is added, false oth */ @SuppressWarnings("PMD.UseStringBufferForStringAppends") public boolean addDirectory(String dir) { String directory = dir; if (directory.startsWith("\\")) { directory = directory.replace('\\', '/'); } else if (directory.charAt(0) != '/') { directory = "/" + directory; } File file = new File(RuntimeEnvironment.getConfig().getSourceRootFile(), directory); if (file.exists()) { directories.add(directory); return true; } return false; } /** * Update the content of this index database * @throws IOException if an error occurs * @throws HistoryException if an error occurs when accessing the history */ @SuppressWarnings({ "resource", "boxing" }) public void update() throws IOException, HistoryException { synchronized (lock) { if (running) { throw new IOException("Indexer already running"); } running = true; interrupted = false; } Configuration config = RuntimeEnvironment.getConfig(); String ctgs = config.getCtags(); if (ctgs != null) { ctags = new Ctags(); ctags.setBinary(ctgs); ctags.setOptionsFile(config.getCtagsOptionsFile()); } if (ctags == null) { logger.warning("Unable to run ctags! Searching definitions will not work!"); } Analyzer analyzer = null; try { //TODO we might need to add writer.commit after certain phases of // index generation, right now it will only happen in the end analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); //iwc.setRAMBufferSizeMB(256.0); //TODO check what is the sweet spot writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk //writer.setMaxFieldLength(RuntimeEnvironment.getInstance().getIndexWordLimit()); if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = config.getSourceRootFile(); } else { sourceRoot = new File(config.getSourceRootFile(), dir); } HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); String startuid = Util.path2uid(dir, ""); IndexReader reader = IndexReader.open(indexDirectory); // open existing index try { uidIter = reader.terms(new Term("u", startuid)); // init uid iterator //TODO below should be optional, since it traverses the tree once more to get total count! :( int file_cnt = 0; if (config.isPrintProgress()) { logger.log(Level.INFO, "Counting files in ''{0}'' ...", dir); file_cnt = indexDown(sourceRoot, dir, true, 0, 0); if (logger.isLoggable(Level.INFO)) { logger.log(Level.INFO, "Need to process {0} files for ''{1}''", new Object[] { file_cnt, dir }); } } indexDown(sourceRoot, dir, false, 0, file_cnt); while (uidIter.term() != null && uidIter.term().field().equals("u") && uidIter.term().text().startsWith(startuid)) { removeFile(); uidIter.next(); } } finally { reader.close(); } } } finally { IOUtils.close(writer); if (ctags != null) { ctags.close(); } IOUtils.close(analyzer); synchronized (lock) { running = false; } } if (!isInterrupted() && isDirty()) { if (config.isOptimizeDatabase()) { optimize(); } createSpellingSuggestions(); File timestamp = new File(config.getDataRootFile(), "timestamp"); if (timestamp.exists()) { if (!timestamp.setLastModified(System.currentTimeMillis())) { logger.warning("Failed to set last modified time on '" + timestamp.getAbsolutePath() + "'used for timestamping the index database"); } } else if (!timestamp.createNewFile()) { logger.warning("Failed to create file '" + timestamp.getAbsolutePath() + "', used for timestamping the index database"); } } } /** * Optimize all index databases * @param executor An executor to run the job * @throws IOException if an error occurs */ static void optimizeAll(ExecutorService executor) throws IOException { List dbs = new ArrayList(); Configuration config = RuntimeEnvironment.getConfig(); if (config.hasProjects()) { for (Project project : config.getProjects()) { dbs.add(new IndexDatabase(project)); } } else { dbs.add(new IndexDatabase()); } for (IndexDatabase d : dbs) { final IndexDatabase db = d; if (db.isDirty()) { executor.submit(new Runnable() { @Override public void run() { try { db.update(); } catch (Throwable e) { logger.warning("Problem updating " + db + ": " + e.getMessage()); logger.log(Level.FINE, "optimizeAll", e); } } }); } } } /** * Optimize the index database */ public void optimize() { synchronized (lock) { if (running) { logger.warning("Optimize terminated... Someone else is updating / optimizing it!"); return ; } running = true; } @SuppressWarnings("resource") IndexWriter wrt = null; @SuppressWarnings("resource") Analyzer analyzer = null; try { logger.info("Optimizing " + this + " ..."); analyzer = new StandardAnalyzer(SearchEngine.LUCENE_VERSION); IndexWriterConfig conf = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); wrt = new IndexWriter(indexDirectory, conf); wrt.forceMerge(1); // this is deprecated and not needed anymore logger.info("Optimizing " + this + " done"); synchronized (lock) { if (dirtyFile.exists() && !dirtyFile.delete()) { logger.log(Level.FINE, "Failed to remove \"dirty-file\" ''{0}''", dirtyFile.getAbsolutePath()); } dirty = false; } } catch (IOException e) { logger.warning(this + " optimizing problem: " + e.getMessage()); logger.log(Level.FINE, "optimize", e); } finally { IOUtils.close(wrt); IOUtils.close(analyzer); synchronized (lock) { running = false; } } } /** * Generate a spelling suggestion for the definitions stored in defs */ @SuppressWarnings("resource") public void createSpellingSuggestions() { IndexReader indexReader = null; SpellChecker checker = null; Analyzer analyzer = null; try { logger.info("Generating spelling suggestions for " + this + " ..."); indexReader = IndexReader.open(indexDirectory); checker = new SpellChecker(spellDirectory); //TODO below seems only to index "defs" , possible bug ? analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); checker.indexDictionary(new LuceneDictionary(indexReader, "defs"), iwc, false); logger.info("Generating spelling suggestions for " + this + " done"); } catch (IOException e) { logger.warning("Problem when generating spelling suggestions for " + this + ": " + e.getMessage()); logger.log(Level.FINE, "createSpellingSuggestions", e); } finally { IOUtils.close(indexReader); IOUtils.close(spellDirectory); IOUtils.close(analyzer); } } private boolean isDirty() { synchronized (lock) { return dirty; } } private void setDirty() { synchronized (lock) { try { if (!dirty && !dirtyFile.createNewFile()) { if (!dirtyFile.exists() && logger.isLoggable(Level.FINE)) { logger.fine("Failed to create \"dirty-file\" '" + dirtyFile.getAbsolutePath() + "'"); } dirty = true; } } catch (IOException e) { logger.log(Level.FINE,"unable to creating dirty file", e); } } } /** * Remove a stale file (uidIter.term().text()) from the index database * (and the xref file) * @throws java.io.IOException if an error occurs */ private void removeFile() throws IOException { String path = Util.uid2url(uidIter.term().text()); for (IndexChangedListener listener : listeners) { listener.fileRemove(path); } writer.deleteDocuments(uidIter.term()); File xrefFile = new File(xrefDir, path); File parent = xrefFile.getParentFile(); if (!xrefFile.delete() && xrefFile.exists()) { logger.log(Level.INFO, "Failed to remove obsolete xref-file ''{0}''", xrefFile.getAbsolutePath()); } // Remove the parent directory if it's empty if (parent.delete()) { logger.log(Level.FINE, "Removed empty xref dir ''{0}''", parent.getAbsolutePath()); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileRemoved(path); } } /** * Add a file to the Lucene index (and generate a xref file) * @param file The file to add * @param path The path to the file (from source root) * @throws java.io.IOException if an error occurs */ @SuppressWarnings("resource") private void addFile(File file, String path) throws IOException { final InputStream in = new BufferedInputStream(new FileInputStream(file)); FileAnalyzer fa = null; try { fa = AnalyzerGuru.getAnalyzer(in, path); for (IndexChangedListener listener : listeners) { listener.fileAdd(path, fa.getClass().getSimpleName()); } fa.setCtags(ctags); fa.setProject(Project.getProject(path)); Document d; try { d = analyzerGuru.getDocument(file, in, path, fa); } catch (Exception e) { logger.log(Level.INFO, "Skipped file ''{0}'' because the analyzer didn''t " + "understand it.", path); logger.log(Level.FINE, "addFile", e); return; } writer.addDocument(d, fa); Genre g = fa.getFactory().getGenre(); if (xrefDir != null && (g == Genre.PLAIN || g == Genre.XREFABLE)) { File xrefFile = new File(xrefDir, path); // If mkdirs() returns false, the failure is most likely // because the file already exists. But to check for the // file first and only add it if it doesn't exists would // only increase the file IO... if (!xrefFile.getParentFile().mkdirs()) { assert xrefFile.getParentFile().exists(); } fa.writeXref(xrefDir, path); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileAdded(path, fa.getClass().getSimpleName()); } } finally { IOUtils.close(in); IOUtils.close(fa); } } /** * Check if I should accept this file into the index database * @param file the file to check * @return true if the file should be included, false otherwise */ private boolean accept(File file) { if (!includedNames.isEmpty() && // the filter should not affect directory names (!(file.isDirectory() || includedNames.match(file))) ) { return false; } if (ignoredNames.match(file)) { return false; } String absolutePath = file.getAbsolutePath(); if (!file.canRead()) { logger.warning("Could not read " + absolutePath); return false; } try { String canonicalPath = file.getCanonicalPath(); if (!absolutePath.equals(canonicalPath) && !acceptSymlink(absolutePath, canonicalPath)) { logger.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''", new Object[]{ absolutePath, canonicalPath }); return false; } // below will only let go files and directories, anything else is // considered special and is not added if (!file.isFile() && !file.isDirectory()) { logger.warning("Ignored special file '" + absolutePath + "'"); return false; } } catch (IOException exp) { logger.warning("Failed to resolve name '" + absolutePath + "'"); logger.log(Level.FINE, "accept", exp); } if (file.isDirectory()) { // always accept directories so that their files can be examined return true; } if (HistoryGuru.getInstance().hasHistory(file)) { // versioned files should always be accepted return true; } // this is an unversioned file, check if it should be indexed return !RuntimeEnvironment.getConfig().isIndexVersionedFilesOnly(); } boolean accept(File parent, File file) { try { File f1 = parent.getCanonicalFile(); File f2 = file.getCanonicalFile(); if (f1.equals(f2)) { logger.log(Level.INFO, "Skipping links to itself (''{0}'' ''{1}'')", new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); return false; } // Now, let's verify that it's not a link back up the chain... File t1 = f1; while ((t1 = t1.getParentFile()) != null) { if (f2.equals(t1)) { logger.log(Level.INFO, "Skipping links to parent (''{0}'' ''{1}'')", new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); return false; } } return accept(file); } catch (IOException ex) { logger.log(Level.WARNING, "Failed to resolve name (''{0}'' ''{1}'')", new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()}); } return false; } /** * Check if I should accept the path containing a symlink * @param absolutePath the path with a symlink to check * @param canonicalPath the canonical path to the file * @return true if the file should be accepted, false otherwise */ private boolean acceptSymlink(String absolutePath, String canonicalPath) throws IOException { // Always accept local symlinks if (isLocal(canonicalPath)) { return true; } Configuration config = RuntimeEnvironment.getConfig(); for (String symlink : config.getAllowedSymlinks()) { if (absolutePath.startsWith(symlink)) { String allowedTarget = new File(symlink).getCanonicalPath(); if (canonicalPath.startsWith(allowedTarget) && absolutePath.substring(symlink.length()) .equals(canonicalPath.substring(allowedTarget.length()))) { return true; } } } return false; } /** * Check if a file is local to the current project. If we don't have * projects, check if the file is in the source root. * * @param path the path to a file * @return true if the file is local to the current repository */ private boolean isLocal(String path) { Configuration config = RuntimeEnvironment.getConfig(); String srcRoot = config.getSourceRoot(); boolean local = false; if (path.startsWith(srcRoot)) { if (config.hasProjects()) { String relPath = path.substring(srcRoot.length()); if (project.equals(Project.getProject(relPath))) { // File is under the current project, so it's local. local = true; } } else { // File is under source root, and we don't have projects, so // consider it local. local = true; } } return local; } /** * Generate indexes recursively * @param dir the root indexDirectory to generate indexes for * @param path the path * @param count_only if true will just traverse the source root and count files * @param cur_count current count during the traversal of the tree * @param est_total estimate total files to process * @return number of files indexed */ @SuppressWarnings("boxing") private int indexDown(File dir, String parent, boolean count_only, int cur_count, int est_total) throws IOException { int lcur_count=cur_count; if (isInterrupted()) { return lcur_count; } if (!accept(dir)) { return lcur_count; } File[] files = dir.listFiles(); if (files == null) { logger.severe("Failed to get file listing for '" + dir.getAbsolutePath() + "'"); return lcur_count; } Arrays.sort(files, new Comparator() { @Override public int compare(File p1, File p2) { return p1.getName().compareTo(p2.getName()); } }); Configuration config = RuntimeEnvironment.getConfig(); for (File file : files) { if (accept(dir, file)) { String path = parent + '/' + file.getName(); if (file.isDirectory()) { lcur_count = indexDown(file, path, count_only, lcur_count, est_total); } else { lcur_count++; if (count_only) { continue; } if (config.isPrintProgress() && est_total > 0 && logger.isLoggable(Level.INFO)) { logger.log(Level.INFO, "Progress: {0} ({1}%)", new Object[] { lcur_count, (lcur_count * 100.0f / est_total) }); } if (uidIter != null) { // construct uid for doc String uid = Util.path2uid(path, DateTools .timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND)); while (uidIter.term() != null && uidIter.term().field().equals("u") && uidIter.term().text().compareTo(uid) < 0) { removeFile(); uidIter.next(); } if (uidIter.term() != null && uidIter.term().field().equals("u") && uidIter.term().text().compareTo(uid) == 0) { uidIter.next(); // keep matching docs continue; } } try { addFile(file, path); } catch (Exception e) { logger.warning("Failed to add file '" + file.getAbsolutePath() + "': " + e.getMessage()); } } } } return lcur_count; } /** * Interrupt the index generation (and the index generation will stop as * soon as possible) */ public void interrupt() { synchronized (lock) { interrupted = true; } } private boolean isInterrupted() { synchronized (lock) { return interrupted; } } /** * Register an object to receive events when modifications is done to the * index database. * * @param listener the object to receive the events */ public void addIndexChangedListener(IndexChangedListener listener) { listeners.add(listener); } /** * Remove an object from the lists of objects to receive events when * modifications is done to the index database * * @param listener the object to remove */ public void removeIndexChangedListener(IndexChangedListener listener) { listeners.remove(listener); } /** * List all files in all of the index databases * @throws IOException if an error occurs */ public static void listAllFiles() throws IOException { listAllFiles(null); } /** * List all files in some of the index databases * @param subFiles Subdirectories for the various projects to list the files * for (or null or an empty list to dump all projects) * @throws IOException if an error occurs */ public static void listAllFiles(List subFiles) throws IOException { Configuration config = RuntimeEnvironment.getConfig(); if (config.hasProjects()) { if (subFiles == null || subFiles.isEmpty()) { for (Project project : config.getProjects()) { IndexDatabase db = new IndexDatabase(project); db.listFiles(); } } else { for (String path : subFiles) { Project project = Project.getProject(path); if (project == null) { logger.warning("Could not find a project for '" + path + "'"); } else { IndexDatabase db = new IndexDatabase(project); db.listFiles(); } } } } else { IndexDatabase db = new IndexDatabase(); db.listFiles(); } } /** * Print a listing of all of the files in this index database to stdout. * * @throws IOException If an IO error occurs while reading from the database */ @SuppressWarnings("resource") public void listFiles() throws IOException { IndexReader ireader = null; TermEnum iter = null; try { ireader = IndexReader.open(indexDirectory); // open existing index iter = ireader.terms(new Term("u", "")); // init uid iterator while (iter.term() != null) { System.out.println(Util.uid2url(iter.term().text())); iter.next(); } } finally { IOUtils.close(iter); IOUtils.close(ireader); } } static void listFrequentTokens() throws IOException { listFrequentTokens(null); } static void listFrequentTokens(List subFiles) throws IOException { final int limit = 4; Configuration config = RuntimeEnvironment.getConfig(); if (config.hasProjects()) { if (subFiles == null || subFiles.isEmpty()) { for (Project project : config.getProjects()) { IndexDatabase db = new IndexDatabase(project); db.listTokens(4); } } else { for (String path : subFiles) { Project project = Project.getProject(path); if (project == null) { logger.warning("Could not find a project for '" + path + "'"); } else { IndexDatabase db = new IndexDatabase(project); db.listTokens(4); } } } } else { IndexDatabase db = new IndexDatabase(); db.listTokens(limit); } } @SuppressWarnings("resource") void listTokens(int freq) throws IOException { IndexReader ireader = null; TermEnum iter = null; try { ireader = IndexReader.open(indexDirectory); iter = ireader.terms(new Term("defs", "")); while (iter.term() != null) { if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().text().length() > freq) { System.out.println(iter.term().text()); } iter.next(); } else { break; } } } finally { IOUtils.close(iter); IOUtils.close(ireader); } } /** * Get an indexReader for the Index database where a given file * @param path the file to get the database for * @return The index database where the file should be located or null if * it cannot be located. */ public static IndexReader getIndexReader(String path) { IndexReader ret = null; Configuration config = RuntimeEnvironment.getConfig(); File indexDir = new File(config.getDataRootFile(), "index"); if (config.hasProjects()) { Project p = Project.getProject(path); if (p == null) { return null; } indexDir = new File(indexDir, p.getPath()); } try { @SuppressWarnings("resource") FSDirectory fdir = FSDirectory.open(indexDir,NoLockFactory.getNoLockFactory()); if (indexDir.exists() && IndexReader.indexExists(fdir)) { ret = IndexReader.open(fdir); } } catch (Exception ex) { logger.warning("Failed to open index '" + indexDir.getAbsolutePath() + "'"); logger.log(Level.FINE, "getIndexReader", ex); } return ret; } /** * Get the latest definitions for a file from the index. * * @param file the file whose definitions to find * @return definitions for the file, or {@code null} if they could not * be found * @throws IOException if an error happens when accessing the index * @throws ParseException if an error happens when building the Lucene query * @throws ClassNotFoundException if the class for the stored definitions * instance cannot be found */ public static Definitions getDefinitions(File file) throws IOException, ParseException, ClassNotFoundException { String path = RuntimeEnvironment.getConfig() .getPathRelativeToSourceRoot(file, 0); IndexReader ireader = getIndexReader(path); if (ireader == null) { // No index, no definitions... return null; } try { Query q = new QueryBuilder().setPath(path).build(); IndexSearcher searcher = new IndexSearcher(ireader); try { TopDocs top = searcher.search(q, 1); if (top.totalHits == 0) { // No hits, no definitions... return null; } Document doc = searcher.doc(top.scoreDocs[0].doc); String foundPath = doc.get("path"); // Only use the definitions if we found an exact match. if (path.equals(foundPath)) { Fieldable tags = doc.getFieldable("tags"); if (tags != null) { return Definitions.deserialize(tags.getBinaryValue()); } } } finally { searcher.close(); } } finally { ireader.close(); } // Didn't find any definitions. return null; } /** * {@inheritDoc} */ @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } IndexDatabase other = (IndexDatabase) obj; return (this.project == other.project) || (this.project != null && this.project.equals(other.project)); } /** * {@inheritDoc} */ @Override public int hashCode() { int hash = 7; hash = 41 * hash + (this.project == null ? 0 : this.project.hashCode()); return hash; } /** * {@inheritDoc} */ @Override public String toString() { return (project == null ? "" : project.getDescription()) + " Lucene IndexDB"; } }