IndexDatabase.java revision 425278cfacbc73f1e955ab6016f206fc5ed93ccb
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
*/
package org.opensolaris.opengrok.index;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPOutputStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.util.BytesRef;
import org.opensolaris.opengrok.analysis.AnalyzerGuru;
import org.opensolaris.opengrok.analysis.Ctags;
import org.opensolaris.opengrok.analysis.Definitions;
import org.opensolaris.opengrok.analysis.FileAnalyzer;
import org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
import org.opensolaris.opengrok.configuration.Project;
import org.opensolaris.opengrok.configuration.RuntimeEnvironment;
import org.opensolaris.opengrok.history.HistoryException;
import org.opensolaris.opengrok.history.HistoryGuru;
import org.opensolaris.opengrok.search.QueryBuilder;
import org.opensolaris.opengrok.search.SearchEngine;
import org.opensolaris.opengrok.util.IOUtils;
import org.opensolaris.opengrok.web.Util;
/**
* This class is used to create / update the index databases. Currently we use
* one index database per project.
*
* @author Trond Norbye
* @author Lubos Kosco , update for lucene 4.2.0
*/
public class IndexDatabase {
private Project project;
private FSDirectory indexDirectory;
private IndexWriter writer;
private TermsEnum uidIter;
private IgnoredNames ignoredNames;
private Filter includedNames;
private AnalyzerGuru analyzerGuru;
private File xrefDir;
private boolean interrupted;
private List<IndexChangedListener> listeners;
private File dirtyFile;
private final Object lock = new Object();
private boolean dirty;
private boolean running;
private List<String> directories;
static final Logger log = Logger.getLogger(IndexDatabase.class.getName());
private Ctags ctags;
private LockFactory lockfact;
private final BytesRef emptyBR = new BytesRef("");
//Directory where we store indexes
private static final String INDEX_DIR="index";
/**
* Create a new instance of the Index Database. Use this constructor if you
* don't use any projects
*
* @throws java.io.IOException if an error occurs while creating directories
*/
public IndexDatabase() throws IOException {
this(null);
}
/**
* Create a new instance of an Index Database for a given project
*
* @param project the project to create the database for
* @throws java.io.IOException if an errror occurs while creating
* directories
*/
public IndexDatabase(Project project) throws IOException {
this.project = project;
lockfact = new SimpleFSLockFactory();
initialize();
}
/**
* Update the index database for all of the projects. Print progress to
* standard out.
*
* @param executor An executor to run the job
* @throws IOException if an error occurs
*/
public static void updateAll(ExecutorService executor) throws IOException {
updateAll(executor, null);
}
/**
* Update the index database for all of the projects
*
* @param executor An executor to run the job
* @param listener where to signal the changes to the database
* @throws IOException if an error occurs
*/
static void updateAll(ExecutorService executor, IndexChangedListener listener) throws IOException {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
List<IndexDatabase> dbs = new ArrayList<>();
if (env.hasProjects()) {
for (Project project : env.getProjects()) {
dbs.add(new IndexDatabase(project));
}
} else {
dbs.add(new IndexDatabase());
}
for (IndexDatabase d : dbs) {
final IndexDatabase db = d;
if (listener != null) {
db.addIndexChangedListener(listener);
}
executor.submit(new Runnable() {
@Override
public void run() {
try {
db.update();
} catch (Throwable e) {
log.log(Level.SEVERE, "Problem updating lucene index database: ", e);
}
}
});
}
}
/**
* Update the index database for a number of sub-directories
*
* @param executor An executor to run the job
* @param listener where to signal the changes to the database
* @param paths
* @throws IOException if an error occurs
*/
public static void update(ExecutorService executor, IndexChangedListener listener, List<String> paths) throws IOException {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
List<IndexDatabase> dbs = new ArrayList<>();
for (String path : paths) {
Project project = Project.getProject(path);
if (project == null && env.hasProjects()) {
log.log(Level.WARNING, "Could not find a project for \"{0}\"", path);
} else {
IndexDatabase db;
try {
if (project == null) {
db = new IndexDatabase();
} else {
db = new IndexDatabase(project);
}
int idx = dbs.indexOf(db);
if (idx != -1) {
db = dbs.get(idx);
}
if (db.addDirectory(path)) {
if (idx == -1) {
dbs.add(db);
}
} else {
log.log(Level.WARNING, "Directory does not exist \"{0}\"", path);
}
} catch (IOException e) {
log.log(Level.WARNING, "An error occured while updating index", e);
}
}
for (final IndexDatabase db : dbs) {
db.addIndexChangedListener(listener);
executor.submit(new Runnable() {
@Override
public void run() {
try {
db.update();
} catch (Throwable e) {
log.log(Level.SEVERE, "An error occured while updating index", e);
}
}
});
}
}
}
@SuppressWarnings("PMD.CollapsibleIfStatements")
private void initialize() throws IOException {
synchronized (this) {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
if (project != null) {
indexDir = new File(indexDir, project.getPath());
}
if (!indexDir.exists() && !indexDir.mkdirs()) {
// to avoid race conditions, just recheck..
if (!indexDir.exists()) {
throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
}
}
if (!env.isUsingLuceneLocking()) {
lockfact = NoLockFactory.getNoLockFactory();
}
indexDirectory = FSDirectory.open(indexDir, lockfact);
ignoredNames = env.getIgnoredNames();
includedNames = env.getIncludedNames();
analyzerGuru = new AnalyzerGuru();
if (env.isGenerateHtml()) {
xrefDir = new File(env.getDataRootFile(), "xref");
}
listeners = new ArrayList<>();
dirtyFile = new File(indexDir, "dirty");
dirty = dirtyFile.exists();
directories = new ArrayList<>();
}
}
/**
* By default the indexer will traverse all directories in the project. If
* you add directories with this function update will just process the
* specified directories.
*
* @param dir The directory to scan
* @return <code>true</code> if the file is added, false otherwise
*/
@SuppressWarnings("PMD.UseStringBufferForStringAppends")
public boolean addDirectory(String dir) {
String directory = dir;
if (directory.startsWith("\\")) {
directory = directory.replace('\\', '/');
} else if (directory.charAt(0) != '/') {
directory = "/" + directory;
}
File file = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), directory);
if (file.exists()) {
directories.add(directory);
return true;
}
return false;
}
/**
* Update the content of this index database
*
* @throws IOException if an error occurs
* @throws HistoryException if an error occurs when accessing the history
*/
public void update() throws IOException, HistoryException {
synchronized (lock) {
if (running) {
throw new IOException("Indexer already running!");
}
running = true;
interrupted = false;
}
String ctgs = RuntimeEnvironment.getInstance().getCtags();
if (ctgs != null) {
ctags = new Ctags();
ctags.setBinary(ctgs);
}
if (ctags == null) {
log.severe("Unable to run ctags! searching definitions will not work!");
}
if (ctags != null) {
String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
if (filename != null) {
ctags.setCTagsExtraOptionsFile(filename);
}
}
try {
Analyzer analyzer = AnalyzerGuru.getAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
writer = new IndexWriter(indexDirectory, iwc);
writer.commit(); // to make sure index exists on the disk
if (directories.isEmpty()) {
if (project == null) {
directories.add("");
} else {
directories.add(project.getPath());
}
}
for (String dir : directories) {
File sourceRoot;
if ("".equals(dir)) {
sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
} else {
sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
}
HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
String startuid = Util.path2uid(dir, "");
IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index
Terms terms = null;
int numDocs = reader.numDocs();
if (numDocs > 0) {
Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
terms = uFields.terms(QueryBuilder.U);
}
try {
if (numDocs > 0) {
uidIter = terms.iterator(uidIter);
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
if (stat==TermsEnum.SeekStatus.END) {
uidIter=null;
log.log(Level.WARNING,
"Couldn't find a start term for {0}, empty u field?",
startuid);
}
}
// TODO below should be optional, since it traverses the tree once more to get total count! :(
int file_cnt = 0;
if (RuntimeEnvironment.getInstance().isPrintProgress()) {
log.log(Level.INFO, "Counting files in {0} ...", dir);
file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
if (log.isLoggable(Level.INFO)) {
log.log(Level.INFO,
"Need to process: {0} files for {1}",
new Object[]{file_cnt, dir});
}
}
indexDown(sourceRoot, dir, false, 0, file_cnt);
while (uidIter != null && uidIter.term() != null
&& uidIter.term().utf8ToString().startsWith(startuid)) {
removeFile();
BytesRef next = uidIter.next();
if (next==null) {
uidIter=null;
}
}
} finally {
reader.close();
}
}
} finally {
if (writer != null) {
try {
writer.prepareCommit();
writer.commit();
writer.close();
} catch (IOException e) {
log.log(Level.WARNING, "An error occured while closing writer", e);
}
}
if (ctags != null) {
try {
ctags.close();
} catch (IOException e) {
log.log(Level.WARNING,
"An error occured while closing ctags process", e);
}
}
synchronized (lock) {
running = false;
}
}
if (!isInterrupted() && isDirty()) {
if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
optimize();
}
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File timestamp = new File(env.getDataRootFile(), "timestamp");
if (timestamp.exists()) {
if (!timestamp.setLastModified(System.currentTimeMillis())) {
log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', "
+ "used for timestamping the index database.",
timestamp.getAbsolutePath());
}
} else {
if (!timestamp.createNewFile()) {
log.log(Level.WARNING, "Failed to create file ''{0}'', "
+ "used for timestamping the index database.",
timestamp.getAbsolutePath());
}
}
}
}
/**
* Optimize all index databases
*
* @param executor An executor to run the job
* @throws IOException if an error occurs
*/
static void optimizeAll(ExecutorService executor) throws IOException {
List<IndexDatabase> dbs = new ArrayList<>();
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
if (env.hasProjects()) {
for (Project project : env.getProjects()) {
dbs.add(new IndexDatabase(project));
}
} else {
dbs.add(new IndexDatabase());
}
for (IndexDatabase d : dbs) {
final IndexDatabase db = d;
if (db.isDirty()) {
executor.submit(new Runnable() {
@Override
public void run() {
try {
db.update();
} catch (Throwable e) {
log.log(Level.SEVERE,
"Problem updating lucene index database: ", e);
}
}
});
}
}
}
/**
* Optimize the index database
*/
public void optimize() {
synchronized (lock) {
if (running) {
log.warning("Optimize terminated... Someone else is updating / optimizing it!");
return;
}
running = true;
}
IndexWriter wrt = null;
try {
log.info("Optimizing the index ... ");
Analyzer analyzer = new StandardAnalyzer(SearchEngine.LUCENE_VERSION);
IndexWriterConfig conf = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
wrt = new IndexWriter(indexDirectory, conf);
wrt.forceMerge(1); // this is deprecated and not needed anymore
log.info("done");
synchronized (lock) {
if (dirtyFile.exists() && !dirtyFile.delete()) {
log.log(Level.FINE, "Failed to remove \"dirty-file\": {0}",
dirtyFile.getAbsolutePath());
}
dirty = false;
}
} catch (IOException e) {
log.log(Level.SEVERE, "ERROR: optimizing index: {0}", e);
} finally {
if (wrt != null) {
try {
wrt.close();
} catch (IOException e) {
log.log(Level.WARNING,
"An error occured while closing writer", e);
}
}
synchronized (lock) {
running = false;
}
}
}
private boolean isDirty() {
synchronized (lock) {
return dirty;
}
}
private void setDirty() {
synchronized (lock) {
try {
if (!dirty && !dirtyFile.createNewFile()) {
if (!dirtyFile.exists()) {
log.log(Level.FINE,
"Failed to create \"dirty-file\": {0}",
dirtyFile.getAbsolutePath());
}
dirty = true;
}
} catch (IOException e) {
log.log(Level.FINE, "When creating dirty file: ", e);
}
}
}
/**
* Remove a stale file (uidIter.term().text()) from the index database (and
* the xref file)
*
* @throws java.io.IOException if an error occurs
*/
private void removeFile() throws IOException {
String path = Util.uid2url(uidIter.term().utf8ToString());
for (IndexChangedListener listener : listeners) {
listener.fileRemove(path);
}
writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));
writer.prepareCommit();
writer.commit();
File xrefFile;
if (RuntimeEnvironment.getInstance().isCompressXref()) {
xrefFile = new File(xrefDir, path + ".gz");
} else {
xrefFile = new File(xrefDir, path);
}
File parent = xrefFile.getParentFile();
if (!xrefFile.delete() && xrefFile.exists()) {
log.log(Level.INFO, "Failed to remove obsolete xref-file: {0}", xrefFile.getAbsolutePath());
}
// Remove the parent directory if it's empty
if (parent.delete()) {
log.log(Level.FINE, "Removed empty xref dir:{0}", parent.getAbsolutePath());
}
setDirty();
for (IndexChangedListener listener : listeners) {
listener.fileRemoved(path);
}
}
/**
* Add a file to the Lucene index (and generate a xref file)
*
* @param file The file to add
* @param path The path to the file (from source root)
* @throws java.io.IOException if an error occurs
*/
private void addFile(File file, String path) throws IOException {
FileAnalyzer fa;
try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
fa = AnalyzerGuru.getAnalyzer(in, path);
}
for (IndexChangedListener listener : listeners) {
listener.fileAdd(path, fa.getClass().getSimpleName());
}
fa.setCtags(ctags);
fa.setProject(Project.getProject(path));
Document doc = new Document();
try (Writer xrefOut = getXrefWriter(fa, path)) {
analyzerGuru.populateDocument(doc, file, path, fa, xrefOut);
} catch (Exception e) {
log.log(Level.INFO,
"Skipped file ''{0}'' because the analyzer didn''t "
+ "understand it.",
path);
log.log(Level.FINE,
"Exception from analyzer " + fa.getClass().getName(), e);
cleanupResources(doc);
return;
}
try {
writer.addDocument(doc, fa);
} catch (Throwable t) {
cleanupResources(doc);
throw t;
}
setDirty();
for (IndexChangedListener listener : listeners) {
listener.fileAdded(path, fa.getClass().getSimpleName());
}
}
/**
* Do a best effort to clean up all resources allocated when populating
* a Lucene document. On normal execution, these resources should be
* closed automatically by the index writer once it's done with them, but
* we may not get that far if something fails.
*
* @param doc the document whose resources to clean up
*/
private void cleanupResources(Document doc) {
for (IndexableField f : doc) {
// If the field takes input from a reader, close the reader.
IOUtils.close(f.readerValue());
// If the field takes input from a token stream, close the
// token stream.
if (f instanceof Field) {
IOUtils.close(((Field) f).tokenStreamValue());
}
}
}
/**
* Check if I should accept this file into the index database
*
* @param file the file to check
* @return true if the file should be included, false otherwise
*/
private boolean accept(File file) {
if (!includedNames.isEmpty()
&& // the filter should not affect directory names
(!(file.isDirectory() || includedNames.match(file)))) {
return false;
}
if (ignoredNames.ignore(file)) {
return false;
}
String absolutePath = file.getAbsolutePath();
if (!file.canRead()) {
log.log(Level.WARNING, "Warning: could not read {0}", absolutePath);
return false;
}
try {
String canonicalPath = file.getCanonicalPath();
if (!absolutePath.equals(canonicalPath)
&& !acceptSymlink(absolutePath, canonicalPath)) {
log.log(Level.FINE, "Skipped symlink ''{0}'' -> ''{1}''",
new Object[]{absolutePath, canonicalPath});
return false;
}
//below will only let go files and directories, anything else is considered special and is not added
if (!file.isFile() && !file.isDirectory()) {
log.log(Level.WARNING, "Warning: ignored special file {0}",
absolutePath);
return false;
}
} catch (IOException exp) {
log.log(Level.WARNING, "Warning: Failed to resolve name: {0}",
absolutePath);
log.log(Level.FINE, "Stack Trace: ", exp);
}
if (file.isDirectory()) {
// always accept directories so that their files can be examined
return true;
}
if (HistoryGuru.getInstance().hasHistory(file)) {
// versioned files should always be accepted
return true;
}
// this is an unversioned file, check if it should be indexed
return !RuntimeEnvironment.getInstance().isIndexVersionedFilesOnly();
}
boolean accept(File parent, File file) {
try {
File f1 = parent.getCanonicalFile();
File f2 = file.getCanonicalFile();
if (f1.equals(f2)) {
log.log(Level.INFO, "Skipping links to itself...: {0} {1}",
new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
return false;
}
// Now, let's verify that it's not a link back up the chain...
File t1 = f1;
while ((t1 = t1.getParentFile()) != null) {
if (f2.equals(t1)) {
log.log(Level.INFO, "Skipping links to parent...: {0} {1}",
new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
return false;
}
}
return accept(file);
} catch (IOException ex) {
log.log(Level.WARNING, "Warning: Failed to resolve name: {0} {1}",
new Object[]{parent.getAbsolutePath(), file.getAbsolutePath()});
}
return false;
}
/**
* Check if I should accept the path containing a symlink
*
* @param absolutePath the path with a symlink to check
* @param canonicalPath the canonical path to the file
* @return true if the file should be accepted, false otherwise
*/
private boolean acceptSymlink(String absolutePath, String canonicalPath) throws IOException {
// Always accept local symlinks
if (isLocal(canonicalPath)) {
return true;
}
for (String allowedSymlink : RuntimeEnvironment.getInstance().getAllowedSymlinks()) {
if (absolutePath.startsWith(allowedSymlink)) {
String allowedTarget = new File(allowedSymlink).getCanonicalPath();
if (canonicalPath.startsWith(allowedTarget)
&& absolutePath.substring(allowedSymlink.length()).equals(canonicalPath.substring(allowedTarget.length()))) {
return true;
}
}
}
return false;
}
/**
* Check if a file is local to the current project. If we don't have
* projects, check if the file is in the source root.
*
* @param path the path to a file
* @return true if the file is local to the current repository
*/
private boolean isLocal(String path) {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
String srcRoot = env.getSourceRootPath();
boolean local = false;
if (path.startsWith(srcRoot)) {
if (env.hasProjects()) {
String relPath = path.substring(srcRoot.length());
if (project.equals(Project.getProject(relPath))) {
// File is under the current project, so it's local.
local = true;
}
} else {
// File is under source root, and we don't have projects, so
// consider it local.
local = true;
}
}
return local;
}
/**
* Generate indexes recursively
*
* @param dir the root indexDirectory to generate indexes for
* @param path the path
* @param count_only if true will just traverse the source root and count
* files
* @param cur_count current count during the traversal of the tree
* @param est_total estimate total files to process
*
*/
private int indexDown(File dir, String parent, boolean count_only,
int cur_count, int est_total) throws IOException {
int lcur_count = cur_count;
if (isInterrupted()) {
return lcur_count;
}
if (!accept(dir)) {
return lcur_count;
}
File[] files = dir.listFiles();
if (files == null) {
log.log(Level.SEVERE, "Failed to get file listing for: {0}",
dir.getAbsolutePath());
return lcur_count;
}
Arrays.sort(files, new Comparator<File>() {
@Override
public int compare(File p1, File p2) {
return p1.getName().compareTo(p2.getName());
}
});
for (File file : files) {
if (accept(dir, file)) {
String path = parent + '/' + file.getName();
if (file.isDirectory()) {
lcur_count = indexDown(file, path, count_only, lcur_count, est_total);
} else {
lcur_count++;
if (count_only) {
continue;
}
if (RuntimeEnvironment.getInstance().isPrintProgress()
&& est_total > 0 && log.isLoggable(Level.INFO)) {
log.log(Level.INFO, "Progress: {0} ({1}%)",
new Object[]{lcur_count,
(lcur_count * 100.0f / est_total)});
}
if (uidIter != null) {
String uid = Util.path2uid(path,
DateTools.timeToString(file.lastModified(),
DateTools.Resolution.MILLISECOND)); // construct uid for doc
BytesRef buid = new BytesRef(uid);
while (uidIter != null && uidIter.term() != null
&& uidIter.term().compareTo(emptyBR) !=0
&& uidIter.term().compareTo(buid) < 0) {
removeFile();
BytesRef next = uidIter.next();
if (next==null) {uidIter=null;}
}
if (uidIter != null && uidIter.term() != null
&& uidIter.term().bytesEquals(buid)) {
BytesRef next = uidIter.next(); // keep matching docs
if (next==null) {uidIter=null;}
continue;
}
}
try {
addFile(file, path);
} catch (Exception e) {
log.log(Level.WARNING,
"Failed to add file " + file.getAbsolutePath(),
e);
}
}
}
}
return lcur_count;
}
/**
* Interrupt the index generation (and the index generation will stop as
* soon as possible)
*/
public void interrupt() {
synchronized (lock) {
interrupted = true;
}
}
private boolean isInterrupted() {
synchronized (lock) {
return interrupted;
}
}
/**
* Register an object to receive events when modifications is done to the
* index database.
*
* @param listener the object to receive the events
*/
public void addIndexChangedListener(IndexChangedListener listener) {
listeners.add(listener);
}
/**
* Remove an object from the lists of objects to receive events when
* modifications is done to the index database
*
* @param listener the object to remove
*/
public void removeIndexChangedListener(IndexChangedListener listener) {
listeners.remove(listener);
}
/**
* List all files in all of the index databases
*
* @throws IOException if an error occurs
*/
public static void listAllFiles() throws IOException {
listAllFiles(null);
}
/**
* List all files in some of the index databases
*
* @param subFiles Subdirectories for the various projects to list the files
* for (or null or an empty list to dump all projects)
* @throws IOException if an error occurs
*/
public static void listAllFiles(List<String> subFiles) throws IOException {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
if (env.hasProjects()) {
if (subFiles == null || subFiles.isEmpty()) {
for (Project project : env.getProjects()) {
IndexDatabase db = new IndexDatabase(project);
db.listFiles();
}
} else {
for (String path : subFiles) {
Project project = Project.getProject(path);
if (project == null) {
log.log(Level.WARNING, "Warning: Could not find a project for \"{0}\"", path);
} else {
IndexDatabase db = new IndexDatabase(project);
db.listFiles();
}
}
}
} else {
IndexDatabase db = new IndexDatabase();
db.listFiles();
}
}
/**
* List all of the files in this index database
*
* @throws IOException If an IO error occurs while reading from the database
*/
public void listFiles() throws IOException {
IndexReader ireader = null;
TermsEnum iter=null;
Terms terms = null;
try {
ireader = DirectoryReader.open(indexDirectory); // open existing index
int numDocs = ireader.numDocs();
if (numDocs > 0) {
Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
terms = uFields.terms(QueryBuilder.U);
}
iter = terms.iterator(iter); // init uid iterator
while (iter != null && iter.term() != null) {
log.fine(Util.uid2url(iter.term().utf8ToString()));
BytesRef next=iter.next();
if (next==null) {iter=null;}
}
} finally {
if (ireader != null) {
try {
ireader.close();
} catch (IOException e) {
log.log(Level.WARNING, "An error occured while closing index reader", e);
}
}
}
}
static void listFrequentTokens() throws IOException {
listFrequentTokens(null);
}
static void listFrequentTokens(List<String> subFiles) throws IOException {
final int limit = 4;
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
if (env.hasProjects()) {
if (subFiles == null || subFiles.isEmpty()) {
for (Project project : env.getProjects()) {
IndexDatabase db = new IndexDatabase(project);
db.listTokens(4);
}
} else {
for (String path : subFiles) {
Project project = Project.getProject(path);
if (project == null) {
log.log(Level.WARNING, "Warning: Could not find a project for \"{0}\"", path);
} else {
IndexDatabase db = new IndexDatabase(project);
db.listTokens(4);
}
}
}
} else {
IndexDatabase db = new IndexDatabase();
db.listTokens(limit);
}
}
public void listTokens(int freq) throws IOException {
IndexReader ireader = null;
TermsEnum iter = null;
Terms terms = null;
try {
ireader = DirectoryReader.open(indexDirectory);
int numDocs = ireader.numDocs();
if (numDocs > 0) {
Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
terms = uFields.terms(QueryBuilder.DEFS);
}
iter = terms.iterator(iter); // init uid iterator
while (iter != null && iter.term() != null) {
//if (iter.term().field().startsWith("f")) {
if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
log.warning(iter.term().utf8ToString());
}
BytesRef next = iter.next();
if (next==null) {iter=null;}
/*} else {
break;
}*/
}
} finally {
if (ireader != null) {
try {
ireader.close();
} catch (IOException e) {
log.log(Level.WARNING, "An error occured while closing index reader", e);
}
}
}
}
/**
* Get an indexReader for the Index database where a given file
*
* @param path the file to get the database for
* @return The index database where the file should be located or null if it
* cannot be located.
*/
public static IndexReader getIndexReader(String path) {
IndexReader ret = null;
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
if (env.hasProjects()) {
Project p = Project.getProject(path);
if (p == null) {
return null;
}
indexDir = new File(indexDir, p.getPath());
}
try {
FSDirectory fdir = FSDirectory.open(indexDir, NoLockFactory.getNoLockFactory());
if (indexDir.exists() && DirectoryReader.indexExists(fdir)) {
ret = DirectoryReader.open(fdir);
}
} catch (Exception ex) {
log.log(Level.SEVERE, "Failed to open index: {0}", indexDir.getAbsolutePath());
log.log(Level.FINE, "Stack Trace: ", ex);
}
return ret;
}
/**
* Get the latest definitions for a file from the index.
*
* @param file the file whose definitions to find
* @return definitions for the file, or {@code null} if they could not be
* found
* @throws IOException if an error happens when accessing the index
* @throws ParseException if an error happens when building the Lucene query
* @throws ClassNotFoundException if the class for the stored definitions
* instance cannot be found
*/
public static Definitions getDefinitions(File file)
throws IOException, ParseException, ClassNotFoundException {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
String path = env.getPathRelativeToSourceRoot(file, 0);
//sanitize windows path delimiters
//in order not to conflict with Lucene escape character
path=path.replace("\\", "/");
IndexReader ireader = getIndexReader(path);
if (ireader == null) {
// No index, no definitions...
return null;
}
try {
Query q = new QueryBuilder().setPath(path).build();
IndexSearcher searcher = new IndexSearcher(ireader);
TopDocs top = searcher.search(q, 1);
if (top.totalHits == 0) {
// No hits, no definitions...
return null;
}
Document doc = searcher.doc(top.scoreDocs[0].doc);
String foundPath = doc.get(QueryBuilder.PATH);
// Only use the definitions if we found an exact match.
if (path.equals(foundPath)) {
IndexableField tags = doc.getField(QueryBuilder.TAGS);
if (tags != null) {
return Definitions.deserialize(tags.binaryValue().bytes);
}
}
} finally {
ireader.close();
}
// Didn't find any definitions.
return null;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final IndexDatabase other = (IndexDatabase) obj;
if (this.project != other.project && (this.project == null || !this.project.equals(other.project))) {
return false;
}
return true;
}
@Override
public int hashCode() {
int hash = 7;
hash = 41 * hash + (this.project == null ? 0 : this.project.hashCode());
return hash;
}
/**
* Get a writer to which the xref can be written, or null if no xref
* should be produced for files of this type.
*/
private Writer getXrefWriter(FileAnalyzer fa, String path) throws IOException {
Genre g = fa.getFactory().getGenre();
if (xrefDir != null && (g == Genre.PLAIN || g == Genre.XREFABLE)) {
File xrefFile = new File(xrefDir, path);
// If mkdirs() returns false, the failure is most likely
// because the file already exists. But to check for the
// file first and only add it if it doesn't exists would
// only increase the file IO...
if (!xrefFile.getParentFile().mkdirs()) {
assert xrefFile.getParentFile().exists();
}
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
boolean compressed = env.isCompressXref();
File file = new File(xrefDir, path + (compressed ? ".gz" : ""));
return new BufferedWriter(new OutputStreamWriter(
compressed ?
new GZIPOutputStream(new FileOutputStream(file)) :
new FileOutputStream(file)));
}
// no Xref for this analyzer
return null;
}
}