IndexDatabase.java revision 212
207N/A/*
207N/A * CDDL HEADER START
207N/A *
207N/A * The contents of this file are subject to the terms of the
207N/A * Common Development and Distribution License (the "License").
207N/A * You may not use this file except in compliance with the License.
207N/A *
207N/A * See LICENSE.txt included in this distribution for the specific
207N/A * language governing permissions and limitations under the License.
207N/A *
207N/A * When distributing Covered Code, include this CDDL HEADER in each
207N/A * file and include the License file at LICENSE.txt.
207N/A * If applicable, add the following below this CDDL HEADER, with the
207N/A * fields enclosed by brackets "[]" replaced with your own identifying
207N/A * information: Portions Copyright [yyyy] [name of copyright owner]
207N/A *
207N/A * CDDL HEADER END
207N/A */
207N/A
207N/A/*
207N/A * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
207N/A * Use is subject to license terms.
207N/A */
207N/Apackage org.opensolaris.opengrok.index;
207N/A
207N/Aimport java.io.BufferedInputStream;
207N/Aimport java.io.File;
207N/Aimport java.io.FileInputStream;
207N/Aimport java.io.FileNotFoundException;
207N/Aimport java.io.IOException;
207N/Aimport java.io.InputStream;
207N/Aimport java.util.ArrayList;
207N/Aimport java.util.Arrays;
207N/Aimport java.util.List;
207N/Aimport org.apache.lucene.document.DateTools;
207N/Aimport org.apache.lucene.document.Document;
207N/Aimport org.apache.lucene.index.IndexReader;
207N/Aimport org.apache.lucene.index.IndexWriter;
207N/Aimport org.apache.lucene.index.Term;
207N/Aimport org.apache.lucene.index.TermEnum;
207N/Aimport org.apache.lucene.search.spell.LuceneDictionary;
207N/Aimport org.apache.lucene.search.spell.SpellChecker;
207N/Aimport org.apache.lucene.store.FSDirectory;
207N/Aimport org.opensolaris.opengrok.analysis.AnalyzerGuru;
207N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer;
207N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
207N/Aimport org.opensolaris.opengrok.configuration.Project;
207N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
207N/Aimport org.opensolaris.opengrok.web.Util;
207N/A
207N/A/**
207N/A * This class is used to create / update the index databases. Currently we use
207N/A * one index database per project.
207N/A *
207N/A * @author Trond Norbye
207N/A */
207N/Apublic class IndexDatabase {
207N/A
207N/A private Project project;
207N/A private FSDirectory indexDirectory;
207N/A private FSDirectory spellDirectory;
207N/A private IndexWriter writer;
207N/A private IndexReader reader;
207N/A private TermEnum uidIter;
207N/A private IgnoredNames ignoredNames;
207N/A private AnalyzerGuru analyzerGuru;
207N/A private File xrefDir;
207N/A private boolean interrupted;
207N/A private List<IndexChangedListener> listeners;
207N/A private boolean dirty;
207N/A
207N/A /**
207N/A * Create a new instance of the Index Database. Use this constructor if
207N/A * you don't use any projects
207N/A *
207N/A * @throws java.io.IOException if an error occurs while creating directories
207N/A */
207N/A public IndexDatabase() throws IOException {
207N/A initialize();
207N/A }
207N/A
207N/A /**
207N/A * Create a new instance of an Index Database for a given project
207N/A * @param project the project to create the database for
207N/A * @throws java.io.IOException if an errror occurs while creating directories
207N/A */
207N/A public IndexDatabase(Project project) throws IOException {
207N/A this.project = project;
207N/A initialize();
207N/A }
207N/A
207N/A /**
207N/A * Update the index database for all of the projects. Print progress to
207N/A * standard out.
207N/A * @throws java.lang.Exception if an error occurs
207N/A */
207N/A public static void updateAll() throws Exception {
207N/A updateAll(null);
207N/A }
207N/A
207N/A /**
207N/A * Update the index database for all of the projects
207N/A * @param listener where to signal the changes to the database
207N/A * @throws java.lang.Exception if an error occurs
207N/A */
207N/A static void updateAll(IndexChangedListener listener) throws Exception {
207N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
207N/A if (env.hasProjects()) {
207N/A for (Project project : env.getProjects()) {
207N/A IndexDatabase db = new IndexDatabase(project);
207N/A if (listener != null) {
207N/A db.addIndexChangedListener(listener);
207N/A }
207N/A db.update();
207N/A }
207N/A } else {
207N/A IndexDatabase db = new IndexDatabase();
207N/A if (listener != null) {
207N/A db.addIndexChangedListener(listener);
207N/A }
207N/A db.update();
207N/A }
207N/A
207N/A }
207N/A
207N/A private void initialize() throws IOException {
207N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
207N/A File indexDir = new File(env.getDataRootFile(), "index");
207N/A File spellDir = new File(env.getDataRootFile(), "spellIndex");
207N/A if (project != null) {
207N/A indexDir = new File(indexDir, project.getPath());
207N/A spellDir = new File(spellDir, project.getPath());
207N/A }
207N/A
207N/A if (!indexDir.exists() || !spellDir.exists()) {
207N/A indexDir.mkdirs();
207N/A spellDir.mkdirs();
207N/A // to avoid race conditions, just recheck..
207N/A if (!indexDir.exists()) {
207N/A throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
207N/A }
207N/A if (!spellDir.exists()) {
207N/A throw new FileNotFoundException("Failed to create root directory [" + spellDir.getAbsolutePath() + "]");
207N/A }
207N/A }
207N/A
207N/A indexDirectory = FSDirectory.getDirectory(indexDir);
207N/A spellDirectory = FSDirectory.getDirectory(spellDir);
207N/A ignoredNames = env.getIgnoredNames();
207N/A analyzerGuru = new AnalyzerGuru();
207N/A if (RuntimeEnvironment.getInstance().isGenerateHtml()) {
207N/A xrefDir = new File(env.getDataRootFile(), "xref");
207N/A }
207N/A listeners = new ArrayList<IndexChangedListener>();
207N/A }
207N/A
207N/A /**
207N/A * Update the content of this index database
207N/A * @throws java.lang.Exception if an error occurs
207N/A */
207N/A public synchronized void update() throws Exception {
207N/A interrupted = false;
207N/A try {
207N/A writer = new IndexWriter(indexDirectory, AnalyzerGuru.getAnalyzer());
207N/A String root;
207N/A File sourceRoot;
207N/A
207N/A if (project != null) {
207N/A root = project.getPath();
207N/A sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), project.getPath());
207N/A } else {
207N/A root = "";
207N/A sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
207N/A }
207N/A
207N/A String startuid = Util.uid(root, "");
207N/A reader = IndexReader.open(indexDirectory); // open existing index
207N/A uidIter = reader.terms(new Term("u", startuid)); // init uid iterator
207N/A
207N/A indexDown(sourceRoot, root);
207N/A
207N/A while (uidIter.term() != null && uidIter.term().field().equals("u") && uidIter.term().text().startsWith(startuid)) {
207N/A removeFile();
207N/A uidIter.next();
207N/A }
207N/A } finally {
207N/A if (reader != null) {
207N/A try {
207N/A reader.close();
207N/A } catch (IOException e) {
207N/A }
207N/A }
207N/A if (writer != null) {
207N/A try {
207N/A writer.close();
207N/A } catch (IOException e) {
207N/A }
207N/A }
207N/A }
207N/A
207N/A if (!interrupted && dirty) {
207N/A optimize();
207N/A createSpellingSuggestions();
207N/A }
207N/A }
207N/A
207N/A /**
207N/A * Optimize the index database
207N/A */
207N/A public void optimize() {
207N/A IndexWriter wrt = null;
207N/A try {
207N/A if (RuntimeEnvironment.getInstance().isVerbose()) {
207N/A System.out.print("Optimizing the index ... ");
207N/A }
207N/A wrt = new IndexWriter(indexDirectory, null, false);
207N/A wrt.optimize();
207N/A if (RuntimeEnvironment.getInstance().isVerbose()) {
207N/A System.out.println("done");
207N/A }
207N/A } catch (IOException e) {
207N/A System.err.println("ERROR: optimizing index: " + e);
207N/A } finally {
207N/A if (wrt != null) {
207N/A try {
207N/A wrt.close();
207N/A } catch (IOException e) {
207N/A }
207N/A }
207N/A
207N/A }
207N/A }
207N/A
207N/A /**
207N/A * Generate a spelling suggestion for the definitions stored in defs
207N/A */
207N/A public void createSpellingSuggestions() {
207N/A IndexReader indexReader = null;
207N/A SpellChecker checker = null;
207N/A
207N/A try {
207N/A if (RuntimeEnvironment.getInstance().isVerbose()) {
207N/A System.out.print("Generating spelling suggestion index ... ");
207N/A }
207N/A indexReader = IndexReader.open(indexDirectory);
207N/A checker = new SpellChecker(spellDirectory);
207N/A checker.indexDictionary(new LuceneDictionary(indexReader, "defs"));
207N/A if (RuntimeEnvironment.getInstance().isVerbose()) {
207N/A System.out.println("done");
207N/A }
207N/A } catch (IOException e) {
207N/A System.err.println("ERROR: Generating spelling: " + e);
207N/A } finally {
207N/A if (indexReader != null) {
207N/A try {
207N/A indexReader.close();
207N/A } catch (IOException e) {
207N/A }
207N/A }
207N/A if (spellDirectory != null) {
207N/A spellDirectory.close();
207N/A }
207N/A }
207N/A }
207N/A
207N/A /**
207N/A * Remove a stale file (uidIter.term().text()) from the index database
207N/A * (and the xref file)
207N/A * @throws java.io.IOException if an error occurs
207N/A */
207N/A private void removeFile() throws IOException {
207N/A String path = Util.uid2url(uidIter.term().text());
207N/A
207N/A for (IndexChangedListener listener : listeners) {
207N/A listener.fileRemoved(path);
207N/A }
207N/A writer.deleteDocuments(uidIter.term());
207N/A
207N/A File xrefFile = new File(xrefDir, path);
207N/A xrefFile.delete();
207N/A xrefFile.getParentFile().delete();
207N/A dirty = true;
207N/A }
207N/A
207N/A /**
207N/A * Add a file to the Lucene index (and generate a xref file)
207N/A * @param file The file to add
207N/A * @param path The path to the file (from source root)
207N/A * @throws java.io.IOException if an error occurs
207N/A */
207N/A private void addFile(File file, String path) throws IOException {
212N/A InputStream in;
212N/A try {
212N/A in = new BufferedInputStream(new FileInputStream(file));
212N/A } catch (IOException ex) {
212N/A System.err.println("Warning: " + ex.getMessage());
212N/A return;
212N/A }
207N/A FileAnalyzer fa = AnalyzerGuru.getAnalyzer(in, path);
207N/A
207N/A for (IndexChangedListener listener : listeners) {
207N/A listener.fileAdded(path, fa.getClass().getSimpleName());
207N/A }
207N/A
207N/A Document d = analyzerGuru.getDocument(file, in, path);
207N/A if (d != null) {
207N/A writer.addDocument(d, fa);
207N/A Genre g = fa.getFactory().getGenre();
207N/A if (xrefDir != null && (g == Genre.PLAIN || g == Genre.XREFABLE)) {
207N/A File xrefFile = new File(xrefDir, path);
207N/A xrefFile.getParentFile().mkdirs();
207N/A fa.writeXref(xrefDir, path);
207N/A }
207N/A dirty = true;
207N/A } else {
207N/A System.err.println("Warning: did not add " + path);
207N/A }
207N/A }
207N/A
207N/A /**
207N/A * Check if I should accept this file into the index database
207N/A * @param file the file to check
207N/A * @return true if the file should be included, false otherwise
207N/A */
207N/A private boolean accept(File file) {
207N/A if (ignoredNames.ignore(file)) {
207N/A return false;
207N/A }
207N/A
207N/A if (!file.canRead()) {
207N/A System.err.println("Warning: could not read " + file.getAbsolutePath());
207N/A return false;
207N/A }
207N/A
207N/A try {
207N/A if (!file.getAbsolutePath().equals(file.getCanonicalPath())) {
207N/A System.err.println("Warning: ignored link " + file.getAbsolutePath() +
207N/A " -> " + file.getCanonicalPath());
207N/A return false;
207N/A }
207N/A } catch (IOException exp) {
207N/A System.err.println("Warning: Failed to resolve name: " + file.getAbsolutePath());
207N/A exp.printStackTrace();
207N/A }
207N/A
207N/A return true;
207N/A }
207N/A
207N/A /**
207N/A * Generate indexes recursively
207N/A * @param dir the root indexDirectory to generate indexes for
207N/A * @param path the path
207N/A */
207N/A private void indexDown(File dir, String parent) throws IOException {
207N/A if (interrupted) {
207N/A return;
207N/A }
207N/A
207N/A if (!accept(dir)) {
207N/A return;
207N/A }
207N/A
207N/A File[] files = dir.listFiles();
207N/A if (files == null) {
207N/A System.err.println("Failed to get file listing for: " + dir.getAbsolutePath());
207N/A return;
207N/A }
207N/A Arrays.sort(files);
207N/A
207N/A for (File file : files) {
207N/A if (accept(file)) {
207N/A String path = parent + '/' + file.getName();
207N/A if (file.isDirectory()) {
207N/A indexDown(file, path);
207N/A } else {
207N/A if (uidIter != null) {
207N/A String uid = Util.uid(path, DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND)); // construct uid for doc
207N/A while (uidIter.term() != null && uidIter.term().field().equals("u") &&
207N/A uidIter.term().text().compareTo(uid) < 0) {
207N/A removeFile();
207N/A uidIter.next();
207N/A }
207N/A
207N/A if (uidIter.term() != null && uidIter.term().field().equals("u") &&
207N/A uidIter.term().text().compareTo(uid) == 0) {
207N/A uidIter.next(); // keep matching docs
207N/A } else {
207N/A addFile(file, path);
207N/A }
207N/A } else {
207N/A addFile(file, path);
207N/A }
207N/A }
207N/A }
207N/A }
207N/A }
207N/A
207N/A /**
207N/A * Interrupt the index generation (and the index generation will stop as
207N/A * soon as possible)
207N/A */
207N/A public void interrupt() {
207N/A interrupted = true;
207N/A }
207N/A
207N/A /**
207N/A * Register an object to receive events when modifications is done to the
207N/A * index database.
207N/A *
207N/A * @param listener the object to receive the events
207N/A */
207N/A void addIndexChangedListener(IndexChangedListener listener) {
207N/A listeners.add(listener);
207N/A }
207N/A
207N/A /**
207N/A * Remove an object from the lists of objects to receive events when
207N/A * modifications is done to the index database
207N/A *
207N/A * @param listener the object to remove
207N/A */
207N/A void removeIndexChangedListener(IndexChangedListener listener) {
207N/A listeners.remove(listener);
207N/A }
207N/A
207N/A /**
207N/A * List all files in all of the index databases
207N/A * @throws java.lang.Exception if an error occurs
207N/A */
207N/A public static void listAllFiles() throws Exception {
207N/A listAllFiles(null);
207N/A }
207N/A
207N/A /**
207N/A * List all files in some of the index databases
207N/A * @param subFiles Subdirectories for the various projects to list the files
207N/A * for (or null or an empty list to dump all projects)
207N/A * @throws java.lang.Exception if an error occurs
207N/A */
207N/A public static void listAllFiles(List<String> subFiles) throws Exception {
207N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
207N/A if (!env.hasProjects()) {
207N/A IndexDatabase db = new IndexDatabase();
207N/A db.listFiles();
207N/A } else {
207N/A if (subFiles == null || subFiles.isEmpty()) {
207N/A for (Project project : env.getProjects()) {
207N/A IndexDatabase db = new IndexDatabase(project);
207N/A db.listFiles();
207N/A }
207N/A } else {
207N/A for (String path : subFiles) {
207N/A Project project = Project.getProject(path);
207N/A if (project == null) {
207N/A System.err.println("Warning: Could not find a project for \"" + path + "\"");
207N/A } else {
207N/A IndexDatabase db = new IndexDatabase(project);
207N/A db.listFiles();
207N/A }
207N/A }
207N/A }
207N/A }
207N/A }
207N/A
207N/A /**
207N/A * List all of the files in this index database
207N/A *
207N/A * @throws java.lang.Exception if an error occurs
207N/A */
207N/A public void listFiles() throws Exception {
207N/A IndexReader ireader = null;
207N/A TermEnum iter = null;
207N/A
207N/A try {
207N/A ireader = IndexReader.open(indexDirectory); // open existing index
207N/A iter = ireader.terms(new Term("u", "")); // init uid iterator
207N/A while (iter.term() != null) {
207N/A System.out.println(Util.uid2url(iter.term().text()));
207N/A iter.next();
207N/A }
207N/A } finally {
207N/A if (iter != null) {
207N/A try {
207N/A iter.close();
207N/A } catch (Exception e) {
207N/A }
207N/A }
207N/A
207N/A if (ireader != null) {
207N/A try {
207N/A ireader.close();
207N/A } catch (Exception e) {
207N/A }
207N/A }
207N/A }
207N/A }
207N/A
207N/A static void listFrequentTokens() throws Exception {
207N/A listFrequentTokens(null);
207N/A }
207N/A
207N/A static void listFrequentTokens(ArrayList<String> subFiles) throws Exception {
207N/A final int limit = 4;
207N/A
207N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
207N/A if (!env.hasProjects()) {
207N/A IndexDatabase db = new IndexDatabase();
207N/A db.listTokens(limit);
207N/A } else {
207N/A if (subFiles == null || subFiles.isEmpty()) {
207N/A for (Project project : env.getProjects()) {
207N/A IndexDatabase db = new IndexDatabase(project);
207N/A db.listTokens(4);
207N/A }
207N/A } else {
207N/A for (String path : subFiles) {
207N/A Project project = Project.getProject(path);
207N/A if (project == null) {
207N/A System.err.println("Warning: Could not find a project for \"" + path + "\"");
207N/A } else {
207N/A IndexDatabase db = new IndexDatabase(project);
207N/A db.listTokens(4);
207N/A }
207N/A }
207N/A }
207N/A }
207N/A }
207N/A
207N/A public void listTokens(int freq) throws Exception {
207N/A IndexReader ireader = null;
207N/A TermEnum iter = null;
207N/A
207N/A try {
207N/A ireader = IndexReader.open(indexDirectory);
207N/A iter = ireader.terms(new Term("defs", ""));
207N/A while (iter.term() != null) {
207N/A if (iter.term().field().startsWith("f")) {
207N/A if (iter.docFreq() > 16 && iter.term().text().length() > freq) {
207N/A System.out.println(iter.term().text());
207N/A }
207N/A iter.next();
207N/A } else {
207N/A break;
207N/A }
207N/A }
207N/A } finally {
207N/A if (iter != null) {
207N/A try {
207N/A iter.close();
207N/A } catch (Exception e) {
207N/A }
207N/A }
207N/A
207N/A if (ireader != null) {
207N/A try {
207N/A ireader.close();
207N/A } catch (Exception e) {
207N/A }
207N/A }
207N/A }
207N/A }
208N/A
208N/A /**
208N/A * Get an indexReader for the Index database where a given file
208N/A * @param path the file to get the database for
208N/A * @return The index database where the file should be located or null if
208N/A * it cannot be located.
208N/A */
208N/A public static IndexReader getIndexReader(String path) {
208N/A IndexReader ret = null;
208N/A
208N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
208N/A File indexDir = new File(env.getDataRootFile(), "index");
208N/A
208N/A if (env.hasProjects()) {
208N/A Project p = Project.getProject(path);
208N/A if (p != null) {
208N/A indexDir = new File(indexDir, p.getPath());
208N/A } else {
208N/A return null;
208N/A }
208N/A }
208N/A
208N/A if (indexDir.exists() && IndexReader.indexExists(indexDir)) {
208N/A try {
208N/A ret = IndexReader.open(indexDir);
208N/A } catch (Exception ex) {
208N/A System.err.println("Failed to open index: " + indexDir.getAbsolutePath());
208N/A ex.printStackTrace();
208N/A }
208N/A }
208N/A
208N/A return ret;
208N/A }
207N/A}