opengrok/index/IndexDatabase.java

	IndexDatabase.java revision 207
0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
0N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
0N/A * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
0N/A * Use is subject to license terms.
0N/A */
0N/Apackage org.opensolaris.opengrok.index;
376N/A
0N/Aimport java.io.BufferedInputStream;
0N/Aimport java.io.File;
234N/Aimport java.io.FileInputStream;
0N/Aimport java.io.FileNotFoundException;
0N/Aimport java.io.IOException;
0N/Aimport java.io.InputStream;
0N/Aimport java.util.ArrayList;
0N/Aimport java.util.Arrays;
0N/Aimport java.util.List;
234N/Aimport org.apache.lucene.document.DateTools;
234N/Aimport org.apache.lucene.document.Document;
0N/Aimport org.apache.lucene.index.IndexReader;
0N/Aimport org.apache.lucene.index.IndexWriter;
350N/Aimport org.apache.lucene.index.Term;
0N/Aimport org.apache.lucene.index.TermEnum;
0N/Aimport org.apache.lucene.search.spell.LuceneDictionary;
0N/Aimport org.apache.lucene.search.spell.SpellChecker;
0N/Aimport org.apache.lucene.store.FSDirectory;
0N/Aimport org.opensolaris.opengrok.analysis.AnalyzerGuru;
0N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer;
0N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
350N/Aimport org.opensolaris.opengrok.configuration.Project;
0N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
234N/Aimport org.opensolaris.opengrok.web.Util;
125N/A
234N/A/**
234N/A * This class is used to create / update the index databases. Currently we use
234N/A * one index database per project.
0N/A *
0N/A * @author Trond Norbye
0N/A */
226N/Apublic class IndexDatabase {
0N/A
0N/A    private Project project;
0N/A    private FSDirectory indexDirectory;
0N/A    private FSDirectory spellDirectory;
0N/A    private IndexWriter writer;
0N/A    private IndexReader reader;
0N/A    private TermEnum uidIter;
0N/A    private IgnoredNames ignoredNames;
0N/A    private AnalyzerGuru analyzerGuru;
0N/A    private File xrefDir;
0N/A    private boolean interrupted;
0N/A    private List<IndexChangedListener> listeners;
0N/A    private boolean dirty;
0N/A
0N/A    /**
0N/A     * Create a new instance of the Index Database. Use this constructor if
0N/A     * you don't use any projects
0N/A     *
0N/A     * @throws java.io.IOException if an error occurs while creating directories
0N/A     */
0N/A    public IndexDatabase() throws IOException {
0N/A        initialize();
0N/A    }
0N/A
0N/A    /**
0N/A     * Create a new instance of an Index Database for a given project
0N/A     * @param project the project to create the database for
0N/A     * @throws java.io.IOException if an errror occurs while creating directories
0N/A     */
0N/A    public IndexDatabase(Project project) throws IOException {
0N/A        this.project = project;
0N/A        initialize();
234N/A    }
0N/A
0N/A    /**
0N/A     * Update the index database for all of the projects. Print progress to
0N/A     * standard out.
0N/A     * @throws java.lang.Exception if an error occurs
0N/A     */
0N/A    public static void updateAll() throws Exception {
0N/A        updateAll(null);
0N/A    }
234N/A
0N/A    /**
234N/A     * Update the index database for all of the projects
234N/A     * @param listener where to signal the changes to the database
234N/A     * @throws java.lang.Exception if an error occurs
0N/A     */
0N/A    static void updateAll(IndexChangedListener listener) throws Exception {
0N/A        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
0N/A        if (env.hasProjects()) {
0N/A            for (Project project : env.getProjects()) {
0N/A                IndexDatabase db = new IndexDatabase(project);
99N/A                if (listener != null) {
125N/A                    db.addIndexChangedListener(listener);
234N/A                }
234N/A                db.update();
236N/A            }
236N/A        } else {
236N/A            IndexDatabase db = new IndexDatabase();
236N/A            if (listener != null) {
236N/A                db.addIndexChangedListener(listener);
236N/A            }
236N/A            db.update();
236N/A        }
236N/A
236N/A    }
236N/A
236N/A    private void initialize() throws IOException {
236N/A        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
236N/A        File indexDir = new File(env.getDataRootFile(), "index");
234N/A        File spellDir = new File(env.getDataRootFile(), "spellIndex");
234N/A        if (project != null) {
235N/A            indexDir = new File(indexDir, project.getPath());
235N/A            spellDir = new File(spellDir, project.getPath());
235N/A        }
235N/A
235N/A        if (!indexDir.exists() || !spellDir.exists()) {
235N/A            indexDir.mkdirs();
235N/A            spellDir.mkdirs();
235N/A            // to avoid race conditions, just recheck..
235N/A            if (!indexDir.exists()) {
235N/A                throw new FileNotFoundException("Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
235N/A            }
235N/A            if (!spellDir.exists()) {
235N/A                throw new FileNotFoundException("Failed to create root directory [" + spellDir.getAbsolutePath() + "]");
235N/A            }
235N/A        }
0N/A
0N/A        indexDirectory = FSDirectory.getDirectory(indexDir);
0N/A        spellDirectory = FSDirectory.getDirectory(spellDir);
0N/A        ignoredNames = env.getIgnoredNames();
0N/A        analyzerGuru = new AnalyzerGuru();
0N/A        if (RuntimeEnvironment.getInstance().isGenerateHtml()) {
234N/A            xrefDir = new File(env.getDataRootFile(), "xref");
0N/A        }
0N/A        listeners = new ArrayList<IndexChangedListener>();
235N/A    }
235N/A
234N/A    /**
235N/A     * Update the content of this index database
226N/A     * @throws java.lang.Exception if an error occurs
226N/A     */
0N/A    public synchronized void update() throws Exception {
226N/A        interrupted = false;
234N/A        try {
234N/A            writer = new IndexWriter(indexDirectory, AnalyzerGuru.getAnalyzer());
234N/A            String root;
234N/A            File sourceRoot;
234N/A
234N/A            if (project != null) {
234N/A                root = project.getPath();
234N/A                sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), project.getPath());
234N/A            } else {
234N/A                root = "";
234N/A                sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
0N/A            }
0N/A
0N/A            String startuid = Util.uid(root, "");
0N/A            reader = IndexReader.open(indexDirectory);       // open existing index
0N/A            uidIter = reader.terms(new Term("u", startuid)); // init uid iterator
234N/A
0N/A            indexDown(sourceRoot, root);
0N/A
0N/A            while (uidIter.term() != null && uidIter.term().field().equals("u") && uidIter.term().text().startsWith(startuid)) {
0N/A                removeFile();
4N/A                uidIter.next();
0N/A            }
4N/A        } finally {
0N/A            if (reader != null) {
0N/A                try {
0N/A                    reader.close();
0N/A                } catch (IOException e) {
0N/A                }
0N/A            }
0N/A            if (writer != null) {
4N/A                try {
0N/A                    writer.close();
4N/A                } catch (IOException e) {
0N/A                }
0N/A            }
0N/A        }
234N/A
0N/A        if (!interrupted && dirty) {
0N/A            optimize();
0N/A            createSpellingSuggestions();
234N/A        }
234N/A    }
234N/A
0N/A    /**
0N/A     * Optimize the index database
0N/A     */
0N/A    public void optimize() {
234N/A        IndexWriter wrt = null;
0N/A        try {
0N/A            if (RuntimeEnvironment.getInstance().isVerbose()) {
350N/A                System.out.print("Optimizing the index ... ");
350N/A            }
350N/A            wrt = new IndexWriter(indexDirectory, null, false);
350N/A            wrt.optimize();
350N/A            if (RuntimeEnvironment.getInstance().isVerbose()) {
234N/A                System.out.println("done");
0N/A            }
0N/A        } catch (IOException e) {
0N/A            System.err.println("ERROR: optimizing index: " + e);
234N/A        } finally {
234N/A            if (wrt != null) {
0N/A                try {
0N/A                    wrt.close();
234N/A                } catch (IOException e) {
380N/A                }
234N/A            }
380N/A
380N/A        }
380N/A    }
380N/A
380N/A    /**
0N/A     * Generate a spelling suggestion for the definitions stored in defs
0N/A     */
0N/A    public void createSpellingSuggestions() {
0N/A        IndexReader indexReader = null;
0N/A        SpellChecker checker = null;
0N/A
0N/A        try {
0N/A            if (RuntimeEnvironment.getInstance().isVerbose()) {
0N/A                System.out.print("Generating spelling suggestion index ... ");
0N/A            }
0N/A            indexReader = IndexReader.open(indexDirectory);
0N/A            checker = new SpellChecker(spellDirectory);
0N/A            checker.indexDictionary(new LuceneDictionary(indexReader, "defs"));
0N/A            if (RuntimeEnvironment.getInstance().isVerbose()) {
0N/A                System.out.println("done");
0N/A            }
0N/A        } catch (IOException e) {
0N/A            System.err.println("ERROR: Generating spelling: " + e);
0N/A        } finally {
0N/A            if (indexReader != null) {
0N/A                try {
0N/A                    indexReader.close();
0N/A                } catch (IOException e) {
0N/A                }
0N/A            }
234N/A            if (spellDirectory != null) {
0N/A                spellDirectory.close();
0N/A            }
0N/A        }
0N/A    }
0N/A
32N/A    /**
350N/A     * Remove a stale file (uidIter.term().text()) from the index database
350N/A     * (and the xref file)
0N/A     * @throws java.io.IOException if an error occurs
0N/A     */
0N/A    private void removeFile() throws IOException {
0N/A        String path = Util.uid2url(uidIter.term().text());
0N/A
0N/A        for (IndexChangedListener listener : listeners) {
0N/A            listener.fileRemoved(path);
0N/A        }
0N/A        writer.deleteDocuments(uidIter.term());
0N/A
0N/A        File xrefFile = new File(xrefDir, path);
0N/A        xrefFile.delete();
0N/A        xrefFile.getParentFile().delete();
0N/A        dirty = true;
0N/A    }
0N/A
0N/A    /**
0N/A     * Add a file to the Lucene index (and generate a xref file)
0N/A     * @param file The file to add
0N/A     * @param path The path to the file (from source root)
0N/A     * @throws java.io.IOException if an error occurs
0N/A     */
0N/A    private void addFile(File file, String path) throws IOException {
0N/A        InputStream in = new BufferedInputStream(new FileInputStream(file));
0N/A        FileAnalyzer fa = AnalyzerGuru.getAnalyzer(in, path);
0N/A
0N/A        for (IndexChangedListener listener : listeners) {
0N/A            listener.fileAdded(path, fa.getClass().getSimpleName());
0N/A        }
0N/A
0N/A        Document d = analyzerGuru.getDocument(file, in, path);
0N/A        if (d != null) {
0N/A            writer.addDocument(d, fa);
0N/A            Genre g = fa.getFactory().getGenre();
0N/A            if (xrefDir != null && (g == Genre.PLAIN || g == Genre.XREFABLE)) {
0N/A                File xrefFile = new File(xrefDir, path);
0N/A                xrefFile.getParentFile().mkdirs();
0N/A                fa.writeXref(xrefDir, path);
0N/A            }
0N/A            dirty = true;
0N/A        } else {
0N/A            System.err.println("Warning: did not add " + path);
0N/A        }
0N/A    }
0N/A
0N/A    /**
0N/A     * Check if I should accept this file into the index database
0N/A     * @param file the file to check
0N/A     * @return true if the file should be included, false otherwise
0N/A     */
0N/A    private boolean accept(File file) {
0N/A        if (ignoredNames.ignore(file)) {
0N/A            return false;
0N/A        }
0N/A
0N/A        if (!file.canRead()) {
0N/A            System.err.println("Warning: could not read " + file.getAbsolutePath());
0N/A            return false;
0N/A        }
0N/A
0N/A        try {
0N/A            if (!file.getAbsolutePath().equals(file.getCanonicalPath())) {
0N/A                System.err.println("Warning: ignored link " + file.getAbsolutePath() +
0N/A                        " -> " + file.getCanonicalPath());
0N/A                return false;
0N/A            }
0N/A        } catch (IOException exp) {
0N/A            System.err.println("Warning: Failed to resolve name: " + file.getAbsolutePath());
0N/A            exp.printStackTrace();
0N/A        }
0N/A
0N/A        return true;
0N/A    }
0N/A
0N/A    /**
0N/A     * Generate indexes recursively
0N/A     * @param dir the root indexDirectory to generate indexes for
0N/A     * @param path the path
0N/A     */
0N/A    private void indexDown(File dir, String parent) throws IOException {
0N/A        if (interrupted) {
0N/A            return;
0N/A        }
0N/A
0N/A        if (!accept(dir)) {
0N/A            return;
0N/A        }
0N/A
0N/A        File[] files = dir.listFiles();
0N/A        if (files == null) {
0N/A            System.err.println("Failed to get file listing for: " + dir.getAbsolutePath());
0N/A            return;
0N/A        }
0N/A        Arrays.sort(files);
0N/A
0N/A        for (File file : files) {
0N/A            if (accept(file)) {
0N/A                String path = parent + '/' + file.getName();
0N/A                if (file.isDirectory()) {
                    indexDown(file, path);
                } else {
                    if (uidIter != null) {
                        String uid = Util.uid(path, DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND));  // construct uid for doc
                        while (uidIter.term() != null && uidIter.term().field().equals("u") &&
                                uidIter.term().text().compareTo(uid) < 0) {
                            removeFile();
                            uidIter.next();
                        }

                        if (uidIter.term() != null && uidIter.term().field().equals("u") &&
                                uidIter.term().text().compareTo(uid) == 0) {
                            uidIter.next();        // keep matching docs
                        } else {
                            addFile(file, path);
                        }
                    } else {
                        addFile(file, path);
                    }
                }
            }
        }
    }

    /**
     * Interrupt the index generation (and the index generation will stop as
     * soon as possible)
     */
    public void interrupt() {
        interrupted = true;
    }

    /**
     * Register an object to receive events when modifications is done to the
     * index database.
     *
     * @param listener the object to receive the events
     */
    void addIndexChangedListener(IndexChangedListener listener) {
        listeners.add(listener);
    }

    /**
     * Remove an object from the lists of objects to receive events when
     * modifications is done to the index database
     *
     * @param listener the object to remove
     */
    void removeIndexChangedListener(IndexChangedListener listener) {
        listeners.remove(listener);
    }

    /**
     * List all files in all of the index databases
     * @throws java.lang.Exception if an error occurs
     */
    public static void listAllFiles() throws Exception {
        listAllFiles(null);
    }

    /**
     * List all files in some of the index databases
     * @param subFiles Subdirectories for the various projects to list the files
     *                 for (or null or an empty list to dump all projects)
     * @throws java.lang.Exception if an error occurs
     */
    public static void listAllFiles(List<String> subFiles) throws Exception {
        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        if (!env.hasProjects()) {
            IndexDatabase db = new IndexDatabase();
            db.listFiles();
        } else {
            if (subFiles == null || subFiles.isEmpty()) {
                for (Project project : env.getProjects()) {
                    IndexDatabase db = new IndexDatabase(project);
                    db.listFiles();
                }
            } else {
                for (String path : subFiles) {
                    Project project = Project.getProject(path);
                    if (project == null) {
                        System.err.println("Warning: Could not find a project for \"" + path + "\"");
                    } else {
                        IndexDatabase db = new IndexDatabase(project);
                        db.listFiles();
                    }
                }
            }
        }
    }

    /**
     * List all of the files in this index database
     *
     * @throws java.lang.Exception if an error occurs
     */
    public void listFiles() throws Exception {
        IndexReader ireader = null;
        TermEnum iter = null;

        try {
            ireader = IndexReader.open(indexDirectory);       // open existing index
            iter = ireader.terms(new Term("u", "")); // init uid iterator
            while (iter.term() != null) {
                System.out.println(Util.uid2url(iter.term().text()));
                iter.next();
            }
        } finally {
            if (iter != null) {
                try {
                    iter.close();
                } catch (Exception e) {
                }
            }

            if (ireader != null) {
                try {
                    ireader.close();
                } catch (Exception e) {
                }
            }
        }
    }

    static void listFrequentTokens() throws Exception {
        listFrequentTokens(null);
    }

    static void listFrequentTokens(ArrayList<String> subFiles) throws Exception {
        final int limit = 4;

        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        if (!env.hasProjects()) {
            IndexDatabase db = new IndexDatabase();
            db.listTokens(limit);
        } else {
            if (subFiles == null || subFiles.isEmpty()) {
                for (Project project : env.getProjects()) {
                    IndexDatabase db = new IndexDatabase(project);
                    db.listTokens(4);
                }
            } else {
                for (String path : subFiles) {
                    Project project = Project.getProject(path);
                    if (project == null) {
                        System.err.println("Warning: Could not find a project for \"" + path + "\"");
                    } else {
                        IndexDatabase db = new IndexDatabase(project);
                        db.listTokens(4);
                    }
                }
            }
        }
    }

    public void listTokens(int freq) throws Exception {
        IndexReader ireader = null;
        TermEnum iter = null;

        try {
            ireader = IndexReader.open(indexDirectory);
            iter = ireader.terms(new Term("defs", ""));
            while (iter.term() != null) {
                if (iter.term().field().startsWith("f")) {
                    if (iter.docFreq() > 16 && iter.term().text().length() > freq) {
                        System.out.println(iter.term().text());
                    }
                    iter.next();
                } else {
                    break;
                }
            }
        } finally {
            if (iter != null) {
                try {
                    iter.close();
                } catch (Exception e) {
                }
            }

            if (ireader != null) {
                try {
                    ireader.close();
                } catch (Exception e) {
                }
            }
        }
    }
}