Indexer.java revision 11
0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
0N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
0N/A * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
0N/A * Use is subject to license terms.
0N/A */
0N/A
0N/A/*
0N/A * ident "%Z%%M% %I% %E% SMI"
0N/A */
0N/A
0N/Apackage org.opensolaris.opengrok.index;
0N/Aimport java.awt.GraphicsEnvironment;
0N/Aimport java.io.*;
0N/Aimport java.util.*;
0N/Aimport org.apache.lucene.analysis.WhitespaceAnalyzer;
0N/Aimport org.apache.lucene.document.*;
0N/Aimport org.apache.lucene.index.*;
0N/Aimport org.apache.lucene.store.FSDirectory;
0N/A//import org.apache.lucene.search.spell.*;
2N/Aimport org.apache.oro.io.GlobFilenameFilter;
0N/Aimport org.apache.lucene.spell.NGramSpeller;
0N/Aimport org.opensolaris.opengrok.analysis.*;
0N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
8N/Aimport org.opensolaris.opengrok.history.HistoryGuru;
8N/Aimport org.opensolaris.opengrok.history.MercurialRepository;
0N/Aimport org.opensolaris.opengrok.search.scope.MainFrame;
0N/Aimport org.opensolaris.opengrok.web.Util;
0N/A
0N/A/**
0N/A * Creates and updates an inverted source index
0N/A * as well as generates Xref, file stats etc., if specified
0N/A * in the options
0N/A */
0N/A
0N/Apublic class Indexer {
0N/A private static String ctags = null;
0N/A private static boolean verbose = true;
0N/A private static boolean economical = false;
0N/A private static String usage = "Usage: " +
8N/A "opengrok.jar [-qe] [-c ctagsToUse] [-w webapproot] [-i ignore_name [ -i ..]] [-m directory [-m ...]] [-s SRC_ROOT] DATA_ROOT [subtree .. ]\n" +
0N/A " opengrok.jar [-O | -l | -t] DATA_ROOT\n" +
0N/A "\t-q run quietly\n" +
0N/A "\t-e economical - consumes less disk space\n" +
0N/A "\t-c path to ctags\n" +
0N/A "\t-w root URL of the webapp, default is /source\n" +
0N/A "\t-i ignore named files or directories\n" +
11N/A "\t-S Search and add \"External\" repositories (Mercurial...)\n" +
0N/A "\t-s SRC_ROOT is root directory of source tree\n" +
0N/A "\t default: last used SRC_ROOT\n" +
0N/A "\tDATA_ROOT - is where output of indexer is stored\n" +
0N/A "\tsubtree - only specified files or directories under SRC_ROOT are processed\n" +
0N/A "\t if not specified all files under SRC_ROOT are processed\n" +
0N/A "\n\t-O optimize the index \n" +
0N/A "\t-l list all files in the index \n" +
0N/A "\t-t lists tokens occuring more than 5 times. Useful for building a unix dictionary\n" +
0N/A "\n Eg. java -jar opengrok.jar -s /usr/include /var/tmp/opengrok_data rpc";
0N/A
0N/A public static void main(String argv[]) {
0N/A if(argv.length == 0) {
0N/A if (GraphicsEnvironment.isHeadless()) {
0N/A System.err.println("No display available for the Graphical User Interface");
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A } else {
0N/A MainFrame.main(argv);
0N/A }
0N/A //Run Scope GUI here I am running Indexing GUI for testing
0N/A //new IndexerWizard(null).setVisible(true);
0N/A } else {
0N/A String srcRoot = null;
0N/A File srcRootDir = null;
0N/A String dataRoot = null;
11N/A boolean searchRepositories = false;
0N/A String urlPrefix = "/source/s?";
0N/A ArrayList<String> subFiles = new ArrayList<String>();
0N/A try{
0N/A if (argv == null || argv.length < 2) {
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
0N/A for (int i = 0; i < argv.length ; i++) {
0N/A if (argv[i].equals("-O")) {
0N/A if (argv.length == 2 && i+1 < argv.length) {
0N/A Index.doOptimize(new File(argv[i+1]));
0N/A System.exit(0);
0N/A } else {
0N/A System.err.println("ERROR: Invalid option or No data root specified!");
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
2N/A } else if (argv[i].equals("-g")) {
2N/A if(i+1 < argv.length) {
2N/A IgnoredNames.glob = new GlobFilenameFilter(argv[++i]);
2N/A }
0N/A } else if (argv[i].equals("-q")) {
0N/A verbose = false;
0N/A } else if (argv[i].equals("-e")) {
0N/A economical = true;
0N/A } else if (argv[i].equals("-c")) {
0N/A if(i+1 < argv.length) {
0N/A ctags = argv[++i];
0N/A System.setProperty("ctags", ctags);
0N/A }
0N/A } else if (argv[i].equals("-w")) {
0N/A if(i+1 < argv.length) {
0N/A String webapp = argv[++i];
0N/A if(webapp.startsWith("/") || webapp.startsWith("http")) {
8N/A ;
0N/A } else {
0N/A webapp = "/" + webapp;
0N/A }
0N/A if(webapp.endsWith("/")) {
0N/A urlPrefix = webapp + "s?";
0N/A } else {
0N/A urlPrefix = webapp + "/s?";
0N/A }
0N/A }
0N/A } else if (argv[i].equals("-l")) {
0N/A if (argv.length == 2 && i+1 < argv.length) {
0N/A Index.doList(new File(argv[i+1]));
0N/A System.exit(0);
0N/A } else {
0N/A System.err.println("ERROR: Invalid option or No data root specified!");
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
0N/A } else if (argv[i].equals("-t")) {
0N/A if (argv.length == 2 && i+1 < argv.length) {
0N/A Index.doDict(new File(argv[i+1]));
0N/A System.exit(0);
0N/A } else {
0N/A System.err.println("ERROR: Invalid or option No data root specified!");
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
0N/A } else if (argv[i].equals("-s")) {
0N/A if(i+1 < argv.length) {
0N/A srcRoot = argv[++i];
0N/A srcRootDir = new File(srcRoot);
0N/A srcRoot = srcRootDir.getCanonicalPath();
0N/A srcRootDir = srcRootDir.getCanonicalFile();
0N/A if(!srcRootDir.isDirectory()) {
0N/A System.err.println("ERROR: No such directory:" + srcRoot);
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
0N/A }
0N/A } else if (argv[i].equals("-i")) {
0N/A if(i+1 < argv.length) {
0N/A IgnoredNames.ignore.add(argv[++i]);
0N/A }
11N/A } else if (argv[i].equals("-S")) {
11N/A searchRepositories = true;
0N/A } else if (!argv[i].startsWith("-")) {
0N/A if (dataRoot == null)
0N/A dataRoot = argv[i];
0N/A else
0N/A subFiles.add(argv[i]);
0N/A } else {
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
0N/A }
0N/A System.setProperty("urlPrefix", urlPrefix);
0N/A if (dataRoot == null) {
0N/A System.out.println(usage);
0N/A System.exit(1);
0N/A }
0N/A if (srcRoot == null) {
0N/A File srcConfig = new File(dataRoot, "SRC_ROOT");
0N/A if(srcConfig.exists()) {
0N/A try {
0N/A BufferedReader sr = new BufferedReader(new FileReader(srcConfig));
0N/A srcRoot = sr.readLine();
0N/A sr.close();
0N/A } catch (IOException e) {
0N/A }
0N/A }
0N/A if(srcRoot == null) {
0N/A System.err.println("ERROR: please specify a SRC_ROOT with option -s !");
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
0N/A srcRootDir = new File(srcRoot);
0N/A if(!srcRootDir.isDirectory()) {
0N/A System.err.println("ERROR: No such directory:" + srcRoot);
0N/A System.err.println(usage);
0N/A System.exit(1);
0N/A }
0N/A }
0N/A if (! Index.setExuberantCtags(ctags)) {
0N/A System.exit(1);
0N/A }
11N/A
11N/A if (searchRepositories) {
11N/A System.out.println("Scanning for repositories...");
11N/A long start = System.currentTimeMillis();
11N/A HistoryGuru.getInstance().addExternalRepositories(srcRootDir.listFiles());
11N/A long time = (System.currentTimeMillis() - start) / 1000;
11N/A System.out.println("Done searching for repositories (" + time + "s)");
11N/A }
11N/A
0N/A Index idx = new Index(verbose ? new StandardPrinter(System.out) : new NullPrinter(), new StandardPrinter(System.err));
0N/A idx.runIndexer(new File(dataRoot), srcRootDir, subFiles, economical);
0N/A } catch ( Exception e) {
0N/A System.err.println("Error: [ main ] " + e);
0N/A if (verbose) e.printStackTrace();
0N/A System.exit(1);
0N/A }
0N/A }
0N/A }
0N/A}