Indexer.java revision 207
340N/A/*
340N/A * CDDL HEADER START
340N/A *
340N/A * The contents of this file are subject to the terms of the
340N/A * Common Development and Distribution License (the "License").
340N/A * You may not use this file except in compliance with the License.
340N/A *
340N/A * See LICENSE.txt included in this distribution for the specific
340N/A * language governing permissions and limitations under the License.
340N/A *
340N/A * When distributing Covered Code, include this CDDL HEADER in each
340N/A * file and include the License file at LICENSE.txt.
340N/A * If applicable, add the following below this CDDL HEADER, with the
340N/A * fields enclosed by brackets "[]" replaced with your own identifying
340N/A * information: Portions Copyright [yyyy] [name of copyright owner]
340N/A *
340N/A * CDDL HEADER END
340N/A */
340N/A
340N/A/*
340N/A * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
340N/A * Use is subject to license terms.
340N/A */
340N/Apackage org.opensolaris.opengrok.index;
340N/A
340N/Aimport java.awt.GraphicsEnvironment;
340N/Aimport java.io.BufferedReader;
340N/Aimport java.io.File;
340N/Aimport java.io.FileReader;
340N/Aimport java.io.IOException;
340N/Aimport java.net.InetAddress;
340N/Aimport java.text.ParseException;
340N/Aimport java.util.ArrayList;
340N/Aimport java.util.Collections;
340N/Aimport java.util.Comparator;
340N/Aimport java.util.List;
340N/Aimport org.opensolaris.opengrok.analysis.AnalyzerGuru;
340N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
340N/Aimport org.opensolaris.opengrok.configuration.Project;
340N/Aimport org.opensolaris.opengrok.history.HistoryGuru;
340N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
340N/Aimport org.opensolaris.opengrok.search.scope.MainFrame;
340N/Aimport org.opensolaris.opengrok.util.Getopt;
340N/A
340N/A/**
340N/A * Creates and updates an inverted source index
340N/A * as well as generates Xref, file stats etc., if specified
340N/A * in the options
340N/A */
340N/Apublic class Indexer {
340N/A private static String usage = "Usage: " +
340N/A "opengrok.jar [-qe] [-c ctagsToUse] [-H] [-R filename] [-W filename] [-U hostname:port] [-P] [-p project-path] [-w webapproot] [-i ignore_name [ -i ..]] [-n] [-s SRC_ROOT] [-d DATA_ROOT] [subtree .. ]\n" +
340N/A " opengrok.jar [-O | -l | -t] [-d DATA_ROOT]\n" +
340N/A "\t-q run quietly\n" +
340N/A "\t-v Print progress information\n" +
340N/A "\t-e economical - consumes less disk space\n" +
340N/A "\t-c path to ctags\n" +
340N/A "\t-R Read configuration from file\n" +
370N/A "\t-W Write the current running configuration\n" +
340N/A "\t-U Send configuration to hostname:port\n" +
340N/A "\t-P Generate a project for each toplevel directory\n" +
340N/A "\t-p Use the project specified by the project path as the default project\n" +
340N/A "\t-Q on/off Turn on / off quick context scan. By default only the first 32k\n" +
340N/A "\t of a file is scanned and a '[..all..]' link is inserted if the\n" +
340N/A "\t is bigger. Activating this option may slow down the server.\n" +
340N/A "\t-n Do not generate indexes\n" +
340N/A "\t-H Generate history cache for external repositories\n" +
368N/A "\t-r on/off Turn on / off support for remote SCM systems\n" +
368N/A "\t-L laf Use \"laf\" as the look'n'feel for the webapp\n" +
340N/A "\t-w root URL of the webapp, default is /source\n" +
456N/A "\t-i ignore named files or directories\n" +
340N/A "\t-A ext:analyzer Files with extension ext should be analyzed with the named class\n" +
340N/A "\t-m Maximum words in a file to index\n" +
340N/A "\t-a on/off Allow or disallow leading wildcards in a search\n" +
340N/A "\t-S Search and add \"External\" repositories (Mercurial etc)\n" +
340N/A "\t-s SRC_ROOT is root directory of source tree\n" +
340N/A "\t default: last used SRC_ROOT\n" +
340N/A "\t-d DATA_ROOT - is where output of indexer is stored\n" +
340N/A "\tsubtree - only specified files or directories under SRC_ROOT are processed\n" +
340N/A "\t if not specified all files under SRC_ROOT are processed\n" +
340N/A "\n" +
340N/A "\t-l list all files in the index\n" +
340N/A "\t-t lists tokens occuring more than 5 times. Useful for building a unix dictionary\n" +
340N/A "\n Eg. java -jar opengrok.jar -s /usr/include /var/tmp/opengrok_data rpc";
340N/A
340N/A private static String options = "d:r:a:qec:Q:R:W:U:Pp:nHw:i:Ss:ltvm:A:L:";
340N/A
340N/A /**
340N/A * Program entry point
340N/A * @param argv argument vector
340N/A */
340N/A public static void main(String argv[]) {
340N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
340N/A boolean runIndex = true;
340N/A
340N/A if(argv.length == 0) {
340N/A if (GraphicsEnvironment.isHeadless()) {
340N/A System.err.println("No display available for the Graphical User Interface");
340N/A System.err.println(usage);
456N/A System.exit(1);
340N/A } else {
340N/A MainFrame.main(argv);
340N/A }
456N/A //Run Scope GUI here I am running Indexing GUI for testing
340N/A //new IndexerWizard(null).setVisible(true);
340N/A } else {
340N/A boolean searchRepositories = false;
456N/A ArrayList<String> subFiles = new ArrayList<String>();
340N/A String configFilename = null;
340N/A String configHost = null;
340N/A boolean addProjects = false;
340N/A boolean refreshHistory = false;
340N/A String defaultProject = null;
340N/A boolean listFiles = false;
340N/A boolean createDict = false;
340N/A
340N/A // Parse command line options:
340N/A Getopt getopt = new Getopt(argv, options);
340N/A
340N/A try {
340N/A getopt.parse();
340N/A } catch (ParseException ex) {
340N/A System.err.println("OpenGrok: " + ex.getMessage());
340N/A System.err.println(usage);
340N/A System.exit(1);
340N/A }
340N/A
340N/A try{
340N/A int cmd;
340N/A
340N/A // We need to read the configuration file first, since we
340N/A // will try to overwrite options..
340N/A while ((cmd = getopt.getOpt()) != -1) {
340N/A if (cmd == 'R') {
340N/A env.readConfiguration(new File(getopt.getOptarg()));
340N/A break;
340N/A }
340N/A }
340N/A
340N/A // Now we can handle all the other options..
340N/A getopt.reset();
340N/A while ((cmd = getopt.getOpt()) != -1) {
340N/A switch (cmd) {
340N/A case 'l':
340N/A listFiles = true;
340N/A runIndex = false;
340N/A break;
340N/A case 't':
340N/A createDict = true;
340N/A runIndex = false;
340N/A break;
340N/A
340N/A case 'q': env.setVerbose(false); break;
340N/A case 'e': env.setGenerateHtml(false); break;
340N/A case 'P': addProjects = true; break;
340N/A case 'p': defaultProject = getopt.getOptarg(); break;
340N/A case 'c': env.setCtags(getopt.getOptarg()); break;
340N/A case 'w': {
340N/A String webapp = getopt.getOptarg();
340N/A if (webapp.startsWith("/") || webapp.startsWith("http")) {
340N/A ;
340N/A } else {
340N/A webapp = "/" + webapp;
340N/A }
340N/A if (webapp.endsWith("/")) {
340N/A env.setUrlPrefix(webapp + "s?");
340N/A } else {
340N/A env.setUrlPrefix(webapp + "/s?");
340N/A }
340N/A }
340N/A break;
340N/A case 'W': configFilename = getopt.getOptarg(); break;
340N/A case 'U': configHost = getopt.getOptarg(); break;
340N/A case 'R':
340N/A // already handled
340N/A break;
340N/A case 'n': runIndex = false; break;
340N/A case 'H': refreshHistory = true; break;
340N/A case 'r': {
340N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
340N/A env.setRemoteScmSupported(true);
340N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
340N/A env.setRemoteScmSupported(true);
340N/A } else {
340N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -r");
340N/A System.err.println(" Ex: \"-r on\" will allow retrival for remote SCM systems");
340N/A System.err.println(" \"-Q off\" will ignore SCM for remote systems");
340N/A }
340N/A }
340N/A break;
340N/A case 'v': env.setVerbose(true); break;
340N/A
340N/A case 's': {
340N/A File file = new File(getopt.getOptarg());
340N/A if (!file.isDirectory()) {
340N/A System.err.println("ERROR: No such directory: " + file.toString());
340N/A System.exit(1);
340N/A }
340N/A
340N/A env.setSourceRootFile(file);
340N/A break;
340N/A }
340N/A case 'd':
340N/A env.setDataRoot(getopt.getOptarg());
340N/A break;
340N/A case 'i':
340N/A env.getIgnoredNames().add(getopt.getOptarg());
340N/A break;
340N/A case 'S' : searchRepositories = true; break;
340N/A case 'Q' :
340N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
340N/A env.setQuickContextScan(true);
340N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
340N/A env.setQuickContextScan(false);
340N/A } else {
340N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -Q");
340N/A System.err.println(" Ex: \"-Q on\" will just scan a \"chunk\" of the file and insert \"[..all..]\"");
340N/A System.err.println(" \"-Q off\" will try to build a more accurate list by reading the complete file.");
340N/A }
340N/A
340N/A break;
340N/A case 'm' : {
340N/A try {
340N/A env.setIndexWordLimit(Integer.parseInt(getopt.getOptarg()));
340N/A } catch (NumberFormatException exp) {
340N/A System.err.println("ERROR: Failed to parse argument to \"-m\": " + exp.getMessage());
340N/A System.exit(1);
340N/A }
340N/A break;
340N/A }
340N/A case 'a' :
340N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
340N/A env.setAllowLeadingWildcard(true);
340N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
340N/A env.setAllowLeadingWildcard(false);
340N/A } else {
340N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -a");
340N/A System.err.println(" Ex: \"-a on\" will allow a search to start with a wildcard");
340N/A System.err.println(" \"-a off\" will disallow a search to start with a wildcard");
340N/A System.exit(1);
340N/A }
340N/A
340N/A break;
340N/A
340N/A case 'A': {
340N/A String[] arg = getopt.getOptarg().split(":");
340N/A if (arg.length != 2) {
340N/A System.err.println("ERROR: You must specify: -A extension:class");
340N/A System.err.println(" Ex: -A foo:org.opensolaris.opengrok.analysis.c.CAnalyzer");
340N/A System.err.println(" will use the C analyzer for all files ending with .foo");
340N/A System.err.println(" Ex: -A c:-");
340N/A System.err.println(" will disable the c-analyzer for for all files ending with .c");
340N/A System.exit(1);
340N/A }
340N/A
340N/A arg[0] = arg[0].substring(arg[0].lastIndexOf('.') + 1).toUpperCase();
340N/A if (arg[1].equals("-")) {
340N/A AnalyzerGuru.addExtension(arg[0], null);
340N/A break;
340N/A }
340N/A
340N/A try {
340N/A Class clazz = Class.forName(arg[1]);
340N/A try {
340N/A FileAnalyzerFactory f =
340N/A (FileAnalyzerFactory)
340N/A clazz.newInstance();
340N/A AnalyzerGuru.addExtension(arg[0], f);
340N/A } catch (ClassCastException cce) {
340N/A System.err.println("ERROR: " + arg[1] +
340N/A " does not extend FileAnalyzerFactory!");
340N/A System.exit(1);
340N/A }
340N/A } catch (ClassNotFoundException exp) {
340N/A System.err.println("ERROR: Could not locate class: " + arg[1]);
340N/A System.exit(1);
340N/A }
340N/A }
340N/A break;
340N/A case 'L' :
340N/A env.setWebappLAF(getopt.getOptarg());
340N/A break;
340N/A default:
340N/A System.err.println("Unknown option: " + (char)cmd);
340N/A System.exit(1);
340N/A }
340N/A }
340N/A
340N/A int optind = getopt.getOptind();
340N/A if (optind != -1) {
340N/A while (optind < argv.length) {
340N/A subFiles.add(argv[optind]);
340N/A ++optind;
340N/A }
340N/A }
340N/A
340N/A if (env.getDataRootPath() == null) {
340N/A System.err.println("ERROR: Please specify a DATA ROOT path");
340N/A System.err.println(usage);
340N/A System.exit(1);
340N/A }
340N/A
340N/A if (env.getSourceRootFile() == null) {
340N/A File srcConfig = new File(env.getDataRootPath(), "SRC_ROOT");
340N/A String line = null;
340N/A if(srcConfig.exists()) {
340N/A try {
340N/A BufferedReader sr = new BufferedReader(new FileReader(srcConfig));
340N/A line = sr.readLine();
340N/A sr.close();
340N/A } catch (IOException e) {
340N/A }
340N/A }
340N/A if(line == null) {
340N/A System.err.println("ERROR: please specify a SRC_ROOT with option -s !");
340N/A System.err.println(usage);
340N/A System.exit(1);
340N/A }
340N/A env.setSourceRoot(line);
340N/A
340N/A if (!env.getSourceRootFile().isDirectory()) {
340N/A System.err.println("ERROR: No such directory:" + line);
340N/A System.err.println(usage);
340N/A System.exit(1);
340N/A }
340N/A }
340N/A
340N/A if (!env.validateExuberantCtags()) {
340N/A System.exit(1);
340N/A }
340N/A
340N/A if (searchRepositories) {
340N/A if (env.isVerbose()) {
340N/A System.out.println("Scanning for repositories...");
340N/A }
340N/A env.getRepositories().clear();
340N/A long start = System.currentTimeMillis();
340N/A HistoryGuru.getInstance().addExternalRepositories(env.getSourceRootPath());
340N/A long time = (System.currentTimeMillis() - start) / 1000;
340N/A if (env.isVerbose()) {
340N/A System.out.println("Done searching for repositories (" + time + "s)");
340N/A }
340N/A }
340N/A
340N/A if (addProjects) {
340N/A File files[] = env.getSourceRootFile().listFiles();
340N/A List<Project> projects = env.getProjects();
340N/A projects.clear();
340N/A for (File file : files) {
340N/A if (!file.getName().startsWith(".") && file.isDirectory()) {
340N/A projects.add(new Project(file.getName(), "/" + file.getName()));
340N/A }
340N/A }
340N/A
340N/A // The projects should be sorted...
368N/A Collections.sort(projects, new Comparator<Project>() {
368N/A public int compare(Project p1, Project p2){
368N/A String s1 = p1.getDescription();
368N/A String s2 = p2.getDescription();
368N/A
368N/A int ret;
340N/A if (s1 == null) {
368N/A ret = (s2 == null) ? 0 : 1;
368N/A } else {
340N/A ret = s1.compareTo(s2);
340N/A }
340N/A return ret;
340N/A }
368N/A });
368N/A }
368N/A
340N/A if (defaultProject != null) {
340N/A for (Project p : env.getProjects()) {
340N/A if (p.getPath().equals(defaultProject)) {
env.setDefaultProject(p);
break;
}
}
}
if (configFilename != null) {
if (env.isVerbose()) {
System.out.println("Writing configuration to " + configFilename);
System.out.flush();
}
env.writeConfiguration(new File(configFilename));
if (env.isVerbose()) {
System.out.println("Done...");
System.out.flush();
}
}
if (refreshHistory) {
HistoryGuru.getInstance().createCache();
}
if (listFiles) {
IndexDatabase.listAllFiles(subFiles);
}
if (createDict) {
IndexDatabase.listFrequentTokens(subFiles);
}
if (runIndex) {
IndexChangedListener progress = new DefaultIndexChangedListener();
if (subFiles.isEmpty() || !env.hasProjects()) {
IndexDatabase.updateAll(progress);
} else {
for (String path : subFiles) {
Project project = Project.getProject(path);
if (project == null) {
System.err.println("Warning: Could not find a project for \"" + path + "\"");
} else {
IndexDatabase db = new IndexDatabase(project);
db.addIndexChangedListener(progress);
db.update();
}
}
}
}
if (configHost != null) {
String[] cfg = configHost.split(":");
if (env.isVerbose()) {
System.out.println("Send configuration to: " + configHost);
}
if (cfg.length == 2) {
try {
InetAddress host = InetAddress.getByName(cfg[0]);
RuntimeEnvironment.getInstance().writeConfiguration(host, Integer.parseInt(cfg[1]));
} catch (Exception ex) {
System.err.println("Failed to send configuration to " + configHost);
ex.printStackTrace();
}
} else {
System.err.println("Syntax error: ");
for (String s : cfg) {
System.err.print("[" + s + "]");
}
System.err.println();
}
if (env.isVerbose()) {
System.out.println("Configuration successfully updated");
}
}
} catch (Exception e) {
System.err.println("Error: [ main ] " + e);
if (env.isVerbose()) e.printStackTrace();
System.exit(1);
}
}
}
}