Indexer.java revision 1016
0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
0N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
930N/A * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
0N/A * Use is subject to license terms.
0N/A */
0N/Apackage org.opensolaris.opengrok.index;
65N/A
125N/Aimport java.io.File;
125N/Aimport java.io.IOException;
58N/Aimport java.net.InetAddress;
77N/Aimport java.text.ParseException;
125N/Aimport java.util.ArrayList;
125N/Aimport java.util.Collections;
125N/Aimport java.util.Comparator;
1016N/Aimport java.util.HashSet;
125N/Aimport java.util.List;
261N/Aimport java.util.concurrent.ExecutorService;
261N/Aimport java.util.concurrent.Executors;
583N/Aimport java.util.concurrent.TimeUnit;
312N/Aimport java.util.logging.Level;
312N/Aimport java.util.logging.Logger;
467N/Aimport org.opensolaris.opengrok.Info;
428N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
126N/Aimport org.opensolaris.opengrok.analysis.AnalyzerGuru;
58N/Aimport org.opensolaris.opengrok.configuration.Project;
394N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
8N/Aimport org.opensolaris.opengrok.history.HistoryGuru;
77N/Aimport org.opensolaris.opengrok.util.Getopt;
0N/A
0N/A/**
0N/A * Creates and updates an inverted source index
0N/A * as well as generates Xref, file stats etc., if specified
0N/A * in the options
0N/A */
491N/A@SuppressWarnings({"PMD.AvoidPrintStackTrace", "PMD.SystemPrintln"})
439N/Apublic final class Indexer {
491N/A
465N/A private final static String ON = "on";
465N/A private final static String OFF = "off";
491N/A private static Indexer index = new Indexer();
491N/A private static final Logger log = Logger.getLogger(Indexer.class.getName());
491N/A
886N/A private static final String DERBY_EMBEDDED_DRIVER =
886N/A "org.apache.derby.jdbc.EmbeddedDriver";
886N/A
886N/A private static final String DERBY_CLIENT_DRIVER =
886N/A "org.apache.derby.jdbc.ClientDriver";
886N/A
491N/A public static Indexer getInstance() {
491N/A return index;
491N/A }
491N/A
65N/A /**
65N/A * Program entry point
65N/A * @param argv argument vector
65N/A */
464N/A @SuppressWarnings("PMD.UseStringBufferForStringAppends")
0N/A public static void main(String argv[]) {
30N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
58N/A boolean runIndex = true;
312N/A boolean update = true;
312N/A boolean optimizedChanged = false;
260N/A CommandLineOptions cmdOptions = new CommandLineOptions();
491N/A
428N/A if (argv.length == 0) {
376N/A System.err.println(cmdOptions.getUsage());
376N/A System.exit(1);
0N/A } else {
11N/A boolean searchRepositories = false;
0N/A ArrayList<String> subFiles = new ArrayList<String>();
240N/A ArrayList<String> repositories = new ArrayList<String>();
1016N/A HashSet<String> allowedSymlinks = new HashSet<String>();
58N/A String configFilename = null;
58N/A String configHost = null;
58N/A boolean addProjects = false;
58N/A boolean refreshHistory = false;
77N/A String defaultProject = null;
207N/A boolean listFiles = false;
207N/A boolean createDict = false;
910N/A int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors());
491N/A
77N/A // Parse command line options:
260N/A Getopt getopt = new Getopt(argv, cmdOptions.getCommandString());
112N/A
77N/A try {
77N/A getopt.parse();
77N/A } catch (ParseException ex) {
77N/A System.err.println("OpenGrok: " + ex.getMessage());
260N/A System.err.println(cmdOptions.getUsage());
77N/A System.exit(1);
77N/A }
77N/A
491N/A try {
77N/A int cmd;
491N/A
111N/A // We need to read the configuration file first, since we
111N/A // will try to overwrite options..
111N/A while ((cmd = getopt.getOpt()) != -1) {
111N/A if (cmd == 'R') {
111N/A env.readConfiguration(new File(getopt.getOptarg()));
111N/A break;
111N/A }
111N/A }
491N/A
886N/A String databaseDriver = env.getDatabaseDriver();
886N/A String databaseURL = env.getDatabaseUrl();
886N/A
111N/A // Now we can handle all the other options..
491N/A getopt.reset();
77N/A while ((cmd = getopt.getOpt()) != -1) {
77N/A switch (cmd) {
491N/A case 't':
491N/A createDict = true;
491N/A runIndex = false;
491N/A break;
77N/A
491N/A case 'q':
491N/A env.setVerbose(false);
491N/A break;
491N/A case 'e':
491N/A env.setGenerateHtml(false);
491N/A break;
491N/A case 'P':
491N/A addProjects = true;
491N/A break;
491N/A case 'p':
491N/A defaultProject = getopt.getOptarg();
491N/A break;
491N/A case 'c':
491N/A env.setCtags(getopt.getOptarg());
491N/A break;
491N/A case 'w':
491N/A {
491N/A String webapp = getopt.getOptarg();
491N/A if (webapp.charAt(0) != '/' && !webapp.startsWith("http")) {
491N/A webapp = "/" + webapp;
491N/A }
491N/A if (webapp.endsWith("/")) {
491N/A env.setUrlPrefix(webapp + "s?");
491N/A } else {
491N/A env.setUrlPrefix(webapp + "/s?");
491N/A }
491N/A }
491N/A break;
491N/A case 'W':
491N/A configFilename = getopt.getOptarg();
491N/A break;
491N/A case 'U':
491N/A configHost = getopt.getOptarg();
491N/A break;
491N/A case 'R':
491N/A // already handled
491N/A break;
1016N/A case 'N':
1016N/A allowedSymlinks.add(getopt.getOptarg());
1016N/A break;
491N/A case 'n':
491N/A runIndex = false;
491N/A break;
491N/A case 'H':
491N/A refreshHistory = true;
491N/A break;
491N/A case 'h':
491N/A repositories.add(getopt.getOptarg());
491N/A break;
799N/A case 'D':
799N/A env.setStoreHistoryCacheInDB(true);
799N/A break;
886N/A case 'j':
886N/A databaseDriver = getopt.getOptarg();
886N/A // Should be a full class name, but we also accept
886N/A // the shorthands "client" and "embedded". Expand
886N/A // the shorthands here.
891N/A if ("client".equals(databaseDriver)) {
886N/A databaseDriver = DERBY_CLIENT_DRIVER;
891N/A } else if ("embedded".equals(databaseDriver)) {
886N/A databaseDriver = DERBY_EMBEDDED_DRIVER;
886N/A }
886N/A break;
886N/A case 'u':
886N/A databaseURL = getopt.getOptarg();
886N/A break;
491N/A case 'r':
491N/A {
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
491N/A env.setRemoteScmSupported(true);
491N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
491N/A env.setRemoteScmSupported(false);
491N/A } else {
491N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -r");
491N/A System.err.println(" Ex: \"-r on\" will allow retrival for remote SCM systems");
491N/A System.err.println(" \"-r off\" will ignore SCM for remote systems");
491N/A }
491N/A }
491N/A break;
491N/A case 'O':
491N/A {
491N/A boolean oldval = env.isOptimizeDatabase();
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
491N/A env.setOptimizeDatabase(true);
491N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
491N/A env.setOptimizeDatabase(false);
491N/A } else {
491N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -O");
491N/A System.err.println(" Ex: \"-O on\" will optimize the database as part of the index generation");
491N/A System.err.println(" \"-O off\" disable optimization of the index database");
491N/A }
491N/A if (oldval != env.isOptimizeDatabase()) {
491N/A optimizedChanged = true;
491N/A }
491N/A }
491N/A break;
491N/A case 'v':
491N/A env.setVerbose(true);
491N/A break;
77N/A
491N/A case 's':
491N/A {
490N/A env.setSourceRoot(getopt.getOptarg());
490N/A File file = env.getSourceRootFile();
490N/A if (!file.isDirectory()) {
490N/A System.err.println("ERROR: source root must be a directory: " + file.toString());
490N/A System.exit(1);
490N/A }
489N/A }
490N/A break;
491N/A case 'd':
491N/A {
490N/A env.setDataRoot(getopt.getOptarg());
490N/A File file = env.getDataRootFile();
490N/A if (!file.isDirectory()) {
490N/A System.err.println("ERROR: data root must be a directory: " + file.toString());
490N/A System.exit(1);
490N/A }
490N/A }
490N/A break;
491N/A case 'i':
491N/A env.getIgnoredNames().add(getopt.getOptarg());
491N/A break;
491N/A case 'S':
491N/A searchRepositories = true;
491N/A break;
491N/A case 'Q':
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
491N/A env.setQuickContextScan(true);
491N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
491N/A env.setQuickContextScan(false);
491N/A } else {
491N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -Q");
491N/A System.err.println(" Ex: \"-Q on\" will just scan a \"chunk\" of the file and insert \"[..all..]\"");
491N/A System.err.println(" \"-Q off\" will try to build a more accurate list by reading the complete file.");
491N/A }
491N/A
491N/A break;
491N/A case 'm': {
491N/A try {
491N/A env.setIndexWordLimit(Integer.parseInt(getopt.getOptarg()));
491N/A } catch (NumberFormatException exp) {
491N/A System.err.println("ERROR: Failed to parse argument to \"-m\": " + exp.getMessage());
491N/A System.exit(1);
491N/A }
491N/A break;
99N/A }
491N/A case 'a':
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
491N/A env.setAllowLeadingWildcard(true);
491N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
491N/A env.setAllowLeadingWildcard(false);
491N/A } else {
491N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -a");
491N/A System.err.println(" Ex: \"-a on\" will allow a search to start with a wildcard");
491N/A System.err.println(" \"-a off\" will disallow a search to start with a wildcard");
126N/A System.exit(1);
126N/A }
126N/A
491N/A break;
491N/A
491N/A case 'A':
491N/A {
491N/A String[] arg = getopt.getOptarg().split(":");
491N/A if (arg.length != 2) {
491N/A System.err.println("ERROR: You must specify: -A extension:class");
491N/A System.err.println(" Ex: -A foo:org.opensolaris.opengrok.analysis.c.CAnalyzer");
491N/A System.err.println(" will use the C analyzer for all files ending with .foo");
491N/A System.err.println(" Ex: -A c:-");
491N/A System.err.println(" will disable the c-analyzer for for all files ending with .c");
491N/A System.exit(1);
491N/A }
126N/A
491N/A arg[0] = arg[0].substring(arg[0].lastIndexOf('.') + 1).toUpperCase();
491N/A if (arg[1].equals("-")) {
491N/A AnalyzerGuru.addExtension(arg[0], null);
491N/A break;
491N/A }
491N/A
491N/A try {
491N/A AnalyzerGuru.addExtension(
491N/A arg[0],
491N/A AnalyzerGuru.findFactory(arg[1]));
491N/A } catch (Exception e) {
491N/A System.err.println("Unable to use " + arg[1] +
491N/A " as a FileAnalyzerFactory");
491N/A e.printStackTrace();
491N/A System.exit(1);
491N/A }
491N/A }
491N/A break;
491N/A case 'L':
491N/A env.setWebappLAF(getopt.getOptarg());
491N/A break;
491N/A case 'T':
126N/A try {
491N/A noThreads = Integer.parseInt(getopt.getOptarg());
491N/A } catch (NumberFormatException exp) {
491N/A System.err.println("ERROR: Failed to parse argument to \"-T\": " + exp.getMessage());
126N/A System.exit(1);
126N/A }
491N/A break;
994N/A case 'z':
994N/A try {
994N/A env.setScanningDepth(Integer.parseInt(getopt.getOptarg()));
994N/A } catch (NumberFormatException exp) {
994N/A System.err.println("ERROR: Failed to parse argument to \"-z\": " + exp.getMessage());
994N/A System.exit(1);
994N/A }
994N/A break;
491N/A case 'l':
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
491N/A env.setUsingLuceneLocking(true);
491N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
491N/A env.setUsingLuceneLocking(false);
491N/A } else {
491N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -l");
491N/A System.err.println(" Ex: \"-l on\" will enable locks in Lucene");
491N/A System.err.println(" \"-l off\" will disable locks in Lucene");
491N/A }
491N/A break;
491N/A case 'V':
491N/A System.out.println(Info.getFullVersion());
491N/A System.exit(0);
491N/A break;
491N/A
491N/A case '?':
491N/A System.err.println(cmdOptions.getUsage());
491N/A System.exit(0);
491N/A break;
491N/A
491N/A default:
491N/A System.err.println("Internal Error - Unimplemented cmdline option: " + (char) cmd);
261N/A System.exit(1);
0N/A }
0N/A }
77N/A
77N/A int optind = getopt.getOptind();
491N/A if (optind != -1) {
77N/A while (optind < argv.length) {
77N/A subFiles.add(argv[optind]);
77N/A ++optind;
77N/A }
77N/A }
491N/A
886N/A if (env.storeHistoryCacheInDB()) {
886N/A // The default database driver is Derby's client driver.
886N/A if (databaseDriver == null) {
886N/A databaseDriver = DERBY_CLIENT_DRIVER;
886N/A }
886N/A
886N/A // The default URL depends on the database driver.
886N/A if (databaseURL == null) {
886N/A StringBuilder defaultURL = new StringBuilder();
886N/A defaultURL.append("jdbc:derby:");
886N/A if (databaseDriver.equals(DERBY_EMBEDDED_DRIVER)) {
886N/A defaultURL
886N/A .append(env.getDataRootPath())
886N/A .append(File.separator);
886N/A } else {
886N/A defaultURL.append("//localhost/");
886N/A }
886N/A defaultURL.append("cachedb;create=true");
886N/A databaseURL = defaultURL.toString();
886N/A }
886N/A }
886N/A
886N/A env.setDatabaseDriver(databaseDriver);
886N/A env.setDatabaseUrl(databaseURL);
886N/A
1016N/A allowedSymlinks.addAll(env.getAllowedSymlinks());
1016N/A env.setAllowedSymlinks(allowedSymlinks);
1016N/A
312N/A getInstance().prepareIndexer(env, searchRepositories, addProjects,
491N/A defaultProject, configFilename, refreshHistory,
491N/A listFiles, createDict, subFiles, repositories);
491N/A if (runIndex || (optimizedChanged && env.isOptimizeDatabase())) {
491N/A IndexChangedListener progress = new DefaultIndexChangedListener();
491N/A getInstance().doIndexerExecution(update, noThreads, subFiles,
491N/A progress);
491N/A }
491N/A getInstance().sendToConfigHost(env, configHost);
491N/A } catch (IndexerException ex) {
491N/A OpenGrokLogger.getLogger().log(Level.SEVERE, "Exception running indexer", ex);
491N/A System.err.println(cmdOptions.getUsage());
491N/A System.exit(1);
491N/A } catch (IOException ioe) {
491N/A System.err.println("Got IOException " + ioe);
491N/A OpenGrokLogger.getLogger().log(Level.SEVERE, "Exception running indexer", ioe);
491N/A System.exit(1);
312N/A }
491N/A }
491N/A
491N/A }
491N/A
491N/A public void prepareIndexer(RuntimeEnvironment env,
491N/A boolean searchRepositories,
491N/A boolean addProjects,
491N/A String defaultProject,
491N/A String configFilename,
491N/A boolean refreshHistory,
491N/A boolean listFiles,
491N/A boolean createDict,
491N/A List<String> subFiles,
491N/A List<String> repositories) throws IndexerException, IOException {
491N/A
491N/A if (env.getDataRootPath() == null) {
491N/A throw new IndexerException("ERROR: Please specify a DATA ROOT path");
491N/A }
491N/A
491N/A if (env.getSourceRootFile() == null) {
491N/A throw new IndexerException("ERROR: please specify a SRC_ROOT with option -s !");
491N/A }
491N/A
491N/A if (!env.validateExuberantCtags()) {
491N/A throw new IndexerException("Didn't find Exuberant Ctags");
491N/A }
491N/A
491N/A if (searchRepositories) {
491N/A if (env.isVerbose()) {
491N/A System.out.println("Scanning for repositories...");
491N/A }
491N/A long start = System.currentTimeMillis();
491N/A HistoryGuru.getInstance().addRepositories(env.getSourceRootPath());
491N/A long time = (System.currentTimeMillis() - start) / 1000;
491N/A if (env.isVerbose()) {
491N/A System.out.println("Done searching for repositories (" + time + "s)");
491N/A }
491N/A }
491N/A
491N/A if (addProjects) {
491N/A File files[] = env.getSourceRootFile().listFiles();
491N/A List<Project> projects = env.getProjects();
491N/A projects.clear();
491N/A for (File file : files) {
491N/A if (!file.getName().startsWith(".") && file.isDirectory()) {
491N/A Project p = new Project();
491N/A String name = file.getName();
491N/A p.setDescription(name);
491N/A p.setPath("/" + name);
491N/A projects.add(p);
491N/A }
491N/A }
491N/A
491N/A // The projects should be sorted...
491N/A Collections.sort(projects, new Comparator<Project>() {
491N/A
491N/A public int compare(Project p1, Project p2) {
491N/A String s1 = p1.getDescription();
491N/A String s2 = p2.getDescription();
312N/A
491N/A int ret;
491N/A if (s1 == null) {
491N/A ret = (s2 == null) ? 0 : 1;
491N/A } else {
491N/A ret = s1.compareTo(s2);
491N/A }
491N/A return ret;
0N/A }
491N/A });
491N/A }
77N/A
491N/A if (defaultProject != null) {
491N/A for (Project p : env.getProjects()) {
491N/A if (p.getPath().equals(defaultProject)) {
491N/A env.setDefaultProject(p);
491N/A break;
0N/A }
491N/A }
491N/A }
77N/A
491N/A if (configFilename != null) {
491N/A if (env.isVerbose()) {
491N/A System.out.println("Writing configuration to " + configFilename);
491N/A System.out.flush();
491N/A }
491N/A env.writeConfiguration(new File(configFilename));
491N/A if (env.isVerbose()) {
491N/A System.out.println("Done...");
491N/A System.out.flush();
491N/A }
491N/A }
491N/A
491N/A if (refreshHistory) {
491N/A HistoryGuru.getInstance().createCache();
491N/A } else if (repositories != null && !repositories.isEmpty()) {
491N/A HistoryGuru.getInstance().createCache(repositories);
491N/A }
491N/A
491N/A if (listFiles) {
491N/A IndexDatabase.listAllFiles(subFiles);
491N/A }
491N/A
491N/A if (createDict) {
491N/A IndexDatabase.listFrequentTokens(subFiles);
491N/A }
491N/A }
491N/A
491N/A public void doIndexerExecution(final boolean update, int noThreads, List<String> subFiles,
491N/A IndexChangedListener progress)
491N/A throws IOException {
491N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
491N/A env.register();
491N/A log.info("Starting indexExecution");
491N/A
491N/A ExecutorService executor = Executors.newFixedThreadPool(noThreads);
491N/A
491N/A if (subFiles == null || subFiles.isEmpty()) {
491N/A if (update) {
491N/A IndexDatabase.updateAll(executor, progress);
491N/A } else if (env.isOptimizeDatabase()) {
491N/A IndexDatabase.optimizeAll(executor);
491N/A }
491N/A } else {
491N/A List<IndexDatabase> dbs = new ArrayList<IndexDatabase>();
491N/A
491N/A for (String path : subFiles) {
491N/A Project project = Project.getProject(path);
491N/A if (project == null && env.hasProjects()) {
491N/A System.err.println("Warning: Could not find a project for \"" + path + "\"");
491N/A } else {
491N/A IndexDatabase db;
491N/A if (project == null) {
491N/A db = new IndexDatabase();
491N/A } else {
491N/A db = new IndexDatabase(project);
99N/A }
491N/A int idx = dbs.indexOf(db);
491N/A if (idx != -1) {
491N/A db = dbs.get(idx);
58N/A }
77N/A
491N/A if (db.addDirectory(path)) {
491N/A if (idx == -1) {
491N/A dbs.add(db);
77N/A }
491N/A } else {
491N/A System.err.println("Warning: Directory does not exist \"" + path + "\"");
99N/A }
58N/A }
491N/A }
58N/A
491N/A for (final IndexDatabase db : dbs) {
491N/A final boolean optimize = env.isOptimizeDatabase();
491N/A db.addIndexChangedListener(progress);
491N/A executor.submit(new Runnable() {
207N/A
491N/A public void run() {
491N/A try {
491N/A if (update) {
491N/A db.update();
491N/A } else if (optimize) {
491N/A db.optimize();
297N/A }
491N/A } catch (Exception e) {
508N/A if (update) {
910N/A OpenGrokLogger.getLogger().log(Level.WARNING, "An error occured while updating index", e);
508N/A } else {
508N/A OpenGrokLogger.getLogger().log(Level.WARNING, "An error occured while optimizing index", e);
508N/A }
491N/A e.printStackTrace();
274N/A }
274N/A }
491N/A });
491N/A }
491N/A }
491N/A
491N/A executor.shutdown();
491N/A while (!executor.isTerminated()) {
491N/A try {
930N/A // Wait forever
930N/A executor.awaitTermination(999,TimeUnit.DAYS);
583N/A } catch (InterruptedException exp) {
583N/A OpenGrokLogger.getLogger().log(Level.WARNING, "Received interrupt while waiting for executor to finish", exp);
491N/A }
583N/A }
491N/A }
261N/A
491N/A public void sendToConfigHost(RuntimeEnvironment env, String configHost) {
491N/A if (configHost != null) {
491N/A String[] cfg = configHost.split(":");
491N/A if (env.isVerbose()) {
491N/A log.info("Send configuration to: " + configHost);
491N/A }
270N/A
491N/A if (cfg.length == 2) {
491N/A try {
491N/A InetAddress host = InetAddress.getByName(cfg[0]);
491N/A RuntimeEnvironment.getInstance().writeConfiguration(host, Integer.parseInt(cfg[1]));
491N/A } catch (Exception ex) {
979N/A log.log(Level.SEVERE, "Failed to send configuration to " + configHost+" (is web application server running with opengrok deployed?)", ex);
264N/A }
491N/A } else {
491N/A System.err.println("Syntax error: ");
491N/A for (String s : cfg) {
491N/A System.err.print("[" + s + "]");
491N/A }
491N/A System.err.println();
491N/A }
491N/A if (env.isVerbose()) {
915N/A log.info("Configuration update routine done, check previous output for errors.");
491N/A }
491N/A }
0N/A }
260N/A
260N/A private Indexer() {
260N/A }
0N/A}