Indexer.java revision 1470
0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
0N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
1054N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
0N/A *
0N/A * Portions Copyright 2011 Jens Elkner.
65N/A */
125N/Apackage org.opensolaris.opengrok.index;
125N/A
58N/Aimport java.io.File;
77N/Aimport java.io.IOException;
125N/Aimport java.lang.reflect.Field;
125N/Aimport java.net.InetAddress;
125N/Aimport java.text.ParseException;
1016N/Aimport java.util.ArrayList;
125N/Aimport java.util.Collections;
261N/Aimport java.util.Comparator;
261N/Aimport java.util.HashMap;
583N/Aimport java.util.HashSet;
312N/Aimport java.util.List;
1062N/Aimport java.util.Map;
312N/Aimport java.util.concurrent.ExecutorService;
467N/Aimport java.util.concurrent.Executors;
428N/Aimport java.util.concurrent.TimeUnit;
126N/Aimport java.util.logging.Level;
1088N/Aimport java.util.logging.LogManager;
58N/Aimport java.util.logging.Logger;
394N/A
8N/Aimport org.opensolaris.opengrok.Info;
77N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
0N/Aimport org.opensolaris.opengrok.analysis.AnalyzerGuru;
0N/Aimport org.opensolaris.opengrok.configuration.Configuration;
0N/Aimport org.opensolaris.opengrok.configuration.Project;
0N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
0N/Aimport org.opensolaris.opengrok.history.HistoryException;
0N/Aimport org.opensolaris.opengrok.history.HistoryGuru;
491N/Aimport org.opensolaris.opengrok.history.Repository;
439N/Aimport org.opensolaris.opengrok.history.RepositoryFactory;
491N/Aimport org.opensolaris.opengrok.history.RepositoryInfo;
465N/Aimport org.opensolaris.opengrok.util.Executor;
465N/Aimport org.opensolaris.opengrok.util.Getopt;
491N/Aimport org.opensolaris.opengrok.web.Prefix;
491N/A
491N/A/**
886N/A * Creates and updates an inverted source index
886N/A * as well as generates Xref, file stats etc., if specified
886N/A * in the options
886N/A */
886N/A@SuppressWarnings({"PMD.AvoidPrintStackTrace", "PMD.SystemPrintln"})
886N/Apublic final class Indexer {
491N/A private static final String ON = "on";
491N/A private static final String OFF = "off";
491N/A private static Indexer index = new Indexer();
491N/A static final Logger log = Logger.getLogger(Indexer.class.getName());
65N/A
65N/A private static final String DERBY_EMBEDDED_DRIVER =
65N/A "org.apache.derby.jdbc.EmbeddedDriver";
65N/A
464N/A private static final String DERBY_CLIENT_DRIVER =
0N/A "org.apache.derby.jdbc.ClientDriver";
58N/A
312N/A /**
312N/A * Get the indexer instance.
260N/A * @return a singleton
491N/A */
428N/A public static Indexer getInstance() {
376N/A return index;
376N/A }
0N/A
11N/A /**
0N/A * Program entry point
240N/A * @param argv argument vector
1016N/A */
58N/A @SuppressWarnings("PMD.UseStringBufferForStringAppends")
58N/A public static void main(String argv[]) {
58N/A boolean runIndex = true;
58N/A boolean update = true;
77N/A boolean optimizedChanged = false;
207N/A ArrayList<String> zapCache = new ArrayList<String>();
207N/A CommandLineOptions cmdOptions = new CommandLineOptions();
910N/A
1062N/A if (argv.length == 0) {
77N/A System.err.println(cmdOptions.getUsage());
260N/A System.exit(1);
112N/A } else {
77N/A Executor.registerErrorHandler();
77N/A boolean searchRepositories = false;
77N/A ArrayList<String> subFiles = new ArrayList<String>();
77N/A ArrayList<String> repositories = new ArrayList<String>();
260N/A HashSet<String> allowedSymlinks = new HashSet<String>();
77N/A String configFilename = null;
77N/A String configHost = null;
77N/A boolean addProjects = false;
491N/A boolean refreshHistory = false;
1088N/A String defaultProject = null;
77N/A boolean listFiles = false;
491N/A boolean listRepos = false;
111N/A boolean listTokens = false;
111N/A int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors());
111N/A
111N/A // Parse command line options:
1088N/A Getopt getopt = new Getopt(argv, cmdOptions.getCommandString());
111N/A
111N/A try {
111N/A getopt.parse();
491N/A } catch (ParseException ex) {
1088N/ASystem.err.println(ex.getMessage());
1088N/ASystem.err.println(cmdOptions.getUsage());
1088N/A System.exit(1);
1088N/A }
1088N/A
1088N/A try {
886N/A Configuration cfg = null;
111N/A int cmd;
491N/A
77N/A // We need to read the configuration file first, since we
77N/A // will try to overwrite options..
491N/A while ((cmd = getopt.getOpt()) != -1) {
491N/A if (cmd == 'R') {
491N/A cfg = Configuration.read(new File(getopt.getOptarg()));
491N/A break;
77N/A }
491N/A }
1088N/A
1062N/A if (cfg == null) {
491N/A cfg = new Configuration();
491N/A }
1088N/A
491N/A String databaseDriver = cfg.getDatabaseDriver();
491N/A String databaseURL = cfg.getDatabaseUrl();
491N/A
491N/A // Now we can handle all the other options..
491N/A getopt.reset();
491N/A while ((cmd = getopt.getOpt()) != -1) {
491N/A switch (cmd) {
491N/A case 'q':
1088N/A cfg.setVerbose(false);
491N/A OpenGrokLogger.setOGConsoleLogLevel(Level.WARNING);
491N/A break;
491N/A case 'e':
491N/A cfg.setGenerateHtml(false);
491N/A break;
491N/A case 'P':
491N/A addProjects = true;
491N/A break;
1088N/A case 'p':
491N/A defaultProject = getopt.getOptarg();
1088N/A break;
491N/A case 'c':
491N/A cfg.setCtags(getopt.getOptarg());
491N/A break;
491N/A case 'w':
491N/A {
491N/A String webapp = getopt.getOptarg();
491N/A if (webapp.charAt(0) != '/' && !webapp.startsWith("http")) {
491N/A webapp = "/" + webapp;
491N/A }
491N/A if (webapp.endsWith("/")) {
491N/A webapp = webapp.substring(0, webapp.length()-1);
491N/A }
1016N/A cfg.setUrlPrefix(webapp + Prefix.SEARCH_R + '?');
1016N/A }
1016N/A break;
491N/A case 'W':
491N/A configFilename = getopt.getOptarg();
491N/A break;
491N/A case 'U':
491N/A configHost = getopt.getOptarg();
491N/A break;
491N/A case 'R':
491N/A // already handled
491N/A break;
799N/A case 'N':
1088N/A allowedSymlinks.add(getopt.getOptarg());
799N/A break;
886N/A case 'n':
886N/A runIndex = false;
886N/A break;
886N/A case 'H':
886N/A refreshHistory = true;
891N/A break;
886N/A case 'h':
891N/A repositories.add(getopt.getOptarg());
886N/A break;
886N/A case 'D':
886N/A cfg.setHistoryCacheInDB(true);
886N/A break;
886N/A case 'j':
886N/A databaseDriver = getopt.getOptarg();
491N/A // Should be a full class name, but we also accept
491N/A // the shorthands "client" and "embedded". Expand
491N/A // the shorthands here.
1088N/A if ("client".equals(databaseDriver)) {
491N/A databaseDriver = DERBY_CLIENT_DRIVER;
1088N/A } else if ("embedded".equals(databaseDriver)) {
491N/A databaseDriver = DERBY_EMBEDDED_DRIVER;
491N/A }
491N/A break;
491N/A case 'u':
491N/A databaseURL = getopt.getOptarg();
491N/A break;
491N/A case 'r':
491N/A {
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
1088N/A cfg.setRemoteScmSupported(true);
491N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
1088N/A cfg.setRemoteScmSupported(false);
491N/A } else {
1088N/ASystem.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -r");
491N/ASystem.err.println(" Ex: \"-r on\" will allow retrival for remote SCM systems");
491N/ASystem.err.println(" \"-r off\" will ignore SCM for remote systems");
491N/A }
491N/A }
491N/A break;
1088N/A case 'o':
491N/A File ctagsOptions = new File(getopt.getOptarg());
491N/A if (!(ctagsOptions.isFile() && ctagsOptions.canRead()))
491N/A {
491N/ASystem.err.println("ERROR: File '" + ctagsOptions + "' not found for the -o option");
491N/A System.exit(1);
1088N/A }
1062N/A cfg.setCtagsOptionsFile(ctagsOptions.getPath());
491N/A break;
77N/A case 'O':
491N/A {
491N/A boolean oldval = cfg.isOptimizeDatabase();
1088N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
1088N/A cfg.setOptimizeDatabase(true);
490N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
490N/A cfg.setOptimizeDatabase(false);
490N/A } else {
490N/ASystem.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -O");
489N/ASystem.err.println(" Ex: \"-O on\" will optimize the database as part of the index generation");
490N/ASystem.err.println(" \"-O off\" disable optimization of the index database");
491N/A }
491N/A if (oldval != cfg.isOptimizeDatabase()) {
1088N/A optimizedChanged = true;
1088N/A }
490N/A }
490N/A break;
490N/A case 'v':
490N/A cfg.setVerbose(true);
490N/A OpenGrokLogger.setOGConsoleLogLevel(Level.INFO);
490N/A break;
491N/A case 'C':
1088N/A cfg.setPrintProgress(true);
491N/A break;
1026N/A
1088N/A case 's':
1026N/A {
491N/A File sourceRoot = new File(getopt.getOptarg());
491N/A if (!sourceRoot.isDirectory()) {
491N/ASystem.err.println("ERROR: Source root must be a directory");
491N/A System.exit(1);
491N/A }
1088N/A cfg.setSourceRoot(sourceRoot.getCanonicalPath());
491N/A break;
1088N/A }
491N/A case 'd':
491N/A {
491N/A File dataRoot = new File(getopt.getOptarg());
491N/A if (!dataRoot.exists() && !dataRoot.mkdirs()) {
491N/ASystem.err.println("ERROR: Cannot create data root");
491N/A System.exit(1);
491N/A }
491N/A if (!dataRoot.isDirectory()) {
491N/ASystem.err.println("ERROR: Data root must be a directory");
1088N/A System.exit(1);
491N/A }
491N/A cfg.setDataRoot(dataRoot.getCanonicalPath());
491N/A break;
491N/A }
491N/A case 'i':
99N/A cfg.getIgnoredNames().add(getopt.getOptarg());
491N/A break;
491N/A case 'I':
1088N/A cfg.getIncludedNames().add(getopt.getOptarg());
491N/A break;
1088N/A case 'S':
491N/A searchRepositories = true;
491N/A break;
491N/A case 'Q':
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
126N/A cfg.setQuickContextScan(true);
126N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
126N/A cfg.setQuickContextScan(false);
491N/A } else {
491N/ASystem.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -Q");
491N/ASystem.err.println(" Ex: \"-Q on\" will just scan a \"chunk\" of the file and insert \"[..all..]\"");
491N/ASystem.err.println(" \"-Q off\" will try to build a more accurate list by reading the complete file.");
491N/A }
491N/A
491N/A break;
491N/A case 'm': {
491N/A try {
491N/A cfg.setIndexWordLimit(Integer.parseInt(getopt.getOptarg()));
491N/A } catch (NumberFormatException exp) {
491N/ASystem.err.println("ERROR: Failed to parse argument to \"-m\": " + exp.getMessage());
491N/A System.exit(1);
126N/A }
491N/A break;
491N/A }
491N/A case 'a':
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
491N/A cfg.setAllowLeadingWildcard(true);
491N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
491N/A cfg.setAllowLeadingWildcard(false);
491N/A } else {
491N/ASystem.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -a");
491N/ASystem.err.println(" Ex: \"-a on\" will allow a search to start with a wildcard");
491N/ASystem.err.println(" \"-a off\" will disallow a search to start with a wildcard");
1054N/A System.exit(1);
1054N/A }
491N/A
491N/A break;
491N/A
491N/A case 'A':
491N/A {
1088N/A String[] arg = getopt.getOptarg().split(":");
491N/A if (arg.length != 2) {
491N/ASystem.err.println("ERROR: You must specify: -A extension:class");
126N/ASystem.err.println(" Ex: -A foo:org.opensolaris.opengrok.analysis.c.CAnalyzer");
491N/ASystem.err.println(" will use the C analyzer for all files ending with .foo");
491N/ASystem.err.println(" Ex: -A c:-");
491N/ASystem.err.println(" will disable the c-analyzer for for all files ending with .c");
126N/A System.exit(1);
126N/A }
491N/A
994N/A arg[0] = arg[0].substring(arg[0].lastIndexOf('.') + 1).toUpperCase();
994N/A if (arg[1].equals("-")) {
1088N/A AnalyzerGuru.addExtension(arg[0], null);
994N/A break;
994N/A }
994N/A
994N/A try {
994N/A AnalyzerGuru.addExtension(arg[0],
491N/A AnalyzerGuru.findFactory(arg[1]));
491N/A } catch (Exception e) {
1088N/A log.log(Level.SEVERE, "Unable to use {0} as a FileAnalyzerFactory", arg[1]);
491N/A log.log(Level.FINE, "main", e);
1088N/A System.exit(1);
491N/A }
491N/A }
491N/A break;
491N/A case 'L':
491N/A cfg.setWebappLAF(getopt.getOptarg());
491N/A break;
491N/A case 'T':
491N/A try {
491N/A noThreads = Integer.parseInt(getopt.getOptarg());
491N/A } catch (NumberFormatException exp) {
491N/ASystem.err.println("ERROR: Failed to parse argument to \"-T\": " + exp.getMessage());
491N/A System.exit(1);
491N/A }
491N/A break;
491N/A case 'z':
491N/A try {
491N/A cfg.setScanningDepth(Integer.parseInt(getopt.getOptarg()));
491N/A } catch (NumberFormatException exp) {
261N/ASystem.err.println("ERROR: Failed to parse argument to \"-z\": " + exp.getMessage());
0N/A System.exit(1);
0N/A }
77N/A break;
77N/A case 'l':
491N/A if (getopt.getOptarg().equalsIgnoreCase(ON)) {
77N/A cfg.setUsingLuceneLocking(true);
77N/A } else if (getopt.getOptarg().equalsIgnoreCase(OFF)) {
77N/A cfg.setUsingLuceneLocking(false);
77N/A } else {
77N/ASystem.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -l");
1062N/ASystem.err.println(" Ex: \"-l on\" will enable locks in Lucene");
1062N/ASystem.err.println(" \"-l off\" will disable locks in Lucene");
1088N/A }
1062N/A break;
1064N/A case 'B':
1062N/A cfg.setUserPage(getopt.getOptarg());
1062N/A break;
1088N/A case 'X':
886N/A cfg.setUserPageSuffix(getopt.getOptarg());
886N/A break;
886N/A case 'V':
886N/A System.out.println(Info.getFullVersion());
886N/A System.exit(0);
886N/A break;
886N/A case 'k':
886N/A zapCache.add(getopt.getOptarg());
886N/A break;
886N/A case 'K':
1088N/A listRepos = true;
886N/A break;
886N/A case '?':
886N/A System.err.println(cmdOptions.getUsage());
886N/A System.exit(0);
886N/A break;
886N/A case 't':
886N/A try {
886N/A int tmp = Integer.parseInt(getopt.getOptarg());
886N/A cfg.setTabSize(tmp);
1088N/A } catch (NumberFormatException exp) {
1088N/ASystem.err.println("ERROR: Failed to parse argument to \"-t\": " + exp.getMessage());
886N/A System.exit(1);
1017N/A }
1088N/A break;
1090N/A case 'Z':
1090N/A cfg.setChattyStatusPage(true);
1017N/A break;
1017N/A case 'E':
1017N/A cfg.setCompressXref(false);
1017N/A break;
1017N/A default:
1017N/ASystem.err.println("Internal Error - Unimplemented cmdline option: " + (char) cmd);
1017N/A System.exit(1);
1017N/A }
1017N/A }
1017N/A String s = System.getProperty(Configuration.DIRLIST_TODAY_PROPERTY_KEY);
1088N/A if (s != null) {
1088N/A cfg.setDirlistUseToday(Boolean.parseBoolean(s));
1088N/A }
1088N/A s = System.getProperty(Configuration.DIRLIST_DATE_PROPERTY_KEY);
1088N/A if (s != null) {
1088N/A cfg.setDirlistDatePattern(s);
1016N/A }
312N/A List<Class<? extends Repository>> repositoryClasses =
491N/A RepositoryFactory.getRepositoryClasses();
491N/A for (Class<? extends Repository> clazz : repositoryClasses) {
491N/A try {
491N/A Field f = clazz.getDeclaredField("CMD_PROPERTY_KEY");
491N/A Object key = f.get(null);
491N/A if (key != null) {
491N/A cfg.setRepoCmd(clazz.getCanonicalName(),
491N/A System.getProperty(key.toString()));
491N/A }
491N/A } catch (Exception e) {
491N/A // don't care
491N/A }
491N/A }
491N/A int optind = getopt.getOptind();
491N/A if (optind != -1) {
491N/A while (optind < argv.length) {
312N/A subFiles.add(argv[optind]);
491N/A ++optind;
491N/A }
491N/A }
491N/A
491N/A //logging starts here
491N/A if (cfg.isVerbose()) {
491N/A String fn = LogManager.getLogManager()
491N/A .getProperty("java.util.logging.FileHandler.pattern");
491N/A if (fn != null) {
491N/A System.out.println("Logging filehandler pattern: " + fn);
491N/A }
491N/A }
491N/A
491N/A if (cfg.isHistoryCacheInDB()) {
491N/A // The default database driver is Derby's client driver.
491N/A if (databaseDriver == null) {
491N/A databaseDriver = DERBY_CLIENT_DRIVER;
491N/A }
491N/A
491N/A // The default URL depends on the database driver.
491N/A if (databaseURL == null) {
491N/A StringBuilder defaultURL = new StringBuilder();
491N/A defaultURL.append("jdbc:derby:");
491N/A if (databaseDriver.equals(DERBY_EMBEDDED_DRIVER)) {
491N/A defaultURL.append(cfg.getDataRoot())
491N/A .append(File.separator);
491N/A } else {
1054N/A defaultURL.append("//localhost/");
1054N/A }
491N/A defaultURL.append("cachedb;create=true");
491N/A databaseURL = defaultURL.toString();
1054N/A }
1062N/A }
491N/A
491N/A cfg.setDatabaseDriver(databaseDriver);
491N/A cfg.setDatabaseUrl(databaseURL);
491N/A
491N/A // automatically allow symlinks that are directly in source root
491N/A String file = cfg.getSourceRoot();
491N/A if (file != null) {
491N/A File sourceRootFile = new File(file);
491N/A File[] projectDirs = sourceRootFile.listFiles();
491N/A if (projectDirs != null) {
491N/A for (File projectDir : projectDirs) {
491N/A if (!projectDir.getCanonicalPath()
491N/A .equals(projectDir.getAbsolutePath()))
491N/A {
491N/A allowedSymlinks.add(projectDir.getAbsolutePath());
491N/A }
491N/A }
491N/A }
491N/A }
1054N/A
491N/A allowedSymlinks.addAll(cfg.getAllowedSymlinks());
491N/A cfg.setAllowedSymlinks(allowedSymlinks);
491N/A
312N/A //Set updated configuration in RuntimeEnvironment
491N/A RuntimeEnvironment.setConfig(cfg);
491N/A Indexer.prepareIndexer(searchRepositories, addProjects,
491N/A defaultProject, configFilename, refreshHistory, listFiles,
491N/A listTokens, subFiles, repositories, zapCache, listRepos);
491N/A if (listRepos || !zapCache.isEmpty()) {
491N/A return;
491N/A }
0N/A if (runIndex || (optimizedChanged && cfg.isOptimizeDatabase())) {
491N/A IndexChangedListener progress = new DefaultIndexChangedListener();
491N/A Indexer.doIndexerExecution(update, noThreads, subFiles,
77N/A progress);
491N/A }
491N/A Indexer.sendToConfigHost(configHost);
491N/A } catch (IndexerException ex) {
491N/A log.log(Level.SEVERE, "Exception running indexer: " + ex.getMessage());
491N/A log.log(Level.FINE, "main", ex);
0N/A System.err.println(cmdOptions.getUsage());
491N/A System.exit(1);
491N/A } catch (Throwable e) {
77N/A log.log(Level.SEVERE, e.getLocalizedMessage());
1054N/A log.log(Level.FINE, "main", e);
1054N/A System.exit(1);
1054N/A }
1054N/A }
491N/A
491N/A }
491N/A
1062N/A // PMD wants us to use length() > 0 && charAt(0) instead of startsWith()
491N/A // for performance. We prefer clarity over performance here, so silence it.
1062N/A /**
491N/A * Prepare the environment to execute indexing. Basically it does some
1062N/A * consistency checks wrt. commadline options sets and executes simple tasks
491N/A * like zapping or refreshing the history cache, list available projects
1062N/A * including their path, create spelling index for related projects,
491N/A * writing the config file.
491N/A *
491N/A * @param searchRepositories If {@code true} scan for new repositories in
491N/A * the source root directory.
491N/A * @param addProjects If {@code true} add automatically projects found
491N/A * @param defaultProject The name of the project to use as default. Might
491N/A * be {@code null}.
491N/A * @param configFilename The pathname of the configuration file to write.
491N/A * If {@code null} writing the config file gets skipped.
491N/A * @param refreshHistory If {@code true}, create/update the history cache
491N/A * for the given or all projects.
491N/A * @param listFiles If {@code true}, print a list of all files of
491N/A * the index database for given projects to stdout.
491N/A * @param listTokens If {@code true}, print a list of frequent
491N/A * tokens for related projects to stdout.
491N/A * @param subFiles Determine target projects by the given repo
1062N/A * files when <var>listFiles</var> or <var>createDict</var> is set to
491N/A * {@code true}. {@code null} implies "all projects".
491N/A * @param repositories Target repositories.{@code null} implies
491N/A * "all projects".
491N/A * @param zapCache List of project names, whose history cache should
491N/A * be zapped. An Asterisk ('*') implies zap all. Ignored if empty.
491N/A * @param listRepoPathes If {@code true}, just list available projects
491N/A * incl. their path, zap projects (if set) and return.
491N/A * @throws IndexerException
491N/A * @throws IOException
491N/A * @throws NullPointerException if <var>env</var> or <var>zapCache</var> is
491N/A * {@code null}
491N/A */
491N/A @SuppressWarnings("PMD.SimplifyStartsWith")
491N/A public static void prepareIndexer(boolean searchRepositories,
491N/A boolean addProjects,
1054N/A String defaultProject,
491N/A String configFilename,
491N/A boolean refreshHistory,
491N/A boolean listFiles,
491N/A boolean listTokens,
491N/A List<String> subFiles,
491N/A List<String> repositories,
99N/A List<String> zapCache,
491N/A boolean listRepoPathes) throws IndexerException, IOException
491N/A {
491N/A Configuration cfg = RuntimeEnvironment.getConfig();
58N/A if (cfg.getDataRoot() == null) {
77N/A throw new IndexerException("Please specify a DATA ROOT path");
491N/A }
491N/A
491N/A if (cfg.getSourceRootFile() == null) {
77N/A throw new IndexerException("Please specify a SRC_ROOT with option -s");
491N/A }
1054N/A
99N/A if (!RuntimeEnvironment.validateExuberantCtags()) {
58N/A throw new IndexerException("Didn't find Exuberant Ctags");
491N/A }
58N/A if (zapCache == null) {
491N/A throw new IndexerException("Internal error, zapCache shouldn't be null");
491N/A }
491N/A
491N/A if (searchRepositories || listRepoPathes || !zapCache.isEmpty()) {
207N/A log.log(Level.INFO,"Scanning for repositories...");
1054N/A long start = System.currentTimeMillis();
491N/A HistoryGuru.getInstance().addRepositories(cfg.getSourceRoot());
491N/A long time = (System.currentTimeMillis() - start) / 1000;
491N/A log.log(Level.INFO, "Done ({0}s)", Long.valueOf(time));
491N/A if (listRepoPathes || !zapCache.isEmpty()) {
491N/A List<RepositoryInfo> repos = cfg.getRepositories();
491N/A String prefix = cfg.getSourceRoot();
297N/A if (listRepoPathes) {
491N/A if (repos.isEmpty()) {
508N/A System.out.println("No repositories found.");
910N/A return;
508N/A }
508N/A System.out.println("Repositories in " + prefix + ":");
508N/A for (RepositoryInfo info : cfg.getRepositories()) {
1054N/A String dir = info.getDirectoryName();
274N/A System.out.println(dir.substring(prefix.length()));
274N/A }
491N/A }
491N/A if (!zapCache.isEmpty()) {
491N/A HashSet<String> toZap = new HashSet<String>(zapCache.size() << 1);
491N/A boolean all = false;
491N/A for (String repo : zapCache) {
491N/A if ("*".equals(repo)) {
491N/A all = true;
930N/A break;
930N/A }
583N/A if (repo.startsWith(prefix)) {
583N/A repo = repo.substring(prefix.length());
491N/A }
583N/A toZap.add(repo);
491N/A }
261N/A if (all) {
491N/A toZap.clear();
491N/A for (RepositoryInfo info : cfg.getRepositories()) {
491N/A toZap.add(info.getDirectoryName()
1054N/A .substring(prefix.length()));
491N/A }
491N/A }
491N/A try {
491N/A HistoryGuru.getInstance().removeCache(toZap);
491N/A } catch (HistoryException e) {
979N/A log.warning("Clearing history cache faild: "
264N/A + e.getLocalizedMessage());
491N/A }
1054N/A }
491N/A return;
1054N/A }
1054N/A }
491N/A
1054N/A if (addProjects) {
491N/A File files[] = cfg.getSourceRootFile().listFiles();
0N/A List<Project> projects = cfg.getProjects();
260N/A
260N/A // Keep a copy of the old project list so that we can preserve
260N/A // the customization of existing projects.
0N/A Map<String, Project> oldProjects = new HashMap<String, Project>();
for (Project p : projects) {
oldProjects.put(p.getPath(), p);
}
projects.clear();
// Add a project for each top-level directory in source root.
for (File file : files) {
String name = file.getName();
String path = "/" + name;
if (oldProjects.containsKey(path)) {
// This is an existing object. Reuse the old project,
// possibly with customizations, instead of creating a
// new with default values.
projects.add(oldProjects.get(path));
} else if (!name.startsWith(".") && file.isDirectory()) {
// Found a new directory with no matching project, so
// create a new project with default properties.
Project p = new Project();
p.setDescription(name);
p.setPath(path);
p.setTabSize(cfg.getTabSize());
projects.add(p);
}
}
// The projects should be sorted...
Collections.sort(projects, new Comparator<Project>() {
@Override
public int compare(Project p1, Project p2) {
String s1 = p1.getDescription();
String s2 = p2.getDescription();
int ret;
if (s1 == null) {
ret = (s2 == null) ? 0 : 1;
} else {
ret = s1.compareTo(s2);
}
return ret;
}
});
}
if (defaultProject != null) {
for (Project p : cfg.getProjects()) {
if (p.getPath().equals(defaultProject)) {
cfg.setDefaultProject(p);
break;
}
}
}
if (configFilename != null) {
log.log(Level.INFO, "Writing configuration to ''{0}''", configFilename);
RuntimeEnvironment.writeConfig(new File(configFilename));
log.info("Done.");
}
if (refreshHistory) {
log.log(Level.INFO, "Generating history cache for all repositories ...");
HistoryGuru.getInstance().createCache();
log.info("Done.");
} else if (repositories != null && !repositories.isEmpty()) {
log.log(Level.INFO, "Generating history cache for specified repositories ...");
HistoryGuru.getInstance().createCache(repositories);
log.info("Done.");
}
if (listFiles) {
IndexDatabase.listAllFiles(subFiles);
}
if (listTokens) {
IndexDatabase.listFrequentTokens(subFiles);
}
}
/**
* Index and optionally optimize related databases. Obtains an
* {@link RuntimeEnvironment} instance and registers with it, so that
* required infos are obtained from there.
*
* @param update If {@code true} update the index for related projects.
* @param noThreads Number of threads to use for indexing.
* @param subFiles Use the giben repo files to determine the projects,
* which should be indexed or index db optimized.
* @param progress If {@code true} show progress when indexing. Wrt.
* to logging and terminal flooding a really bad idea.
* @throws IOException
* @see IndexDatabase#update(ExecutorService, IndexChangedListener, List)
*/
public static void doIndexerExecution(final boolean update, int noThreads,
List<String> subFiles, IndexChangedListener progress)
throws IOException
{
RuntimeEnvironment.register();
Configuration cfg = RuntimeEnvironment.getConfig();
log.info("Start indexing ...");
ExecutorService executor = Executors.newFixedThreadPool(noThreads);
if (subFiles == null || subFiles.isEmpty()) {
if (update) {
IndexDatabase.updateAll(executor, progress);
} else if (cfg.isOptimizeDatabase()) {
IndexDatabase.optimizeAll(executor);
}
} else {
List<IndexDatabase> dbs = new ArrayList<IndexDatabase>();
for (String path : subFiles) {
Project project = Project.getProject(path);
if (project == null && cfg.hasProjects()) {
log.warning("Could not find a project for '" + path + "'");
} else {
IndexDatabase db;
if (project == null) {
db = new IndexDatabase();
} else {
db = new IndexDatabase(project);
}
int idx = dbs.indexOf(db);
if (idx != -1) {
db = dbs.get(idx);
}
if (db.addDirectory(path)) {
if (idx == -1) {
dbs.add(db);
}
} else {
log.warning("Directory does not exist '" + path + "'");
}
}
}
for (final IndexDatabase db : dbs) {
final boolean optimize = cfg.isOptimizeDatabase();
db.addIndexChangedListener(progress);
executor.submit(new Runnable() {
@Override
public void run() {
try {
if (update) {
db.update();
} else if (optimize) {
db.optimize();
}
} catch (Throwable e) {
log.warning("An error occured while "
+ (update ? "updating " : "optimizing ")
+ db + ": " + e.getMessage());
log.log(Level.FINE, "run", e);
}
}
});
}
}
executor.shutdown();
while (!executor.isTerminated()) {
try {
// Wait forever
executor.awaitTermination(999,TimeUnit.DAYS);
} catch (InterruptedException exp) {
log.warning("Received interrupt while waiting for executor to finish: "
+ exp.getMessage());
log.log(Level.FINE, "doIndexerExecution", exp);
}
}
log.info("Done.");
}
/**
* Send the configuration of the current thread's runtime environment to
* the given host.
* @param configHost where to send the configuration. Format: host:port .
*/
public static void sendToConfigHost(String configHost) {
if (configHost != null) {
String[] cfg = configHost.split(":");
log.log(Level.INFO, "Sending configuration to {0}", configHost);
if (cfg.length == 2) {
try {
InetAddress host = InetAddress.getByName(cfg[0]);
RuntimeEnvironment.writeConfig(host, Integer.parseInt(cfg[1]));
} catch (Exception ex) {
log.log(Level.WARNING, "Failed to send configuration to "
+ configHost
+ " (is web application server running with opengrok deployed?): "
+ ex.getMessage());
log.log(Level.FINE, "sendToConfigHost", ex);
}
} else if ( cfg.length > 0 ) {
StringBuilder sb = new StringBuilder("Syntax errors = ");
for (String s : cfg) {
sb.append('[').append(s).append(']').append(", ");
}
sb.setLength(sb.length()-2);
log.warning(sb.toString());
}
log.info("Done.");
}
}
private Indexer() {
}
}