Indexer.java revision 376
1186N/A/*
949N/A * CDDL HEADER START
949N/A *
949N/A * The contents of this file are subject to the terms of the
1294N/A * Common Development and Distribution License (the "License").
1186N/A * You may not use this file except in compliance with the License.
949N/A *
949N/A * See LICENSE.txt included in this distribution for the specific
1186N/A * language governing permissions and limitations under the License.
949N/A *
949N/A * When distributing Covered Code, include this CDDL HEADER in each
949N/A * file and include the License file at LICENSE.txt.
1186N/A * If applicable, add the following below this CDDL HEADER, with the
1186N/A * fields enclosed by brackets "[]" replaced with your own identifying
1186N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1186N/A *
1186N/A * CDDL HEADER END
1186N/A */
1186N/A
1186N/A/*
1186N/A * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
1186N/A * Use is subject to license terms.
1186N/A */
1186N/Apackage org.opensolaris.opengrok.index;
1186N/A
1186N/Aimport java.awt.GraphicsEnvironment;
1186N/Aimport java.io.BufferedReader;
1186N/Aimport java.io.File;
1186N/Aimport java.io.FileReader;
1186N/Aimport java.io.IOException;
1186N/Aimport java.net.InetAddress;
1186N/Aimport java.text.ParseException;
1186N/Aimport java.util.ArrayList;
1186N/Aimport java.util.Collections;
1186N/Aimport java.util.Comparator;
1186N/Aimport java.util.List;
1186N/Aimport java.util.concurrent.ExecutorService;
1186N/Aimport java.util.concurrent.Executors;
1186N/Aimport java.util.logging.Level;
1186N/Aimport java.util.logging.Logger;
949N/Aimport org.opensolaris.opengrok.analysis.AnalyzerGuru;
949N/Aimport org.opensolaris.opengrok.configuration.Project;
1186N/Aimport org.opensolaris.opengrok.history.HistoryGuru;
1186N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
1186N/Aimport org.opensolaris.opengrok.util.Getopt;
1186N/A
1186N/A/**
1186N/A * Creates and updates an inverted source index
1186N/A * as well as generates Xref, file stats etc., if specified
1186N/A * in the options
1186N/A */
1186N/Apublic class Indexer {
1186N/A
1254N/A private static Indexer index = new Indexer();
1186N/A private static final Logger log = Logger.getLogger(Indexer.class.getName());
1186N/A
1186N/A public static Indexer getInstance() {
1186N/A return index;
1186N/A }
1186N/A /**
1186N/A * Program entry point
1186N/A * @param argv argument vector
1186N/A */
1294N/A public static void main(String argv[]) {
1186N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1186N/A boolean runIndex = true;
1186N/A boolean update = true;
1186N/A boolean optimizedChanged = false;
1186N/A CommandLineOptions cmdOptions = new CommandLineOptions();
1186N/A
1186N/A if(argv.length == 0) {
1186N/A System.err.println(cmdOptions.getUsage());
1186N/A System.exit(1);
1186N/A } else {
1186N/A boolean searchRepositories = false;
1186N/A ArrayList<String> subFiles = new ArrayList<String>();
949N/A ArrayList<String> repositories = new ArrayList<String>();
949N/A String configFilename = null;
1186N/A String configHost = null;
1186N/A boolean addProjects = false;
1186N/A boolean refreshHistory = false;
1186N/A String defaultProject = null;
1186N/A boolean listFiles = false;
1186N/A boolean createDict = false;
1186N/A int noThreads = Runtime.getRuntime().availableProcessors();
1186N/A
1186N/A // Parse command line options:
1186N/A Getopt getopt = new Getopt(argv, cmdOptions.getCommandString());
1186N/A
1186N/A try {
1186N/A getopt.parse();
1186N/A } catch (ParseException ex) {
1186N/A System.err.println("OpenGrok: " + ex.getMessage());
1186N/A System.err.println(cmdOptions.getUsage());
1186N/A System.exit(1);
1186N/A }
1186N/A
1186N/A try{
1186N/A int cmd;
1186N/A
1186N/A // We need to read the configuration file first, since we
1186N/A // will try to overwrite options..
1186N/A while ((cmd = getopt.getOpt()) != -1) {
1186N/A if (cmd == 'R') {
1186N/A env.readConfiguration(new File(getopt.getOptarg()));
1186N/A break;
1186N/A }
1186N/A }
1186N/A
1186N/A // Now we can handle all the other options..
1186N/A getopt.reset();
1186N/A while ((cmd = getopt.getOpt()) != -1) {
1186N/A switch (cmd) {
1186N/A case 't':
1186N/A createDict = true;
1186N/A runIndex = false;
1186N/A break;
949N/A
949N/A case 'q': env.setVerbose(false); break;
1186N/A case 'e': env.setGenerateHtml(false); break;
1186N/A case 'P': addProjects = true; break;
1186N/A case 'p': defaultProject = getopt.getOptarg(); break;
1186N/A case 'c': env.setCtags(getopt.getOptarg()); break;
1186N/A case 'w': {
1186N/A String webapp = getopt.getOptarg();
1186N/A if (webapp.startsWith("/") || webapp.startsWith("http")) {
1186N/A ;
1186N/A } else {
1186N/A webapp = "/" + webapp;
1186N/A }
1186N/A if (webapp.endsWith("/")) {
1186N/A env.setUrlPrefix(webapp + "s?");
1186N/A } else {
1186N/A env.setUrlPrefix(webapp + "/s?");
1186N/A }
1186N/A }
1186N/A break;
1186N/A case 'W': configFilename = getopt.getOptarg(); break;
1186N/A case 'U': configHost = getopt.getOptarg(); break;
1186N/A case 'R':
1186N/A // already handled
1186N/A break;
949N/A case 'n': runIndex = false; break;
949N/A case 'H': refreshHistory = true; break;
1186N/A case 'h' : repositories.add(getopt.getOptarg()); break;
1186N/A case 'r': {
1186N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
1186N/A env.setRemoteScmSupported(true);
1186N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
1186N/A env.setRemoteScmSupported(false);
949N/A } else {
949N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -r");
1186N/A System.err.println(" Ex: \"-r on\" will allow retrival for remote SCM systems");
1186N/A System.err.println(" \"-r off\" will ignore SCM for remote systems");
1186N/A }
1186N/A }
1186N/A break;
1186N/A case 'O': {
1186N/A boolean oldval = env.isOptimizeDatabase();
1186N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
1186N/A env.setOptimizeDatabase(true);
1186N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
1186N/A env.setOptimizeDatabase(false);
1186N/A } else {
1186N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -O");
1186N/A System.err.println(" Ex: \"-O on\" will optimize the database as part of the index generation");
1186N/A System.err.println(" \"-O off\" disable optimization of the index database");
1186N/A }
1186N/A if (oldval != env.isOptimizeDatabase()) {
1186N/A optimizedChanged = true;
1186N/A }
1186N/A }
949N/A break;
949N/A case 'v': env.setVerbose(true); break;
1186N/A
1186N/A case 's': {
1186N/A File file = new File(getopt.getOptarg());
1186N/A if (!file.isDirectory()) {
1186N/A System.err.println("ERROR: No such directory: " + file.toString());
1186N/A System.exit(1);
1186N/A }
1186N/A
1186N/A env.setSourceRootFile(file);
1186N/A break;
1186N/A }
1186N/A case 'd':
1186N/A env.setDataRoot(getopt.getOptarg());
1186N/A break;
1186N/A case 'i':
1186N/A env.getIgnoredNames().add(getopt.getOptarg());
1186N/A break;
1186N/A case 'S' : searchRepositories = true; break;
1186N/A case 'Q' :
1186N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
1186N/A env.setQuickContextScan(true);
1186N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
1186N/A env.setQuickContextScan(false);
1186N/A } else {
1186N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -Q");
1186N/A System.err.println(" Ex: \"-Q on\" will just scan a \"chunk\" of the file and insert \"[..all..]\"");
1186N/A System.err.println(" \"-Q off\" will try to build a more accurate list by reading the complete file.");
1186N/A }
1186N/A
1186N/A break;
1186N/A case 'm' : {
1186N/A try {
949N/A env.setIndexWordLimit(Integer.parseInt(getopt.getOptarg()));
1186N/A } catch (NumberFormatException exp) {
949N/A System.err.println("ERROR: Failed to parse argument to \"-m\": " + exp.getMessage());
949N/A System.exit(1);
1186N/A }
1186N/A break;
1186N/A }
1186N/A case 'a' :
1186N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
1186N/A env.setAllowLeadingWildcard(true);
1186N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
1186N/A env.setAllowLeadingWildcard(false);
1186N/A } else {
1186N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -a");
949N/A System.err.println(" Ex: \"-a on\" will allow a search to start with a wildcard");
949N/A System.err.println(" \"-a off\" will disallow a search to start with a wildcard");
1355N/A System.exit(1);
1186N/A }
1186N/A
1186N/A break;
1355N/A
1186N/A case 'A': {
1186N/A String[] arg = getopt.getOptarg().split(":");
1186N/A if (arg.length != 2) {
1186N/A System.err.println("ERROR: You must specify: -A extension:class");
1186N/A System.err.println(" Ex: -A foo:org.opensolaris.opengrok.analysis.c.CAnalyzer");
1186N/A System.err.println(" will use the C analyzer for all files ending with .foo");
1186N/A System.err.println(" Ex: -A c:-");
1186N/A System.err.println(" will disable the c-analyzer for for all files ending with .c");
1186N/A System.exit(1);
1186N/A }
1186N/A
1186N/A arg[0] = arg[0].substring(arg[0].lastIndexOf('.') + 1).toUpperCase();
1186N/A if (arg[1].equals("-")) {
1186N/A AnalyzerGuru.addExtension(arg[0], null);
1186N/A break;
1186N/A }
1186N/A
1186N/A try {
1186N/A AnalyzerGuru.addExtension(
1186N/A arg[0],
949N/A AnalyzerGuru.findFactory(arg[1]));
949N/A } catch (Exception e) {
949N/A System.err.println("Unable to use " + arg[1] +
1186N/A " as a FileAnalyzerFactory");
1186N/A e.printStackTrace();
1186N/A System.exit(1);
949N/A }
949N/A }
1186N/A break;
1186N/A case 'L' :
1186N/A env.setWebappLAF(getopt.getOptarg());
1186N/A break;
1186N/A case 'T' :
1186N/A try {
1186N/A noThreads = Integer.parseInt(getopt.getOptarg());
1186N/A } catch (NumberFormatException exp) {
1186N/A System.err.println("ERROR: Failed to parse argument to \"-T\": " + exp.getMessage());
1186N/A System.exit(1);
1186N/A }
1186N/A break;
1186N/A case 'l' :
1186N/A if (getopt.getOptarg().equalsIgnoreCase("on")) {
1186N/A env.setUsingLuceneLocking(true);
1186N/A } else if (getopt.getOptarg().equalsIgnoreCase("off")) {
1186N/A env.setUsingLuceneLocking(false);
1186N/A } else {
1186N/A System.err.println("ERROR: You should pass either \"on\" or \"off\" as argument to -l");
1186N/A System.err.println(" Ex: \"-l on\" will enable locks in Lucene");
1186N/A System.err.println(" \"-l off\" will disable locks in Lucene");
1186N/A }
1186N/A break;
1186N/A case '?':
1186N/A System.err.println(cmdOptions.getUsage());
1186N/A System.exit(0);
1186N/A break;
1186N/A
1186N/A default:
1186N/A System.err.println("Internal Error - Unimplemented cmdline option: " + (char)cmd);
1186N/A System.exit(1);
1186N/A }
1186N/A }
949N/A
949N/A int optind = getopt.getOptind();
1186N/A if (optind != -1) {
1186N/A while (optind < argv.length) {
1186N/A subFiles.add(argv[optind]);
1186N/A ++optind;
1186N/A }
1186N/A }
1186N/A
1186N/A getInstance().prepareIndexer(env, searchRepositories, addProjects,
1186N/A defaultProject,configFilename,refreshHistory,
1186N/A listFiles,createDict,subFiles,repositories);
1186N/A if (runIndex || (optimizedChanged && env.isOptimizeDatabase())) {
1186N/A IndexChangedListener progress = new DefaultIndexChangedListener();
1186N/A getInstance().doIndexerExecution(update, noThreads, subFiles,
1186N/A progress);
1186N/A }
1186N/A getInstance().sendToConfigHost(env, configHost);
1186N/A } catch (IndexerException ex) {
1186N/A System.err.println(ex);
1186N/A System.err.println(cmdOptions.getUsage());
949N/A System.exit(1);
949N/A } catch (IOException ioe) {
1186N/A System.err.println("Got IOException " + ioe);
1186N/A System.exit(1);
1186N/A }
1186N/A }
1186N/A
1186N/A }
1186N/A
1294N/A
1186N/A
1186N/A public void prepareIndexer(RuntimeEnvironment env,
1186N/A boolean searchRepositories,
1186N/A boolean addProjects,
1186N/A String defaultProject,
1186N/A String configFilename,
949N/A boolean refreshHistory,
949N/A boolean listFiles,
1186N/A boolean createDict,
1186N/A ArrayList<String> subFiles,
1186N/A ArrayList<String> repositories) throws IndexerException,IOException {
1186N/A
1186N/A if (env.getDataRootPath() == null) {
1186N/A throw new IndexerException("ERROR: Please specify a DATA ROOT path");
1186N/A }
1186N/A
1186N/A if (env.getSourceRootFile() == null) {
1186N/A File srcConfig = new File(env.getDataRootPath(), "SRC_ROOT");
1186N/A String line = null;
1186N/A if(srcConfig.exists()) {
1186N/A try {
1186N/A BufferedReader sr = new BufferedReader(new FileReader(srcConfig));
1186N/A line = sr.readLine();
1186N/A sr.close();
1186N/A } catch (IOException e) {
1186N/A }
1186N/A }
1186N/A if(line == null) {
1186N/A throw new IndexerException("ERROR: please specify a SRC_ROOT with option -s !");
1186N/A }
1186N/A env.setSourceRoot(line);
1186N/A
1186N/A if (!env.getSourceRootFile().isDirectory()) {
1186N/A throw new IndexerException("ERROR: No such directory:" + line);
1186N/A }
1186N/A }
1186N/A
1186N/A if (!env.validateExuberantCtags()) {
1186N/A System.exit(1);
1186N/A }
1186N/A
1186N/A if (searchRepositories) {
1186N/A if (env.isVerbose()) {
1186N/A System.out.println("Scanning for repositories...");
949N/A }
1186N/A env.getRepositories().clear();
1186N/A long start = System.currentTimeMillis();
1186N/A HistoryGuru.getInstance().addRepositories(env.getSourceRootPath());
1186N/A long time = (System.currentTimeMillis() - start) / 1000;
1186N/A if (env.isVerbose()) {
1186N/A System.out.println("Done searching for repositories (" + time + "s)");
1186N/A }
1186N/A }
1186N/A
1186N/A if (addProjects) {
1186N/A File files[] = env.getSourceRootFile().listFiles();
1186N/A List<Project> projects = env.getProjects();
1186N/A projects.clear();
1186N/A for (File file : files) {
1186N/A if (!file.getName().startsWith(".") && file.isDirectory()) {
1186N/A projects.add(new Project(file.getName(), "/" + file.getName()));
1186N/A }
1186N/A }
1186N/A
1186N/A // The projects should be sorted...
1128N/A Collections.sort(projects, new Comparator<Project>() {
1186N/A public int compare(Project p1, Project p2){
1186N/A String s1 = p1.getDescription();
1186N/A String s2 = p2.getDescription();
949N/A
1186N/A int ret;
1186N/A if (s1 == null) {
1186N/A ret = (s2 == null) ? 0 : 1;
1186N/A } else {
1186N/A ret = s1.compareTo(s2);
1186N/A }
1186N/A return ret;
1128N/A }
1186N/A });
1186N/A }
1186N/A
1186N/A if (defaultProject != null) {
1186N/A for (Project p : env.getProjects()) {
1186N/A if (p.getPath().equals(defaultProject)) {
1186N/A env.setDefaultProject(p);
1186N/A break;
1128N/A }
1186N/A }
1186N/A }
1186N/A
1186N/A if (configFilename != null) {
1186N/A if (env.isVerbose()) {
1186N/A System.out.println("Writing configuration to " + configFilename);
1186N/A System.out.flush();
1186N/A }
1186N/A env.writeConfiguration(new File(configFilename));
1186N/A if (env.isVerbose()) {
1186N/A System.out.println("Done...");
1186N/A System.out.flush();
1186N/A }
1186N/A }
1186N/A
1186N/A if (refreshHistory) {
1128N/A HistoryGuru.getInstance().createCache();
1186N/A } else if (repositories != null && repositories.size() > 0) {
1186N/A HistoryGuru.getInstance().createCache(repositories);
1186N/A }
1186N/A
1186N/A if (listFiles) {
1186N/A IndexDatabase.listAllFiles(subFiles);
1186N/A }
1186N/A
1186N/A if (createDict) {
1294N/A IndexDatabase.listFrequentTokens(subFiles);
1186N/A }
1186N/A }
1186N/A
1186N/A
1186N/A public void doIndexerExecution(final boolean update, int noThreads, List<String> subFiles,
1186N/A IndexChangedListener progress)
1186N/A throws IOException {
1186N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1186N/A env.register();
1186N/A log.info("Starting indexExecution");
1186N/A
1186N/A ExecutorService executor = Executors.newFixedThreadPool(noThreads);
1186N/A
1128N/A if (subFiles == null || subFiles.isEmpty()) {
1186N/A if (update) {
1186N/A IndexDatabase.updateAll(executor, progress);
1186N/A } else if (env.isOptimizeDatabase()) {
1186N/A IndexDatabase.optimizeAll(executor);
1186N/A }
1186N/A } else {
1186N/A List<IndexDatabase> dbs = new ArrayList<IndexDatabase>();
1128N/A
1186N/A for (String path : subFiles) {
1186N/A Project project = Project.getProject(path);
1186N/A if (project == null && env.hasProjects()) {
1186N/A System.err.println("Warning: Could not find a project for \"" + path + "\"");
1186N/A } else {
1186N/A IndexDatabase db;
1186N/A if (project != null) {
1186N/A db = new IndexDatabase(project);
1128N/A } else {
1186N/A db = new IndexDatabase();
1186N/A }
1186N/A int idx = dbs.indexOf(db);
1186N/A if (idx != -1) {
1186N/A db = dbs.get(idx);
1186N/A }
1186N/A
1186N/A if (db.addDirectory(path)) {
1186N/A if (idx == -1) {
1186N/A dbs.add(db);
1186N/A }
1186N/A } else {
1186N/A System.err.println("Warning: Directory does not exist \"" + path + "\"");
1186N/A }
1186N/A }
1186N/A }
1186N/A
1186N/A for (final IndexDatabase db : dbs) {
1186N/A final boolean optimize = env.isOptimizeDatabase();
949N/A db.addIndexChangedListener(progress);
949N/A executor.submit(new Runnable() {
1186N/A
1186N/A public void run() {
949N/A try {
1186N/A if (update) {
1186N/A db.update();
1186N/A } else if (optimize) {
1186N/A db.optimize();
1186N/A }
1186N/A } catch (Exception e) {
1186N/A e.printStackTrace();
949N/A }
1186N/A }
1186N/A });
1186N/A }
1186N/A }
1186N/A
1186N/A executor.shutdown();
1294N/A while (!executor.isTerminated()) {
1355N/A try {
1186N/A Thread.sleep(1000);
1186N/A } catch (Exception e) {
1186N/A
1186N/A }
1186N/A }
1186N/A }
1186N/A
1186N/A
1186N/A public void sendToConfigHost(RuntimeEnvironment env, String configHost) {
1186N/A if (configHost != null) {
1186N/A String[] cfg = configHost.split(":");
1186N/A if (env.isVerbose()) {
1186N/A log.info("Send configuration to: " + configHost);
1186N/A }
1186N/A
1186N/A if (cfg.length == 2) {
1186N/A try {
1186N/A InetAddress host = InetAddress.getByName(cfg[0]);
1186N/A RuntimeEnvironment.getInstance().writeConfiguration(host, Integer.parseInt(cfg[1]));
1186N/A } catch (Exception ex) {
1186N/A log.log(Level.SEVERE,"Failed to send configuration to "
1186N/A + configHost,ex);
1186N/A }
1186N/A } else {
1186N/A System.err.println("Syntax error: ");
949N/A for (String s : cfg) {
1186N/A System.err.print("[" + s + "]");
1186N/A }
1186N/A System.err.println();
1186N/A }
949N/A if (env.isVerbose()) {
1186N/A log.info("Configuration successfully updated");
1186N/A }
1186N/A }
1186N/A }
949N/A
1186N/A private Indexer() {
1186N/A }
1186N/A}
1186N/A