/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ package org.opensolaris.opengrok.analysis; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.util.logging.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.opensolaris.opengrok.configuration.Project; import org.opensolaris.opengrok.configuration.RuntimeEnvironment; import org.opensolaris.opengrok.util.IOUtils; /** * Base class for all different File Analyzers * * An Analyzer for a filetype provides *
    *
  1. the file extentions and magic numbers it analyzes
  2. *
  3. a lucene document listing the fields it can support
  4. *
  5. TokenStreams for each of the field it said requires tokenizing in 2
  6. *
  7. cross reference in HTML format
  8. *
  9. the type of file data, plain text etc.
  10. *
* * Created on September 21, 2005 * * @author Chandan */ public class FileAnalyzer extends Analyzer { private static final Logger logger = Logger.getLogger(FileAnalyzer.class.getName()); /** the project to which this analyzer is related to */ protected Project project; private final FileAnalyzerFactory factory; /** * What kind of file is this? */ public static enum Genre { /** xrefed - line numbered context */ PLAIN("p"), /** xrefed - summarizer context */ XREFABLE("x"), /** not xrefed - no context - used by diff/list */ IMAGE("i"), /** not xrefed - no context */ DATA("d"), /** not xrefed - summarizer context from original file */ HTML("h") ; private String typeName; private Genre(String typename) { this.typeName = typename; } /** * Get the type name value used to tag lucence documents. * @return a none-null string. */ public String typeName() { return typeName; } /** * Get the Genre for the given type name. * @param typeName name to check * @return {@code null} if it doesn't match any genre, the genre otherwise. * @see #typeName() */ public static Genre get(String typeName) { if (typeName == null) { return null; } for (Genre g : values()) { if (g.typeName.equals(typeName)) { return g; } } return null; } } /** * Ctags instance to use for tagging. */ protected Ctags ctags; /** * Set the ctag instance to use for tagging this repo's files. * @param ctags instance to set. */ public void setCtags(Ctags ctags) { this.ctags = ctags; } /** *Set the project, to which this instance is related to. * @param project project to set. */ public void setProject(Project project) { this.project = project; } /** * Get the factory which created this analyzer. * @return the {@code FileAnalyzerFactory} which created this analyzer */ public final FileAnalyzerFactory getFactory() { return factory; } /** * Get the genre, which this analyzer usually handles. * @return the genre this analyzer handles. * @see FileAnalyzerFactory#getGenre() */ public Genre getGenre() { return factory.getGenre(); } private final HistoryAnalyzer hista; /** Creates a new instance of FileAnalyzer * @param factory the factory to use to obtain default settings */ public FileAnalyzer(FileAnalyzerFactory factory) { this.factory = factory; hista = new HistoryAnalyzer(); } /** * Read the given input, analyze it and store the result into the given * document. NOTE: This method does not close given input stream on return! * * @param doc where to store results. * @param in source to read. * @throws IOException */ @SuppressWarnings("unused") // yes, hybrid: [not] a interface, [not] abstract public void analyze(Document doc, InputStream in) throws IOException { // not used } /** * Get the TokenStream for the given fieldname. * @param fieldName filed name in question {@code path | project | hist} * @param reader reader to use. * @return {@code null} for unknown field names, a new corresponding * TokenStream otherwise. */ public TokenStream overridableTokenStream(String fieldName, Reader reader) { if ("path".equals(fieldName) || "project".equals(fieldName)) { return new PathTokenizer(reader); } else if ("hist".equals(fieldName)) { return hista.tokenStream(fieldName, reader); } logger.warning("Have no analyzer for '" + fieldName + "'"); return null; } /** * {@inheritDoc} */ @Override public final TokenStream tokenStream(String fieldName, Reader reader) { return this.overridableTokenStream(fieldName, reader); } /** * {@inheritDoc} */ @Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) { //TODO needs refactoring to get more speed and less ram usage for indexer return this.tokenStream(fieldName, reader); } /** * Write out current results html formatted to the given writer. * * @param out to writer HTML cross-reference * @throws java.io.IOException if an error occurs */ @SuppressWarnings("static-method") public void writeXref(XrefWriter out) throws IOException { out.write("Error General File X-Ref writer!"); } /** * Write out current results html formatted to the given destination. * @param xrefDir the parent directory of the file to write. * @param path the relative path wrt. xrefDir of the file to write. * @throws IOException */ public void writeXref(File xrefDir, String path) throws IOException { @SuppressWarnings("resource") XrefWriter w = null; try { w = new XrefWriter(new File(xrefDir, path), factory.getGenre(), RuntimeEnvironment.getConfig().isCompressXref()); writeXref(w); } finally { IOUtils.close(w); } } }