FileAnalyzer.java revision 1185
824N/A/*
824N/A * CDDL HEADER START
824N/A *
824N/A * The contents of this file are subject to the terms of the
963N/A * Common Development and Distribution License (the "License").
824N/A * You may not use this file except in compliance with the License.
824N/A *
919N/A * See LICENSE.txt included in this distribution for the specific
919N/A * language governing permissions and limitations under the License.
919N/A *
919N/A * When distributing Covered Code, include this CDDL HEADER in each
919N/A * file and include the License file at LICENSE.txt.
919N/A * If applicable, add the following below this CDDL HEADER, with the
919N/A * fields enclosed by brackets "[]" replaced with your own identifying
919N/A * information: Portions Copyright [yyyy] [name of copyright owner]
919N/A *
919N/A * CDDL HEADER END
919N/A */
919N/A
919N/A/*
919N/A * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
919N/A * Use is subject to license terms.
919N/A */
919N/Apackage org.opensolaris.opengrok.analysis;
824N/A
824N/Aimport java.io.BufferedWriter;
824N/Aimport java.io.File;
824N/Aimport java.io.FileOutputStream;
824N/Aimport java.io.IOException;
824N/Aimport java.io.InputStream;
965N/Aimport java.io.OutputStream;
965N/Aimport java.io.OutputStreamWriter;
824N/Aimport java.io.Reader;
824N/Aimport java.io.Writer;
824N/Aimport java.util.logging.Level;
824N/Aimport java.util.zip.GZIPOutputStream;
824N/Aimport org.apache.lucene.analysis.Analyzer;
824N/Aimport org.apache.lucene.analysis.TokenStream;
824N/Aimport org.apache.lucene.document.Document;
824N/Aimport org.opensolaris.opengrok.OpenGrokLogger;
824N/Aimport org.opensolaris.opengrok.configuration.Project;
824N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
824N/A
824N/A/**
824N/A * Base class for all different File Analyzers
824N/A *
824N/A * An Analyzer for a filetype provides
824N/A *<ol>
824N/A * <li>the file extentions and magic numbers it analyzes</li>
824N/A * <li>a lucene document listing the fields it can support</li>
824N/A * <li>TokenStreams for each of the field it said requires tokenizing in 2</li>
824N/A * <li>cross reference in HTML format</li>
824N/A * <li>The type of file data, plain text etc</li>
824N/A *</ol>
824N/A *
824N/A * Created on September 21, 2005
824N/A *
824N/A * @author Chandan
833N/A */
824N/Apublic class FileAnalyzer extends Analyzer {
824N/A
824N/A protected Project project;
824N/A private final FileAnalyzerFactory factory;
824N/A
824N/A /**
824N/A * What kind of file is this?
824N/A */
824N/A public static enum Genre {
824N/A /** xrefed - line numbered context */
824N/A PLAIN("p"),
824N/A /** xrefed - summarizer context */
824N/A XREFABLE("x"),
824N/A /** not xrefed - no context - used by diff/list */
824N/A IMAGE("i"),
824N/A /** not xrefed - no context */
824N/A DATA("d"),
824N/A /** not xrefed - summarizer context from original file */
824N/A HTML("h")
824N/A ;
824N/A private String typeName;
824N/A private Genre(String typename) {
824N/A this.typeName = typename;
824N/A }
833N/A
824N/A /**
824N/A * Get the type name value used to tag lucence documents.
824N/A * @return a none-null string.
824N/A */
824N/A public String typeName() {
824N/A return typeName;
824N/A }
824N/A
833N/A /**
824N/A * Get the Genre for the given type name.
824N/A * @param typeName name to check
824N/A * @return {@code null} if it doesn't match any genre, the genre otherwise.
824N/A * @see #typeName()
824N/A */
824N/A public static Genre get(String typeName) {
824N/A if (typeName == null) {
824N/A return null;
824N/A }
824N/A for (Genre g : values()) {
824N/A if (g.typeName.equals(typeName)) {
963N/A return g;
963N/A }
963N/A }
963N/A return null;
}
}
protected Ctags ctags;
public void setCtags(Ctags ctags) {
this.ctags = ctags;
}
public void setProject(Project project) {
this.project = project;
}
/**
* Get the factory which created this analyzer.
* @return the {@code FileAnalyzerFactory} which created this analyzer
*/
public final FileAnalyzerFactory getFactory() {
return factory;
}
public Genre getGenre() {
return factory.getGenre();
}
private final HistoryAnalyzer hista;
/** Creates a new instance of FileAnalyzer */
public FileAnalyzer(FileAnalyzerFactory factory) {
this.factory = factory;
hista = new HistoryAnalyzer();
}
public void analyze(Document doc, InputStream in) throws IOException {
// not used
}
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
if ("path".equals(fieldName) || "project".equals(fieldName)) {
return new PathTokenizer(reader);
} else if ("hist".equals(fieldName)) {
return hista.tokenStream(fieldName, reader);
}
OpenGrokLogger.getLogger().log(Level.WARNING, "Have no analyzer for: {0}", fieldName);
return null;
}
/**
* Write a cross referenced HTML file.
* @param out to writer HTML cross-reference
* @throws java.io.IOException if an error occurs
*/
public void writeXref(Writer out) throws IOException {
out.write("Error General File X-Ref writer!");
}
public void writeXref(File xrefDir, String path) throws IOException {
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
final boolean compressed = env.isCompressXref();
final File file = new File(xrefDir, path + (compressed ? ".gz" : ""));
OutputStream out = new FileOutputStream(file);
try {
if (compressed) {
out = new GZIPOutputStream(out);
}
Writer w = new BufferedWriter(new OutputStreamWriter(out));
writeXref(w);
w.close();
} finally {
out.close();
}
}
}