/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/**
* Manages and provides Analyzers as needed. Please see
* this</a> page for a great description of the purpose of the AnalyzerGuru.
*
* Created on September 22, 2005
* @author Chandan
*/
public class AnalyzerGuru {
/** The default {@code FileAnalyzerFactory} instance. */
private static final FileAnalyzerFactory
/** Map from file names to analyzer factories. */
/** Map from file extensions to analyzer factories. */
// @TODO: have a comparator
/** Map from magic strings to analyzer factories. */
/**
* List of matcher objects which can be used to determine which analyzer
* factory to use.
*/
/** List of all registered {@code FileAnalyzerFactory} instances. */
private static final List<FileAnalyzerFactory>
public static final FieldType string_ft_stored_nanalyzed_norms = new FieldType(StringField.TYPE_STORED);
public static final FieldType string_ft_nstored_nanalyzed_norms = new FieldType(StringField.TYPE_NOT_STORED);
/*
* If you write your own analyzer please register it here
*/
static {
FileAnalyzerFactory[] analyzers = {
new IgnorantAnalyzerFactory(),
new BZip2AnalyzerFactory(),
new XMLAnalyzerFactory(),
new TroffAnalyzerFactory(),
new ELFAnalyzerFactory(),
new JavaClassAnalyzerFactory(),
new ImageAnalyzerFactory(),
new TarAnalyzerFactory(),
new CAnalyzerFactory(),
new CSharpAnalyzerFactory(),
new VBAnalyzerFactory(),
new CxxAnalyzerFactory(),
new ShAnalyzerFactory(),
new GZIPAnalyzerFactory(),
new JavaAnalyzerFactory(),
new JavaScriptAnalyzerFactory(),
new PythonAnalyzerFactory(),
new PerlAnalyzerFactory(),
new PhpAnalyzerFactory(),
new LispAnalyzerFactory(),
new TclAnalyzerFactory(),
new SQLAnalyzerFactory(),
new PLSQLAnalyzerFactory(),
new FortranAnalyzerFactory()
};
}
}
return factories;
}
/**
* Register a {@code FileAnalyzerFactory} instance.
*/
}
}
}
}
/**
* Instruct the AnalyzerGuru to use a given analyzer for a given
* file extension.
* @param extension the file-extension to add
* @param factory a factory which creates
* the analyzer to use for the given extension
* (if you pass null as the analyzer, you will disable
* the analyzer used for that extension)
*/
} else {
}
}
/**
* Get the default Analyzer.
*/
return DEFAULT_ANALYZER_FACTORY.getAnalyzer();
}
/**
* Get an analyzer suited to analyze a file. This function will reuse
* analyzers since they are costly.
*
* @param in Input stream containing data to be analyzed
* @param file Name of the file to be analyzed
* @return An analyzer suited for that file content
* @throws java.io.IOException If an error occurs while accessing the
* data in the input stream.
*/
return getAnalyzer();
}
return factory.getAnalyzer();
}
/**
* Create a Lucene document and fill in the required fields
* @param file The file to index
* @param in The data to generate the index for
* @param path Where the file is located (from source root)
* @return The Lucene document to add to the index database
* @throws java.io.IOException If an exception occurs while collecting the
* datas
*/
try {
// date = hr.getLastCommentDate() //RFE
}
} catch (HistoryException e) {
}
}
}
));
}
}
return doc;
}
/**
* Get the content type for a named file.
*
* @param in The input stream we want to get the content type for (if
* we cannot determine the content type by the filename)
* @param file The name of the file
* @return The contentType suitable for printing to response.setContentType() or null
* if the factory was not found
* @throws java.io.IOException If an error occurs while accessing the input
* stream.
*/
}
return type;
}
/**
* Write a browsable version of the file
*
* @param factory The analyzer factory for this filetype
* @param in The input stream containing the data
* @param out Where to write the result
* @param defs definitions for the source file, if available
* @param annotation Annotation information for the file
* @param project Project the file belongs to
* @throws java.io.IOException If an error occurs while creating the
* output
*/
throws IOException
{
// This is some kind of text file, so we need to expand tabs to
// spaces to match the project's tab settings.
}
}
/**
* Get the genre of a file
*
* @param file The file to inpect
* @return The genre suitable to decide how to display the file
*/
}
/**
* Get the genre of a bulk of data
*
* @param in A stream containing the data
* @return The genre suitable to decide how to display the file
* @throws java.io.IOException If an error occurs while getting the content
*/
}
/**
* Get the genre for a named class (this is most likely an analyzer)
* @param factory the analyzer factory to get the genre for
* @return The genre of this class (null if not found)
*/
}
return null;
}
/**
* Find a {@code FileAnalyzerFactory} with the specified class name. If one
* doesn't exist, create one and register it.
*
* @param factoryClassName name of the factory class
* @return a file analyzer factory
*
* @throws ClassNotFoundException if there is no class with that name
* @throws ClassCastException if the class is not a subclass of {@code
* FileAnalyzerFactory}
* @throws IllegalAccessException if the constructor cannot be accessed
* @throws InstantiationException if the class cannot be instantiated
*/
{
}
/**
* Find a {@code FileAnalyzerFactory} which is an instance of the specified
* class. If one doesn't exist, create one and register it.
*
* @param factoryClass the factory class
* @return a file analyzer factory
*
* @throws ClassCastException if the class is not a subclass of {@code
* FileAnalyzerFactory}
* @throws IllegalAccessException if the constructor cannot be accessed
* @throws InstantiationException if the class cannot be instantiated
*/
{
for (FileAnalyzerFactory f : factories) {
if (f.getClass() == factoryClass) {
return f;
}
}
registerAnalyzer(f);
return f;
}
/**
* Finds a suitable analyser class for file name. If the analyzer cannot
* be determined by the file extension, try to look at the data in the
* InputStream to find a suitable analyzer.
*
* Use if you just want to find file type.
*
*
* @param in The input stream containing the data
* @param file The file name to get the analyzer for
* @return the analyzer factory to use
* @throws java.io.IOException If a problem occurs while reading the data
*/
throws IOException
{
// TODO above is not that great, since if 2 analyzers share one extension
// then only the first one registered will own it
// it would be cool if above could return more analyzers and below would
// then decide between them ...
return factory;
}
}
/**
* Finds a suitable analyser class for file name.
*
* @param file The file name to get the analyzer for
* @return the analyzer factory to use
*/
int i = 0;
}
if (dotpos >= 0) {
return factory;
}
}
// file doesn't have any of the extensions we know, try full match
}
/**
* Finds a suitable analyser class for the data in this stream
*
* @param in The stream containing the data to analyze
* @return the analyzer factory to use
* @throws java.io.IOException if an error occurs while reading data from
* the stream
*/
byte[] content = new byte[8];
if (len < 8) {
/*
* Need at least 4 bytes to perform magic string matching.
*/
if (len < 4) {
return null;
}
}
return factory;
}
return fac;
}
}
return null;
}
/**
* Finds a suitable analyser class for a magic signature
*
* @param signature the magic signature look up
* @return the analyzer factory to use
*/
throws IOException {
// XXX this assumes ISO-8859-1 encoding (and should work in most cases
// for US-ASCII, UTF-8 and other ISO-8859-* encodings, but not always),
// we should try to be smarter than this...
}
if (a == null) {
}
// See if text files have the magic sequence if we remove the
// byte-order marker
if (sigWithoutBOM != null &&
}
}
}
return a;
}
/** Byte-order markers. */
static {
}
/**
* Strip away the byte-order marker from the string, if it has one.
*
* @param sig a sequence of bytes from which to remove the BOM
* @return a string without the byte-order marker, or <code>null</code> if
* the string doesn't start with a BOM
*/
int i = 0;
i++;
}
// BOM matched beginning of signature
return new String(
sig,
encoding);
}
}
}
return null;
}
}