AnalyzerGuru.java revision 58
58N/A/*
58N/A * CDDL HEADER START
58N/A *
58N/A * The contents of this file are subject to the terms of the
58N/A * Common Development and Distribution License (the "License").
58N/A * You may not use this file except in compliance with the License.
58N/A *
58N/A * See LICENSE.txt included in this distribution for the specific
58N/A * language governing permissions and limitations under the License.
58N/A *
58N/A * When distributing Covered Code, include this CDDL HEADER in each
58N/A * file and include the License file at LICENSE.txt.
58N/A * If applicable, add the following below this CDDL HEADER, with the
58N/A * fields enclosed by brackets "[]" replaced with your own identifying
58N/A * information: Portions Copyright [yyyy] [name of copyright owner]
58N/A *
58N/A * CDDL HEADER END
58N/A */
58N/A
58N/A/*
1291N/A * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
1356N/A * Use is subject to license terms.
58N/A */
58N/A
58N/A/*
234N/A * ident "@(#)AnalyzerGuru.java 1.3 06/02/22 SMI"
234N/A */
234N/Apackage org.opensolaris.opengrok.analysis;
234N/A
1287N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
639N/Aimport org.opensolaris.opengrok.analysis.document.TroffAnalyzer;
639N/Aimport org.opensolaris.opengrok.analysis.java.JavaAnalyzer;
234N/Aimport org.opensolaris.opengrok.analysis.lisp.LispAnalyzer;
234N/Aimport org.opensolaris.opengrok.analysis.plain.*;
1470N/Aimport org.opensolaris.opengrok.analysis.c.*;
234N/Aimport org.opensolaris.opengrok.analysis.sh.*;
1289N/Aimport org.opensolaris.opengrok.analysis.data.*;
234N/Aimport java.io.*;
639N/Aimport java.util.*;
639N/Aimport java.lang.reflect.*;
1463N/Aimport org.apache.lucene.document.*;
58N/Aimport org.apache.lucene.analysis.*;
1185N/Aimport org.opensolaris.opengrok.analysis.archive.*;
667N/Aimport org.opensolaris.opengrok.analysis.executables.*;
1185N/Aimport org.opensolaris.opengrok.configuration.Project;
1016N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
58N/Aimport org.opensolaris.opengrok.history.*;
1185N/Aimport org.opensolaris.opengrok.web.Util;
1016N/A
1470N/A/**
1436N/A * Manages and porvides Analyzers as needed.
1185N/A * Created on September 22, 2005
664N/A *
1026N/A * @author Chandan
112N/A */
1195N/Apublic class AnalyzerGuru {
1419N/A private static HashMap<String, Class<? extends FileAnalyzer>> ext;
58N/A private static SortedMap<String, Class<? extends FileAnalyzer>> magics;
58N/A private static ArrayList<Method> matchers;
77N/A /*
77N/A * If you write your own analyzer please register it here
77N/A */
77N/A private static ArrayList<Class<? extends FileAnalyzer>> analyzers =
58N/A new ArrayList<Class<? extends FileAnalyzer>>();
418N/A static {
1462N/A analyzers.add(IgnorantAnalyzer.class);
1462N/A analyzers.add(BZip2Analyzer.class);
1327N/A analyzers.add(FileAnalyzer.class);
1327N/A analyzers.add(XMLAnalyzer.class);
1463N/A analyzers.add(TroffAnalyzer.class);
1327N/A analyzers.add(ELFAnalyzer.class);
1327N/A analyzers.add(JavaClassAnalyzer.class);
1327N/A analyzers.add(ImageAnalyzer.class);
1356N/A analyzers.add(JarAnalyzer.class);
1356N/A analyzers.add(ZipAnalyzer.class);
1356N/A analyzers.add(TarAnalyzer.class);
1463N/A analyzers.add(CAnalyzer.class);
1463N/A analyzers.add(ShAnalyzer.class);
1463N/A analyzers.add(PlainAnalyzer.class);
1463N/A analyzers.add(GZIPAnalyzer.class);
1463N/A analyzers.add(JavaAnalyzer.class);
1463N/A analyzers.add(LispAnalyzer.class);
1463N/A }
1463N/A
1463N/A private static HashMap<Class<? extends FileAnalyzer>, FileAnalyzer>
1463N/A analyzerInstances =
1463N/A new HashMap<Class<? extends FileAnalyzer>, FileAnalyzer>();
1463N/A
1463N/A /**
1463N/A * Initializes an AnalyzerGuru
1463N/A */
1463N/A static {
1463N/A if (ext == null) {
1463N/A ext = new HashMap<String, Class<? extends FileAnalyzer>>();
58N/A }
1436N/A if (magics == null) {
1436N/A magics = new TreeMap<String, Class<? extends FileAnalyzer>>();
1436N/A // TODO: have a comparator
58N/A }
773N/A if (matchers == null) {
773N/A matchers = new ArrayList<Method>();
773N/A }
773N/A for (Class<? extends FileAnalyzer> analyzer: analyzers) {
58N/A try{
1436N/A String[] suffixes = (String[]) analyzer.getField("suffixes").get(null);
1436N/A for (String suffix: suffixes) {
1436N/A //System.err.println(analyzer.getSimpleName() + " = " + suffix);
773N/A ext.put(suffix, analyzer);
773N/A }
58N/A } catch (Exception e) {
58N/A // System.err.println("AnalyzerFinder:" + analyzer.getSimpleName() + e);
58N/A }
664N/A try{
58N/A String[] smagics = (String[]) analyzer.getField("magics").get(null);
65N/A for (String magic: smagics) {
1436N/A //System.err.println(analyzer.getSimpleName() + " = " + magic);
1436N/A magics.put(magic, analyzer);
1436N/A }
1436N/A } catch (Exception e) {
1436N/A // System.err.println("AnalyzerFinder: " + analyzer.getSimpleName() + e);
77N/A }
99N/A try{
99N/A Method m = analyzer.getMethod("isMagic", byte[].class);
1115N/A if (m != null) matchers.add(m);
1115N/A } catch (Exception e) {
125N/A }
112N/A }
1026N/A //System.err.println("Exts " + ext);
129N/A //System.err.println("Matchers " + matchers);
1100N/A }
129N/A
129N/A /*
318N/A * Get the default Analyzer.
318N/A */
144N/A public static FileAnalyzer getAnalyzer() {
173N/A
253N/A Class<FileAnalyzer> a = FileAnalyzer.class;
296N/A FileAnalyzer fa = analyzerInstances.get(a);
335N/A if (fa == null) {
480N/A try {
816N/A fa = (FileAnalyzer) a.newInstance();
816N/A analyzerInstances.put(a, fa);
833N/A return fa;
833N/A } catch (Exception e) {
1416N/A System.err.println("ERROR: Initializing " + a);
1185N/A }
1016N/A }
1123N/A return fa;
1125N/A }
1218N/A
1185N/A /*
1463N/A * use this if you want to analyze a file. Analyzers are costly.
1463N/A */
1463N/A public static FileAnalyzer getAnalyzer(InputStream in, String path) throws IOException {
1326N/A Class<? extends FileAnalyzer> a = find(in, path);
993N/A if(a == null) {
1185N/A a = FileAnalyzer.class;
1185N/A }
1190N/A if (a != null) {
1436N/A FileAnalyzer fa = analyzerInstances.get(a);
1185N/A if (fa == null) {
1185N/A try {
1252N/A fa = (FileAnalyzer) a.newInstance();
1185N/A analyzerInstances.put(a, fa);
1185N/A return fa;
1185N/A } catch (Exception e) {
1185N/A System.err.println("ERROR: Initializing " + a);
1185N/A }
1185N/A } else {
1185N/A return fa;
1185N/A }
1436N/A }
1185N/A return null;
1185N/A }
1252N/A
1185N/A public Document getDocument(File f, InputStream in, String path) throws IOException {
1185N/A Document doc = new Document();
1185N/A String date = DateField.timeToString(f.lastModified());
1185N/A doc.add(new org.apache.lucene.document.Field("u", Util.uid(path, date), false, true, false));
1185N/A doc.add(new org.apache.lucene.document.Field("fullpath", f.getAbsolutePath(), true, true, true));
1461N/A try{
1461N/A HistoryReader hr = HistoryGuru.getInstance().getHistoryReader(f);
1461N/A if (hr != null) {
1461N/A doc.add(org.apache.lucene.document.Field.Text("hist", hr));
1461N/A // date = hr.getLastCommentDate() //RFE
1185N/A }
993N/A } catch (IOException e) {
993N/A }
993N/A doc.add(org.apache.lucene.document.Field.Keyword("date", date));
1461N/A if(path != null) {
1461N/A doc.add(new org.apache.lucene.document.Field("path", path, true, true, true));
1461N/A
1461N/A RuntimeEnvironment env = RuntimeEnvironment.getInstance();
1461N/A if (env.hasProjects()) {
1185N/A StringBuilder sb = new StringBuilder();
993N/A
993N/A for (Project proj : env.getProjects()) {
937N/A if (path.indexOf(proj.getPath()) == 0) {
1436N/A doc.add(org.apache.lucene.document.Field.Text("project", proj.getPath()));
1436N/A }
1436N/A }
58N/A }
816N/A }
58N/A FileAnalyzer fa = null;
58N/A try {
773N/A fa = getAnalyzer(in, path);
58N/A } catch (Exception e) {
664N/A
1419N/A }
1419N/A if (fa != null) {
1419N/A try {
1327N/A Genre g = fa.getGenre();
870N/A if (g == Genre.PLAIN) {
870N/A doc.add(new org.apache.lucene.document.Field("t", "p", true, false, false));
99N/A } else if ( g == Genre.XREFABLE) {
1115N/A doc.add(new org.apache.lucene.document.Field("t", "x", true, false, false));
101N/A } else if ( g == Genre.HTML) {
106N/A doc.add(new org.apache.lucene.document.Field("t", "h", true, false, false));
112N/A }
1026N/A fa.analyze(doc, in);
129N/A } catch (Exception e) {
129N/A // Ignoring any errors while analysing
129N/A }
875N/A }
318N/A doc.removeField("fullpath");
1356N/A
173N/A return doc;
253N/A }
296N/A
335N/A /**
480N/A * @return The contentType suitable for printing to response.setContentType()
816N/A */
816N/A public static String getContentType(String path) {
993N/A Class<? extends FileAnalyzer> a = find(path);
1016N/A return getContentType(a);
1315N/A }
1185N/A
1463N/A public static String getContentType(InputStream in, String path) throws IOException {
1463N/A Class<? extends FileAnalyzer> a = find(in, path);
1463N/A return getContentType(a);
1463N/A }
1463N/A
1463N/A public static String getContentType(Class<? extends FileAnalyzer> a) {
1463N/A String contentType = null;
1463N/A if (a != null) {
1463N/A try {
1463N/A contentType = (String) a.getMethod("getContentType").invoke(null);
1463N/A } catch (Exception e ) {
1463N/A
1463N/A }
1463N/A }
1463N/A return contentType;
1463N/A }
1463N/A
1463N/A public static void writeXref(Class<? extends FileAnalyzer> a,
1463N/A InputStream in, Writer out)
1463N/A throws IOException {
1463N/A if (a != null) {
1463N/A try {
1463N/A a.getMethod("writeXref", InputStream.class, Writer.class).invoke(null, in, out);
1463N/A } catch (IllegalArgumentException ex) {
1463N/A } catch (SecurityException ex) {
1463N/A } catch (NoSuchMethodException ex) {
1463N/A } catch (InvocationTargetException ex) {
1463N/A } catch (IllegalAccessException ex) {
1463N/A }
1463N/A }
1463N/A }
1463N/A
1463N/A /**
1463N/A * @return The genre suitable to decide how to display the file
1463N/A */
1463N/A public static Genre getGenre(String path) {
1463N/A Class a = find(path);
1463N/A return getGenre(a);
1463N/A }
1463N/A
1463N/A public static Genre getGenre(InputStream in, String path) throws IOException {
1463N/A Class a = find(in, path);
1463N/A return getGenre(a);
1463N/A }
1463N/A
1463N/A public static Genre getGenre(InputStream in) throws IOException {
1463N/A Class a = find(in);
1463N/A return getGenre(a);
1463N/A }
58N/A
937N/A public static Genre getGenre(Class a) {
1461N/A Genre g = null;
1461N/A if (a != null) {
1461N/A try {
1461N/A g = (Genre) a.getField("g").get(null);
1461N/A } catch (Exception e ) {
1461N/A e.printStackTrace();
1185N/A }
1185N/A }
1185N/A return g;
1190N/A }
1461N/A
1461N/A /**
1461N/A * Finds a suitable analyser class for an InputStream and a file name
1461N/A * Use if you just want to find file type.
1461N/A */
1461N/A public static Class<? extends FileAnalyzer>
1461N/A find(InputStream in, String path) throws IOException {
1461N/A Class<? extends FileAnalyzer> a = find(path);
1461N/A if(a == null) {
1185N/A a = find(in);
1185N/A }
1185N/A return a;
1185N/A }
1185N/A
1185N/A public static Class<? extends FileAnalyzer> find(String path) {
1185N/A int i = 0;
1185N/A if ((i = path.lastIndexOf('/')) > 0 || (i = path.lastIndexOf('\\')) > 0) {
1185N/A if(i+1<path.length())
1185N/A path = path.substring(i+1);
1461N/A }
1461N/A path = path.toUpperCase();
1461N/A int dotpos = path.lastIndexOf('.');
1461N/A if(dotpos >= 0) {
1461N/A Class<? extends FileAnalyzer> analyzer =
1461N/A ext.get(path.substring(dotpos+1).toUpperCase());
1185N/A if (analyzer != null) {
1185N/A //System.err.println(path.substring(dotpos+1).toUpperCase() + " = " + analyzer.getSimpleName());
1185N/A return analyzer;
1190N/A }
1461N/A }
1461N/A return(ext.get(path));
1461N/A }
1461N/A
1461N/A public static Class<? extends FileAnalyzer> find(InputStream in)
1461N/A throws IOException {
1185N/A in.mark(8);
1185N/A byte[] content = new byte[8];
1185N/A int len = in.read(content);
1185N/A in.reset();
1190N/A if (len < 4)
1461N/A return null;
1461N/A Class<? extends FileAnalyzer> a = find(content);
1461N/A if(a == null) {
1461N/A for(Method matcher: matchers) {
58N/A try {
58N/A //System.out.println("USING = " + matcher.getName());
58N/A
937N/A // cannot check conversion because of reflection
1461N/A @SuppressWarnings("unchecked")
1461N/A Class<? extends FileAnalyzer> c =
1461N/A (Class) matcher.invoke(null, content);
1461N/A
58N/A if (c != null) {
58N/A return c;
58N/A }
937N/A } catch (Exception e ) {
1461N/A e.printStackTrace();
1461N/A }
1461N/A }
1461N/A }
1461N/A return a;
1461N/A }
816N/A
816N/A public static Class<? extends FileAnalyzer> find(byte[] content) {
816N/A char[] chars = new char[content.length > 8 ? 8 : content.length];
816N/A for (int i = 0; i< chars.length ; i++) {
1461N/A chars[i] = (char)(0xFF & content[i]);
1461N/A }
1461N/A return(findMagic(new String(chars)));
1461N/A }
1461N/A
1461N/A public static Class<? extends FileAnalyzer> findMagic(String content) {
816N/A Class<? extends FileAnalyzer> a = magics.get(content);
816N/A if (a == null) {
816N/A for(String magic: magics.keySet()) {
816N/A if(content.startsWith(magic)) {
1461N/A return magics.get(magic);
1461N/A }
1461N/A }
1461N/A }
816N/A return a;
816N/A }
816N/A
816N/A public static void main(String [] args) throws Exception {
1461N/A AnalyzerGuru af = new AnalyzerGuru();
1461N/A System.out.println("<pre wrap=true>");
1461N/A for(String arg: args) {
1461N/A try {
816N/A Class<? extends FileAnalyzer> an = af.find(arg);
816N/A File f = new File(arg);
816N/A BufferedInputStream in = new BufferedInputStream(new FileInputStream(f));
773N/A FileAnalyzer fa = af.getAnalyzer(in, arg);
773N/A System.out.println("\nANALYZER = " + fa);
773N/A Document doc = af.getDocument(f, in, arg);
1436N/A System.out.println("\nDOCUMENT = " + doc);
1436N/A Enumeration fields = doc.fields();
1436N/A while (fields.hasMoreElements()) {
773N/A org.apache.lucene.document.Field field = (org.apache.lucene.document.Field) fields.nextElement();
58N/A if(field.isTokenized()){
58N/A Reader r = field.readerValue();
58N/A if(r == null) {
773N/A r = new StringReader(field.stringValue());
773N/A }
773N/A TokenStream ts = fa.tokenStream(field.name(), r);
1436N/A System.out.println("\nFIELD = " + field.name() + " TOKEN STREAM = "+ ts.getClass().getName());
773N/A Token t;
773N/A while((t = ts.next()) != null) {
58N/A System.out.print(t.termText());
58N/A System.out.print(' ');
58N/A }
773N/A System.out.println();
773N/A }
1436N/A if(field.isStored()) {
1436N/A System.out.println("\nFIELD = " + field.name());
773N/A if(field.readerValue() == null) {
773N/A System.out.println(field.stringValue());
773N/A } else {
773N/A System.out.println("STORING THE READER");
773N/A }
58N/A }
58N/A }
58N/A System.out.println("Writing XREF--------------");
773N/A Writer out = new OutputStreamWriter(System.out);
773N/A fa.writeXref(out);
1436N/A out.flush();
1436N/A } catch (Exception e) {
773N/A System.err.println("ERROR: " + e.getMessage());
773N/A e.printStackTrace();
773N/A }
58N/A }
58N/A }
58N/A}
773N/A