opengrok/analysis/FileAnalyzer.java

	FileAnalyzer.java revision 1462
0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
407N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
1380N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
0N/A * Use is subject to license terms.
0N/A */
0N/Apackage org.opensolaris.opengrok.analysis;
0N/A
335N/Aimport java.io.File;
335N/Aimport java.io.IOException;
335N/Aimport java.io.InputStream;
335N/Aimport java.io.Reader;
1327N/Aimport java.util.logging.Logger;
1327N/A
335N/Aimport org.apache.lucene.analysis.Analyzer;
335N/Aimport org.apache.lucene.analysis.TokenStream;
335N/Aimport org.apache.lucene.document.Document;
271N/Aimport org.opensolaris.opengrok.configuration.Project;
99N/Aimport org.opensolaris.opengrok.configuration.RuntimeEnvironment;
1195N/Aimport org.opensolaris.opengrok.util.IOUtils;
0N/A
0N/A/**
0N/A * Base class for all different File Analyzers
0N/A *
0N/A * An Analyzer for a filetype provides
0N/A *<ol>
0N/A * <li>the file extentions and magic numbers it analyzes</li>
0N/A * <li>a lucene document listing the fields it can support</li>
0N/A * <li>TokenStreams for each of the field it said requires tokenizing in 2</li>
0N/A * <li>cross reference in HTML format</li>
0N/A * <li>The type of file data, plain text etc</li>
0N/A *</ol>
0N/A *
0N/A * Created on September 21, 2005
0N/A *
0N/A * @author Chandan
0N/A */
656N/Apublic class FileAnalyzer extends Analyzer {
1327N/A    private static final Logger logger = Logger.getLogger(FileAnalyzer.class.getName());
1462N/A    /** the project to which this analyzer is related to */
271N/A    protected Project project;
202N/A    private final FileAnalyzerFactory factory;
202N/A
0N/A    /**
0N/A     * What kind of file is this?
0N/A     */
0N/A    public static enum Genre {
1185N/A        /** xrefed - line numbered context */
1185N/A        PLAIN("p"),
1185N/A        /** xrefed - summarizer context */
1185N/A        XREFABLE("x"),
1185N/A        /** not xrefed - no context - used by diff/list */
1185N/A        IMAGE("i"),
1185N/A        /** not xrefed - no context */
1185N/A        DATA("d"),
1185N/A        /** not xrefed - summarizer context from original file */
1185N/A        HTML("h")
1185N/A        ;
1185N/A        private String typeName;
1185N/A        private Genre(String typename) {
1185N/A            this.typeName = typename;
1185N/A        }
1190N/A
1185N/A        /**
1185N/A         * Get the type name value used to tag lucence documents.
1185N/A         * @return a none-null string.
1185N/A         */
1185N/A        public String typeName() {
1185N/A            return typeName;
1185N/A        }
1190N/A
1185N/A        /**
1185N/A         * Get the Genre for the given type name.
1185N/A         * @param typeName name to check
1185N/A         * @return {@code null} if it doesn't match any genre, the genre otherwise.
1185N/A         * @see #typeName()
1185N/A         */
1185N/A        public static Genre get(String typeName) {
1185N/A            if (typeName == null) {
1185N/A                return null;
1185N/A            }
1185N/A            for (Genre g : values()) {
1185N/A                if (g.typeName.equals(typeName)) {
1185N/A                    return g;
1185N/A                }
1185N/A            }
1185N/A            return null;
1185N/A        }
656N/A    }
1462N/A
1462N/A    /**
1462N/A     * Ctags instance to use for tagging.
1462N/A     */
656N/A    protected Ctags ctags;
656N/A
1462N/A    /**
1462N/A     * Set the ctag instance to use for tagging this repo's files.
1462N/A     * @param ctags instance to set.
1462N/A     */
656N/A    public void setCtags(Ctags ctags) {
656N/A        this.ctags = ctags;
0N/A    }
202N/A
1462N/A    /**
1462N/A     *Set the project, to which this instance is related to.
1462N/A     * @param project   project to set.
1462N/A     */
922N/A    public void setProject(Project project) {
922N/A        this.project = project;
922N/A    }
922N/A
202N/A    /**
202N/A     * Get the factory which created this analyzer.
202N/A     * @return the {@code FileAnalyzerFactory} which created this analyzer
202N/A     */
202N/A    public final FileAnalyzerFactory getFactory() {
202N/A        return factory;
202N/A    }
202N/A
1462N/A    /**
1462N/A     * Get the genre, which this analyzer usually handles.
1462N/A     * @return the genre this analyzer handles.
1462N/A     * @see FileAnalyzerFactory#getGenre()
1462N/A     */
0N/A    public Genre getGenre() {
202N/A        return factory.getGenre();
0N/A    }
656N/A    private final HistoryAnalyzer hista;
202N/A
1462N/A    /** Creates a new instance of FileAnalyzer
1462N/A     * @param factory the factory to use to obtain default settings */
202N/A    public FileAnalyzer(FileAnalyzerFactory factory) {
202N/A        this.factory = factory;
656N/A        hista = new HistoryAnalyzer();
0N/A    }
656N/A
1462N/A    /**
1462N/A     * Read the given input, analyze it and store the result into the given
1462N/A     * document. NOTE: This method does not close given input stream on return!
1462N/A     *
1462N/A     * @param doc   where to store results.
1462N/A     * @param in    source to read.
1462N/A     * @throws IOException
1462N/A     */
1462N/A    @SuppressWarnings("unused") // yes, hybrid: [not] a interface, [not] abstract
889N/A    public void analyze(Document doc, InputStream in) throws IOException {
456N/A        // not used
0N/A    }
656N/A
1462N/A    /**
1462N/A     * Get the TokenStream for the given fieldname.
1462N/A     * @param fieldName filed name in question {@code path | project | hist}
1462N/A     * @param reader reader to use.
1462N/A     * @return {@code null} for unknown field names, a new corresponding
1462N/A     *  TokenStream otherwise.
1462N/A     */
1380N/A    public TokenStream overridableTokenStream(String fieldName, Reader reader) {
656N/A        if ("path".equals(fieldName) || "project".equals(fieldName)) {
656N/A            return new PathTokenizer(reader);
656N/A        } else if ("hist".equals(fieldName)) {
656N/A            return hista.tokenStream(fieldName, reader);
58N/A        }
1327N/A        logger.warning("Have no analyzer for '" + fieldName + "'");
656N/A        return null;
0N/A    }
656N/A
1462N/A    /**
1462N/A     * {@inheritDoc}
1462N/A     */
1380N/A    @Override
1380N/A    public final TokenStream tokenStream(String fieldName, Reader reader) {
1380N/A        return this.overridableTokenStream(fieldName, reader);
1380N/A    }
1380N/A
1462N/A    /**
1462N/A     * {@inheritDoc}
1462N/A     */
1380N/A    @Override
1380N/A    public final TokenStream reusableTokenStream(String fieldName, Reader reader) {
1380N/A        //TODO needs refactoring to get more speed and less ram usage for indexer
1380N/A        return this.tokenStream(fieldName, reader);
1380N/A    }
1380N/A
0N/A    /**
1462N/A     * Write out current results html formatted to the given writer.
1462N/A     *
0N/A     * @param out to writer HTML cross-reference
271N/A     * @throws java.io.IOException if an error occurs
0N/A     */
1462N/A    @SuppressWarnings("static-method")
1384N/A    public void writeXref(XrefWriter out) throws IOException {
656N/A        out.write("Error General File X-Ref writer!");
0N/A    }
656N/A
1462N/A    /**
1462N/A     * Write out current results html formatted to the given destination.
1462N/A     * @param xrefDir   the parent directory of the file to write.
1462N/A     * @param path      the relative path wrt. <var>xrefDir</var> of the file to write.
1462N/A     * @throws IOException
1462N/A     */
0N/A    public void writeXref(File xrefDir, String path) throws IOException {
1462N/A        @SuppressWarnings("resource")
1384N/A        XrefWriter w = null;
509N/A        try {
1384N/A            w = new XrefWriter(new File(xrefDir, path), factory.getGenre(),
1384N/A                RuntimeEnvironment.getInstance().isCompressXref());
559N/A            writeXref(w);
1384N/A        } finally {
1195N/A            IOUtils.close(w);
335N/A        }
0N/A    }
0N/A}