analysis/plain/PlainAnalyzer.java

0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
407N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
1380N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
0N/A */
0N/Apackage org.opensolaris.opengrok.analysis.plain;
0N/A
392N/Aimport java.io.IOException;
392N/Aimport java.io.Reader;
392N/Aimport java.io.StringReader;
1268N/Aimport java.util.Arrays;
1384N/A
392N/Aimport org.apache.lucene.analysis.TokenStream;
392N/Aimport org.apache.lucene.document.Document;
392N/Aimport org.apache.lucene.document.Field;
392N/Aimport org.opensolaris.opengrok.analysis.Definitions;
922N/Aimport org.opensolaris.opengrok.analysis.ExpandTabsReader;
392N/Aimport org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
392N/Aimport org.opensolaris.opengrok.analysis.Hash2TokenStream;
953N/Aimport org.opensolaris.opengrok.analysis.TextAnalyzer;
1384N/Aimport org.opensolaris.opengrok.analysis.XrefWriter;
271N/Aimport org.opensolaris.opengrok.configuration.Project;
89N/Aimport org.opensolaris.opengrok.history.Annotation;
0N/A
0N/A/**
0N/A * Analyzer for plain text files
0N/A * Created on September 21, 2005
0N/A *
0N/A * @author Chandan
0N/A */
953N/Apublic class PlainAnalyzer extends TextAnalyzer {
656N/A
1462N/A    /** buffer to store the content which needs to be tokenized/parsed */
225N/A    protected char[] content;
1462N/A    /** the number of chars in {@link #content} representing the original source
1462N/A     * read, i.e. the last {@code content.length - len} chars of content should
1462N/A     * be ignored (contain garbage).*/
225N/A    protected int len;
456N/A    private final PlainFullTokenizer plainfull;
456N/A    private final PlainSymbolTokenizer plainref;
456N/A    private final PlainXref xref;
225N/A    private static final Reader dummy = new StringReader(" ");
1462N/A    /** container, where found symbols get stored */
349N/A    protected Definitions defs;
656N/A
1462N/A    /** Creates a new instance of PlainAnalyzer
1462N/A     * @param factory   factory to use for fallback settings
1462N/A     */
202N/A    protected PlainAnalyzer(FileAnalyzerFactory factory) {
202N/A        super(factory);
0N/A        content = new char[64 * 1024];
0N/A        len = 0;
0N/A        plainfull = new PlainFullTokenizer(dummy);
0N/A        plainref = new PlainSymbolTokenizer(dummy);
0N/A        xref = new PlainXref((Reader) null);
0N/A    }
0N/A
1462N/A    /**
1462N/A     * {@inheritDoc}
1462N/A     */
508N/A    @Override
957N/A    public void analyze(Document doc, Reader in) throws IOException {
1462N/A        @SuppressWarnings("resource")
1462N/A        Reader inReader = ExpandTabsReader.wrap(in, project);
889N/A        len = 0;
889N/A        do {
889N/A            int rbytes = inReader.read(content, len, content.length - len);
1185N/A            if (rbytes >= 0) {
889N/A                if (rbytes == (content.length - len)) {
1268N/A                    content = Arrays.copyOf(content, content.length * 2);
656N/A                }
889N/A                len += rbytes;
889N/A            } else {
889N/A                break;
889N/A            }
889N/A        } while (true);
889N/A
99N/A        doc.add(new Field("full", dummy));
889N/A        String fullpath;
889N/A        if ((fullpath = doc.get("fullpath")) != null && ctags != null) {
889N/A            defs = ctags.doCtags(fullpath + "\n");
889N/A            if (defs != null && defs.numberOfSymbols() > 0) {
889N/A                doc.add(new Field("defs", dummy));
889N/A                doc.add(new Field("refs", dummy)); //@FIXME adding a refs field only if it has defs?
889N/A                byte[] tags = defs.serialize();
1318N/A                doc.add(new Field("tags", tags));
0N/A            }
0N/A        }
656N/A    }
656N/A
1462N/A    /**
1462N/A     * {@inheritDoc}
1462N/A     */
1185N/A    @Override
1380N/A    public TokenStream overridableTokenStream(String fieldName, Reader reader) {
656N/A        if ("full".equals(fieldName)) {
0N/A            plainfull.reInit(content, len);
0N/A            return plainfull;
0N/A        } else if ("refs".equals(fieldName)) {
0N/A            plainref.reInit(content, len);
0N/A            return plainref;
656N/A        } else if ("defs".equals(fieldName)) {
349N/A            return new Hash2TokenStream(defs.getSymbols());
0N/A        }
1380N/A        return super.overridableTokenStream(fieldName, reader);
0N/A    }
656N/A
1462N/A
0N/A    /**
1462N/A     * {@inheritDoc}
0N/A     */
1185N/A    @Override
1384N/A    public void writeXref(XrefWriter out) throws IOException {
0N/A        xref.reInit(content, len);
271N/A        xref.project = project;
0N/A        xref.write(out);
0N/A    }
0N/A
0N/A    /**
0N/A     * Write a cross referenced HTML file reads the source from in
0N/A     * @param in Input source
0N/A     * @param out Output xref writer
1127N/A     * @param defs definitions for the file (could be null)
89N/A     * @param annotation annotation for the file (could be null)
0N/A     */
1462N/A    static void writeXref(Reader in, XrefWriter out, Definitions defs,
1462N/A        Annotation annotation, Project project) throws IOException
1462N/A    {
0N/A        PlainXref xref = new PlainXref(in);
89N/A        xref.annotation = annotation;
271N/A        xref.project = project;
1127N/A        xref.setDefs(defs);
0N/A        xref.write(out);
0N/A    }
0N/A}