opengrok/analysis/JFlexTokenizer.java

	JFlexTokenizer.java revision 1461
816N/A/*
816N/A * CDDL HEADER START
816N/A *
816N/A * The contents of this file are subject to the terms of the
816N/A * Common Development and Distribution License (the "License").
816N/A * You may not use this file except in compliance with the License.
816N/A *
816N/A * See LICENSE.txt included in this distribution for the specific
816N/A * language governing permissions and limitations under the License.
816N/A *
816N/A * When distributing Covered Code, include this CDDL HEADER in each
816N/A * file and include the License file at LICENSE.txt.
816N/A * If applicable, add the following below this CDDL HEADER, with the
816N/A * fields enclosed by brackets "[]" replaced with your own identifying
816N/A * information: Portions Copyright [yyyy] [name of copyright owner]
816N/A *
816N/A * CDDL HEADER END
816N/A */
816N/A
816N/A/*
1056N/A * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
816N/A */
1056N/A
816N/Apackage org.opensolaris.opengrok.analysis;
816N/A
1056N/Aimport java.io.CharArrayReader;
1057N/Aimport java.io.IOException;
1056N/Aimport java.io.Reader;
1461N/Aimport java.util.ArrayDeque;
1461N/A
816N/Aimport org.apache.lucene.analysis.Tokenizer;
1318N/Aimport org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
928N/Aimport org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
928N/Aimport org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
816N/A
816N/A/**
1461N/A * This class was created because of lucene:
1461N/A * <ul>
1461N/A * <li>2.4.1 update which introduced char[] in Tokens instead of String</li>
1461N/A * <li>3.0.0 uses AttributeSource instead of Tokens to make things even easier :-D</li>
1461N/A * <li>3.5.0 uses CharTermAttribute</li>
1461N/A * </ul>
928N/A *
1461N/A * Generally this is a "template" for all new Tokenizers, so be carefull when
1461N/A * changing it, it will impact almost ALL symbol tokenizers in OpenGrok ...
816N/A *
816N/A * Created on August 24, 2009
816N/A * @author Lubos Kosco
816N/A */
816N/A
816N/Apublic abstract class JFlexTokenizer extends Tokenizer {
816N/A
1461N/A    /** Stack to remember the order of relevant states for the current parser. */
1461N/A    protected ArrayDeque<Integer> stateStack = new ArrayDeque<Integer>();
1439N/A
1461N/A    /**
1461N/A     * Run the scanner to get the next token from the input.
1461N/A     * @return {@code true} if a new Token is available/was found.
1461N/A     * @throws IOException */
1057N/A    abstract public boolean yylex() throws IOException;
1461N/A    /**
1461N/A     * Closes the current input stream, and resets the scanner to read from the
1461N/A     * given input stream. All internal variables are reset, the old input
1461N/A     * stream cannot be reused (content of the internal buffer is discarded and
1461N/A     * lost). The lexical state is set to {@code YY_INITIAL}.
1461N/A     * @param reader the new input stream to operate on.*/
1056N/A    abstract public void yyreset(Reader reader);
1461N/A    /**
1461N/A     * Closes the input stream in use. All subsequent calls to the scanning
1461N/A     * method will return the end of file value.
1461N/A     * @throws IOException
1461N/A     */
1057N/A    abstract public void yyclose() throws IOException;
1461N/A    /**
1461N/A     * Enter the given lexical state.
1461N/A     * @param newState state to enter
1461N/A     */
1439N/A    abstract public void yybegin(int newState);
1461N/A    /**
1461N/A     * Get the current lexical state of the scanner.
1461N/A     * @return a lexical state.
1461N/A     */
1439N/A    abstract public int yystate();
1439N/A
1461N/A    /**
1461N/A     * Create a new tokenizer using the given stream.
1461N/A     * @param input  input to process. Might be {@code null}.
1461N/A     */
1425N/A    public JFlexTokenizer(java.io.Reader input) {
1425N/A        super(input);
1425N/A    }
1056N/A
1056N/A    /**
1056N/A     * Reinitialize the tokenizer with new contents.
1056N/A     *
1056N/A     * @param contents a char buffer with text to tokenize
1056N/A     * @param length the number of characters to use from the char buffer
1056N/A     */
1056N/A    public final void reInit(char[] contents, int length) {
1056N/A        yyreset(new CharArrayReader(contents, 0, length));
1056N/A    }
1056N/A
1461N/A    /**
1461N/A     * Close the scanner including the input stream in use.
1461N/A     * @see #yyclose()
1461N/A     */
1057N/A    @Override
1057N/A    public final void close() throws IOException {
1057N/A        yyclose();
1057N/A    }
1057N/A
1461N/A    /** term text of the current Token */
1461N/A    protected CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1461N/A    /** start and end character offset of the current Token */
1461N/A    protected OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
1461N/A    /** position of the current Token relative to the previous Token within the
1461N/A     * related TokenStream */
1461N/A    protected PositionIncrementAttribute posIncrAtt =
1461N/A        addAttribute(PositionIncrementAttribute.class);
849N/A
816N/A    /**
1461N/A     * Go forward to the next available token.
1461N/A     * @return {@code false} if no more tokens available.
816N/A     * @throws java.io.IOException
1190N/A     */
816N/A    @Override
1380N/A    public final boolean incrementToken() throws java.io.IOException {
1190N/A        return this.yylex();
928N/A    }
928N/A
1461N/A    /**
1461N/A     * Reset the attributes for the current Token.
1461N/A     * NOTE: For now PositionIncrement gets automatically set to {@code 1}.
1461N/A     * @param str  Token text to set.
1461N/A     * @param start  the start psition of the current Token
1461N/A     * @param end  the end position of the current Token
1461N/A     * @see #termAtt
1461N/A     * @see #offsetAtt
1461N/A     * @see #posIncrAtt
1461N/A     */
1004N/A    protected void setAttribs(String str, int start, int end) {
930N/A        //FIXME increasing below by one(default) might be tricky, need more analysis
1318N/A        // after lucene upgrade to 3.5 below is most probably not even needed
928N/A        this.posIncrAtt.setPositionIncrement(1);
1318N/A        this.termAtt.setEmpty();
1318N/A        this.termAtt.append(str);
928N/A        this.offsetAtt.setOffset(start, end);
928N/A    }
1439N/A
1461N/A    /**
1461N/A     * Push the current state to the state order stack and enter the given state.
1461N/A     * @param newState  new state to enter.
1461N/A     * @see #stateStack
1461N/A     * @see #yystate()
1461N/A     * @see #yybegin(int)
1461N/A     */
1461N/A    @SuppressWarnings("boxing")
1439N/A    public void yypush(int newState) {
1461N/A        stateStack.push(yystate());
1439N/A        yybegin(newState);
1439N/A    }
1439N/A
1461N/A    /**
1461N/A     * Pop the last entry from the state order stack and enter it.
1461N/A     * @see #stateStack
1461N/A     * @see #yybegin(int)
1461N/A     */
1461N/A    @SuppressWarnings("boxing")
1439N/A    public void yypop() {
1461N/A        yybegin(stateStack.pop());
1439N/A    }
816N/A}