opengrok/analysis/JFlexTokenizer.java

	JFlexTokenizer.java revision 1057
0N/A/*
0N/A * CDDL HEADER START
0N/A *
0N/A * The contents of this file are subject to the terms of the
137N/A * Common Development and Distribution License (the "License").
0N/A * You may not use this file except in compliance with the License.
0N/A *
0N/A * See LICENSE.txt included in this distribution for the specific
0N/A * language governing permissions and limitations under the License.
0N/A *
0N/A * When distributing Covered Code, include this CDDL HEADER in each
0N/A * file and include the License file at LICENSE.txt.
0N/A * If applicable, add the following below this CDDL HEADER, with the
0N/A * fields enclosed by brackets "[]" replaced with your own identifying
0N/A * information: Portions Copyright [yyyy] [name of copyright owner]
0N/A *
0N/A * CDDL HEADER END
0N/A */
0N/A
0N/A/*
137N/A * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
0N/A */
0N/A
0N/Apackage org.opensolaris.opengrok.analysis;
0N/A
0N/Aimport java.io.CharArrayReader;
137N/Aimport java.io.IOException;
137N/Aimport java.io.Reader;
137N/Aimport org.apache.lucene.analysis.Tokenizer;
137N/Aimport org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
137N/Aimport org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
137N/Aimport org.apache.lucene.analysis.tokenattributes.TermAttribute;
137N/A
137N/A/**
457N/A * this class was created because of lucene 2.4.1 update which introduced char[] in Tokens instead of String
137N/A * lucene 3.0.0 uses AttributeSource instead of Tokens to make things even easier :-D
137N/A *
428N/A * Generally this is a "template" for all new Tokenizers, so be carefull when changing it,
428N/A * it will impact almost ALL symbol tokenizers in OpenGrok ...
0N/A *
0N/A * Created on August 24, 2009
0N/A * @author Lubos Kosco
0N/A */
0N/A
0N/Apublic abstract class JFlexTokenizer extends Tokenizer {
0N/A
0N/A    // default jflex scanner methods and variables
0N/A    abstract public boolean yylex() throws IOException;
0N/A    abstract public void yyreset(Reader reader);
0N/A    abstract public void yyclose() throws IOException;
0N/A
0N/A    /**
0N/A     * Reinitialize the tokenizer with new contents.
0N/A     *
137N/A     * @param contents a char buffer with text to tokenize
0N/A     * @param length the number of characters to use from the char buffer
0N/A     */
0N/A    public final void reInit(char[] contents, int length) {
0N/A        yyreset(new CharArrayReader(contents, 0, length));
137N/A    }
0N/A
137N/A    @Override
0N/A    public final void close() throws IOException {
0N/A        yyclose();
457N/A    }
0N/A
0N/A    protected TermAttribute termAtt= (TermAttribute) addAttribute(TermAttribute.class);
0N/A    protected OffsetAttribute offsetAtt=(OffsetAttribute) addAttribute(OffsetAttribute.class);
137N/A    protected PositionIncrementAttribute posIncrAtt= (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
0N/A
0N/A    /**
0N/A     * This will reinitalize internal AttributeImpls, or it returns false if end of input Reader ...
0N/A     * @return false if no more tokens, otherwise true
0N/A     * @throws java.io.IOException
137N/A     */
0N/A    @Override
137N/A    public boolean incrementToken() throws java.io.IOException {
0N/A        return this.yylex();
0N/A    }
0N/A
0N/A    protected void setAttribs(String str, int start, int end) {
137N/A        //FIXME increasing below by one(default) might be tricky, need more analysis
0N/A        this.posIncrAtt.setPositionIncrement(1);
0N/A        this.termAtt.setTermBuffer(str);
0N/A        this.offsetAtt.setOffset(start, end);
0N/A    }
137N/A}
0N/A