JFlexTokenizer.java revision eb11fe3584b7b243fb0641da4ab2e157610bb767
2ronwalf/*
2ronwalf * CDDL HEADER START
2ronwalf *
2ronwalf * The contents of this file are subject to the terms of the
2ronwalf * Common Development and Distribution License (the "License").
2ronwalf * You may not use this file except in compliance with the License.
2ronwalf *
2ronwalf * See LICENSE.txt included in this distribution for the specific
2ronwalf * language governing permissions and limitations under the License.
2ronwalf *
2ronwalf * When distributing Covered Code, include this CDDL HEADER in each
2ronwalf * file and include the License file at LICENSE.txt.
2ronwalf * If applicable, add the following below this CDDL HEADER, with the
2ronwalf * fields enclosed by brackets "[]" replaced with your own identifying
2ronwalf * information: Portions Copyright [yyyy] [name of copyright owner]
2ronwalf *
2ronwalf * CDDL HEADER END
2ronwalf */
2ronwalf
2ronwalf/*
2ronwalf * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
2ronwalf */
2ronwalfpackage org.opensolaris.opengrok.analysis;
2ronwalf
2ronwalfimport java.io.IOException;
2ronwalfimport java.io.Reader;
2ronwalfimport java.util.Stack;
2ronwalfimport org.apache.lucene.analysis.Tokenizer;
2ronwalfimport org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
2ronwalfimport org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
2ronwalfimport org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
2ronwalf
2ronwalf/**
2ronwalf *
2ronwalf * Generally this is a "template" for all new Tokenizers, so be careful when
2ronwalf * changing it, it will impact almost ALL symbol tokenizers in OpenGrok ...
2ronwalf *
2ronwalf * Created on August 24, 2009
2ronwalf *
2ronwalf * @author Lubos Kosco
2ronwalf */
2ronwalfpublic abstract class JFlexTokenizer extends Tokenizer {
2ronwalf
2ronwalf protected Stack<Integer> stack = new Stack<>();
2ronwalf
2ronwalf // default jflex scanner methods and variables
2ronwalf abstract public boolean yylex() throws IOException;
2ronwalf
2ronwalf abstract public void yyreset(Reader reader);
2ronwalf
2ronwalf abstract public void yyclose() throws IOException;
2ronwalf
2ronwalf abstract public void yybegin(int newState);
2ronwalf
2ronwalf abstract public int yystate();
2ronwalf
2ronwalf //TODO can be removed once we figure out jflex generation of empty constructor
2ronwalf protected JFlexTokenizer(Reader in) {
2ronwalf super();
2ronwalf }
2ronwalf
2ronwalf protected JFlexTokenizer() {
2ronwalf super();
2ronwalf }
2ronwalf
2ronwalf /**
2ronwalf * Reinitialize the tokenizer with new reader.
2ronwalf * @throws java.io.IOException
2ronwalf */
2ronwalf @Override
2ronwalf public void reset() throws IOException {
2ronwalf super.reset();
2ronwalf stack.clear();
2ronwalf this.yyreset(input);
2ronwalf }
2ronwalf
2ronwalf @Override
2ronwalf public final void close() throws IOException {
2ronwalf super.close();
2ronwalf this.yyclose();
2ronwalf }
2ronwalf protected CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
2ronwalf protected OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
2ronwalf protected PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
2ronwalf protected int finalOffset;
2ronwalf
2ronwalf /**
2ronwalf * This will re-initalize internal AttributeImpls, or it returns false if
2ronwalf * end of input Reader ...
2ronwalf *
2ronwalf * @return false if no more tokens, otherwise true
2ronwalf * @throws IOException
2ronwalf */
2ronwalf @Override
public final boolean incrementToken() throws IOException {
return this.yylex();
}
protected void setAttribs(String str, int start, int end) {
clearAttributes();
//FIXME increasing below by one(default) might be tricky, need more analysis
// after lucene upgrade to 3.5 below is most probably not even needed
this.posIncrAtt.setPositionIncrement(1);
this.termAtt.setEmpty();
this.termAtt.append(str);
this.offsetAtt.setOffset(start, end);
}
public void yypush(int newState) {
this.stack.push(yystate());
this.yybegin(newState);
}
public void yypop() {
this.yybegin(this.stack.pop());
}
}