/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. */ package org.opensolaris.opengrok.analysis; import java.io.CharArrayReader; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.Stack; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; /** * * Generally this is a "template" for all new Tokenizers, so be carefull when * changing it, it will impact almost ALL symbol tokenizers in OpenGrok ... * * Created on August 24, 2009 * * @author Lubos Kosco */ public abstract class JFlexTokenizer extends Tokenizer { protected Stack stack = new Stack(); // default jflex scanner methods and variables abstract public boolean yylex() throws IOException; abstract public void yyreset(Reader reader); abstract public void yyclose() throws IOException; abstract public void yybegin(int newState); abstract public int yystate(); public JFlexTokenizer(java.io.Reader input) { super(input); } /** * Reinitialize the tokenizer with new contents. * * @param contents a char buffer with text to tokenize * @param length the number of characters to use from the char buffer */ public final void reInit(char[] contents, int length) { this.yyreset(new CharArrayReader(contents, 0, length)); } public final void reInit(String s) { this.yyreset(new StringReader(s)); } /** * Reinitialize the tokenizer with new reader. * * @param reader new reader for this tokenizer */ public final void reInit(Reader reader) { this.yyreset(reader); } @Override public final void close() throws IOException { this.yyclose(); } protected CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); protected OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); protected PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); protected int finalOffset; /** * This will re-initalize internal AttributeImpls, or it returns false if * end of input Reader ... * * @return false if no more tokens, otherwise true * @throws java.io.IOException */ @Override public final boolean incrementToken() throws java.io.IOException { return this.yylex(); } protected void setAttribs(String str, int start, int end) { clearAttributes(); //FIXME increasing below by one(default) might be tricky, need more analysis // after lucene upgrade to 3.5 below is most probably not even needed this.posIncrAtt.setPositionIncrement(1); this.termAtt.setEmpty(); this.termAtt.append(str); this.offsetAtt.setOffset(start, end); } public void yypush(int newState) { this.stack.push(yystate()); this.yybegin(newState); } public void yypop() { this.yybegin(this.stack.pop()); } }