/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ package org.opensolaris.opengrok.analysis; import java.io.CharArrayReader; import java.io.IOException; import java.io.Reader; import java.util.ArrayDeque; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; /** * This class was created because of lucene: * * * Generally this is a "template" for all new Tokenizers, so be carefull when * changing it, it will impact almost ALL symbol tokenizers in OpenGrok ... * * Created on August 24, 2009 * @author Lubos Kosco */ public abstract class JFlexTokenizer extends Tokenizer { /** Stack to remember the order of relevant states for the current parser. */ protected ArrayDeque stateStack = new ArrayDeque(); /** * Run the scanner to get the next token from the input. * @return {@code true} if a new Token is available/was found. * @throws IOException */ abstract public boolean yylex() throws IOException; /** * Closes the current input stream, and resets the scanner to read from the * given input stream. All internal variables are reset, the old input * stream cannot be reused (content of the internal buffer is discarded and * lost). The lexical state is set to {@code YY_INITIAL}. * @param reader the new input stream to operate on.*/ abstract public void yyreset(Reader reader); /** * Closes the input stream in use. All subsequent calls to the scanning * method will return the end of file value. * @throws IOException */ abstract public void yyclose() throws IOException; /** * Enter the given lexical state. * @param newState state to enter */ abstract public void yybegin(int newState); /** * Get the current lexical state of the scanner. * @return a lexical state. */ abstract public int yystate(); /** * Create a new tokenizer using the given stream. * @param input input to process. Might be {@code null}. */ public JFlexTokenizer(java.io.Reader input) { super(input); } /** * Reinitialize the tokenizer with new contents. * * @param contents a char buffer with text to tokenize * @param length the number of characters to use from the char buffer */ public final void reInit(char[] contents, int length) { yyreset(new CharArrayReader(contents, 0, length)); } /** * Close the scanner including the input stream in use. * @see #yyclose() */ @Override public final void close() throws IOException { yyclose(); } /** term text of the current Token */ protected CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** start and end character offset of the current Token */ protected OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** position of the current Token relative to the previous Token within the * related TokenStream */ protected PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** * Go forward to the next available token. * @return {@code false} if no more tokens available. * @throws java.io.IOException */ @Override public final boolean incrementToken() throws java.io.IOException { return this.yylex(); } /** * Reset the attributes for the current Token. * NOTE: For now PositionIncrement gets automatically set to {@code 1}. * @param str Token text to set. * @param start the start psition of the current Token * @param end the end position of the current Token * @see #termAtt * @see #offsetAtt * @see #posIncrAtt */ protected void setAttribs(String str, int start, int end) { //FIXME increasing below by one(default) might be tricky, need more analysis // after lucene upgrade to 3.5 below is most probably not even needed this.posIncrAtt.setPositionIncrement(1); this.termAtt.setEmpty(); this.termAtt.append(str); this.offsetAtt.setOffset(start, end); } /** * Push the current state to the state order stack and enter the given state. * @param newState new state to enter. * @see #stateStack * @see #yystate() * @see #yybegin(int) */ @SuppressWarnings("boxing") public void yypush(int newState) { stateStack.push(yystate()); yybegin(newState); } /** * Pop the last entry from the state order stack and enter it. * @see #stateStack * @see #yybegin(int) */ @SuppressWarnings("boxing") public void yypop() { yybegin(stateStack.pop()); } }