JFlexTokenizer.java revision 50203181d8c071cfd69cb197b5da0eda5c4d6372
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco/*
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * CDDL HEADER START
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * The contents of this file are subject to the terms of the
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * Common Development and Distribution License (the "License").
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * You may not use this file except in compliance with the License.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * See LICENSE.txt included in this distribution for the specific
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * language governing permissions and limitations under the License.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * When distributing Covered Code, include this CDDL HEADER in each
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * file and include the License file at LICENSE.txt.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * If applicable, add the following below this CDDL HEADER, with the
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * fields enclosed by brackets "[]" replaced with your own identifying
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * information: Portions Copyright [yyyy] [name of copyright owner]
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * CDDL HEADER END
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco */
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco/*
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco */
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Koscopackage org.opensolaris.opengrok.analysis;
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlenimport java.io.CharArrayReader;
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlenimport java.io.IOException;
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlenimport java.io.Reader;
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Koscoimport org.apache.lucene.analysis.Tokenizer;
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Koscoimport org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Koscoimport org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Koscoimport org.apache.lucene.analysis.tokenattributes.TermAttribute;
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco/**
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * this class was created because of lucene 2.4.1 update which introduced char[] in Tokens instead of String
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco * lucene 3.0.0 uses AttributeSource instead of Tokens to make things even easier :-D
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco *
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco * Generally this is a "template" for all new Tokenizers, so be carefull when changing it,
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco * it will impact almost ALL symbol tokenizers in OpenGrok ...
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * Created on August 24, 2009
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * @author Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco */
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Koscopublic abstract class JFlexTokenizer extends Tokenizer {
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco // default jflex scanner methods and variables
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen abstract public boolean yylex() throws IOException;
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen abstract public void yyreset(Reader reader);
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen abstract public void yyclose() throws IOException;
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen /**
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen * Reinitialize the tokenizer with new contents.
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen *
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen * @param contents a char buffer with text to tokenize
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen * @param length the number of characters to use from the char buffer
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen */
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen public final void reInit(char[] contents, int length) {
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen yyreset(new CharArrayReader(contents, 0, length));
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen }
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen @Override
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen public final void close() throws IOException {
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen yyclose();
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen }
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen
50203181d8c071cfd69cb197b5da0eda5c4d6372Jens Elkner protected TermAttribute termAtt= addAttribute(TermAttribute.class);
50203181d8c071cfd69cb197b5da0eda5c4d6372Jens Elkner protected OffsetAttribute offsetAtt= addAttribute(OffsetAttribute.class);
50203181d8c071cfd69cb197b5da0eda5c4d6372Jens Elkner protected PositionIncrementAttribute posIncrAtt= addAttribute(PositionIncrementAttribute.class);
c6e0f8b39af7343c04ec7558a085c965159f4ea0Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco /**
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco * This will reinitalize internal AttributeImpls, or it returns false if end of input Reader ...
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco * @return false if no more tokens, otherwise true
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * @throws java.io.IOException
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco */
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco @Override
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco public boolean incrementToken() throws java.io.IOException {
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco return this.yylex();
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco }
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco
817883e9f0d419428e8236a09b77cdeeaa034df7Knut Anders Hatlen protected void setAttribs(String str, int start, int end) {
be5cdf850da5383468637c6937c016f26bd339cfLubos Kosco //FIXME increasing below by one(default) might be tricky, need more analysis
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco this.posIncrAtt.setPositionIncrement(1);
817883e9f0d419428e8236a09b77cdeeaa034df7Knut Anders Hatlen this.termAtt.setTermBuffer(str);
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco this.offsetAtt.setOffset(start, end);
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco }
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco}