eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco/*
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * CDDL HEADER START
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * The contents of this file are subject to the terms of the
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * Common Development and Distribution License (the "License").
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * You may not use this file except in compliance with the License.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * See LICENSE.txt included in this distribution for the specific
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * language governing permissions and limitations under the License.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * When distributing Covered Code, include this CDDL HEADER in each
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * file and include the License file at LICENSE.txt.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * If applicable, add the following below this CDDL HEADER, with the
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * fields enclosed by brackets "[]" replaced with your own identifying
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * information: Portions Copyright [yyyy] [name of copyright owner]
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * CDDL HEADER END
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco */
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco/*
4083abf043fbc87feef12bde4afb75b046200dbaLubos Kosco * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco */
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Koscopackage org.opensolaris.opengrok.analysis;
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlenimport java.io.IOException;
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlenimport java.io.Reader;
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopesimport java.util.Stack;
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Koscoimport org.apache.lucene.analysis.Tokenizer;
6d7c6f82e644c205bc679ee5b1fa2929ec949963Lubos Koscoimport org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Koscoimport org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Koscoimport org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco/**
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco *
80c87d7e9c9b9ce372265f9d7eccee40dced7463Lubos Kosco * Generally this is a "template" for all new Tokenizers, so be careful when
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco * changing it, it will impact almost ALL symbol tokenizers in OpenGrok ...
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * Created on August 24, 2009
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco *
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco * @author Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco */
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Koscopublic abstract class JFlexTokenizer extends Tokenizer {
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco
80c87d7e9c9b9ce372265f9d7eccee40dced7463Lubos Kosco protected Stack<Integer> stack = new Stack<>();
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco // default jflex scanner methods and variables
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen abstract public boolean yylex() throws IOException;
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen abstract public void yyreset(Reader reader);
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen abstract public void yyclose() throws IOException;
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes abstract public void yybegin(int newState);
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco abstract public int yystate();
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco //TODO can be removed once we figure out jflex generation of empty constructor
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco protected JFlexTokenizer(Reader in) {
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco super();
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco }
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco protected JFlexTokenizer() {
6602c01097c66d242046fb0490e0a5dcc1ca36c5Lubos Kosco super();
eb11fe3584b7b243fb0641da4ab2e157610bb767Lubos Kosco }
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco /**
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco * Reinitialize the tokenizer with new reader.
4083abf043fbc87feef12bde4afb75b046200dbaLubos Kosco * @throws java.io.IOException in case of I/O error
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco */
20bc00e4946cb455e11b148ae09f8a3c8376318dKnut Anders Hatlen @Override
20bc00e4946cb455e11b148ae09f8a3c8376318dKnut Anders Hatlen public void reset() throws IOException {
20bc00e4946cb455e11b148ae09f8a3c8376318dKnut Anders Hatlen super.reset();
20bc00e4946cb455e11b148ae09f8a3c8376318dKnut Anders Hatlen stack.clear();
20bc00e4946cb455e11b148ae09f8a3c8376318dKnut Anders Hatlen this.yyreset(input);
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen }
4750e1be4c1b2ba11705d5b73b86dd1b9dd4e1acKnut Anders Hatlen
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen @Override
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen public final void close() throws IOException {
8a16fdd0eb18b354da40c48d80e543b837593891Lubos Kosco super.close();
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco this.yyclose();
bfc104f8240fac5b59d4347db4c27be705f6dfc2Knut Anders Hatlen }
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco protected CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco protected OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco protected PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco protected int finalOffset;
c6e0f8b39af7343c04ec7558a085c965159f4ea0Lubos Kosco
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco /**
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco * This will re-initalize internal AttributeImpls, or it returns false if
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco * end of input Reader ...
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco *
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco * @return false if no more tokens, otherwise true
4083abf043fbc87feef12bde4afb75b046200dbaLubos Kosco * @throws IOException in case of I/O error
ff5eba819da0cf7964d884630fb13262ef12c505Trond Norbye */
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco @Override
d2e3f3641cebe445e5a99af0e33c455b16140b2bKnut Anders Hatlen public final boolean incrementToken() throws IOException {
ff5eba819da0cf7964d884630fb13262ef12c505Trond Norbye return this.yylex();
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco }
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco
817883e9f0d419428e8236a09b77cdeeaa034df7Knut Anders Hatlen protected void setAttribs(String str, int start, int end) {
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco clearAttributes();
be5cdf850da5383468637c6937c016f26bd339cfLubos Kosco //FIXME increasing below by one(default) might be tricky, need more analysis
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco // after lucene upgrade to 3.5 below is most probably not even needed
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco this.posIncrAtt.setPositionIncrement(1);
6d7c6f82e644c205bc679ee5b1fa2929ec949963Lubos Kosco this.termAtt.setEmpty();
6d7c6f82e644c205bc679ee5b1fa2929ec949963Lubos Kosco this.termAtt.append(str);
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco this.offsetAtt.setOffset(start, end);
b645988bdc1cf4f2f82b8c00ed041ddddd822c24Lubos Kosco }
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes public void yypush(int newState) {
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes this.stack.push(yystate());
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco this.yybegin(newState);
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes }
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes public void yypop() {
f9fd2b96d1c5ea62664f74da0e34a04b6511a8ffLubos Kosco this.yybegin(this.stack.pop());
b6ada49236374a1f4ae462c384d69bab7e6f0a1dGustavo Lopes }
eb32a77fdb57f20c042b7b79b28a4fb4060cb949Lubos Kosco}