1416N/A/*
1416N/A * CDDL HEADER START
1416N/A *
1416N/A * The contents of this file are subject to the terms of the
1416N/A * Common Development and Distribution License (the "License").
1416N/A * You may not use this file except in compliance with the License.
1416N/A *
1416N/A * See LICENSE.txt included in this distribution for the specific
1416N/A * language governing permissions and limitations under the License.
1416N/A *
1416N/A * When distributing Covered Code, include this CDDL HEADER in each
1416N/A * file and include the License file at LICENSE.txt.
1416N/A * If applicable, add the following below this CDDL HEADER, with the
1416N/A * fields enclosed by brackets "[]" replaced with your own identifying
1416N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1416N/A *
1416N/A * CDDL HEADER END
1416N/A */
1416N/A
1416N/A/*
1416N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
1416N/A */
1416N/Apackage org.opensolaris.opengrok.analysis;
1416N/A
1416N/Aimport java.io.Reader;
1416N/Aimport java.util.HashSet;
1416N/Aimport java.util.Iterator;
1416N/Aimport java.util.Set;
1416N/Aimport org.apache.lucene.analysis.Tokenizer;
1416N/Aimport org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
1416N/A
1416N/Apublic final class Hash2Tokenizer extends Tokenizer {
1416N/A int i=0;
1416N/A String term;
1416N/A String terms[];
1416N/A Iterator<String> keys;
1416N/A private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1416N/A private int finalOffset;
1416N/A
1416N/A public Hash2Tokenizer(Reader reader){
1416N/A super(reader);
1416N/A keys=new HashSet<String>().iterator();
1416N/A }
1416N/A
1416N/A public Hash2Tokenizer(Set<String> symbols){
1416N/A super(AnalyzerGuru.dummyR);
1416N/A keys=symbols.iterator();
1416N/A }
1416N/A
1416N/A public void reInit(Set<String> symbols) {
1416N/A keys = symbols.iterator();
1416N/A }
1416N/A
1416N/A @Override
1416N/A public final boolean incrementToken() throws java.io.IOException {
1416N/A clearAttributes();
1416N/A while (i <= 0) {
1416N/A if (keys.hasNext()) {
1416N/A term = keys.next();
1416N/A terms = term.split("[^a-zA-Z_0-9]+");
1416N/A i = terms.length;
1416N/A if (i > 0) {
1416N/A termAtt.setEmpty();
1416N/A termAtt.append(terms[--i]);
1416N/A return true;
1416N/A }
1416N/A // no tokens found in this key, try next
1416N/A continue;
1416N/A }
1416N/A return false;
1416N/A }
1416N/A finalOffset=0;
1416N/A termAtt.setEmpty();
1416N/A termAtt.append(terms[--i]);
1416N/A return true;
1416N/A }
1416N/A}