opengrok/analysis/Hash2Tokenizer.java

1416N/A/*
1416N/A * CDDL HEADER START
1416N/A *
1416N/A * The contents of this file are subject to the terms of the
1416N/A * Common Development and Distribution License (the "License").
1416N/A * You may not use this file except in compliance with the License.
1416N/A *
1416N/A * See LICENSE.txt included in this distribution for the specific
1416N/A * language governing permissions and limitations under the License.
1416N/A *
1416N/A * When distributing Covered Code, include this CDDL HEADER in each
1416N/A * file and include the License file at LICENSE.txt.
1416N/A * If applicable, add the following below this CDDL HEADER, with the
1416N/A * fields enclosed by brackets "[]" replaced with your own identifying
1416N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1416N/A *
1416N/A * CDDL HEADER END
1416N/A */
1416N/A
1416N/A/*
1416N/A * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
1416N/A */
1416N/Apackage org.opensolaris.opengrok.analysis;
1416N/A
1416N/Aimport java.io.Reader;
1416N/Aimport java.util.HashSet;
1416N/Aimport java.util.Iterator;
1416N/Aimport java.util.Set;
1416N/Aimport org.apache.lucene.analysis.Tokenizer;
1416N/Aimport org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
1416N/A
1416N/Apublic final class Hash2Tokenizer extends Tokenizer {
1416N/A    int i=0;
1416N/A    String term;
1416N/A    String terms[];
1416N/A    Iterator<String> keys;
1416N/A    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
1416N/A    private int finalOffset;
1416N/A
1416N/A    public Hash2Tokenizer(Reader reader){
1416N/A        super(reader);
1416N/A        keys=new HashSet<String>().iterator();
1416N/A    }
1416N/A
1416N/A    public Hash2Tokenizer(Set<String> symbols){
1416N/A        super(AnalyzerGuru.dummyR);
1416N/A        keys=symbols.iterator();
1416N/A    }
1416N/A
1416N/A    public void reInit(Set<String> symbols) {
1416N/A        keys = symbols.iterator();
1416N/A    }
1416N/A
1416N/A    @Override
1416N/A    public final boolean incrementToken() throws java.io.IOException {
1416N/A        clearAttributes();
1416N/A        while (i <= 0) {
1416N/A            if (keys.hasNext()) {
1416N/A                term = keys.next();
1416N/A                terms = term.split("[^a-zA-Z_0-9]+");
1416N/A                i = terms.length;
1416N/A                if (i > 0) {
1416N/A                    termAtt.setEmpty();
1416N/A                    termAtt.append(terms[--i]);
1416N/A                    return true;
1416N/A                }
1416N/A                // no tokens found in this key, try next
1416N/A                continue;
1416N/A            }
1416N/A            return false;
1416N/A        }
1416N/A        finalOffset=0;
1416N/A        termAtt.setEmpty();
1416N/A        termAtt.append(terms[--i]);
1416N/A        return true;
1416N/A    }
1416N/A}