opengrok/analysis/Hash2Tokenizer.java

	Hash2Tokenizer.java revision f9fd2b96d1c5ea62664f74da0e34a04b6511a8ff
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
 */
package org.opensolaris.opengrok.analysis;

import java.io.Reader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

public final class Hash2Tokenizer extends Tokenizer {
    int i=0;
    String term;
    String terms[];
    Iterator<String> keys;
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private int finalOffset;

    public Hash2Tokenizer(Reader reader){
        super(reader);
        keys=new HashSet<String>().iterator();
    }

    public Hash2Tokenizer(Set<String> symbols){
        super(AnalyzerGuru.dummyR);
        keys=symbols.iterator();
    }

    public void reInit(Set<String> symbols) {
        keys = symbols.iterator();
    }

    @Override
    public final boolean incrementToken() throws java.io.IOException {
        clearAttributes();
        while (i <= 0) {
            if (keys.hasNext()) {
                term = keys.next();
                terms = term.split("[^a-zA-Z_0-9]+");
                i = terms.length;
                if (i > 0) {
                    termAtt.setEmpty();
                    termAtt.append(terms[--i]);
                    return true;
                }
                // no tokens found in this key, try next
                continue;
            }
            return false;
        }
        finalOffset=0;
        termAtt.setEmpty();
        termAtt.append(terms[--i]);
        return true;
    }
}