opengrok/analysis/PathTokenizer.java

	PathTokenizer.java revision 123edc876010aee39f9c9caf2520c418d6b96fca
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */
package org.opensolaris.opengrok.analysis;

import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;

public class PathTokenizer extends Tokenizer {

    // below should be '/' since we try to convert even windows file separators to unix ones
    private static final char dirSep = '/';
    private boolean dot = false;
    private static final char ADOT[]={'.'};
    private final TermAttribute termAtt = addAttribute(TermAttribute.class);

    public PathTokenizer(Reader input) {
        super(input);
    }

    @Override
    public final boolean incrementToken() throws java.io.IOException {
        if (dot) {
            dot = false;
            termAtt.setTermBuffer(ADOT,0,1);
            return true;
        }

        char buf[] = new char[64];
        int c;
        int i = 0;
        do {
            c = input.read();
            if (c == -1) {
                return false;
            }
        } while (c == dirSep);

        do {
            if (i >= buf.length) {
                char nb[] = new char[buf.length * 2];
                System.arraycopy(buf, 0, nb, 0, buf.length);
                buf = nb;
            }
            buf[i++] = Character.toLowerCase((char) c);
            c = input.read();
        } while (c != dirSep && c != '.' && !Character.isWhitespace(c) && c != -1);
        if (c == '.') {
            dot = true;
        }
        termAtt.setTermBuffer(buf, 0, i);
        return true;
    }
}