PathTokenizer.java revision 1318
639N/A/*
639N/A * CDDL HEADER START
639N/A *
639N/A * The contents of this file are subject to the terms of the
639N/A * Common Development and Distribution License (the "License").
639N/A * You may not use this file except in compliance with the License.
639N/A *
639N/A * See LICENSE.txt included in this distribution for the specific
639N/A * language governing permissions and limitations under the License.
639N/A *
639N/A * When distributing Covered Code, include this CDDL HEADER in each
639N/A * file and include the License file at LICENSE.txt.
639N/A * If applicable, add the following below this CDDL HEADER, with the
639N/A * fields enclosed by brackets "[]" replaced with your own identifying
639N/A * information: Portions Copyright [yyyy] [name of copyright owner]
639N/A *
639N/A * CDDL HEADER END
639N/A */
639N/A
639N/A/*
639N/A * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
639N/A */
639N/Apackage org.opensolaris.opengrok.analysis;
639N/A
639N/Aimport java.io.Reader;
639N/Aimport java.util.Arrays;
639N/Aimport org.apache.lucene.analysis.Tokenizer;
639N/Aimport org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
639N/A
639N/Apublic class PathTokenizer extends Tokenizer {
639N/A
639N/A // below should be '/' since we try to convert even windows file separators to unix ones
639N/A private static final char dirSep = '/';
639N/A private boolean dot = false;
639N/A private static final char ADOT[]={'.'};
639N/A private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
639N/A
639N/A public PathTokenizer(Reader input) {
639N/A super(input);
639N/A }
639N/A
639N/A @Override
639N/A public final boolean incrementToken() throws java.io.IOException {
639N/A if (dot) {
639N/A dot = false;
termAtt.copyBuffer(ADOT,0,1);
return true;
}
char buf[] = new char[64];
int c;
int i = 0;
do {
c = input.read();
if (c == -1) {
return false;
}
} while (c == dirSep);
do {
if (i >= buf.length) {
buf = Arrays.copyOf(buf, buf.length * 2);
}
buf[i++] = Character.toLowerCase((char) c);
c = input.read();
} while (c != dirSep && c != '.' && !Character.isWhitespace(c) && c != -1);
if (c == '.') {
dot = true;
}
termAtt.copyBuffer(buf, 0, i);
return true;
}
}