opengrok/analysis/PathTokenizer.java

	PathTokenizer.java revision 1269
1185N/A/*
1185N/A * CDDL HEADER START
1185N/A *
1185N/A * The contents of this file are subject to the terms of the
1185N/A * Common Development and Distribution License (the "License").
1185N/A * You may not use this file except in compliance with the License.
1185N/A *
1185N/A * See LICENSE.txt included in this distribution for the specific
1185N/A * language governing permissions and limitations under the License.
1185N/A *
1185N/A * When distributing Covered Code, include this CDDL HEADER in each
1185N/A * file and include the License file at LICENSE.txt.
1185N/A * If applicable, add the following below this CDDL HEADER, with the
1185N/A * fields enclosed by brackets "[]" replaced with your own identifying
1185N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1185N/A *
1185N/A * CDDL HEADER END
1185N/A */
1185N/A
1185N/A/*
1185N/A * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
1185N/A */
1185N/Apackage org.opensolaris.opengrok.analysis;
1185N/A
1185N/Aimport java.io.Reader;
1185N/Aimport java.util.Arrays;
1185N/Aimport org.apache.lucene.analysis.Tokenizer;
1185N/Aimport org.apache.lucene.analysis.tokenattributes.TermAttribute;
1185N/A
1185N/Apublic class PathTokenizer extends Tokenizer {
1185N/A
1185N/A    // below should be '/' since we try to convert even windows file separators to unix ones
1185N/A    private static final char dirSep = '/';
1185N/A    private boolean dot = false;
1185N/A    private static final char ADOT[]={'.'};
1185N/A    private final TermAttribute termAtt = addAttribute(TermAttribute.class);
1185N/A
1185N/A    public PathTokenizer(Reader input) {
1185N/A        super(input);
1185N/A    }
1185N/A
1185N/A    @Override
1185N/A    public final boolean incrementToken() throws java.io.IOException {
1185N/A        if (dot) {
1185N/A            dot = false;
1185N/A            termAtt.setTermBuffer(ADOT,0,1);
1185N/A            return true;
1185N/A        }
1185N/A
1185N/A        char buf[] = new char[64];
1185N/A        int c;
1185N/A        int i = 0;
1185N/A        do {
1185N/A            c = input.read();
1185N/A            if (c == -1) {
1185N/A                return false;
1185N/A            }
1185N/A        } while (c == dirSep);
1185N/A
1185N/A        do {
1185N/A            if (i >= buf.length) {
1185N/A                buf = Arrays.copyOf(buf, buf.length * 2);
1185N/A            }
1185N/A            buf[i++] = Character.toLowerCase((char) c);
1185N/A            c = input.read();
1185N/A        } while (c != dirSep && c != '.' && !Character.isWhitespace(c) && c != -1);
1185N/A        if (c == '.') {
1185N/A            dot = true;
1185N/A        }
1185N/A        termAtt.setTermBuffer(buf, 0, i);
1185N/A        return true;
1185N/A    }
1185N/A}
1185N/A