1024N/A/*
1024N/A * CDDL HEADER START
1024N/A *
1024N/A * The contents of this file are subject to the terms of the
1024N/A * Common Development and Distribution License (the "License").
1024N/A * You may not use this file except in compliance with the License.
1024N/A *
1024N/A * See LICENSE.txt included in this distribution for the specific
1024N/A * language governing permissions and limitations under the License.
1024N/A *
1024N/A * When distributing Covered Code, include this CDDL HEADER in each
1024N/A * file and include the License file at LICENSE.txt.
1024N/A * If applicable, add the following below this CDDL HEADER, with the
1024N/A * fields enclosed by brackets "[]" replaced with your own identifying
1024N/A * information: Portions Copyright [yyyy] [name of copyright owner]
1024N/A *
1024N/A * CDDL HEADER END
1024N/A */
1024N/A
1024N/A/*
1024N/A * Copyright 2010 Sun Micosystems. All rights reserved.
1024N/A * Use is subject to license terms.
1024N/A */
1024N/A
953N/Apackage org.opensolaris.opengrok.analysis;
953N/A
953N/Aimport java.io.ByteArrayInputStream;
953N/Aimport java.io.IOException;
953N/Aimport java.io.InputStreamReader;
957N/Aimport java.io.Reader;
953N/Aimport java.nio.ByteBuffer;
953N/Aimport java.nio.charset.Charset;
953N/A
953N/Aimport junit.framework.Assert;
953N/A
953N/Aimport org.apache.lucene.document.Document;
953N/Aimport org.junit.Test;
953N/A
953N/Apublic class TextAnalyzerTest {
972N/A
972N/A private String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
972N/A private String encoding;
972N/A private String contents;
972N/A
972N/A @Test
972N/A public void defaultEncoding() throws IOException {
972N/A new TestableTextAnalyzer().analyze(new Document(),
972N/A new ByteArrayInputStream("hello".getBytes()));
972N/A
972N/A Assert.assertEquals(defaultEncoding, encoding);
972N/A
972N/A Assert.assertEquals("hello", contents);
972N/A }
972N/A
972N/A @Test
972N/A public void resetsStreamOnShortInput() throws IOException {
972N/A new TestableTextAnalyzer().analyze(new Document(),
972N/A new ByteArrayInputStream("hi".getBytes()));
972N/A
972N/A Assert.assertEquals(defaultEncoding, encoding);
972N/A
972N/A Assert.assertEquals("hi", contents);
972N/A }
972N/A
972N/A @Test
972N/A public void utf8WithBOM() throws IOException {
972N/A byte[] buffer = new byte[]{(byte) 239, (byte) 187, (byte) 191, 'h', 'e', 'l', 'l', 'o'};
972N/A new TestableTextAnalyzer().analyze(new Document(),
972N/A new ByteArrayInputStream(buffer));
972N/A
972N/A Assert.assertEquals("hello", contents);
972N/A Assert.assertEquals("UTF8", encoding);
972N/A }
972N/A
972N/A @Test
972N/A public void utf16WithBOM() throws IOException {
972N/A final ByteBuffer utf16str = Charset.forName("UTF-16").encode("hello");
972N/A byte[] bytes = new byte[utf16str.remaining()];
972N/A utf16str.get(bytes, 0, bytes.length);
953N/A
972N/A new TestableTextAnalyzer().analyze(new Document(),
972N/A new ByteArrayInputStream(bytes));
972N/A
972N/A Assert.assertEquals("UTF-16", encoding);
972N/A
972N/A Assert.assertEquals("hello", contents);
972N/A }
972N/A
972N/A @Test
972N/A public void utf16WithBOMAlternate() throws IOException {
972N/A final ByteBuffer utf16str = Charset.forName("UTF-16").encode("hello");
972N/A byte[] bytes = new byte[utf16str.remaining()];
972N/A utf16str.get(bytes, 0, bytes.length);
972N/A
972N/A for (int i = 0; i < bytes.length; i += 2) {
972N/A byte b = bytes[i];
972N/A bytes[i] = bytes[i + 1];
972N/A bytes[i + 1] = b;
972N/A }
972N/A
972N/A new TestableTextAnalyzer().analyze(new Document(),
972N/A new ByteArrayInputStream(bytes));
972N/A
972N/A Assert.assertEquals("UTF-16", encoding);
972N/A
972N/A Assert.assertEquals("hello", contents);
972N/A }
972N/A
972N/A public class TestableTextAnalyzer extends TextAnalyzer {
972N/A
972N/A public TestableTextAnalyzer() {
972N/A super(null);
972N/A }
972N/A
972N/A @Override
972N/A protected void analyze(Document doc, Reader r) throws IOException {
972N/A encoding = ((InputStreamReader) r).getEncoding();
972N/A
972N/A char[] buf = new char[1024];
972N/A int br = r.read(buf);
972N/A
972N/A contents = new String(buf, 0, br);
972N/A }
972N/A }
953N/A}