395N/A/*
2362N/A * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
395N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
395N/A *
395N/A * This code is free software; you can redistribute it and/or modify it
395N/A * under the terms of the GNU General Public License version 2 only, as
395N/A * published by the Free Software Foundation.
395N/A *
395N/A * This code is distributed in the hope that it will be useful, but WITHOUT
395N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
395N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
395N/A * version 2 for more details (a copy is included in the LICENSE file that
395N/A * accompanied this code).
395N/A *
395N/A * You should have received a copy of the GNU General Public License version
395N/A * 2 along with this work; if not, write to the Free Software Foundation,
395N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
395N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
395N/A */
395N/A
395N/A/* @test
395N/A @bug 4328178
395N/A @summary Performs baseline and regression test on the ISCII91 charset
395N/A */
395N/A
395N/Aimport java.io.*;
395N/A
395N/Apublic class ISCIITest {
395N/A
395N/A private static void failureReport() {
395N/A System.err.println ("Failed ISCII91 Regression Test");
395N/A }
395N/A
395N/A private static void mapEquiv(int start,
395N/A int end,
395N/A String testName)
395N/A throws Exception
395N/A {
395N/A byte[] singleByte = new byte[1];
395N/A byte[] encoded = new byte[1];
395N/A
395N/A for (int i = start; i <= end; i++ ) {
395N/A singleByte[0] = (byte) i;
395N/A try {
395N/A String unicodeStr =
395N/A new String (singleByte,"ISCII91");
395N/A
395N/A if (i != (int)unicodeStr.charAt(0)) {
395N/A System.err.println ("FAILED ISCII91 Regression test"
395N/A + "input byte is " + i );
395N/A throw new Exception("");
395N/A }
395N/A encoded = unicodeStr.getBytes("ISCII91");
395N/A
395N/A if (encoded[0] != singleByte[0]) {
395N/A System.err.println("Encoding error " + testName);
395N/A throw new Exception("Failed ISCII91 Regression test");
395N/A }
395N/A
395N/A } catch (UnsupportedEncodingException e) {
395N/A failureReport();
395N/A }
395N/A }
395N/A return;
395N/A }
395N/A
395N/A private static void checkUnmapped(int start,
395N/A int end,
395N/A String testName)
395N/A throws Exception {
395N/A
395N/A byte[] singleByte = new byte[1];
395N/A
395N/A for (int i = start; i <= end; i++ ) {
395N/A singleByte[0] = (byte) i;
395N/A try {
395N/A String unicodeStr = new String (singleByte, "ISCII91");
395N/A
395N/A if (unicodeStr.charAt(0) != '\uFFFD') {
395N/A System.err.println("FAILED " + testName +
395N/A "input byte is " + i );
395N/A throw new Exception ("Failed ISCII91 regression test");
395N/A }
395N/A } catch (UnsupportedEncodingException e) {
395N/A System.err.println("Unsupported character encoding");
395N/A }
395N/A }
395N/A return;
395N/A }
395N/A
395N/A /*
395N/A *
395N/A */
395N/A private static void checkRange(int start, int end,
395N/A char[] expectChars,
395N/A String testName)
395N/A throws Exception {
395N/A byte[] singleByte = new byte[1];
395N/A byte[] encoded = new byte[1];
395N/A int lookupOffset = 0;
395N/A
395N/A for (int i=start; i <= end; i++ ) {
395N/A singleByte[0] = (byte) i;
395N/A String unicodeStr = new String (singleByte, "ISCII91");
395N/A if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) {
395N/A throw new Exception ("Failed ISCII91 Regression Test");
395N/A }
395N/A encoded = unicodeStr.getBytes("ISCII");
395N/A }
395N/A return;
395N/A }
395N/A
395N/A /*
395N/A * Tests the ISCII91 Indic character encoding
395N/A * as per IS 13194:1991 Bureau of Indian Standards.
395N/A */
395N/A
395N/A private static void test () throws Exception {
395N/A
395N/A try {
395N/A
395N/A
395N/A // ISCII91 is an 8-byte encoding which retains the ASCII
395N/A // mappings in the lower half.
395N/A
395N/A mapEquiv(0, 0x7f, "7 bit ASCII range");
395N/A
395N/A // Checks a range of characters which are unmappable according
395N/A // to the standards.
395N/A
395N/A checkUnmapped(0x81, 0x9f, "UNMAPPED");
395N/A
395N/A // Vowel Modifier chars can be used to modify the vowel
395N/A // sound of the preceding consonant, vowel or matra character.
395N/A
395N/A byte[] testByte = new byte[1];
395N/A char[] vowelModChars = {
395N/A '\u0901', // Vowel modifier Chandrabindu
395N/A '\u0902', // Vowel modifier Anuswar
395N/A '\u0903' // Vowel modifier Visarg
395N/A };
395N/A
395N/A checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS");
395N/A
395N/A char[] expectChars = {
395N/A '\u0905', // a4 -- Vowel A
395N/A '\u0906', // a5 -- Vowel AA
395N/A '\u0907', // a6 -- Vowel I
395N/A '\u0908', // a7 -- Vowel II
395N/A '\u0909', // a8 -- Vowel U
395N/A '\u090a', // a9 -- Vowel UU
395N/A '\u090b', // aa -- Vowel RI
395N/A '\u090e', // ab -- Vowel E ( Southern Scripts )
395N/A '\u090f', // ac -- Vowel EY
395N/A '\u0910', // ad -- Vowel AI
395N/A '\u090d', // ae -- Vowel AYE ( Devanagari Script )
395N/A '\u0912', // af -- Vowel O ( Southern Scripts )
395N/A '\u0913', // b0 -- Vowel OW
395N/A '\u0914', // b1 -- Vowel AU
395N/A '\u0911', // b2 -- Vowel AWE ( Devanagari Script )
395N/A };
395N/A
395N/A checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS");
395N/A
395N/A char[] expectConsChars =
395N/A {
395N/A '\u0915', // b3 -- Consonant KA
395N/A '\u0916', // b4 -- Consonant KHA
395N/A '\u0917', // b5 -- Consonant GA
395N/A '\u0918', // b6 -- Consonant GHA
395N/A '\u0919', // b7 -- Consonant NGA
395N/A '\u091a', // b8 -- Consonant CHA
395N/A '\u091b', // b9 -- Consonant CHHA
395N/A '\u091c', // ba -- Consonant JA
395N/A '\u091d', // bb -- Consonant JHA
395N/A '\u091e', // bc -- Consonant JNA
395N/A '\u091f', // bd -- Consonant Hard TA
395N/A '\u0920', // be -- Consonant Hard THA
395N/A '\u0921', // bf -- Consonant Hard DA
395N/A '\u0922', // c0 -- Consonant Hard DHA
395N/A '\u0923', // c1 -- Consonant Hard NA
395N/A '\u0924', // c2 -- Consonant Soft TA
395N/A '\u0925', // c3 -- Consonant Soft THA
395N/A '\u0926', // c4 -- Consonant Soft DA
395N/A '\u0927', // c5 -- Consonant Soft DHA
395N/A '\u0928', // c6 -- Consonant Soft NA
395N/A '\u0929', // c7 -- Consonant NA ( Tamil )
395N/A '\u092a', // c8 -- Consonant PA
395N/A '\u092b', // c9 -- Consonant PHA
395N/A '\u092c', // ca -- Consonant BA
395N/A '\u092d', // cb -- Consonant BHA
395N/A '\u092e', // cc -- Consonant MA
395N/A '\u092f', // cd -- Consonant YA
395N/A '\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya )
395N/A '\u0930', // cf -- Consonant RA
395N/A '\u0931', // d0 -- Consonant Hard RA ( Southern Scripts )
395N/A '\u0932', // d1 -- Consonant LA
395N/A '\u0933', // d2 -- Consonant Hard LA
395N/A '\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam )
395N/A '\u0935', // d4 -- Consonant VA
395N/A '\u0936', // d5 -- Consonant SHA
395N/A '\u0937', // d6 -- Consonant Hard SHA
395N/A '\u0938', // d7 -- Consonant SA
395N/A '\u0939', // d8 -- Consonant HA
395N/A };
395N/A
395N/A checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS");
395N/A
395N/A char[] matraChars = {
395N/A '\u093e', // da -- Vowel Sign AA
395N/A '\u093f', // db -- Vowel Sign I
395N/A '\u0940', // dc -- Vowel Sign II
395N/A '\u0941', // dd -- Vowel Sign U
395N/A '\u0942', // de -- Vowel Sign UU
395N/A '\u0943', // df -- Vowel Sign RI
395N/A '\u0946', // e0 -- Vowel Sign E ( Southern Scripts )
395N/A '\u0947', // e1 -- Vowel Sign EY
395N/A '\u0948', // e2 -- Vowel Sign AI
395N/A '\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script )
395N/A '\u094a', // e4 -- Vowel Sign O ( Southern Scripts )
395N/A '\u094b', // e5 -- Vowel Sign OW
395N/A '\u094c', // e6 -- Vowel Sign AU
395N/A '\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script )
395N/A };
395N/A
395N/A // Matras or Vowel signs alter the implicit
395N/A // vowel sound associated with an Indic consonant.
395N/A
395N/A checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS");
395N/A
395N/A char[] loneContextModifierChars = {
395N/A '\u094d', // e8 -- Vowel Omission Sign ( Halant )
395N/A '\u093c', // e9 -- Diacritic Sign ( Nukta )
395N/A '\u0964' // ea -- Full Stop ( Viram, Northern Scripts )
395N/A };
395N/A
395N/A checkRange(0xe8, 0xea,
395N/A loneContextModifierChars, "LONE INDIC CONTEXT CHARS");
395N/A
395N/A
395N/A // Test Indic script numeral chars
395N/A // (as opposed to international numerals)
395N/A
395N/A char[] expectNumeralChars =
395N/A {
395N/A '\u0966', // f1 -- Digit 0
395N/A '\u0967', // f2 -- Digit 1
395N/A '\u0968', // f3 -- Digit 2
395N/A '\u0969', // f4 -- Digit 3
395N/A '\u096a', // f5 -- Digit 4
395N/A '\u096b', // f6 -- Digit 5
395N/A '\u096c', // f7 -- Digit 6
395N/A '\u096d', // f8 -- Digit 7
395N/A '\u096e', // f9 -- Digit 8
395N/A '\u096f' // fa -- Digit 9
395N/A };
395N/A
395N/A checkRange(0xf1, 0xfa,
395N/A expectNumeralChars, "NUMERAL/DIGIT CHARACTERS");
395N/A int lookupOffset = 0;
395N/A
395N/A char[] expectNuktaSub = {
395N/A '\u0950',
395N/A '\u090c',
395N/A '\u0961',
395N/A '\u0960',
395N/A '\u0962',
395N/A '\u0963',
395N/A '\u0944',
395N/A '\u093d'
395N/A };
395N/A
395N/A /*
395N/A * ISCII uses a number of code extension techniques
395N/A * to access a number of lesser used characters.
395N/A * The Nukta character which ordinarily signifies
395N/A * a diacritic is used in combination with existing
395N/A * characters to escape them to a different character.
395N/A * value.
395N/A */
395N/A
395N/A byte[] codeExtensionBytes = {
395N/A (byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta
395N/A // =>DEVANAGARI OM SIGN
395N/A (byte)0xa6 , (byte)0xe9, // Vowel I + Nukta
395N/A // => DEVANAGARI VOCALIC L
395N/A (byte)0xa7 , (byte)0xe9, // Vowel II + Nukta
395N/A // => DEVANAGARI VOCALIC LL
395N/A (byte)0xaa , (byte)0xe9, // Vowel RI + Nukta
395N/A // => DEVANAGARI VOCALIC RR
395N/A (byte)0xdb , (byte)0xe9, // Vowel sign I + Nukta
395N/A // => DEVANAGARI VOWEL SIGN VOCALIC L
395N/A (byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta
395N/A // => DEVANAGARI VOWEL SIGN VOCALIC LL
395N/A
395N/A (byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta
395N/A // => DEVANAGARI VOWEL SIGN VOCALIC RR
395N/A (byte)0xea , (byte)0xe9 // Full stop/Phrase separator + Nukta
395N/A // => DEVANAGARI SIGN AVAGRAHA
395N/A };
395N/A
395N/A lookupOffset = 0;
395N/A byte[] bytePair = new byte[2];
395N/A
395N/A for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) {
395N/A bytePair[0] = (byte) codeExtensionBytes[lookupOffset++];
395N/A bytePair[1] = (byte) codeExtensionBytes[lookupOffset++];
395N/A
395N/A String unicodeStr = new String (bytePair,"ISCII91");
395N/A if (unicodeStr.charAt(0) != expectNuktaSub[i]) {
395N/A throw new Exception("Failed Nukta Sub");
395N/A }
395N/A }
395N/A
395N/A lookupOffset = 0;
395N/A byte[] comboBytes = {
395N/A (byte)0xe8 , (byte)0xe8, //HALANT + HALANT
395N/A (byte)0xe8 , (byte)0xe9 //HALANT + NUKTA aka. Soft Halant
395N/A };
395N/A char[] expectCombChars = {
395N/A '\u094d',
395N/A '\u200c',
395N/A '\u094d',
395N/A '\u200d'
395N/A };
395N/A
395N/A for (int i=0; i < (comboBytes.length)/2; i++ ) {
395N/A bytePair[0] = (byte) comboBytes[lookupOffset++];
395N/A bytePair[1] = (byte) comboBytes[lookupOffset];
395N/A String unicodeStr = new String (bytePair, "ISCII91");
395N/A if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1]
395N/A && unicodeStr.charAt(1) != expectCombChars[lookupOffset]) {
395N/A throw new Exception("Failed ISCII91 Regression Test");
395N/A }
395N/A lookupOffset++;
395N/A }
395N/A
395N/A } catch (UnsupportedEncodingException e) {
395N/A System.err.println ("ISCII91 encoding not supported");
395N/A throw new Exception ("Failed ISCII91 Regression Test");
395N/A }
395N/A }
395N/A
395N/A public static void main (String[] args) throws Exception {
395N/A test();
395N/A }
395N/A}