0N/A * DO NOT REMOVE OR ALTER! 0N/A * Copyright 1999-2002,2004,2005 The Apache Software Foundation. 0N/A * Licensed under the Apache License, Version 2.0 (the "License"); 2362N/A * you may not use this file except in compliance with the License. 0N/A * You may obtain a copy of the License at 0N/A * Unless required by applicable law or agreed to in writing, software 0N/A * distributed under the License is distributed on an "AS IS" BASIS, 0N/A * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0N/A * See the License for the specific language governing permissions and 0N/A * limitations under the License. 0N/A * This class represents a node in parse tree. 4162N/A * How many characters are needed? 4162N/A // When this.child.getMaxLength() < 0, 4162N/A // this returns minus value 4162N/A if (this.type == STRING) mylength = this.getString().length(); 4162N/A else if (this.type == CHAR) mylength = this.getChar() >= 0x10000 ? 2 : 1; 4162N/A else throw new RuntimeException("Internal Error: Illegal type: "+this.type); 4162N/A if (tok.type == STRING) otherlength = tok.getString().length(); 4162N/A else if (tok.type == CHAR) otherlength = tok.getChar() >= 0x10000 ? 2 : 1; 4162N/A else throw new RuntimeException("Internal Error: Illegal type: "+tok.type); 4162N/A // ------------------------------------------------------ 4162N/A "Cn",
"Lu",
"Ll",
"Lt",
"Lm",
"Lo",
"Mn",
"Me",
"Mc",
"Nd",
4162N/A "Nl",
"No",
"Zs",
"Zl",
"Zp",
"Cc",
"Cf",
null,
"Co",
"Cs",
4162N/A "Pd",
"Ps",
"Pe",
"Pc",
"Po",
"Sm",
"Sc",
"Sk",
"So",
// 28 4162N/A "L",
"M",
"N",
"Z",
"C",
"P",
"S",
// 31-37 4162N/A // Schema Rec. {Datatypes} - Punctuation 4162N/A //blockNames in UNICODE 3.1 that supported by XML Schema REC 4162N/A /*0000..007F;*/ "Basic Latin",
4162N/A /*0080..00FF;*/ "Latin-1 Supplement",
4162N/A /*0100..017F;*/ "Latin Extended-A",
4162N/A /*0180..024F;*/ "Latin Extended-B",
4162N/A /*0250..02AF;*/ "IPA Extensions",
4162N/A /*02B0..02FF;*/ "Spacing Modifier Letters",
4162N/A /*0300..036F;*/ "Combining Diacritical Marks",
4162N/A /*0400..04FF;*/ "Cyrillic",
4162N/A /*0530..058F;*/ "Armenian",
4162N/A /*0900..097F;*/ "Devanagari",
4162N/A /*0A00..0A7F;*/ "Gurmukhi",
4162N/A /*0A80..0AFF;*/ "Gujarati",
4162N/A /*0D00..0D7F;*/ "Malayalam",
4162N/A /*10A0..10FF;*/ "Georgian",
4162N/A /*1100..11FF;*/ "Hangul Jamo",
4162N/A /*1200..137F;*/ "Ethiopic",
4162N/A /*13A0..13FF;*/ "Cherokee",
4162N/A /*1400..167F;*/ "Unified Canadian Aboriginal Syllabics",
4162N/A /*1800..18AF;*/ "Mongolian",
4162N/A /*1E00..1EFF;*/ "Latin Extended Additional",
4162N/A /*1F00..1FFF;*/ "Greek Extended",
4162N/A /*2000..206F;*/ "General Punctuation",
4162N/A /*2070..209F;*/ "Superscripts and Subscripts",
4162N/A /*20A0..20CF;*/ "Currency Symbols",
4162N/A /*20D0..20FF;*/ "Combining Marks for Symbols",
4162N/A /*2100..214F;*/ "Letterlike Symbols",
4162N/A /*2150..218F;*/ "Number Forms",
4162N/A /*2200..22FF;*/ "Mathematical Operators",
4162N/A /*2300..23FF;*/ "Miscellaneous Technical",
4162N/A /*2400..243F;*/ "Control Pictures",
4162N/A /*2440..245F;*/ "Optical Character Recognition",
4162N/A /*2460..24FF;*/ "Enclosed Alphanumerics",
4162N/A /*2500..257F;*/ "Box Drawing",
4162N/A /*2580..259F;*/ "Block Elements",
4162N/A /*25A0..25FF;*/ "Geometric Shapes",
4162N/A /*2600..26FF;*/ "Miscellaneous Symbols",
4162N/A /*2700..27BF;*/ "Dingbats",
4162N/A /*2800..28FF;*/ "Braille Patterns",
4162N/A /*2E80..2EFF;*/ "CJK Radicals Supplement",
4162N/A /*2F00..2FDF;*/ "Kangxi Radicals",
4162N/A /*2FF0..2FFF;*/ "Ideographic Description Characters",
4162N/A /*3000..303F;*/ "CJK Symbols and Punctuation",
4162N/A /*3040..309F;*/ "Hiragana",
4162N/A /*30A0..30FF;*/ "Katakana",
4162N/A /*3100..312F;*/ "Bopomofo",
4162N/A /*3130..318F;*/ "Hangul Compatibility Jamo",
4162N/A /*31A0..31BF;*/ "Bopomofo Extended",
4162N/A /*3200..32FF;*/ "Enclosed CJK Letters and Months",
4162N/A /*3300..33FF;*/ "CJK Compatibility",
4162N/A /*3400..4DB5;*/ "CJK Unified Ideographs Extension A",
4162N/A /*4E00..9FFF;*/ "CJK Unified Ideographs",
4162N/A /*A000..A48F;*/ "Yi Syllables",
4162N/A /*A490..A4CF;*/ "Yi Radicals",
4162N/A /*AC00..D7A3;*/ "Hangul Syllables",
4162N/A /*E000..F8FF;*/ "Private Use",
4162N/A /*F900..FAFF;*/ "CJK Compatibility Ideographs",
4162N/A /*FB00..FB4F;*/ "Alphabetic Presentation Forms",
4162N/A /*FB50..FDFF;*/ "Arabic Presentation Forms-A",
4162N/A /*FE20..FE2F;*/ "Combining Half Marks",
4162N/A /*FE30..FE4F;*/ "CJK Compatibility Forms",
4162N/A /*FE50..FE6F;*/ "Small Form Variants",
4162N/A /*FE70..FEFE;*/ "Arabic Presentation Forms-B",
4162N/A /*FEFF..FEFF;*/ "Specials",
4162N/A /*FF00..FFEF;*/ "Halfwidth and Fullwidth Forms",
4162N/A //missing Specials add manually 4162N/A /*10300..1032F;*/ "Old Italic",
// 84 4162N/A /*10330..1034F;*/ "Gothic",
4162N/A /*10400..1044F;*/ "Deseret",
4162N/A /*1D000..1D0FF;*/ "Byzantine Musical Symbols",
4162N/A /*1D100..1D1FF;*/ "Musical Symbols",
4162N/A /*1D400..1D7FF;*/ "Mathematical Alphanumeric Symbols",
4162N/A /*20000..2A6D6;*/ "CJK Unified Ideographs Extension B",
4162N/A /*2F800..2FA1F;*/ "CJK Compatibility Ideographs Supplement",
4162N/A //missing 2 private use add manually 4162N/A //F0000..FFFFD; "Private Use", 4162N/A //100000..10FFFD; "Private Use" 4162N/A "\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F" 4162N/A +
"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF" 4162N/A +
"\u0900\u097F\u0980\u09FF\u0A00\u0A7F\u0A80\u0AFF\u0B00\u0B7F\u0B80\u0BFF\u0C00\u0C7F\u0C80\u0CFF" 4162N/A +
"\u0D00\u0D7F\u0D80\u0DFF\u0E00\u0E7F\u0E80\u0EFF\u0F00\u0FFF\u1000\u109F\u10A0\u10FF\u1100\u11FF" 4162N/A +
"\u1200\u137F\u13A0\u13FF\u1400\u167F\u1680\u169F\u16A0\u16FF\u1780\u17FF\u1800\u18AF\u1E00\u1EFF" 4162N/A +
"\u1F00\u1FFF\u2000\u206F\u2070\u209F\u20A0\u20CF\u20D0\u20FF\u2100\u214F\u2150\u218F\u2190\u21FF\u2200\u22FF" 4162N/A +
"\u2300\u23FF\u2400\u243F\u2440\u245F\u2460\u24FF\u2500\u257F\u2580\u259F\u25A0\u25FF\u2600\u26FF\u2700\u27BF" 4162N/A +
"\u2800\u28FF\u2E80\u2EFF\u2F00\u2FDF\u2FF0\u2FFF\u3000\u303F\u3040\u309F\u30A0\u30FF\u3100\u312F\u3130\u318F" 4162N/A +
"\u3190\u319F\u31A0\u31BF\u3200\u32FF\u3300\u33FF\u3400\u4DB5\u4E00\u9FFF\uA000\uA48F\uA490\uA4CF" 4162N/A +
"\uAC00\uD7A3\uE000\uF8FF\uF900\uFAFF\uFB00\uFB4F\uFB50\uFDFF" 4162N/A +
"\uFE20\uFE2F\uFE30\uFE4F\uFE50\uFE6F\uFE70\uFEFE\uFEFF\uFEFF\uFF00\uFFEF";
4162N/A for (
int i =
0; i <
0x10000; i ++) {
4162N/A if (i ==
0x00AB || i ==
0x2018 || i ==
0x201B || i ==
0x201C ||
4162N/A i ==
0x201F || i ==
0x2039) {
4162N/A if (i ==
0x00BB || i ==
0x2019 || i ==
0x201D || i ==
0x203A ) {
4162N/A //REVISIT: do we really need to support block names as in Unicode 3.1 4162N/A // or we can just create all the names in IsBLOCKNAME format (XML Schema REC)? 4162N/A //System.out.println(n+" " +Integer.toHexString(rstart) 4162N/A // +"-"+ Integer.toHexString(rend)); //if (tok == null) System.out.println(name); * This method is called by only getRange(). * So this method need not MT-safe. //System.err.println("isRegisterNonXS: "+name); // ------------------------------------------------------ "\u094D"// ;DEVANAGARI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u09CD"//;BENGALI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0A4D"//;GURMUKHI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0ACD"//;GUJARATI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0B4D"//;ORIYA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0BCD"//;TAMIL SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0C4D"//;TELUGU SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0CCD"//;KANNADA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0D4D"//;MALAYALAM SIGN VIRAMA;Mn;9;ON;;;;;N;;;;; +
"\u0E3A"//;THAI CHARACTER PHINTHU;Mn;9;ON;;;;;N;THAI VOWEL SIGN PHINTHU;;;; +
"\u0F84";
//;TIBETAN MARK HALANTA;Mn;9;ON;;;;;N;TIBETAN VIRAMA;;;; * Combing Character Sequence in Perl 5.6. // ------------------------------------------------------ // ------------------------------------------------------ * This class represents a node in parse tree. * This class represents a node in parse tree. * This class represents a node in parse tree. case '|':
case '*':
case '+':
case '?':
case '(':
case ')':
case '.':
case '[':
case '\f':
ret =
"\\f";
break;
case '\n':
ret =
"\\n";
break;
case '\r':
ret =
"\\r";
break;
case '\t':
ret =
"\\t";
break;
case 0x1b:
ret =
"\\e";
break;
//case 0x0b: ret = "\\v"; break; * This class represents a node in parse tree. * This class represents a node in parse tree. * (?(condition)yes-pattern|no-pattern) return this.
no ==
null ?
1 :
2;
* This class represents a node in parse tree. // This is CONCAT, and new child is CONCAT. for (
int i =
0; i <
tok.
size(); i ++)
//System.err.println("Merge '"+previous+"' and '"+tok+"'.");