java/lang/ConditionalSpecialCasing.java

0N/A/*
2362N/A * Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation.  Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/Apackage java.lang;
0N/A
0N/Aimport java.text.BreakIterator;
0N/Aimport java.util.HashSet;
0N/Aimport java.util.Hashtable;
0N/Aimport java.util.Iterator;
0N/Aimport java.util.Locale;
0N/Aimport sun.text.Normalizer;
0N/A
0N/A
0N/A/**
0N/A * This is a utility class for <code>String.toLowerCase()</code> and
0N/A * <code>String.toUpperCase()</code>, that handles special casing with
0N/A * conditions.  In other words, it handles the mappings with conditions
0N/A * that are defined in
0N/A * <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special
0N/A * Casing Properties</a> file.
0N/A * <p>
0N/A * Note that the unconditional case mappings (including 1:M mappings)
0N/A * are handled in <code>Character.toLower/UpperCase()</code>.
0N/A */
0N/Afinal class ConditionalSpecialCasing {
0N/A
0N/A    // context conditions.
0N/A    final static int FINAL_CASED =              1;
0N/A    final static int AFTER_SOFT_DOTTED =        2;
0N/A    final static int MORE_ABOVE =               3;
0N/A    final static int AFTER_I =                  4;
0N/A    final static int NOT_BEFORE_DOT =           5;
0N/A
0N/A    // combining class definitions
0N/A    final static int COMBINING_CLASS_ABOVE = 230;
0N/A
0N/A    // Special case mapping entries
0N/A    static Entry[] entry = {
0N/A        //# ================================================================================
0N/A        //# Conditional mappings
0N/A        //# ================================================================================
0N/A        new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA
0N/A
0N/A        //# ================================================================================
0N/A        //# Locale-sensitive mappings
0N/A        //# ================================================================================
0N/A        //# Lithuanian
0N/A        new Entry(0x0307, new char[]{0x0307}, new char[]{}, "lt",  AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE
0N/A        new Entry(0x0049, new char[]{0x0069, 0x0307}, new char[]{0x0049}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I
0N/A        new Entry(0x004A, new char[]{0x006A, 0x0307}, new char[]{0x004A}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J
0N/A        new Entry(0x012E, new char[]{0x012F, 0x0307}, new char[]{0x012E}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK
0N/A        new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
0N/A        new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
0N/A        new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
1091N/A        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, "lt", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
0N/A
0N/A        //# ================================================================================
0N/A        //# Turkish and Azeri
0N/A//      new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
0N/A//      new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
0N/A        new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE
0N/A        new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE
0N/A        new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
0N/A        new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
0N/A        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
1091N/A        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN SMALL LETTER I
1091N/A        //# ================================================================================
1091N/A        //# Other
1091N/A        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, "en", 0), // # LATIN CAPITALLETTER I WITH DOT ABOVE
0N/A    };
0N/A
0N/A    // A hash table that contains the above entries
0N/A    static Hashtable entryTable = new Hashtable();
0N/A    static {
0N/A        // create hashtable from the entry
0N/A        for (int i = 0; i < entry.length; i ++) {
0N/A            Entry cur = entry[i];
0N/A            Integer cp = new Integer(cur.getCodePoint());
0N/A            HashSet set = (HashSet)entryTable.get(cp);
0N/A            if (set == null) {
0N/A                set = new HashSet();
0N/A            }
0N/A            set.add(cur);
0N/A            entryTable.put(cp, set);
0N/A        }
0N/A    }
0N/A
0N/A    static int toLowerCaseEx(String src, int index, Locale locale) {
0N/A        char[] result = lookUpTable(src, index, locale, true);
0N/A
0N/A        if (result != null) {
0N/A            if (result.length == 1) {
0N/A                return result[0];
0N/A            } else {
0N/A                return Character.ERROR;
0N/A            }
0N/A        } else {
0N/A            // default to Character class' one
0N/A            return Character.toLowerCase(src.codePointAt(index));
0N/A        }
0N/A    }
0N/A
0N/A    static int toUpperCaseEx(String src, int index, Locale locale) {
0N/A        char[] result = lookUpTable(src, index, locale, false);
0N/A
0N/A        if (result != null) {
0N/A            if (result.length == 1) {
0N/A                return result[0];
0N/A            } else {
0N/A                return Character.ERROR;
0N/A            }
0N/A        } else {
0N/A            // default to Character class' one
0N/A            return Character.toUpperCaseEx(src.codePointAt(index));
0N/A        }
0N/A    }
0N/A
0N/A    static char[] toLowerCaseCharArray(String src, int index, Locale locale) {
0N/A        return lookUpTable(src, index, locale, true);
0N/A    }
0N/A
0N/A    static char[] toUpperCaseCharArray(String src, int index, Locale locale) {
0N/A        char[] result = lookUpTable(src, index, locale, false);
0N/A        if (result != null) {
0N/A            return result;
0N/A        } else {
0N/A            return Character.toUpperCaseCharArray(src.codePointAt(index));
0N/A        }
0N/A    }
0N/A
0N/A    private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {
0N/A        HashSet set = (HashSet)entryTable.get(new Integer(src.codePointAt(index)));
0N/A
0N/A        if (set != null) {
0N/A            Iterator iter = set.iterator();
0N/A            String currentLang = locale.getLanguage();
0N/A            while (iter.hasNext()) {
0N/A                Entry entry = (Entry)iter.next();
0N/A                String conditionLang= entry.getLanguage();
0N/A                if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
0N/A                        isConditionMet(src, index, locale, entry.getCondition())) {
0N/A                    return (bLowerCasing ? entry.getLowerCase() : entry.getUpperCase());
0N/A                }
0N/A            }
0N/A        }
0N/A
0N/A        return null;
0N/A    }
0N/A
0N/A    private static boolean isConditionMet(String src, int index, Locale locale, int condition) {
0N/A        switch (condition) {
0N/A        case FINAL_CASED:
0N/A            return isFinalCased(src, index, locale);
0N/A
0N/A        case AFTER_SOFT_DOTTED:
0N/A            return isAfterSoftDotted(src, index);
0N/A
0N/A        case MORE_ABOVE:
0N/A            return isMoreAbove(src, index);
0N/A
0N/A        case AFTER_I:
0N/A            return isAfterI(src, index);
0N/A
0N/A        case NOT_BEFORE_DOT:
0N/A            return !isBeforeDot(src, index);
0N/A
0N/A        default:
0N/A            return true;
0N/A        }
0N/A    }
0N/A
0N/A    /**
0N/A     * Implements the "Final_Cased" condition
0N/A     *
0N/A     * Specification: Within the closest word boundaries containing C, there is a cased
0N/A     * letter before C, and there is no cased letter after C.
0N/A     *
0N/A     * Regular Expression:
0N/A     *   Before C: [{cased==true}][{wordBoundary!=true}]*
0N/A     *   After C: !([{wordBoundary!=true}]*[{cased}])
0N/A     */
0N/A    private static boolean isFinalCased(String src, int index, Locale locale) {
0N/A        BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
0N/A        wordBoundary.setText(src);
0N/A        int ch;
0N/A
0N/A        // Look for a preceding 'cased' letter
0N/A        for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
0N/A                i -= Character.charCount(ch)) {
0N/A
0N/A            ch = src.codePointBefore(i);
0N/A            if (isCased(ch)) {
0N/A
0N/A                int len = src.length();
0N/A                // Check that there is no 'cased' letter after the index
0N/A                for (i = index + Character.charCount(src.codePointAt(index));
0N/A                        (i < len) && !wordBoundary.isBoundary(i);
0N/A                        i += Character.charCount(ch)) {
0N/A
0N/A                    ch = src.codePointAt(i);
0N/A                    if (isCased(ch)) {
0N/A                        return false;
0N/A                    }
0N/A                }
0N/A
0N/A                return true;
0N/A            }
0N/A        }
0N/A
0N/A        return false;
0N/A    }
0N/A
0N/A    /**
0N/A     * Implements the "After_I" condition
0N/A     *
0N/A     * Specification: The last preceding base character was an uppercase I,
0N/A     * and there is no intervening combining character class 230 (ABOVE).
0N/A     *
0N/A     * Regular Expression:
0N/A     *   Before C: [I]([{cc!=230}&{cc!=0}])*
0N/A     */
0N/A    private static boolean isAfterI(String src, int index) {
0N/A        int ch;
0N/A        int cc;
0N/A
0N/A        // Look for the last preceding base character
0N/A        for (int i = index; i > 0; i -= Character.charCount(ch)) {
0N/A
0N/A            ch = src.codePointBefore(i);
0N/A
0N/A            if (ch == 'I') {
0N/A                return true;
0N/A            } else {
0N/A                cc = Normalizer.getCombiningClass(ch);
0N/A                if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
0N/A                    return false;
0N/A                }
0N/A            }
0N/A        }
0N/A
0N/A        return false;
0N/A    }
0N/A
0N/A    /**
0N/A     * Implements the "After_Soft_Dotted" condition
0N/A     *
0N/A     * Specification: The last preceding character with combining class
0N/A     * of zero before C was Soft_Dotted, and there is no intervening
0N/A     * combining character class 230 (ABOVE).
0N/A     *
0N/A     * Regular Expression:
0N/A     *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
0N/A     */
0N/A    private static boolean isAfterSoftDotted(String src, int index) {
0N/A        int ch;
0N/A        int cc;
0N/A
0N/A        // Look for the last preceding character
0N/A        for (int i = index; i > 0; i -= Character.charCount(ch)) {
0N/A
0N/A            ch = src.codePointBefore(i);
0N/A
0N/A            if (isSoftDotted(ch)) {
0N/A                return true;
0N/A            } else {
0N/A                cc = Normalizer.getCombiningClass(ch);
0N/A                if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
0N/A                    return false;
0N/A                }
0N/A            }
0N/A        }
0N/A
0N/A        return false;
0N/A    }
0N/A
0N/A    /**
0N/A     * Implements the "More_Above" condition
0N/A     *
0N/A     * Specification: C is followed by one or more characters of combining
0N/A     * class 230 (ABOVE) in the combining character sequence.
0N/A     *
0N/A     * Regular Expression:
0N/A     *   After C: [{cc!=0}]*[{cc==230}]
0N/A     */
0N/A    private static boolean isMoreAbove(String src, int index) {
0N/A        int ch;
0N/A        int cc;
0N/A        int len = src.length();
0N/A
0N/A        // Look for a following ABOVE combining class character
0N/A        for (int i = index + Character.charCount(src.codePointAt(index));
0N/A                i < len; i += Character.charCount(ch)) {
0N/A
0N/A            ch = src.codePointAt(i);
0N/A            cc = Normalizer.getCombiningClass(ch);
0N/A
0N/A            if (cc == COMBINING_CLASS_ABOVE) {
0N/A                return true;
0N/A            } else if (cc == 0) {
0N/A                return false;
0N/A            }
0N/A        }
0N/A
0N/A        return false;
0N/A    }
0N/A
0N/A    /**
0N/A     * Implements the "Before_Dot" condition
0N/A     *
0N/A     * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.
0N/A     * Any sequence of characters with a combining class that is
0N/A     * neither 0 nor 230 may intervene between the current character
0N/A     * and the combining dot above.
0N/A     *
0N/A     * Regular Expression:
0N/A     *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]
0N/A     */
0N/A    private static boolean isBeforeDot(String src, int index) {
0N/A        int ch;
0N/A        int cc;
0N/A        int len = src.length();
0N/A
0N/A        // Look for a following COMBINING DOT ABOVE
0N/A        for (int i = index + Character.charCount(src.codePointAt(index));
0N/A                i < len; i += Character.charCount(ch)) {
0N/A
0N/A            ch = src.codePointAt(i);
0N/A
0N/A            if (ch == '\u0307') {
0N/A                return true;
0N/A            } else {
0N/A                cc = Normalizer.getCombiningClass(ch);
0N/A                if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
0N/A                    return false;
0N/A                }
0N/A            }
0N/A        }
0N/A
0N/A        return false;
0N/A    }
0N/A
0N/A    /**
0N/A     * Examines whether a character is 'cased'.
0N/A     *
0N/A     * A character C is defined to be 'cased' if and only if at least one of
0N/A     * following are true for C: uppercase==true, or lowercase==true, or
0N/A     * general_category==titlecase_letter.
0N/A     *
0N/A     * The uppercase and lowercase property values are specified in the data
0N/A     * file DerivedCoreProperties.txt in the Unicode Character Database.
0N/A     */
0N/A    private static boolean isCased(int ch) {
0N/A        int type = Character.getType(ch);
0N/A        if (type == Character.LOWERCASE_LETTER ||
0N/A                type == Character.UPPERCASE_LETTER ||
0N/A                type == Character.TITLECASE_LETTER) {
0N/A            return true;
0N/A        } else {
0N/A            // Check for Other_Lowercase and Other_Uppercase
0N/A            //
0N/A            if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
0N/A                // MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
0N/A                return true;
0N/A            } else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
0N/A                // MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
0N/A                return true;
0N/A            } else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
0N/A                // MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
0N/A                return true;
0N/A            } else if (ch == 0x0345) {
0N/A                // COMBINING GREEK YPOGEGRAMMENI
0N/A                return true;
0N/A            } else if (ch == 0x037A) {
0N/A                // GREEK YPOGEGRAMMENI
0N/A                return true;
0N/A            } else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
0N/A                // MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
0N/A                return true;
0N/A            } else if ((ch >= 0x2160) && (ch <= 0x217F)) {
0N/A                // ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
0N/A                // SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
0N/A                return true;
0N/A            } else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
0N/A                // CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
0N/A                // CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
0N/A                return true;
0N/A            } else {
0N/A                return false;
0N/A            }
0N/A        }
0N/A    }
0N/A
0N/A    private static boolean isSoftDotted(int ch) {
0N/A        switch (ch) {
0N/A        case 0x0069: // Soft_Dotted # L&       LATIN SMALL LETTER I
0N/A        case 0x006A: // Soft_Dotted # L&       LATIN SMALL LETTER J
0N/A        case 0x012F: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH OGONEK
0N/A        case 0x0268: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH STROKE
0N/A        case 0x0456: // Soft_Dotted # L&       CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0N/A        case 0x0458: // Soft_Dotted # L&       CYRILLIC SMALL LETTER JE
0N/A        case 0x1D62: // Soft_Dotted # L&       LATIN SUBSCRIPT SMALL LETTER I
0N/A        case 0x1E2D: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH TILDE BELOW
0N/A        case 0x1ECB: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH DOT BELOW
0N/A        case 0x2071: // Soft_Dotted # L&       SUPERSCRIPT LATIN SMALL LETTER I
0N/A            return true;
0N/A        default:
0N/A            return false;
0N/A        }
0N/A    }
0N/A
0N/A    /**
0N/A     * An internal class that represents an entry in the Special Casing Properties.
0N/A     */
0N/A    static class Entry {
0N/A        int ch;
0N/A        char [] lower;
0N/A        char [] upper;
0N/A        String lang;
0N/A        int condition;
0N/A
0N/A        Entry(int ch, char[] lower, char[] upper, String lang, int condition) {
0N/A            this.ch = ch;
0N/A            this.lower = lower;
0N/A            this.upper = upper;
0N/A            this.lang = lang;
0N/A            this.condition = condition;
0N/A        }
0N/A
0N/A        int getCodePoint() {
0N/A            return ch;
0N/A        }
0N/A
0N/A        char[] getLowerCase() {
0N/A            return lower;
0N/A        }
0N/A
0N/A        char[] getUpperCase() {
0N/A            return upper;
0N/A        }
0N/A
0N/A        String getLanguage() {
0N/A            return lang;
0N/A        }
0N/A
0N/A        int getCondition() {
0N/A            return condition;
0N/A        }
0N/A    }
0N/A}