0N/A/*
3081N/A * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/Apackage java.awt.font;
0N/A
1914N/Aimport java.io.IOException;
1914N/Aimport java.io.ObjectOutputStream;
1914N/Aimport java.util.Arrays;
1914N/Aimport java.util.Comparator;
1914N/Aimport java.util.EnumSet;
1914N/Aimport java.util.Set;
1914N/A
0N/A/**
0N/A * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
0N/A * digits to other Unicode decimal digits. Users of this class will
0N/A * primarily be people who wish to present data using
0N/A * national digit shapes, but find it more convenient to represent the
0N/A * data internally using Latin-1 (European) digits. This does not
0N/A * interpret the deprecated numeric shape selector character (U+206E).
0N/A * <p>
0N/A * Instances of <code>NumericShaper</code> are typically applied
0N/A * as attributes to text with the
0N/A * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
0N/A * of the <code>TextAttribute</code> class.
0N/A * For example, this code snippet causes a <code>TextLayout</code> to
0N/A * shape European digits to Arabic in an Arabic context:<br>
0N/A * <blockquote><pre>
0N/A * Map map = new HashMap();
0N/A * map.put(TextAttribute.NUMERIC_SHAPING,
0N/A * NumericShaper.getContextualShaper(NumericShaper.ARABIC));
0N/A * FontRenderContext frc = ...;
0N/A * TextLayout layout = new TextLayout(text, map, frc);
0N/A * layout.draw(g2d, x, y);
0N/A * </pre></blockquote>
0N/A * <br>
0N/A * It is also possible to perform numeric shaping explicitly using instances
0N/A * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
0N/A * <blockquote><pre>
3301N/A * char[] text = ...;
3301N/A * // shape all EUROPEAN digits (except zero) to ARABIC digits
3301N/A * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
3301N/A * shaper.shape(text, start, count);
0N/A *
3301N/A * // shape European digits to ARABIC digits if preceding text is Arabic, or
3301N/A * // shape European digits to TAMIL digits if preceding text is Tamil, or
3301N/A * // leave European digits alone if there is no preceding text, or
3301N/A * // preceding text is neither Arabic nor Tamil
3301N/A * NumericShaper shaper =
3301N/A * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
3301N/A * NumericShaper.TAMIL,
3301N/A * NumericShaper.EUROPEAN);
3301N/A * shaper.shape(text, start, count);
1914N/A * </pre></blockquote>
1914N/A *
1914N/A * <p><b>Bit mask- and enum-based Unicode ranges</b></p>
1914N/A *
1914N/A * <p>This class supports two different programming interfaces to
1914N/A * represent Unicode ranges for script-specific digits: bit
1914N/A * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and
1914N/A * enum-based ones, such as {@link NumericShaper.Range#ARABIC}.
1914N/A * Multiple ranges can be specified by ORing bit mask-based constants,
1914N/A * such as:
1914N/A * <blockquote><pre>
1914N/A * NumericShaper.ARABIC | NumericShaper.TAMIL
0N/A * </pre></blockquote>
1914N/A * or creating a {@code Set} with the {@link NumericShaper.Range}
1914N/A * constants, such as:
1914N/A * <blockquote><pre>
1914N/A * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL)
1914N/A * </pre></blockquote>
1914N/A * The enum-based ranges are a super set of the bit mask-based ones.
1914N/A *
1914N/A * <p>If the two interfaces are mixed (including serialization),
1914N/A * Unicode range values are mapped to their counterparts where such
1914N/A * mapping is possible, such as {@code NumericShaper.Range.ARABIC}
1914N/A * from/to {@code NumericShaper.ARABIC}. If any unmappable range
1914N/A * values are specified, such as {@code NumericShaper.Range.BALINESE},
1914N/A * those ranges are ignored.
0N/A *
3301N/A * <p><b>Decimal Digits Precedence</b></p>
3301N/A *
3301N/A * <p>A Unicode range may have more than one set of decimal digits. If
3301N/A * multiple decimal digits sets are specified for the same Unicode
3301N/A * range, one of the sets will take precedence as follows.
3301N/A *
3301N/A * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence.">
3301N/A * <tr>
3301N/A * <th class="TableHeadingColor">Unicode Range</th>
3301N/A * <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th>
3301N/A * <th class="TableHeadingColor">Precedence</th>
3301N/A * </tr>
3301N/A * <tr>
3301N/A * <td rowspan="2">Arabic</td>
3301N/A * <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br>
3301N/A * {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
3301N/A * <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
3301N/A * </tr>
3301N/A * <tr>
3301N/A * <td>{@link NumericShaper.Range#ARABIC}<br>
3301N/A * {@link NumericShaper.Range#EASTERN_ARABIC}</td>
3301N/A * <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td>
3301N/A * </tr>
3301N/A * <tr>
3301N/A * <td>Tai Tham</td>
3301N/A * <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br>
3301N/A * {@link NumericShaper.Range#TAI_THAM_THAM}</td>
3301N/A * <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td>
3301N/A * </tr>
3301N/A * </table>
3301N/A *
0N/A * @since 1.4
0N/A */
0N/A
0N/Apublic final class NumericShaper implements java.io.Serializable {
1914N/A /**
1914N/A * A {@code NumericShaper.Range} represents a Unicode range of a
1914N/A * script having its own decimal digits. For example, the {@link
1914N/A * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT
1914N/A * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59).
1914N/A *
1914N/A * <p>The <code>Range</code> enum replaces the traditional bit
1914N/A * mask-based values (e.g., {@link NumericShaper#ARABIC}), and
1914N/A * supports more Unicode ranges than the bit mask-based ones. For
1914N/A * example, the following code using the bit mask:
1914N/A * <blockquote><pre>
1914N/A * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
1914N/A * NumericShaper.TAMIL,
1914N/A * NumericShaper.EUROPEAN);
1914N/A * </pre></blockquote>
1914N/A * can be written using this enum as:
1914N/A * <blockquote><pre>
1914N/A * NumericShaper.getContextualShaper(EnumSet.of(
1914N/A * NumericShaper.Range.ARABIC,
1914N/A * NumericShaper.Range.TAMIL),
1914N/A * NumericShaper.Range.EUROPEAN);
1914N/A * </pre></blockquote>
1914N/A *
1914N/A * @since 1.7
1914N/A */
1914N/A public static enum Range {
2291N/A // The order of EUROPEAN to MOGOLIAN must be consistent
2291N/A // with the bitmask-based constants.
1914N/A /**
1914N/A * The Latin (European) range with the Latin (ASCII) digits.
1914N/A */
1914N/A EUROPEAN ('\u0030', '\u0000', '\u0300'),
1914N/A /**
1914N/A * The Arabic range with the Arabic-Indic digits.
1914N/A */
1914N/A ARABIC ('\u0660', '\u0600', '\u0780'),
1914N/A /**
1914N/A * The Arabic range with the Eastern Arabic-Indic digits.
1914N/A */
1914N/A EASTERN_ARABIC ('\u06f0', '\u0600', '\u0780'),
1914N/A /**
1914N/A * The Devanagari range with the Devanagari digits.
1914N/A */
1914N/A DEVANAGARI ('\u0966', '\u0900', '\u0980'),
1914N/A /**
1914N/A * The Bengali range with the Bengali digits.
1914N/A */
1914N/A BENGALI ('\u09e6', '\u0980', '\u0a00'),
1914N/A /**
1914N/A * The Gurmukhi range with the Gurmukhi digits.
1914N/A */
1914N/A GURMUKHI ('\u0a66', '\u0a00', '\u0a80'),
1914N/A /**
1914N/A * The Gujarati range with the Gujarati digits.
1914N/A */
1914N/A GUJARATI ('\u0ae6', '\u0b00', '\u0b80'),
1914N/A /**
1914N/A * The Oriya range with the Oriya digits.
1914N/A */
1914N/A ORIYA ('\u0b66', '\u0b00', '\u0b80'),
1914N/A /**
1914N/A * The Tamil range with the Tamil digits.
1914N/A */
1914N/A TAMIL ('\u0be6', '\u0b80', '\u0c00'),
1914N/A /**
1914N/A * The Telugu range with the Telugu digits.
1914N/A */
1914N/A TELUGU ('\u0c66', '\u0c00', '\u0c80'),
1914N/A /**
1914N/A * The Kannada range with the Kannada digits.
1914N/A */
1914N/A KANNADA ('\u0ce6', '\u0c80', '\u0d00'),
1914N/A /**
1914N/A * The Malayalam range with the Malayalam digits.
1914N/A */
1914N/A MALAYALAM ('\u0d66', '\u0d00', '\u0d80'),
1914N/A /**
1914N/A * The Thai range with the Thai digits.
1914N/A */
1914N/A THAI ('\u0e50', '\u0e00', '\u0e80'),
1914N/A /**
1914N/A * The Lao range with the Lao digits.
1914N/A */
1914N/A LAO ('\u0ed0', '\u0e80', '\u0f00'),
1914N/A /**
1914N/A * The Tibetan range with the Tibetan digits.
1914N/A */
1914N/A TIBETAN ('\u0f20', '\u0f00', '\u1000'),
1914N/A /**
1914N/A * The Myanmar range with the Myanmar digits.
1914N/A */
1914N/A MYANMAR ('\u1040', '\u1000', '\u1080'),
1914N/A /**
1914N/A * The Ethiopic range with the Ethiopic digits. Ethiopic
1914N/A * does not have a decimal digit 0 so Latin (European) 0 is
1914N/A * used.
1914N/A */
1914N/A ETHIOPIC ('\u1369', '\u1200', '\u1380') {
1914N/A @Override
1914N/A char getNumericBase() { return 1; }
1914N/A },
1914N/A /**
1914N/A * The Khmer range with the Khmer digits.
1914N/A */
1914N/A KHMER ('\u17e0', '\u1780', '\u1800'),
1914N/A /**
1914N/A * The Mongolian range with the Mongolian digits.
1914N/A */
1914N/A MONGOLIAN ('\u1810', '\u1800', '\u1900'),
2291N/A // The order of EUROPEAN to MOGOLIAN must be consistent
2291N/A // with the bitmask-based constants.
2291N/A
1914N/A /**
1914N/A * The N'Ko range with the N'Ko digits.
1914N/A */
1914N/A NKO ('\u07c0', '\u07c0', '\u0800'),
1914N/A /**
1914N/A * The Myanmar range with the Myanmar Shan digits.
1914N/A */
1914N/A MYANMAR_SHAN ('\u1090', '\u1000', '\u10a0'),
1914N/A /**
1914N/A * The Limbu range with the Limbu digits.
1914N/A */
1914N/A LIMBU ('\u1946', '\u1900', '\u1950'),
1914N/A /**
1914N/A * The New Tai Lue range with the New Tai Lue digits.
1914N/A */
1914N/A NEW_TAI_LUE ('\u19d0', '\u1980', '\u19e0'),
1914N/A /**
1914N/A * The Balinese range with the Balinese digits.
1914N/A */
1914N/A BALINESE ('\u1b50', '\u1b00', '\u1b80'),
1914N/A /**
1914N/A * The Sundanese range with the Sundanese digits.
1914N/A */
1914N/A SUNDANESE ('\u1bb0', '\u1b80', '\u1bc0'),
1914N/A /**
1914N/A * The Lepcha range with the Lepcha digits.
1914N/A */
1914N/A LEPCHA ('\u1c40', '\u1c00', '\u1c50'),
1914N/A /**
1914N/A * The Ol Chiki range with the Ol Chiki digits.
1914N/A */
1914N/A OL_CHIKI ('\u1c50', '\u1c50', '\u1c80'),
1914N/A /**
1914N/A * The Vai range with the Vai digits.
1914N/A */
1914N/A VAI ('\ua620', '\ua500', '\ua640'),
1914N/A /**
1914N/A * The Saurashtra range with the Saurashtra digits.
1914N/A */
1914N/A SAURASHTRA ('\ua8d0', '\ua880', '\ua8e0'),
1914N/A /**
1914N/A * The Kayah Li range with the Kayah Li digits.
1914N/A */
1914N/A KAYAH_LI ('\ua900', '\ua900', '\ua930'),
1914N/A /**
1914N/A * The Cham range with the Cham digits.
1914N/A */
3081N/A CHAM ('\uaa50', '\uaa00', '\uaa60'),
3081N/A /**
3081N/A * The Tai Tham Hora range with the Tai Tham Hora digits.
3081N/A */
3081N/A TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'),
3081N/A /**
3081N/A * The Tai Tham Tham range with the Tai Tham Tham digits.
3081N/A */
3081N/A TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'),
3081N/A /**
3081N/A * The Javanese range with the Javanese digits.
3081N/A */
3081N/A JAVANESE ('\ua9d0', '\ua980', '\ua9e0'),
3081N/A /**
3081N/A * The Meetei Mayek range with the Meetei Mayek digits.
3081N/A */
3081N/A MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00');
1914N/A
1914N/A private static int toRangeIndex(Range script) {
1914N/A int index = script.ordinal();
1914N/A return index < NUM_KEYS ? index : -1;
1914N/A }
1914N/A
1914N/A private static Range indexToRange(int index) {
1914N/A return index < NUM_KEYS ? Range.values()[index] : null;
1914N/A }
1914N/A
1914N/A private static int toRangeMask(Set<Range> ranges) {
1914N/A int m = 0;
1914N/A for (Range range : ranges) {
1914N/A int index = range.ordinal();
1914N/A if (index < NUM_KEYS) {
1914N/A m |= 1 << index;
1914N/A }
1914N/A }
1914N/A return m;
1914N/A }
1914N/A
1914N/A private static Set<Range> maskToRangeSet(int mask) {
1914N/A Set<Range> set = EnumSet.noneOf(Range.class);
1914N/A Range[] a = Range.values();
1914N/A for (int i = 0; i < NUM_KEYS; i++) {
1914N/A if ((mask & (1 << i)) != 0) {
1914N/A set.add(a[i]);
1914N/A }
1914N/A }
1914N/A return set;
1914N/A }
1914N/A
1914N/A // base character of range digits
1914N/A private final int base;
1914N/A // Unicode range
1914N/A private final int start, // inclusive
1914N/A end; // exclusive
1914N/A
1914N/A private Range(int base, int start, int end) {
1914N/A this.base = base - ('0' + getNumericBase());
1914N/A this.start = start;
1914N/A this.end = end;
1914N/A }
1914N/A
1914N/A private int getDigitBase() {
1914N/A return base;
1914N/A }
1914N/A
1914N/A char getNumericBase() {
1914N/A return 0;
1914N/A }
1914N/A
1914N/A private boolean inRange(int c) {
1914N/A return start <= c && c < end;
1914N/A }
1914N/A }
1914N/A
0N/A /** index of context for contextual shaping - values range from 0 to 18 */
0N/A private int key;
0N/A
0N/A /** flag indicating whether to shape contextually (high bit) and which
0N/A * digit ranges to shape (bits 0-18)
0N/A */
0N/A private int mask;
0N/A
1914N/A /**
1914N/A * The context {@code Range} for contextual shaping or the {@code
1914N/A * Range} for non-contextual shaping. {@code null} for the bit
1914N/A * mask-based API.
1914N/A *
1914N/A * @since 1.7
1914N/A */
1914N/A private Range shapingRange;
1914N/A
1914N/A /**
1914N/A * {@code Set<Range>} indicating which Unicode ranges to
1914N/A * shape. {@code null} for the bit mask-based API.
1914N/A */
1914N/A private transient Set<Range> rangeSet;
1914N/A
2291N/A /**
2291N/A * rangeSet.toArray() value. Sorted by Range.base when the number
2291N/A * of elements is greater then BSEARCH_THRESHOLD.
2291N/A */
2291N/A private transient Range[] rangeArray;
2291N/A
2291N/A /**
2291N/A * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used.
2291N/A */
2291N/A private static final int BSEARCH_THRESHOLD = 3;
2291N/A
1914N/A private static final long serialVersionUID = -8022764705923730308L;
1914N/A
0N/A /** Identifies the Latin-1 (European) and extended range, and
0N/A * Latin-1 (European) decimal base.
0N/A */
0N/A public static final int EUROPEAN = 1<<0;
0N/A
0N/A /** Identifies the ARABIC range and decimal base. */
0N/A public static final int ARABIC = 1<<1;
0N/A
0N/A /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
0N/A public static final int EASTERN_ARABIC = 1<<2;
0N/A
0N/A /** Identifies the DEVANAGARI range and decimal base. */
0N/A public static final int DEVANAGARI = 1<<3;
0N/A
0N/A /** Identifies the BENGALI range and decimal base. */
0N/A public static final int BENGALI = 1<<4;
0N/A
0N/A /** Identifies the GURMUKHI range and decimal base. */
0N/A public static final int GURMUKHI = 1<<5;
0N/A
0N/A /** Identifies the GUJARATI range and decimal base. */
0N/A public static final int GUJARATI = 1<<6;
0N/A
0N/A /** Identifies the ORIYA range and decimal base. */
0N/A public static final int ORIYA = 1<<7;
0N/A
1914N/A /** Identifies the TAMIL range and decimal base. */
1914N/A // TAMIL DIGIT ZERO was added in Unicode 4.1
0N/A public static final int TAMIL = 1<<8;
0N/A
0N/A /** Identifies the TELUGU range and decimal base. */
0N/A public static final int TELUGU = 1<<9;
0N/A
0N/A /** Identifies the KANNADA range and decimal base. */
0N/A public static final int KANNADA = 1<<10;
0N/A
0N/A /** Identifies the MALAYALAM range and decimal base. */
0N/A public static final int MALAYALAM = 1<<11;
0N/A
0N/A /** Identifies the THAI range and decimal base. */
0N/A public static final int THAI = 1<<12;
0N/A
0N/A /** Identifies the LAO range and decimal base. */
0N/A public static final int LAO = 1<<13;
0N/A
0N/A /** Identifies the TIBETAN range and decimal base. */
0N/A public static final int TIBETAN = 1<<14;
0N/A
0N/A /** Identifies the MYANMAR range and decimal base. */
0N/A public static final int MYANMAR = 1<<15;
0N/A
0N/A /** Identifies the ETHIOPIC range and decimal base. */
0N/A public static final int ETHIOPIC = 1<<16;
0N/A
0N/A /** Identifies the KHMER range and decimal base. */
0N/A public static final int KHMER = 1<<17;
0N/A
0N/A /** Identifies the MONGOLIAN range and decimal base. */
0N/A public static final int MONGOLIAN = 1<<18;
0N/A
1914N/A /** Identifies all ranges, for full contextual shaping.
1914N/A *
1914N/A * <p>This constant specifies all of the bit mask-based
1914N/A * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to
1914N/A * specify all of the enum-based ranges.
1914N/A */
0N/A public static final int ALL_RANGES = 0x0007ffff;
0N/A
0N/A private static final int EUROPEAN_KEY = 0;
0N/A private static final int ARABIC_KEY = 1;
0N/A private static final int EASTERN_ARABIC_KEY = 2;
0N/A private static final int DEVANAGARI_KEY = 3;
0N/A private static final int BENGALI_KEY = 4;
0N/A private static final int GURMUKHI_KEY = 5;
0N/A private static final int GUJARATI_KEY = 6;
0N/A private static final int ORIYA_KEY = 7;
0N/A private static final int TAMIL_KEY = 8;
0N/A private static final int TELUGU_KEY = 9;
0N/A private static final int KANNADA_KEY = 10;
0N/A private static final int MALAYALAM_KEY = 11;
0N/A private static final int THAI_KEY = 12;
0N/A private static final int LAO_KEY = 13;
0N/A private static final int TIBETAN_KEY = 14;
0N/A private static final int MYANMAR_KEY = 15;
0N/A private static final int ETHIOPIC_KEY = 16;
0N/A private static final int KHMER_KEY = 17;
0N/A private static final int MONGOLIAN_KEY = 18;
0N/A
1914N/A private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed
0N/A
0N/A private static final int CONTEXTUAL_MASK = 1<<31;
0N/A
0N/A private static final char[] bases = {
0N/A '\u0030' - '\u0030', // EUROPEAN
1914N/A '\u0660' - '\u0030', // ARABIC-INDIC
1914N/A '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC)
0N/A '\u0966' - '\u0030', // DEVANAGARI
0N/A '\u09e6' - '\u0030', // BENGALI
0N/A '\u0a66' - '\u0030', // GURMUKHI
0N/A '\u0ae6' - '\u0030', // GUJARATI
0N/A '\u0b66' - '\u0030', // ORIYA
1914N/A '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1
0N/A '\u0c66' - '\u0030', // TELUGU
0N/A '\u0ce6' - '\u0030', // KANNADA
0N/A '\u0d66' - '\u0030', // MALAYALAM
0N/A '\u0e50' - '\u0030', // THAI
0N/A '\u0ed0' - '\u0030', // LAO
0N/A '\u0f20' - '\u0030', // TIBETAN
0N/A '\u1040' - '\u0030', // MYANMAR
1914N/A '\u1369' - '\u0031', // ETHIOPIC - no zero
0N/A '\u17e0' - '\u0030', // KHMER
0N/A '\u1810' - '\u0030', // MONGOLIAN
0N/A };
0N/A
0N/A // some ranges adjoin or overlap, rethink if we want to do a binary search on this
0N/A
0N/A private static final char[] contexts = {
0N/A '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
1914N/A '\u0600', '\u0780', // ARABIC
1914N/A '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic
0N/A '\u0900', '\u0980', // DEVANAGARI
0N/A '\u0980', '\u0a00', // BENGALI
0N/A '\u0a00', '\u0a80', // GURMUKHI
0N/A '\u0a80', '\u0b00', // GUJARATI
0N/A '\u0b00', '\u0b80', // ORIYA
1914N/A '\u0b80', '\u0c00', // TAMIL
0N/A '\u0c00', '\u0c80', // TELUGU
0N/A '\u0c80', '\u0d00', // KANNADA
0N/A '\u0d00', '\u0d80', // MALAYALAM
0N/A '\u0e00', '\u0e80', // THAI
0N/A '\u0e80', '\u0f00', // LAO
0N/A '\u0f00', '\u1000', // TIBETAN
0N/A '\u1000', '\u1080', // MYANMAR
1914N/A '\u1200', '\u1380', // ETHIOPIC - note missing zero
0N/A '\u1780', '\u1800', // KHMER
0N/A '\u1800', '\u1900', // MONGOLIAN
0N/A '\uffff',
0N/A };
0N/A
0N/A // assume most characters are near each other so probing the cache is infrequent,
0N/A // and a linear probe is ok.
0N/A
0N/A private static int ctCache = 0;
0N/A private static int ctCacheLimit = contexts.length - 2;
0N/A
0N/A // warning, synchronize access to this as it modifies state
0N/A private static int getContextKey(char c) {
0N/A if (c < contexts[ctCache]) {
0N/A while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
0N/A } else if (c >= contexts[ctCache + 1]) {
0N/A while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
0N/A }
0N/A
0N/A // if we're not in a known range, then return EUROPEAN as the range key
0N/A return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
0N/A }
0N/A
1914N/A // cache for the NumericShaper.Range version
1914N/A private transient volatile Range currentRange = Range.EUROPEAN;
1914N/A
2291N/A private Range rangeForCodePoint(final int codepoint) {
2291N/A if (currentRange.inRange(codepoint)) {
2291N/A return currentRange;
1914N/A }
1914N/A
2291N/A final Range[] ranges = rangeArray;
2291N/A if (ranges.length > BSEARCH_THRESHOLD) {
2291N/A int lo = 0;
2291N/A int hi = ranges.length - 1;
2291N/A while (lo <= hi) {
2291N/A int mid = (lo + hi) / 2;
2291N/A Range range = ranges[mid];
2291N/A if (codepoint < range.start) {
2291N/A hi = mid - 1;
2291N/A } else if (codepoint >= range.end) {
2291N/A lo = mid + 1;
2291N/A } else {
2291N/A currentRange = range;
2291N/A return range;
2291N/A }
2291N/A }
2291N/A } else {
2291N/A for (int i = 0; i < ranges.length; i++) {
2291N/A if (ranges[i].inRange(codepoint)) {
2291N/A return ranges[i];
2291N/A }
1914N/A }
1914N/A }
1914N/A return Range.EUROPEAN;
1914N/A }
1914N/A
0N/A /*
0N/A * A range table of strong directional characters (types L, R, AL).
0N/A * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
0N/A * characters, odd (right) indexes are starts of ranges of strong directional
0N/A * characters.
0N/A */
1914N/A private static int[] strongTable = {
1914N/A 0x0000, 0x0041,
1914N/A 0x005b, 0x0061,
1914N/A 0x007b, 0x00aa,
1914N/A 0x00ab, 0x00b5,
1914N/A 0x00b6, 0x00ba,
1914N/A 0x00bb, 0x00c0,
1914N/A 0x00d7, 0x00d8,
1914N/A 0x00f7, 0x00f8,
1914N/A 0x02b9, 0x02bb,
1914N/A 0x02c2, 0x02d0,
1914N/A 0x02d2, 0x02e0,
1914N/A 0x02e5, 0x02ee,
1914N/A 0x02ef, 0x0370,
1914N/A 0x0374, 0x0376,
1914N/A 0x037e, 0x0386,
1914N/A 0x0387, 0x0388,
1914N/A 0x03f6, 0x03f7,
1914N/A 0x0483, 0x048a,
1914N/A 0x058a, 0x05be,
1914N/A 0x05bf, 0x05c0,
1914N/A 0x05c1, 0x05c3,
1914N/A 0x05c4, 0x05c6,
1914N/A 0x05c7, 0x05d0,
1914N/A 0x0600, 0x0608,
1914N/A 0x0609, 0x060b,
1914N/A 0x060c, 0x060d,
1914N/A 0x060e, 0x061b,
1914N/A 0x064b, 0x066d,
1914N/A 0x0670, 0x0671,
1914N/A 0x06d6, 0x06e5,
1914N/A 0x06e7, 0x06ee,
1914N/A 0x06f0, 0x06fa,
1914N/A 0x070f, 0x0710,
1914N/A 0x0711, 0x0712,
1914N/A 0x0730, 0x074d,
1914N/A 0x07a6, 0x07b1,
1914N/A 0x07eb, 0x07f4,
1914N/A 0x07f6, 0x07fa,
3081N/A 0x0816, 0x081a,
3081N/A 0x081b, 0x0824,
3081N/A 0x0825, 0x0828,
3081N/A 0x0829, 0x0830,
3081N/A 0x0859, 0x085e,
3081N/A 0x0900, 0x0903,
3081N/A 0x093a, 0x093b,
1914N/A 0x093c, 0x093d,
1914N/A 0x0941, 0x0949,
3081N/A 0x094d, 0x094e,
1914N/A 0x0951, 0x0958,
1914N/A 0x0962, 0x0964,
1914N/A 0x0981, 0x0982,
1914N/A 0x09bc, 0x09bd,
1914N/A 0x09c1, 0x09c7,
1914N/A 0x09cd, 0x09ce,
1914N/A 0x09e2, 0x09e6,
1914N/A 0x09f2, 0x09f4,
3081N/A 0x09fb, 0x0a03,
1914N/A 0x0a3c, 0x0a3e,
1914N/A 0x0a41, 0x0a59,
1914N/A 0x0a70, 0x0a72,
1914N/A 0x0a75, 0x0a83,
1914N/A 0x0abc, 0x0abd,
1914N/A 0x0ac1, 0x0ac9,
1914N/A 0x0acd, 0x0ad0,
1914N/A 0x0ae2, 0x0ae6,
1914N/A 0x0af1, 0x0b02,
1914N/A 0x0b3c, 0x0b3d,
1914N/A 0x0b3f, 0x0b40,
1914N/A 0x0b41, 0x0b47,
1914N/A 0x0b4d, 0x0b57,
1914N/A 0x0b62, 0x0b66,
1914N/A 0x0b82, 0x0b83,
1914N/A 0x0bc0, 0x0bc1,
1914N/A 0x0bcd, 0x0bd0,
1914N/A 0x0bf3, 0x0c01,
1914N/A 0x0c3e, 0x0c41,
1914N/A 0x0c46, 0x0c58,
1914N/A 0x0c62, 0x0c66,
1914N/A 0x0c78, 0x0c7f,
1914N/A 0x0cbc, 0x0cbd,
1914N/A 0x0ccc, 0x0cd5,
1914N/A 0x0ce2, 0x0ce6,
1914N/A 0x0d41, 0x0d46,
3081N/A 0x0d4d, 0x0d4e,
1914N/A 0x0d62, 0x0d66,
1914N/A 0x0dca, 0x0dcf,
1914N/A 0x0dd2, 0x0dd8,
1914N/A 0x0e31, 0x0e32,
1914N/A 0x0e34, 0x0e40,
1914N/A 0x0e47, 0x0e4f,
1914N/A 0x0eb1, 0x0eb2,
1914N/A 0x0eb4, 0x0ebd,
1914N/A 0x0ec8, 0x0ed0,
1914N/A 0x0f18, 0x0f1a,
1914N/A 0x0f35, 0x0f36,
1914N/A 0x0f37, 0x0f38,
1914N/A 0x0f39, 0x0f3e,
1914N/A 0x0f71, 0x0f7f,
1914N/A 0x0f80, 0x0f85,
1914N/A 0x0f86, 0x0f88,
3081N/A 0x0f8d, 0x0fbe,
1914N/A 0x0fc6, 0x0fc7,
1914N/A 0x102d, 0x1031,
1914N/A 0x1032, 0x1038,
1914N/A 0x1039, 0x103b,
1914N/A 0x103d, 0x103f,
1914N/A 0x1058, 0x105a,
1914N/A 0x105e, 0x1061,
1914N/A 0x1071, 0x1075,
1914N/A 0x1082, 0x1083,
1914N/A 0x1085, 0x1087,
1914N/A 0x108d, 0x108e,
3081N/A 0x109d, 0x109e,
3081N/A 0x135d, 0x1360,
1914N/A 0x1390, 0x13a0,
3081N/A 0x1400, 0x1401,
1914N/A 0x1680, 0x1681,
1914N/A 0x169b, 0x16a0,
1914N/A 0x1712, 0x1720,
1914N/A 0x1732, 0x1735,
1914N/A 0x1752, 0x1760,
1914N/A 0x1772, 0x1780,
1914N/A 0x17b7, 0x17be,
1914N/A 0x17c6, 0x17c7,
1914N/A 0x17c9, 0x17d4,
1914N/A 0x17db, 0x17dc,
1914N/A 0x17dd, 0x17e0,
1914N/A 0x17f0, 0x1810,
1914N/A 0x18a9, 0x18aa,
1914N/A 0x1920, 0x1923,
1914N/A 0x1927, 0x1929,
1914N/A 0x1932, 0x1933,
1914N/A 0x1939, 0x1946,
1914N/A 0x19de, 0x1a00,
1914N/A 0x1a17, 0x1a19,
3081N/A 0x1a56, 0x1a57,
3081N/A 0x1a58, 0x1a61,
3081N/A 0x1a62, 0x1a63,
3081N/A 0x1a65, 0x1a6d,
3081N/A 0x1a73, 0x1a80,
1914N/A 0x1b00, 0x1b04,
1914N/A 0x1b34, 0x1b35,
1914N/A 0x1b36, 0x1b3b,
1914N/A 0x1b3c, 0x1b3d,
1914N/A 0x1b42, 0x1b43,
1914N/A 0x1b6b, 0x1b74,
1914N/A 0x1b80, 0x1b82,
1914N/A 0x1ba2, 0x1ba6,
1914N/A 0x1ba8, 0x1baa,
3081N/A 0x1be6, 0x1be7,
3081N/A 0x1be8, 0x1bea,
3081N/A 0x1bed, 0x1bee,
3081N/A 0x1bef, 0x1bf2,
1914N/A 0x1c2c, 0x1c34,
1914N/A 0x1c36, 0x1c3b,
3081N/A 0x1cd0, 0x1cd3,
3081N/A 0x1cd4, 0x1ce1,
3081N/A 0x1ce2, 0x1ce9,
3081N/A 0x1ced, 0x1cee,
1914N/A 0x1dc0, 0x1e00,
1914N/A 0x1fbd, 0x1fbe,
1914N/A 0x1fbf, 0x1fc2,
1914N/A 0x1fcd, 0x1fd0,
1914N/A 0x1fdd, 0x1fe0,
1914N/A 0x1fed, 0x1ff2,
1914N/A 0x1ffd, 0x200e,
1914N/A 0x2010, 0x2071,
1914N/A 0x2074, 0x207f,
1914N/A 0x2080, 0x2090,
1914N/A 0x20a0, 0x2102,
1914N/A 0x2103, 0x2107,
1914N/A 0x2108, 0x210a,
1914N/A 0x2114, 0x2115,
1914N/A 0x2116, 0x2119,
1914N/A 0x211e, 0x2124,
1914N/A 0x2125, 0x2126,
1914N/A 0x2127, 0x2128,
1914N/A 0x2129, 0x212a,
1914N/A 0x212e, 0x212f,
1914N/A 0x213a, 0x213c,
1914N/A 0x2140, 0x2145,
1914N/A 0x214a, 0x214e,
3081N/A 0x2150, 0x2160,
3081N/A 0x2189, 0x2336,
1914N/A 0x237b, 0x2395,
1914N/A 0x2396, 0x249c,
1914N/A 0x24ea, 0x26ac,
1914N/A 0x26ad, 0x2800,
1914N/A 0x2900, 0x2c00,
3081N/A 0x2ce5, 0x2ceb,
3081N/A 0x2cef, 0x2d00,
3081N/A 0x2d7f, 0x2d80,
1914N/A 0x2de0, 0x3005,
1914N/A 0x3008, 0x3021,
1914N/A 0x302a, 0x3031,
1914N/A 0x3036, 0x3038,
1914N/A 0x303d, 0x3041,
1914N/A 0x3099, 0x309d,
1914N/A 0x30a0, 0x30a1,
1914N/A 0x30fb, 0x30fc,
1914N/A 0x31c0, 0x31f0,
1914N/A 0x321d, 0x3220,
1914N/A 0x3250, 0x3260,
1914N/A 0x327c, 0x327f,
1914N/A 0x32b1, 0x32c0,
1914N/A 0x32cc, 0x32d0,
1914N/A 0x3377, 0x337b,
1914N/A 0x33de, 0x33e0,
1914N/A 0x33ff, 0x3400,
1914N/A 0x4dc0, 0x4e00,
3081N/A 0xa490, 0xa4d0,
1914N/A 0xa60d, 0xa610,
1914N/A 0xa66f, 0xa680,
3081N/A 0xa6f0, 0xa6f2,
1914N/A 0xa700, 0xa722,
1914N/A 0xa788, 0xa789,
1914N/A 0xa802, 0xa803,
1914N/A 0xa806, 0xa807,
1914N/A 0xa80b, 0xa80c,
1914N/A 0xa825, 0xa827,
3081N/A 0xa828, 0xa830,
3081N/A 0xa838, 0xa840,
1914N/A 0xa874, 0xa880,
1914N/A 0xa8c4, 0xa8ce,
3081N/A 0xa8e0, 0xa8f2,
1914N/A 0xa926, 0xa92e,
1914N/A 0xa947, 0xa952,
3081N/A 0xa980, 0xa983,
3081N/A 0xa9b3, 0xa9b4,
3081N/A 0xa9b6, 0xa9ba,
3081N/A 0xa9bc, 0xa9bd,
1914N/A 0xaa29, 0xaa2f,
1914N/A 0xaa31, 0xaa33,
1914N/A 0xaa35, 0xaa40,
1914N/A 0xaa43, 0xaa44,
1914N/A 0xaa4c, 0xaa4d,
3081N/A 0xaab0, 0xaab1,
3081N/A 0xaab2, 0xaab5,
3081N/A 0xaab7, 0xaab9,
3081N/A 0xaabe, 0xaac0,
3081N/A 0xaac1, 0xaac2,
3081N/A 0xabe5, 0xabe6,
3081N/A 0xabe8, 0xabe9,
3081N/A 0xabed, 0xabf0,
1914N/A 0xfb1e, 0xfb1f,
1914N/A 0xfb29, 0xfb2a,
1914N/A 0xfd3e, 0xfd50,
1914N/A 0xfdfd, 0xfe70,
1914N/A 0xfeff, 0xff21,
1914N/A 0xff3b, 0xff41,
1914N/A 0xff5b, 0xff66,
1914N/A 0xffe0, 0x10000,
1914N/A 0x10101, 0x10102,
1914N/A 0x10140, 0x101d0,
1914N/A 0x101fd, 0x10280,
1914N/A 0x1091f, 0x10920,
1914N/A 0x10a01, 0x10a10,
1914N/A 0x10a38, 0x10a40,
3081N/A 0x10b39, 0x10b40,
3081N/A 0x10e60, 0x11000,
3081N/A 0x11001, 0x11002,
3081N/A 0x11038, 0x11047,
3081N/A 0x11052, 0x11066,
3081N/A 0x11080, 0x11082,
3081N/A 0x110b3, 0x110b7,
3081N/A 0x110b9, 0x110bb,
1914N/A 0x1d167, 0x1d16a,
1914N/A 0x1d173, 0x1d183,
1914N/A 0x1d185, 0x1d18c,
1914N/A 0x1d1aa, 0x1d1ae,
1914N/A 0x1d200, 0x1d360,
3081N/A 0x1d6db, 0x1d6dc,
3081N/A 0x1d715, 0x1d716,
3081N/A 0x1d74f, 0x1d750,
3081N/A 0x1d789, 0x1d78a,
3081N/A 0x1d7c3, 0x1d7c4,
3081N/A 0x1d7ce, 0x1f110,
3081N/A 0x1f300, 0x1f48c,
3081N/A 0x1f48d, 0x1f524,
3081N/A 0x1f525, 0x20000,
1914N/A 0xe0001, 0xf0000,
1914N/A 0x10fffe, 0x10ffff // sentinel
0N/A };
0N/A
0N/A
0N/A // use a binary search with a cache
0N/A
1914N/A private transient volatile int stCache = 0;
0N/A
1914N/A private boolean isStrongDirectional(char c) {
1914N/A int cachedIndex = stCache;
1914N/A if (c < strongTable[cachedIndex]) {
1914N/A cachedIndex = search(c, strongTable, 0, cachedIndex);
1914N/A } else if (c >= strongTable[cachedIndex + 1]) {
1914N/A cachedIndex = search(c, strongTable, cachedIndex + 1,
1914N/A strongTable.length - cachedIndex - 1);
0N/A }
1914N/A boolean val = (cachedIndex & 0x1) == 1;
1914N/A stCache = cachedIndex;
1914N/A return val;
0N/A }
0N/A
1914N/A private static int getKeyFromMask(int mask) {
0N/A int key = 0;
0N/A while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
0N/A ++key;
0N/A }
0N/A if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
0N/A throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
0N/A }
0N/A return key;
0N/A }
0N/A
0N/A /**
0N/A * Returns a shaper for the provided unicode range. All
0N/A * Latin-1 (EUROPEAN) digits are converted
0N/A * to the corresponding decimal unicode digits.
0N/A * @param singleRange the specified Unicode range
0N/A * @return a non-contextual numeric shaper
0N/A * @throws IllegalArgumentException if the range is not a single range
0N/A */
1914N/A public static NumericShaper getShaper(int singleRange) {
0N/A int key = getKeyFromMask(singleRange);
0N/A return new NumericShaper(key, singleRange);
0N/A }
0N/A
0N/A /**
1914N/A * Returns a shaper for the provided Unicode
1914N/A * range. All Latin-1 (EUROPEAN) digits are converted to the
1914N/A * corresponding decimal digits of the specified Unicode range.
1914N/A *
1914N/A * @param singleRange the Unicode range given by a {@link
1914N/A * NumericShaper.Range} constant.
1914N/A * @return a non-contextual {@code NumericShaper}.
1914N/A * @throws NullPointerException if {@code singleRange} is {@code null}
1914N/A * @since 1.7
1914N/A */
1914N/A public static NumericShaper getShaper(Range singleRange) {
1914N/A return new NumericShaper(singleRange, EnumSet.of(singleRange));
1914N/A }
1914N/A
1914N/A /**
0N/A * Returns a contextual shaper for the provided unicode range(s).
0N/A * Latin-1 (EUROPEAN) digits are converted to the decimal digits
0N/A * corresponding to the range of the preceding text, if the
0N/A * range is one of the provided ranges. Multiple ranges are
0N/A * represented by or-ing the values together, such as,
0N/A * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
0N/A * shaper assumes EUROPEAN as the starting context, that is, if
0N/A * EUROPEAN digits are encountered before any strong directional
0N/A * text in the string, the context is presumed to be EUROPEAN, and
0N/A * so the digits will not shape.
0N/A * @param ranges the specified Unicode ranges
0N/A * @return a shaper for the specified ranges
0N/A */
1914N/A public static NumericShaper getContextualShaper(int ranges) {
0N/A ranges |= CONTEXTUAL_MASK;
0N/A return new NumericShaper(EUROPEAN_KEY, ranges);
0N/A }
0N/A
0N/A /**
1914N/A * Returns a contextual shaper for the provided Unicode
1914N/A * range(s). The Latin-1 (EUROPEAN) digits are converted to the
1914N/A * decimal digits corresponding to the range of the preceding
1914N/A * text, if the range is one of the provided ranges.
1914N/A *
1914N/A * <p>The shaper assumes EUROPEAN as the starting context, that
1914N/A * is, if EUROPEAN digits are encountered before any strong
1914N/A * directional text in the string, the context is presumed to be
1914N/A * EUROPEAN, and so the digits will not shape.
1914N/A *
1914N/A * @param ranges the specified Unicode ranges
1914N/A * @return a contextual shaper for the specified ranges
1914N/A * @throws NullPointerException if {@code ranges} is {@code null}.
1914N/A * @since 1.7
1914N/A */
1914N/A public static NumericShaper getContextualShaper(Set<Range> ranges) {
1914N/A NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges);
1914N/A shaper.mask = CONTEXTUAL_MASK;
1914N/A return shaper;
1914N/A }
1914N/A
1914N/A /**
0N/A * Returns a contextual shaper for the provided unicode range(s).
0N/A * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
0N/A * corresponding to the range of the preceding text, if the
0N/A * range is one of the provided ranges. Multiple ranges are
0N/A * represented by or-ing the values together, for example,
0N/A * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The
0N/A * shaper uses defaultContext as the starting context.
0N/A * @param ranges the specified Unicode ranges
0N/A * @param defaultContext the starting context, such as
0N/A * <code>NumericShaper.EUROPEAN</code>
0N/A * @return a shaper for the specified Unicode ranges.
0N/A * @throws IllegalArgumentException if the specified
0N/A * <code>defaultContext</code> is not a single valid range.
0N/A */
1914N/A public static NumericShaper getContextualShaper(int ranges, int defaultContext) {
0N/A int key = getKeyFromMask(defaultContext);
0N/A ranges |= CONTEXTUAL_MASK;
0N/A return new NumericShaper(key, ranges);
0N/A }
0N/A
0N/A /**
1914N/A * Returns a contextual shaper for the provided Unicode range(s).
1914N/A * The Latin-1 (EUROPEAN) digits will be converted to the decimal
1914N/A * digits corresponding to the range of the preceding text, if the
1914N/A * range is one of the provided ranges. The shaper uses {@code
1914N/A * defaultContext} as the starting context.
1914N/A *
1914N/A * @param ranges the specified Unicode ranges
1914N/A * @param defaultContext the starting context, such as
1914N/A * {@code NumericShaper.Range.EUROPEAN}
1914N/A * @return a contextual shaper for the specified Unicode ranges.
1914N/A * @throws NullPointerException
1914N/A * if {@code ranges} or {@code defaultContext} is {@code null}
1914N/A * @since 1.7
1914N/A */
1914N/A public static NumericShaper getContextualShaper(Set<Range> ranges,
1914N/A Range defaultContext) {
1914N/A if (defaultContext == null) {
1914N/A throw new NullPointerException();
1914N/A }
1914N/A NumericShaper shaper = new NumericShaper(defaultContext, ranges);
1914N/A shaper.mask = CONTEXTUAL_MASK;
1914N/A return shaper;
1914N/A }
1914N/A
1914N/A /**
0N/A * Private constructor.
0N/A */
0N/A private NumericShaper(int key, int mask) {
0N/A this.key = key;
0N/A this.mask = mask;
0N/A }
0N/A
1914N/A private NumericShaper(Range defaultContext, Set<Range> ranges) {
2291N/A shapingRange = defaultContext;
2291N/A rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
2291N/A
2291N/A // Give precedance to EASTERN_ARABIC if both ARABIC and
2291N/A // EASTERN_ARABIC are specified.
2291N/A if (rangeSet.contains(Range.EASTERN_ARABIC)
2291N/A && rangeSet.contains(Range.ARABIC)) {
2291N/A rangeSet.remove(Range.ARABIC);
2291N/A }
3081N/A
3081N/A // As well as the above case, give precedance to TAI_THAM_THAM if both
3081N/A // TAI_THAM_HORA and TAI_THAM_THAM are specified.
3081N/A if (rangeSet.contains(Range.TAI_THAM_THAM)
3081N/A && rangeSet.contains(Range.TAI_THAM_HORA)) {
3081N/A rangeSet.remove(Range.TAI_THAM_HORA);
3081N/A }
3081N/A
2291N/A rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
2291N/A if (rangeArray.length > BSEARCH_THRESHOLD) {
2291N/A // sort rangeArray for binary search
2291N/A Arrays.sort(rangeArray,
2291N/A new Comparator<Range>() {
2291N/A public int compare(Range s1, Range s2) {
2291N/A return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
2291N/A }
2291N/A });
2291N/A }
1914N/A }
1914N/A
0N/A /**
0N/A * Converts the digits in the text that occur between start and
0N/A * start + count.
0N/A * @param text an array of characters to convert
0N/A * @param start the index into <code>text</code> to start
0N/A * converting
0N/A * @param count the number of characters in <code>text</code>
0N/A * to convert
0N/A * @throws IndexOutOfBoundsException if start or start + count is
0N/A * out of bounds
0N/A * @throws NullPointerException if text is null
0N/A */
0N/A public void shape(char[] text, int start, int count) {
1914N/A checkParams(text, start, count);
0N/A if (isContextual()) {
1914N/A if (rangeSet == null) {
1914N/A shapeContextually(text, start, count, key);
1914N/A } else {
1914N/A shapeContextually(text, start, count, shapingRange);
1914N/A }
0N/A } else {
0N/A shapeNonContextually(text, start, count);
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Converts the digits in the text that occur between start and
0N/A * start + count, using the provided context.
0N/A * Context is ignored if the shaper is not a contextual shaper.
0N/A * @param text an array of characters
0N/A * @param start the index into <code>text</code> to start
0N/A * converting
0N/A * @param count the number of characters in <code>text</code>
0N/A * to convert
0N/A * @param context the context to which to convert the
0N/A * characters, such as <code>NumericShaper.EUROPEAN</code>
0N/A * @throws IndexOutOfBoundsException if start or start + count is
0N/A * out of bounds
0N/A * @throws NullPointerException if text is null
0N/A * @throws IllegalArgumentException if this is a contextual shaper
0N/A * and the specified <code>context</code> is not a single valid
0N/A * range.
0N/A */
0N/A public void shape(char[] text, int start, int count, int context) {
1914N/A checkParams(text, start, count);
1914N/A if (isContextual()) {
1914N/A int ctxKey = getKeyFromMask(context);
1914N/A if (rangeSet == null) {
1914N/A shapeContextually(text, start, count, ctxKey);
1914N/A } else {
1914N/A shapeContextually(text, start, count, Range.values()[ctxKey]);
1914N/A }
1914N/A } else {
1914N/A shapeNonContextually(text, start, count);
1914N/A }
1914N/A }
1914N/A
1914N/A /**
1914N/A * Converts the digits in the text that occur between {@code
1914N/A * start} and {@code start + count}, using the provided {@code
1914N/A * context}. {@code Context} is ignored if the shaper is not a
1914N/A * contextual shaper.
1914N/A *
1914N/A * @param text a {@code char} array
1914N/A * @param start the index into {@code text} to start converting
1914N/A * @param count the number of {@code char}s in {@code text}
1914N/A * to convert
1914N/A * @param context the context to which to convert the characters,
1914N/A * such as {@code NumericShaper.Range.EUROPEAN}
1914N/A * @throws IndexOutOfBoundsException
1914N/A * if {@code start} or {@code start + count} is out of bounds
1914N/A * @throws NullPointerException
1914N/A * if {@code text} or {@code context} is null
1914N/A * @since 1.7
1914N/A */
1914N/A public void shape(char[] text, int start, int count, Range context) {
1914N/A checkParams(text, start, count);
1914N/A if (context == null) {
1914N/A throw new NullPointerException("context is null");
1914N/A }
1914N/A
1914N/A if (isContextual()) {
1914N/A if (rangeSet != null) {
1914N/A shapeContextually(text, start, count, context);
1914N/A } else {
1914N/A int key = Range.toRangeIndex(context);
1914N/A if (key >= 0) {
1914N/A shapeContextually(text, start, count, key);
1914N/A } else {
1914N/A shapeContextually(text, start, count, shapingRange);
1914N/A }
1914N/A }
1914N/A } else {
1914N/A shapeNonContextually(text, start, count);
1914N/A }
1914N/A }
1914N/A
1914N/A private void checkParams(char[] text, int start, int count) {
0N/A if (text == null) {
0N/A throw new NullPointerException("text is null");
0N/A }
0N/A if ((start < 0)
0N/A || (start > text.length)
0N/A || ((start + count) < 0)
0N/A || ((start + count) > text.length)) {
0N/A throw new IndexOutOfBoundsException(
0N/A "bad start or count for text of length " + text.length);
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Returns a <code>boolean</code> indicating whether or not
0N/A * this shaper shapes contextually.
0N/A * @return <code>true</code> if this shaper is contextual;
0N/A * <code>false</code> otherwise.
0N/A */
0N/A public boolean isContextual() {
0N/A return (mask & CONTEXTUAL_MASK) != 0;
0N/A }
0N/A
0N/A /**
0N/A * Returns an <code>int</code> that ORs together the values for
0N/A * all the ranges that will be shaped.
0N/A * <p>
0N/A * For example, to check if a shaper shapes to Arabic, you would use the
0N/A * following:
0N/A * <blockquote>
0N/A * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
0N/A * </blockquote>
1914N/A *
1914N/A * <p>Note that this method supports only the bit mask-based
1914N/A * ranges. Call {@link #getRangeSet()} for the enum-based ranges.
1914N/A *
0N/A * @return the values for all the ranges to be shaped.
0N/A */
0N/A public int getRanges() {
0N/A return mask & ~CONTEXTUAL_MASK;
0N/A }
0N/A
0N/A /**
1914N/A * Returns a {@code Set} representing all the Unicode ranges in
1914N/A * this {@code NumericShaper} that will be shaped.
1914N/A *
1914N/A * @return all the Unicode ranges to be shaped.
1914N/A * @since 1.7
1914N/A */
1914N/A public Set<Range> getRangeSet() {
1914N/A if (rangeSet != null) {
1914N/A return EnumSet.copyOf(rangeSet);
1914N/A }
1914N/A return Range.maskToRangeSet(mask);
1914N/A }
1914N/A
1914N/A /**
0N/A * Perform non-contextual shaping.
0N/A */
0N/A private void shapeNonContextually(char[] text, int start, int count) {
1914N/A int base;
1914N/A char minDigit = '0';
1914N/A if (shapingRange != null) {
1914N/A base = shapingRange.getDigitBase();
1914N/A minDigit += shapingRange.getNumericBase();
1914N/A } else {
1914N/A base = bases[key];
1914N/A if (key == ETHIOPIC_KEY) {
1914N/A minDigit++; // Ethiopic doesn't use decimal zero
1914N/A }
1914N/A }
0N/A for (int i = start, e = start + count; i < e; ++i) {
0N/A char c = text[i];
0N/A if (c >= minDigit && c <= '\u0039') {
0N/A text[i] = (char)(c + base);
0N/A }
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Perform contextual shaping.
1914N/A * Synchronized to protect caches used in getContextKey.
0N/A */
0N/A private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
0N/A
0N/A // if we don't support this context, then don't shape
0N/A if ((mask & (1<<ctxKey)) == 0) {
0N/A ctxKey = EUROPEAN_KEY;
0N/A }
0N/A int lastkey = ctxKey;
0N/A
0N/A int base = bases[ctxKey];
1914N/A char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1914N/A
1914N/A synchronized (NumericShaper.class) {
1914N/A for (int i = start, e = start + count; i < e; ++i) {
1914N/A char c = text[i];
1914N/A if (c >= minDigit && c <= '\u0039') {
1914N/A text[i] = (char)(c + base);
1914N/A }
1914N/A
1914N/A if (isStrongDirectional(c)) {
1914N/A int newkey = getContextKey(c);
1914N/A if (newkey != lastkey) {
1914N/A lastkey = newkey;
0N/A
1914N/A ctxKey = newkey;
2331N/A if (((mask & EASTERN_ARABIC) != 0) &&
2331N/A (ctxKey == ARABIC_KEY ||
2331N/A ctxKey == EASTERN_ARABIC_KEY)) {
1914N/A ctxKey = EASTERN_ARABIC_KEY;
2331N/A } else if (((mask & ARABIC) != 0) &&
2331N/A (ctxKey == ARABIC_KEY ||
2331N/A ctxKey == EASTERN_ARABIC_KEY)) {
2331N/A ctxKey = ARABIC_KEY;
1914N/A } else if ((mask & (1<<ctxKey)) == 0) {
1914N/A ctxKey = EUROPEAN_KEY;
1914N/A }
1914N/A
1914N/A base = bases[ctxKey];
1914N/A
1914N/A minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1914N/A }
1914N/A }
1914N/A }
1914N/A }
1914N/A }
1914N/A
1914N/A private void shapeContextually(char[] text, int start, int count, Range ctxKey) {
2291N/A // if we don't support the specified context, then don't shape.
2291N/A if (ctxKey == null || !rangeSet.contains(ctxKey)) {
1914N/A ctxKey = Range.EUROPEAN;
1914N/A }
1914N/A
1914N/A Range lastKey = ctxKey;
1914N/A int base = ctxKey.getDigitBase();
1914N/A char minDigit = (char)('0' + ctxKey.getNumericBase());
2291N/A final int end = start + count;
2291N/A for (int i = start; i < end; ++i) {
0N/A char c = text[i];
1914N/A if (c >= minDigit && c <= '9') {
0N/A text[i] = (char)(c + base);
1914N/A continue;
0N/A }
0N/A if (isStrongDirectional(c)) {
2291N/A ctxKey = rangeForCodePoint(c);
2291N/A if (ctxKey != lastKey) {
2291N/A lastKey = ctxKey;
1914N/A base = ctxKey.getDigitBase();
1914N/A minDigit = (char)('0' + ctxKey.getNumericBase());
0N/A }
0N/A }
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Returns a hash code for this shaper.
0N/A * @return this shaper's hash code.
0N/A * @see java.lang.Object#hashCode
0N/A */
0N/A public int hashCode() {
1914N/A int hash = mask;
1914N/A if (rangeSet != null) {
1914N/A // Use the CONTEXTUAL_MASK bit only for the enum-based
1914N/A // NumericShaper. A deserialized NumericShaper might have
1914N/A // bit masks.
1914N/A hash &= CONTEXTUAL_MASK;
1914N/A hash ^= rangeSet.hashCode();
1914N/A }
1914N/A return hash;
0N/A }
0N/A
0N/A /**
1914N/A * Returns {@code true} if the specified object is an instance of
1914N/A * <code>NumericShaper</code> and shapes identically to this one,
1914N/A * regardless of the range representations, the bit mask or the
1914N/A * enum. For example, the following code produces {@code "true"}.
1914N/A * <blockquote><pre>
1914N/A * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC);
1914N/A * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC);
1914N/A * System.out.println(ns1.equals(ns2));
1914N/A * </pre></blockquote>
1914N/A *
0N/A * @param o the specified object to compare to this
0N/A * <code>NumericShaper</code>
0N/A * @return <code>true</code> if <code>o</code> is an instance
0N/A * of <code>NumericShaper</code> and shapes in the same way;
0N/A * <code>false</code> otherwise.
0N/A * @see java.lang.Object#equals(java.lang.Object)
0N/A */
0N/A public boolean equals(Object o) {
0N/A if (o != null) {
0N/A try {
0N/A NumericShaper rhs = (NumericShaper)o;
1914N/A if (rangeSet != null) {
1914N/A if (rhs.rangeSet != null) {
1914N/A return isContextual() == rhs.isContextual()
1914N/A && rangeSet.equals(rhs.rangeSet)
1914N/A && shapingRange == rhs.shapingRange;
1914N/A }
1914N/A return isContextual() == rhs.isContextual()
1914N/A && rangeSet.equals(Range.maskToRangeSet(rhs.mask))
1914N/A && shapingRange == Range.indexToRange(rhs.key);
1914N/A } else if (rhs.rangeSet != null) {
1914N/A Set<Range> rset = Range.maskToRangeSet(mask);
1914N/A Range srange = Range.indexToRange(key);
1914N/A return isContextual() == rhs.isContextual()
1914N/A && rset.equals(rhs.rangeSet)
1914N/A && srange == rhs.shapingRange;
1914N/A }
0N/A return rhs.mask == mask && rhs.key == key;
0N/A }
0N/A catch (ClassCastException e) {
0N/A }
0N/A }
0N/A return false;
0N/A }
0N/A
0N/A /**
0N/A * Returns a <code>String</code> that describes this shaper. This method
0N/A * is used for debugging purposes only.
0N/A * @return a <code>String</code> describing this shaper.
0N/A */
0N/A public String toString() {
0N/A StringBuilder buf = new StringBuilder(super.toString());
0N/A
1914N/A buf.append("[contextual:").append(isContextual());
0N/A
1914N/A String[] keyNames = null;
0N/A if (isContextual()) {
1914N/A buf.append(", context:");
1914N/A buf.append(shapingRange == null ? Range.values()[key] : shapingRange);
0N/A }
0N/A
1914N/A if (rangeSet == null) {
1914N/A buf.append(", range(s): ");
1914N/A boolean first = true;
1914N/A for (int i = 0; i < NUM_KEYS; ++i) {
1914N/A if ((mask & (1 << i)) != 0) {
1914N/A if (first) {
1914N/A first = false;
1914N/A } else {
1914N/A buf.append(", ");
1914N/A }
1914N/A buf.append(Range.values()[i]);
0N/A }
0N/A }
1914N/A } else {
1914N/A buf.append(", range set: ").append(rangeSet);
0N/A }
0N/A buf.append(']');
0N/A
0N/A return buf.toString();
0N/A }
0N/A
0N/A /**
0N/A * Returns the index of the high bit in value (assuming le, actually
0N/A * power of 2 >= value). value must be positive.
0N/A */
0N/A private static int getHighBit(int value) {
0N/A if (value <= 0) {
0N/A return -32;
0N/A }
0N/A
0N/A int bit = 0;
0N/A
0N/A if (value >= 1 << 16) {
0N/A value >>= 16;
0N/A bit += 16;
0N/A }
0N/A
0N/A if (value >= 1 << 8) {
0N/A value >>= 8;
0N/A bit += 8;
0N/A }
0N/A
0N/A if (value >= 1 << 4) {
0N/A value >>= 4;
0N/A bit += 4;
0N/A }
0N/A
0N/A if (value >= 1 << 2) {
0N/A value >>= 2;
0N/A bit += 2;
0N/A }
0N/A
0N/A if (value >= 1 << 1) {
0N/A bit += 1;
0N/A }
0N/A
0N/A return bit;
0N/A }
0N/A
0N/A /**
0N/A * fast binary search over subrange of array.
0N/A */
1914N/A private static int search(int value, int[] array, int start, int length)
0N/A {
0N/A int power = 1 << getHighBit(length);
0N/A int extra = length - power;
0N/A int probe = power;
0N/A int index = start;
0N/A
0N/A if (value >= array[index + extra]) {
0N/A index += extra;
0N/A }
0N/A
0N/A while (probe > 1) {
0N/A probe >>= 1;
0N/A
0N/A if (value >= array[index + probe]) {
0N/A index += probe;
0N/A }
0N/A }
0N/A
0N/A return index;
0N/A }
1914N/A
1914N/A /**
1914N/A * Converts the {@code NumericShaper.Range} enum-based parameters,
1914N/A * if any, to the bit mask-based counterparts and writes this
1914N/A * object to the {@code stream}. Any enum constants that have no
1914N/A * bit mask-based counterparts are ignored in the conversion.
1914N/A *
1914N/A * @param stream the output stream to write to
1914N/A * @throws IOException if an I/O error occurs while writing to {@code stream}
1914N/A * @since 1.7
1914N/A */
1914N/A private void writeObject(ObjectOutputStream stream) throws IOException {
1914N/A if (shapingRange != null) {
1914N/A int index = Range.toRangeIndex(shapingRange);
1914N/A if (index >= 0) {
1914N/A key = index;
1914N/A }
1914N/A }
1914N/A if (rangeSet != null) {
1914N/A mask |= Range.toRangeMask(rangeSet);
1914N/A }
1914N/A stream.defaultWriteObject();
1914N/A }
0N/A}