0N/A/*
2362N/A * Copyright (c) 1999, 2003, Oracle and/or its affiliates. All rights reserved.
0N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0N/A *
0N/A * This code is free software; you can redistribute it and/or modify it
0N/A * under the terms of the GNU General Public License version 2 only, as
2362N/A * published by the Free Software Foundation. Oracle designates this
0N/A * particular file as subject to the "Classpath" exception as provided
2362N/A * by Oracle in the LICENSE file that accompanied this code.
0N/A *
0N/A * This code is distributed in the hope that it will be useful, but WITHOUT
0N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0N/A * version 2 for more details (a copy is included in the LICENSE file that
0N/A * accompanied this code).
0N/A *
0N/A * You should have received a copy of the GNU General Public License version
0N/A * 2 along with this work; if not, write to the Free Software Foundation,
0N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0N/A *
2362N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2362N/A * or visit www.oracle.com if you need additional information or have any
2362N/A * questions.
0N/A */
0N/A
0N/A/*
0N/A * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
0N/A * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
0N/A *
0N/A * The original version of this source code and documentation is copyrighted
0N/A * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
0N/A * materials are provided under terms of a License Agreement between Taligent
0N/A * and Sun. This technology is protected by multiple US and International
0N/A * patents. This notice and attribution to Taligent may not be removed.
0N/A * Taligent is a registered trademark of Taligent, Inc.
0N/A *
0N/A */
0N/A
0N/Apackage java.text;
0N/A
0N/Aimport java.util.Vector;
0N/Aimport sun.text.UCompactIntArray;
0N/Aimport sun.text.IntHashtable;
0N/A
0N/A/**
0N/A * This class contains the static state of a RuleBasedCollator: The various
0N/A * tables that are used by the collation routines. Several RuleBasedCollators
0N/A * can share a single RBCollationTables object, easing memory requirements and
0N/A * improving performance.
0N/A */
0N/Afinal class RBCollationTables {
0N/A //===========================================================================================
0N/A // The following diagram shows the data structure of the RBCollationTables object.
0N/A // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
0N/A // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
0N/A // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
0N/A // sorts 'o-umlaut' as if it's always expanded with 'e'.
0N/A //
0N/A // mapping table contracting list expanding list
0N/A // (contains all unicode char
0N/A // entries) ___ ____________ _________________________
0N/A // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
0N/A // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
0N/A // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
0N/A // |____:___| | |_:_| |------------| | |-------------------------|
0N/A // |____:___| | |'cH'|v('cH')| | | : |
0N/A // |__'a'___|-> v('a') | |------------| | |-------------------------|
0N/A // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
0N/A // |____:___| | |------------| | |-------------------------|
0N/A // |____:___| | |'CH'|v('CH')| | | : |
0N/A // |___'c'__|---------------- ------------ | |-------------------------|
0N/A // |____:___| | | : |
0N/A // |o-umlaut|---------------------------------------- |_________________________|
0N/A // |____:___|
0N/A //
0N/A // Noted by Helena Shih on 6/23/97
0N/A //============================================================================================
0N/A
0N/A public RBCollationTables(String rules, int decmp) throws ParseException {
0N/A this.rules = rules;
0N/A
0N/A RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
0N/A builder.build(rules, decmp); // this object is filled in through
0N/A // the BuildAPI object
0N/A }
0N/A
0N/A final class BuildAPI {
0N/A /**
0N/A * Private constructor. Prevents anyone else besides RBTableBuilder
0N/A * from gaining direct access to the internals of this class.
0N/A */
0N/A private BuildAPI() {
0N/A }
0N/A
0N/A /**
0N/A * This function is used by RBTableBuilder to fill in all the members of this
0N/A * object. (Effectively, the builder class functions as a "friend" of this
0N/A * class, but to avoid changing too much of the logic, it carries around "shadow"
0N/A * copies of all these variables until the end of the build process and then
0N/A * copies them en masse into the actual tables object once all the construction
0N/A * logic is complete. This function does that "copying en masse".
0N/A * @param f2ary The value for frenchSec (the French-secondary flag)
0N/A * @param swap The value for SE Asian swapping rule
0N/A * @param map The collator's character-mapping table (the value for mapping)
0N/A * @param cTbl The collator's contracting-character table (the value for contractTable)
0N/A * @param eTbl The collator's expanding-character table (the value for expandTable)
0N/A * @param cFlgs The hash table of characters that participate in contracting-
0N/A * character sequences (the value for contractFlags)
0N/A * @param mso The value for maxSecOrder
0N/A * @param mto The value for maxTerOrder
0N/A */
0N/A void fillInTables(boolean f2ary,
0N/A boolean swap,
0N/A UCompactIntArray map,
0N/A Vector cTbl,
0N/A Vector eTbl,
0N/A IntHashtable cFlgs,
0N/A short mso,
0N/A short mto) {
0N/A frenchSec = f2ary;
0N/A seAsianSwapping = swap;
0N/A mapping = map;
0N/A contractTable = cTbl;
0N/A expandTable = eTbl;
0N/A contractFlags = cFlgs;
0N/A maxSecOrder = mso;
0N/A maxTerOrder = mto;
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Gets the table-based rules for the collation object.
0N/A * @return returns the collation rules that the table collation object
0N/A * was created from.
0N/A */
0N/A public String getRules()
0N/A {
0N/A return rules;
0N/A }
0N/A
0N/A public boolean isFrenchSec() {
0N/A return frenchSec;
0N/A }
0N/A
0N/A public boolean isSEAsianSwapping() {
0N/A return seAsianSwapping;
0N/A }
0N/A
0N/A // ==============================================================
0N/A // internal (for use by CollationElementIterator)
0N/A // ==============================================================
0N/A
0N/A /**
0N/A * Get the entry of hash table of the contracting string in the collation
0N/A * table.
0N/A * @param ch the starting character of the contracting string
0N/A */
0N/A Vector getContractValues(int ch)
0N/A {
0N/A int index = mapping.elementAt(ch);
0N/A return getContractValuesImpl(index - CONTRACTCHARINDEX);
0N/A }
0N/A
0N/A //get contract values from contractTable by index
0N/A private Vector getContractValuesImpl(int index)
0N/A {
0N/A if (index >= 0)
0N/A {
0N/A return (Vector)contractTable.elementAt(index);
0N/A }
0N/A else // not found
0N/A {
0N/A return null;
0N/A }
0N/A }
0N/A
0N/A /**
0N/A * Returns true if this character appears anywhere in a contracting
0N/A * character sequence. (Used by CollationElementIterator.setOffset().)
0N/A */
0N/A boolean usedInContractSeq(int c) {
0N/A return contractFlags.get(c) == 1;
0N/A }
0N/A
0N/A /**
0N/A * Return the maximum length of any expansion sequences that end
0N/A * with the specified comparison order.
0N/A *
0N/A * @param order a collation order returned by previous or next.
0N/A * @return the maximum length of any expansion seuences ending
0N/A * with the specified order.
0N/A *
0N/A * @see CollationElementIterator#getMaxExpansion
0N/A */
0N/A int getMaxExpansion(int order)
0N/A {
0N/A int result = 1;
0N/A
0N/A if (expandTable != null) {
0N/A // Right now this does a linear search through the entire
0N/A // expandsion table. If a collator had a large number of expansions,
0N/A // this could cause a performance problem, but in practise that
0N/A // rarely happens
0N/A for (int i = 0; i < expandTable.size(); i++) {
0N/A int[] valueList = (int [])expandTable.elementAt(i);
0N/A int length = valueList.length;
0N/A
0N/A if (length > result && valueList[length-1] == order) {
0N/A result = length;
0N/A }
0N/A }
0N/A }
0N/A
0N/A return result;
0N/A }
0N/A
0N/A /**
0N/A * Get the entry of hash table of the expanding string in the collation
0N/A * table.
0N/A * @param idx the index of the expanding string value list
0N/A */
0N/A final int[] getExpandValueList(int order) {
0N/A return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
0N/A }
0N/A
0N/A /**
0N/A * Get the comarison order of a character from the collation table.
0N/A * @return the comparison order of a character.
0N/A */
0N/A int getUnicodeOrder(int ch)
0N/A {
0N/A return mapping.elementAt(ch);
0N/A }
0N/A
0N/A short getMaxSecOrder() {
0N/A return maxSecOrder;
0N/A }
0N/A
0N/A short getMaxTerOrder() {
0N/A return maxTerOrder;
0N/A }
0N/A
0N/A /**
0N/A * Reverse a string.
0N/A */
0N/A //shemran/Note: this is used for secondary order value reverse, no
0N/A // need to consider supplementary pair.
0N/A static void reverse (StringBuffer result, int from, int to)
0N/A {
0N/A int i = from;
0N/A char swap;
0N/A
0N/A int j = to - 1;
0N/A while (i < j) {
0N/A swap = result.charAt(i);
0N/A result.setCharAt(i, result.charAt(j));
0N/A result.setCharAt(j, swap);
0N/A i++;
0N/A j--;
0N/A }
0N/A }
0N/A
0N/A final static int getEntry(Vector list, String name, boolean fwd) {
0N/A for (int i = 0; i < list.size(); i++) {
0N/A EntryPair pair = (EntryPair)list.elementAt(i);
0N/A if (pair.fwd == fwd && pair.entryName.equals(name)) {
0N/A return i;
0N/A }
0N/A }
0N/A return UNMAPPED;
0N/A }
0N/A
0N/A // ==============================================================
0N/A // constants
0N/A // ==============================================================
0N/A //sherman/Todo: is the value big enough?????
0N/A final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
0N/A final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
0N/A final static int UNMAPPED = 0xFFFFFFFF;
0N/A
0N/A final static int PRIMARYORDERMASK = 0xffff0000;
0N/A final static int SECONDARYORDERMASK = 0x0000ff00;
0N/A final static int TERTIARYORDERMASK = 0x000000ff;
0N/A final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
0N/A final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
0N/A final static int PRIMARYORDERSHIFT = 16;
0N/A final static int SECONDARYORDERSHIFT = 8;
0N/A
0N/A // ==============================================================
0N/A // instance variables
0N/A // ==============================================================
0N/A private String rules = null;
0N/A private boolean frenchSec = false;
0N/A private boolean seAsianSwapping = false;
0N/A
0N/A private UCompactIntArray mapping = null;
0N/A private Vector contractTable = null;
0N/A private Vector expandTable = null;
0N/A private IntHashtable contractFlags = null;
0N/A
0N/A private short maxSecOrder = 0;
0N/A private short maxTerOrder = 0;
0N/A}