/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
* (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
*
* The original version of this source code and documentation is copyrighted
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
* materials are provided under terms of a License Agreement between Taligent
* and Sun. This technology is protected by multiple US and International
* patents. This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*
*/
/**
* This class contains all the code to parse a RuleBasedCollator pattern
* and build a RBCollationTables object from it. A particular instance
* of tis class exists only during the actual build process-- once an
* RBCollationTables object has been built, the RBTableBuilder object
* goes away. This object carries all of the state which is only needed
* during the build process, plus a "shadow" copy of all of the state
* that will go into the tables object itself. This object communicates
* with RBCollationTables through a separate class, RBCollationTables.BuildAPI,
* this is an inner class of RBCollationTables and provides a separate
* private API for communication with RBTableBuilder.
* This class isn't just an inner class of RBCollationTables itself because
* of its large size. For source-code readability, it seemed better for the
* builder to have its own source file.
*/
final class RBTableBuilder {
}
/**
* Create a table-based collation object with the given rules.
* This is the main function that actually builds the tables and
* stores them back in the RBCollationTables object. It is called
* ONLY by the RBCollationTables constructor.
* @see java.util.RuleBasedCollator#RuleBasedCollator
* @exception ParseException If the rules format is incorrect.
*/
{
boolean isSource = true;
int i = 0;
// This array maps Unicode characters to their collation ordering
// Normalize the build rules. Find occurances of all decomposed characters
// and normalize the rules before feeding into the builder. By "normalize",
// we mean that all precomposed Unicode characters must be converted into
// a base character and one or more combining characters (such as accents).
// When there are multiple combining characters attached to a base character,
// the combining characters must be in their canonical order
//
//(1)decmp will be NO_DECOMPOSITION only in ko locale to prevent decompose
//hangual syllables to jamos, so we can actually just call decompose with
//normalizer's IGNORE_HANGUL option turned on
//
//(2)just call the "special version" in NormalizerImpl directly
//pattern = Normalizer.decompose(pattern, false, Normalizer.IGNORE_HANGUL, true);
//
//Normalizer.Mode mode = CollatorUtilities.toNormalizerMode(decmp);
//pattern = Normalizer.normalize(pattern, mode, 0, true);
// Build the merged collation entries
// Since rules can be specified in any order in the string
// (e.g. "c , C < d , D < e , E .... C < CH")
// this splits all of the rules in the string out into separate
// objects and then sorts them. In the above example, it merges the
// "C < CH" rule in just before the "C < D" rule.
//
int order = 0;
// Now walk though each entry and add it to my own tables
{
case '@':
frenchSec = true;
break;
case '!':
seAsianSwapping = true;
break;
}
}
} else {
}
} else {
}
}
}
commit();
/*
System.out.println("mappingSize=" + mapping.getKSize());
for (int j = 0; j < 0xffff; j++) {
int value = mapping.elementAt(j);
if (value != RBCollationTables.UNMAPPED)
System.out.println("index=" + Integer.toString(j, 16)
+ ", value=" + Integer.toString(value, 16));
}
*/
}
/** Add expanding entries for pre-composed unicode characters so that this
* collator can be used reasonably well with decomposition turned off.
*/
// Iterate through all of the pre-composed characters in Unicode
int c;
//
// We don't already have an ordering for this pre-composed character.
//
// First, see if the decomposed string is already in our
// tables as a single contracting-string ordering.
// If so, just map the precomposed character to that order.
//
// TODO: What we should really be doing here is trying to find the
// longest initial substring of the decomposition that is present
// in the tables as a contracting character sequence, and find its
// ordering. Then do this recursively with the remaining chars
// so that we build a list of orderings, and add that list to
// the expansion table.
// That would be more correct but also significantly slower, so
// I'm not totally sure it's worth doing.
//
//only thing need to do is to check if this decomposed character
//has an entry in our order table, this order is not necessary
//to be a contraction order, if it does have one, add an entry
//for the precomposed character by using the same order, the
//previous impl unnecessarily adds a single character expansion
//entry.
if (s.length() == 1) {
}
continue;
} else if (s.length() == 2) {
}
continue;
}
}
int contractOrder = getContractOrder(s);
addOrder(c, contractOrder);
} else {
//
// We don't have a contracting ordering for the entire string
// that results from the decomposition, but if we have orders
// for each individual character, we can add an expanding
// table entry for the pre-composed character
//
boolean allThere = true;
for (int i = 0; i < s.length(); i++) {
allThere = false;
break;
}
}
if (allThere) {
}
}
}
}
}
/**
* Look up for unmapped values in the expanded character table.
*
* When the expanding character tables are built by addExpandOrder,
* it doesn't know what the final ordering of each character
* in the expansion will be. Instead, it just puts the raw character
* code into the table, adding CHARINDEX as a flag. Now that we've
* finished building the mapping table, we can go back and look up
* that character to see what its real collation order is and
* stick that into the expansion table. That lets us avoid doing
* a two-stage lookup later.
*/
private final void commit()
{
if (expandTable != null) {
// found a expanding character that isn't filled in yet
// Get the real values for the non-filled entry
// The real value is still unmapped, maybe it's ignorable
} else {
// just fill in the value
}
}
}
}
}
}
/**
* Increment of the last order based on the comparison level.
*/
{
switch(aStrength)
{
// increment priamry order and mask off secondary and tertiary difference
isOverIgnore = true;
break;
// increment secondary order and mask off tertiary difference
// record max # of ignorable chars with secondary difference
if (!isOverIgnore)
maxSecOrder++;
break;
// increment tertiary order
// record max # of ignorable chars with tertiary difference
if (!isOverIgnore)
maxTerOrder++;
break;
}
return lastValue;
}
/**
* Adds a character and its designated order into the collation table.
*/
{
// See if the char already has an order in the mapping table
// There's already an entry for this character that points to a contracting
// character table. Instead of adding the character directly to the mapping
// table, we must add it to the contract table instead.
int length = 1;
} else {
}
} else {
// add the entry to the mapping table,
// the same later entry replaces the previous one
}
}
}
/**
* Adds the contracting string into the collation table.
*/
boolean fwd)
{
if (contractTable == null) {
}
//initial character
/*
char ch0 = groupChars.charAt(0);
int ch = Character.isHighSurrogate(ch0)?
Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
*/
// See if the initial character of the string already has a contract table.
if (entryTable == null) {
// We need to create a new table of contract entries for this base char
// Add the initial character's current ordering first. then
// update its mapping to point to this contract table
}
// Now add (or replace) this string in the table
} else {
// NOTE: This little bit of logic is here to speed CollationElementIterator
// .nextContractChar(). This code ensures that the longest sequence in
// this list is always the _last_ one in the list. This keeps
// nextContractChar() from having to search the entire list for the longest
// sequence.
} else {
}
}
// If this was a forward mapping for a contracting string, also add a
// reverse mapping for it, so that CollationElementIterator.previous
// can work right
anOrder, false);
}
}
/**
* If the given string has been specified as a contracting string
* in this collation table, return its ordering.
* Otherwise return UNMAPPED.
*/
{
if (contractTable != null) {
/*
char ch0 = groupChars.charAt(0);
int ch = Character.isHighSurrogate(ch0)?
Character.toCodePoint(ch0, groupChars.charAt(1)):ch0;
*/
if (entryTable != null) {
}
}
}
return result;
}
}
return order;
}
/**
* Get the entry of hash table of the contracting string in the collation
* table.
* @param ch the starting character of the contracting string
*/
{
}
{
if (index >= 0)
{
}
else // not found
{
return null;
}
}
/**
* Adds the expanding string into the collation table.
*/
int anOrder) throws ParseException
{
// Create an expansion table entry
// And add its index into the main mapping table
//only add into table when it is a legal surrogate
}
} else {
}
} else {
}
}
throws ParseException
{
}
/**
* Create a new entry in the expansion table that contains the orderings
* for the given characers. If anOrder is valid, it is added to the
* beginning of the expanded list of orders.
*/
if (expandTable == null) {
}
// If anOrder is valid, we want to add it at the beginning of the list
if (offset == 1) {
}
int j = offset;
char ch1;
int ch;
if (++i == expandChars.length() ||
//ether we are missing the low surrogate or the next char
//is not a legal low surrogate, so stop loop
break;
}
} else {
}
} else {
// can't find it in the table, will be filled in by commit().
}
}
//we had at least one supplementary character, the size of valueList
//is bigger than it really needs...
int[] tmpBuf = new int[j];
while (--j >= 0) {
}
}
// Add the expanding char list into the expansion table.
return tableIndex;
}
char c0;
int c;
for (int i = 0; i < len; i++) {
:c0;
}
}
// ==============================================================
// constants
// ==============================================================
// ==============================================================
// instance variables
// ==============================================================
// variables used by the build process
private boolean isOverIgnore = false;
// "shadow" copies of the instance variables in RBCollationTables
// (the values in these variables are copied back into RBCollationTables
// at the end of the build process)
private boolean frenchSec = false;
private boolean seAsianSwapping = false;
}