/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/*
*
* (C) Copyright IBM Corp. 1998-2010 - All Rights Reserved
*
* Developed at DIT - Government of Bhutan
*
* Contact person: Pema Geyleg - <pema_geyleg@druknet.bt>
*
* This file is a modification of the ICU file KhmerReordering.h
* by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan
* A first module for Dzongkha was developed by Karunakar under Panlocalisation funding.
* Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola
*
*/
#ifndef __TIBETANREORDERING_H
#define __TIBETANORDERING_H
/**
* \file
* \internal
*/
// #include "LETypes.h"
// #include "OpenTypeTables.h"
// Vocabulary
// Base -> A consonant in its full (not subscript) form. It is the
// center of the syllable, it can be souranded by subjoined consonants, vowels,
// signs... but there is only one base in a stack, it has to be coded as
// the first character of the syllable.Included here are also groups of base + subjoined
// which are represented by one single code point in unicode (e.g. 0F43) Also other characters that might take
// subjoined consonants or other combining characters.
// Subjoined -> Subjoined consonants and groups of subjoined consonants which have a single code-point
// to repersent the group (even if each subjoined consonant is represented independently
// by anothe code-point
// Tsa Phru --> Tsa Phru character, Bhutanese people will always place it right after the base, but sometimes, due to
// "normalization"
// is placed after all the subjoined consonants, and it is also permitted there.
// A Chung Vowel lengthening mark --> . 0F71 It is placed after the base and any subjoined consonants but before any vowels
// Precomposed Sanskrit vowels --> The are combinations of subjoined consonants + vowels that have been assigned
// a given code-point (in spite of each single part of them having also a code-point
// They are avoided, and users are encouraged to use the combination of code-points that
// represents the same sound instead of using this combined characters. This is included here
// for compatibility with possible texts that use them (they are not in the Dzongkha keyboard).
// Halanta -> The Halanta or Virama character 0F84 indicates that a consonant should not use its inheernt vowel,
// in spite of not having other vowels present. It is usually placed immediatly after a base consonant,
// but in some special cases it can also be placed after a subjoined consonant, so this is also
// permitted in this algorithm. (Halanta is always displayed in Tibetan not used as a connecting char)
//
// Subjoined vowels -> Dependent vowels (matras) placed below the base and below all subjoined consonants. There
// might be as much as three subjoined vowels in a given stack (only one in general text, but up
// to three for abreviations, they have to be permitted).
// Superscript vowels -> There are three superscript vowels, and they can be repeated or combined (up to three
// times. They can combine with subjoined vowels, and are always coded after these.
// Anusvara --> Nasalisation sign. Traditioinally placed in absence of vowels, but also after vowels. In some
// special cases it can be placed before a vowel, so this is also permitted
// Candrabindu -> Forms of the Anusvara with different glyphs (and different in identity) which can be placed
// without vowel or after the vowel, but never before. Cannot combine with Anusvara.
// Stress marks -> Marks placed above or below a syllable, affecting the whole syllable. They are combining
// marks, so they have to be attached to a specific stack. The are using to emphasise a syllable.
//
// Digits -> Digits are not considered as non-combining characters because there are a few characters which
// combine with them, so they have to be considered independently.
// Digit combining marks -> dependent marks that combine with digits.
//
// TODO
// There are a number of characters in the CJK block that are used in Tibetan script, two of these are symbols
// are used as bases for combining glyphs, and have not been encoded in Tibetan. As these characters are outside
// of the tibetan block, they have not been treated in this program.
struct TibetanClassTable // This list must include all types of components that can be used inside a syllable
{
enum CharClassValues // order is important here! This order must be the same that is found in each horizontal
// line in the statetable for Tibetan (file TibetanReordering.cpp). It assigns one number
// to each type of character that has to be considered when analysing the order in which
// characters can be placed
{
CC_BASE = 1, // Base Consonants, Base Consonants with Subjoined attached in code point, Sanskrit base marks
CC_SUBJOINED = 2, // Subjoined Consonats, combination of more than Subjoined Consonants in the code point
};
enum CharClassFlags
{
CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
// position flags
};
static const TibetanClassTable *getTibetanClassTable();
};
// do not instantiate
static le_int32 findSyllable(const TibetanClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
};
#endif