src/dom/charclass.h

	charclass.h revision 5c094164a11df18b51ec8ae62f8630873fe45436
#ifndef __CHARCLASS_H__
#define __CHARCLASS_H__
/**
 *
 * Phoebe DOM Implementation.
 *
 * This is a C++ approximation of the W3C DOM model, which follows
 * fairly closely the specifications in the various .idl files, copies of
 * which are provided for reference.  Most important is this one:
 *
 * Authors:
 *   Bob Jamison
 *
 * Copyright (C) 2008 Bob Jamison
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 3 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */


/**
 * Enumerated Unicode general category types
 */
typedef enum UniCharType
{
    UNI_UNASSIGNED                =  0,  /* Cn */
    UNI_UPPERCASE_LETTER          =  1,  /* Lu */
    UNI_LOWERCASE_LETTER          =  2,  /* Ll */
    UNI_TITLECASE_LETTER          =  3,  /* Lt */
    UNI_MODIFIER_LETTER           =  4,  /* Lm */
    UNI_OTHER_LETTER              =  5,  /* Lo */
    UNI_NON_SPACING_MARK          =  6,  /* Mn */
    UNI_ENCLOSING_MARK            =  7,  /* Me */
    UNI_COMBINING_SPACING_MARK    =  8,  /* Mc */
    UNI_DECIMAL_DIGIT_NUMBER      =  9,  /* Nd */
    UNI_LETTER_NUMBER             = 10,  /* Nl */
    UNI_OTHER_NUMBER              = 11,  /* No */
    UNI_SPACE_SEPARATOR           = 12,  /* Zs */
    UNI_LINE_SEPARATOR            = 13,  /* Zl */
    UNI_PARAGRAPH_SEPARATOR       = 14,  /* Zp */
    UNI_CONTROL                   = 15,  /* Cc */
    UNI_FORMAT                    = 16,  /* Cf */
    UNI_UNUSED_RESERVE            = 17,  /* xx */
    UNI_PRIVATE_USE               = 18,  /* Co */
    UNI_SURROGATE                 = 19,  /* Cs */
    UNI_DASH_PUNCTUATION          = 20,  /* Pd */
    UNI_START_PUNCTUATION         = 21,  /* Ps */
    UNI_END_PUNCTUATION           = 22,  /* Pe */
    UNI_CONNECTOR_PUNCTUATION     = 23,  /* Pc */
    UNI_OTHER_PUNCTUATION         = 24,  /* Po */
    UNI_MATH_SYMBOL               = 25,  /* Sm */
    UNI_CURRENCY_SYMBOL           = 26,  /* Sc */
    UNI_MODIFIER_SYMBOL           = 27,  /* Sk */
    UNI_OTHER_SYMBOL              = 28,  /* So */
    UNI_INITIAL_QUOTE_PUNCTUATION = 29,  /* Pi */
    UNI_FINAL_QUOTE_PUNCTUATION   = 30   /* Pf */
} UniCharType;


/**
 * Get the raw table entry for this Unicode codepoint
 * @param ch the Unicode codepoint to test
 * @return the raw UCD property table entry
 */
unsigned int uni_code(int ch);


/**
 * Get the Unicode General Category of ths character
 * @param ch the Unicode codepoint to test
 * @return the 'UniCharType' General Category enumeration (above)
 */
unsigned int uni_type(int ch);


/**
 * Test if this Unicode code point is lower case
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_lower(int ch);


/**
 * Test if this Unicode code point is upper case
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_upper(int ch);


/**
 * Test if this Unicode code point is title case
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_title(int ch);


/**
 * Test if this Unicode code point is a numeric digit
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_digit(int ch);


/**
 * Test if this Unicode code point is defined in the database
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_defined(int ch);

/**
 * Test if this Unicode code point is a letter
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_letter(int ch);


/**
 * Test if this Unicode code point is a letter or a digit
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_letter_or_digit(int ch);

/**
 * Test if this Unicode code point is considered to be a space
 * @param ch the Unicode codepoint to test
 * @return 1 if successful, else 0
 */
int uni_is_space(int ch);


#endif /* __CHARCLASS_H__ */