#ifndef UNICHAR_H
#define UNICHAR_H
/* Character used to replace invalid input. */
/* Characters >= base require surrogates */
/* Returns TRUE if given byte is ASCII character or the beginning of a
multibyte UTF-8 sequence */
#define UTF8_IS_START_SEQ(b) \
(((b) & 0x80) == 0 || ((b) & 0xC0) == 0xC0)
/* Normalize UTF8 input and append it to output buffer.
Returns 0 if ok, -1 if input was invalid. Even if input was invalid,
as much as possible should be added to output. */
extern const unsigned char utf8_replacement_char[UTF8_REPLACEMENT_CHAR_LEN];
extern const uint8_t *const uni_utf8_non1_bytes;
{
return (!UTF16_VALID_HIGH_SURROGATE(chr) &&
chr <= UNICHAR_T_MAX);
};
/* Returns number of characters in a NUL-terminated unicode string */
/* Translates UTF-8 input to UCS-4 output. Returns 0 if ok, -1 if input was
invalid */
/* Translates UCS-4 input to UTF-8 output. */
/* Returns char_bytes (>0) if *chr_r is set, 0 for incomplete trailing character,
-1 for invalid input. */
/* Returns number of characters in UTF-8 string. */
/* Returns number of characters in UTF-8 input of specified size. */
/* Same as uni_utf8_strlen_n(), but if input ends with a partial UTF-8
character, don't include it in the return value and set partial_pos_r to
where the character begins. Otherwise partial_pos_r is set to the end
of the input. */
/* Returns the number of bytes belonging to this UTF-8 character. The given
parameter is the first byte of the UTF-8 sequence. Invalid input is
returned with length 1. */
static inline unsigned int ATTR_CONST
{
/* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */
return 1;
}
/* Return given character in titlecase. */
/* Convert UTF-8 input to titlecase and decompose the titlecase characters to
output buffer. Returns 0 if ok, -1 if input was invalid. This generates
output that's compatible with i;unicode-casemap comparator. Invalid input
is replaced with unicode replacement character (0xfffd). */
/* If input contains only valid UTF-8 characters, return TRUE without updating
buf. If input contains invalid UTF-8 characters, replace them with unicode
replacement character (0xfffd), write the output to buf and return FALSE. */
/* Returns TRUE if string is valid UTF-8 input. */
bool uni_utf8_str_is_valid(const char *str);
/* Returns TRUE if data contains only valid UTF-8 input. */
/* surrogate handling */
{
(low - UTF16_SURROGATE_LOW_FIRST) +
}
{
}
#endif