unichar.h revision 8e9666f46faceeef0f3c6f706f10f3a873e4b0eb
409N/A#ifndef UNICHAR_H
267N/A#define UNICHAR_H
267N/A
267N/A/* Character used to replace invalid input. */
267N/A#define UNICODE_REPLACEMENT_CHAR 0xfffd
267N/A
267N/Atypedef uint32_t unichar_t;
267N/AARRAY_DEFINE_TYPE(unichars, unichar_t);
267N/A
267N/Aextern const uint8_t *const uni_utf8_non1_bytes;
267N/A
267N/A/* Returns number of characters in a NUL-terminated unicode string */
267N/Aunsigned int uni_strlen(const unichar_t *str);
267N/A/* Translates UTF-8 input to UCS-4 output. Returns 0 if ok, -1 if input was
267N/A invalid */
267N/Aint uni_utf8_to_ucs4(const char *input, ARRAY_TYPE(unichars) *output);
267N/A/* Translates UCS-4 input to UTF-8 output. */
267N/Avoid uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output);
267N/Avoid uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output);
267N/A
267N/A/* Returns 1 if *chr_r is set, 0 for incomplete trailing character,
267N/A -1 for invalid input. */
267N/Aint uni_utf8_get_char(const char *input, unichar_t *chr_r);
267N/Aint uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r);
267N/A/* Returns UTF-8 string length with maximum input size. */
267N/Aunsigned int uni_utf8_strlen_n(const void *input, size_t size);
267N/A
267N/A/* Returns the number of bytes belonging to this partial UTF-8 character.
267N/A Invalid input is returned with length 1. */
267N/Astatic inline unsigned int uni_utf8_char_bytes(char chr)
267N/A{
269N/A /* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */
428N/A if ((uint8_t)chr < (192 + 2))
267N/A return 1;
430N/A return uni_utf8_non1_bytes[(uint8_t)chr - (192 + 2)];
430N/A}
430N/A
430N/A/* Return given character in titlecase. */
430N/Aunichar_t uni_ucs4_to_titlecase(unichar_t chr);
430N/A
269N/A/* Convert UTF-8 input to titlecase and decompose the titlecase characters to
269N/A output buffer. Returns 0 if ok, -1 if input was invalid. This generates
269N/A output that's compatible with i;unicode-casemap comparator. Invalid input
373N/A is replaced with unicode replacement character (0xfffd). */
269N/Aint uni_utf8_to_decomposed_titlecase(const void *input, size_t max_len,
373N/A buffer_t *output);
373N/A
373N/A/* If input contains only valid UTF-8 characters, return TRUE without updating
269N/A buf. If input contains invalid UTF-8 characters, replace them with unicode
373N/A replacement character (0xfffd), write the output to buf and return FALSE. */
373N/Abool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
373N/A buffer_t *buf);
373N/A
269N/A#endif
373N/A