unichar.h revision 511ba4416aafb9f9ba1a4193703b95a033267068
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainenextern const uint8_t *const uni_utf8_non1_bytes;
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Returns number of characters in a NUL-terminated unicode string */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Translates UTF-8 input to UCS-4 output. Returns 0 if ok, -1 if input was
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenint uni_utf8_to_ucs4(const char *input, buffer_t *output);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Translates UCS-4 input to UTF-8 output. */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenvoid uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output);
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainenvoid uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output);
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainen/* Returns 1 if *chr_r is set, 0 for incomplete trailing character,
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainen -1 for invalid input. */
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainenint uni_utf8_get_char(const char *input, unichar_t *chr_r);
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainenint uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Returns UTF-8 string length with maximum input size. */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenunsigned int uni_utf8_strlen_n(const void *input, size_t size);
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainen/* Returns the number of bytes belonging to this partial UTF-8 character.
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainen Invalid input is returned with length 1. */
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainenstatic inline unsigned int uni_utf8_char_bytes(char chr)
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainen /* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */
2a7605bb97dc9ed8accf2537fad1073a5fc5ff48Timo Sirainen return uni_utf8_non1_bytes[(uint8_t)chr - (192 + 2)];
0ddb604f911b908085ef787455c015a91dc9c365Timo Sirainen/* Return given character in titlecase. */
0ddb604f911b908085ef787455c015a91dc9c365Timo Sirainenunichar_t uni_ucs4_to_titlecase(unichar_t chr);
0ddb604f911b908085ef787455c015a91dc9c365Timo Sirainen/* Convert UTF-8 input to titlecase and decompose the titlecase characters to
0ddb604f911b908085ef787455c015a91dc9c365Timo Sirainen output buffer. Returns 0 if ok, -1 if input was invalid. This generates
0ddb604f911b908085ef787455c015a91dc9c365Timo Sirainen output that's compatible with i;unicode-casemap comparator. */
0ddb604f911b908085ef787455c015a91dc9c365Timo Sirainenint uni_utf8_to_decomposed_titlecase(const void *input, size_t max_len,
511ba4416aafb9f9ba1a4193703b95a033267068Timo Sirainen/* If input contains only valid UTF-8 input, return it directly. If input
511ba4416aafb9f9ba1a4193703b95a033267068Timo Sirainen contains invalid UTF-8 input, write only valid UTF-8 characters to the
511ba4416aafb9f9ba1a4193703b95a033267068Timo Sirainen given buffer and return it. */
511ba4416aafb9f9ba1a4193703b95a033267068Timo Sirainenconst unsigned char *
511ba4416aafb9f9ba1a4193703b95a033267068Timo Sirainenuni_utf8_get_valid_data(const unsigned char *input, size_t size,