unichar.h revision aa883f5fbc68920c48c4f52919e8a5bb9611e678
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen#ifndef __UNICHAR_H
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen#define __UNICHAR_H
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainentypedef uint32_t unichar_t;
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenextern const char *const uni_utf8_skip;
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Returns number of characters in a NUL-terminated unicode string */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenunsigned int uni_strlen(const unichar_t *str);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Translates UTF-8 input to UCS-4 output. Returns 0 if ok, -1 if input was
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen invalid */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenint uni_utf8_to_ucs4(const char *input, buffer_t *output);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Translates UCS-4 input to UTF-8 output. */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenvoid uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Returns the next UTF-8 character, or (unichar_t)-1 for invalid input and
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen (unichar_t)-2 for incomplete trailing character. */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenunichar_t uni_utf8_get_char(const char *input);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenunichar_t uni_utf8_get_char_len(const unsigned char *input, size_t max_len);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen/* Returns UTF-8 string length with maximum input size. */
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainenunsigned int uni_utf8_strlen_n(const void *input, size_t size);
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen#define uni_utf8_next_char(p) \
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen ((p) + uni_utf8_skip[*(const uint8_t *)(p)])
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen
aa883f5fbc68920c48c4f52919e8a5bb9611e678Timo Sirainen#endif