unichar.c revision c6ead31ba07401556abe0c69374d7fbed99844e7
/* Copyright (c) 2005-2011 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "array.h"
#include "bsearch-insert-pos.h"
#include "unichar.h"
#include "unicodemap.c"
#define HANGUL_FIRST 0xac00
#define HANGUL_LAST 0xd7a3
const unsigned char utf8_replacement_char[UTF8_REPLACEMENT_CHAR_LEN] =
{ 0xef, 0xbf, 0xbd }; /* 0xfffd */
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};
{
unsigned int len = 0;
return len;
}
{
chr_r);
}
{
unsigned int i, len;
int ret;
if (*input < 0x80) {
return 1;
}
/* first byte has len highest bits set, followed by zero bit.
the rest of the bits are used as the highest bits of the value. */
switch (len) {
case 2:
chr &= 0x1f;
break;
case 3:
chr &= 0x0f;
break;
case 4:
chr &= 0x07;
break;
case 5:
chr &= 0x03;
break;
case 6:
chr &= 0x01;
break;
default:
/* only 7bit chars should have len==1 */
return -1;
}
ret = 1;
else {
/* check first if the input is invalid before returning 0 */
ret = 0;
}
/* the following bytes must all be 10xxxxxx */
for (i = 1; i < len; i++) {
chr <<= 6;
}
return ret;
}
{
while (*input != '\0') {
/* invalid input */
return -1;
}
}
return 0;
}
{
unsigned int len;
while (size > 0) {
/* invalid input */
return -1;
}
}
return 0;
}
{
}
{
unsigned char first;
int bitpos;
if (chr < 0x80) {
return;
}
/* 110xxxxx */
bitpos = 6;
/* 1110xxxx */
/* 11110xxx */
/* 111110xx */
} else {
/* 1111110x */
}
do {
bitpos -= 6;
} while (bitpos > 0);
}
{
unsigned int len = 0;
size_t i;
i += uni_utf8_char_bytes(input[i]);
if (i > size)
break;
len++;
}
return len;
}
{
}
{
}
{
unsigned int idx;
if (chr <= 0xff)
return titlecase8_map[chr];
else if (chr <= 0xffff) {
return chr;
else
return titlecase16_values[idx];
} else {
return chr;
else
return titlecase32_values[idx];
}
}
{
unsigned int idx;
if (*chr <= 0xff) {
return FALSE;
} else if (*chr <= 0xffff) {
if (*chr < uni16_decomp_keys[0])
return FALSE;
if (!uint16_find(uni16_decomp_keys,
return FALSE;
} else {
if (!uint32_find(uni32_decomp_keys,
return FALSE;
}
return TRUE;
}
{
#define SBase HANGUL_FIRST
#define LBase 0x1100
#define VBase 0x1161
#define TBase 0x11A7
#define LCount 19
#define VCount 21
#define TCount 28
uni_ucs4_to_utf8_c(L, output);
uni_ucs4_to_utf8_c(V, output);
}
{
unsigned int idx;
return FALSE;
return FALSE;
return TRUE;
}
{
/* don't add the replacement char multiple times */
return;
}
}
{
unsigned int bytes;
int ret = 0;
/* invalid input. try the next byte. */
ret = -1;
continue;
}
else if (uni_ucs4_decompose_uni(&chr) ||
}
return ret;
}
static inline unsigned int
{
unsigned int i, len;
return 0;
/* the rest of the chars should be in 0x80..0xbf range.
anything else is start of a sequence or invalid */
for (i = 1; i < len; i++) {
return 0;
}
return len;
}
{
/* find the first invalid utf8 sequence */
for (i = 0; i < size;) {
if (input[i] < 0x80)
i++;
else {
*pos_r = i;
return -1;
}
i += len;
}
}
return 0;
}
{
return TRUE;
/* broken utf-8 input - skip the broken characters */
while (i < size) {
if (input[i] < 0x80) {
continue;
}
if (len == 0) {
i++;
continue;
}
i += len;
}
return FALSE;
}
bool uni_utf8_str_is_valid(const char *str)
{
size_t i;
return uni_utf8_find_invalid_pos((const unsigned char *)str,
}
{
size_t i;
}