unichar.c revision 7aa59f55d8a4e02c7039fbd22660c4055bfc8393
/* Copyright (c) 2005-2007 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "buffer.h"
#include "bsearch-insert-pos.h"
#include "unichar.h"
#include "unicodemap.c"
#define HANGUL_FIRST 0xac00
#define HANGUL_LAST 0xd7a3
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};
{
unsigned int len = 0;
return len;
}
{
chr_r);
}
{
unsigned int i, len;
int ret;
if (*input < 0x80) {
return 1;
}
/* first byte has len highest bits set, followed by zero bit.
the rest of the bits are used as the highest bits of the value. */
switch (len) {
case 2:
chr &= 0x1f;
break;
case 3:
chr &= 0x0f;
break;
case 4:
chr &= 0x07;
break;
case 5:
chr &= 0x03;
break;
case 6:
chr &= 0x01;
break;
default:
/* only 7bit chars should have len==1 */
return -1;
}
ret = 1;
else {
/* check first if the input is invalid before returning 0 */
ret = 0;
}
/* the following bytes must all be 10xxxxxx */
for (i = 1; i < len; i++) {
chr <<= 6;
}
return ret;
}
{
while (*input != '\0') {
/* invalid input */
return -1;
}
}
return 0;
}
{
}
{
unsigned char first;
int bitpos;
if (chr < 0x80) {
return;
}
/* 110xxxxx */
bitpos = 6;
/* 1110xxxx */
/* 11110xxx */
/* 111110xx */
} else {
/* 1111110x */
}
do {
bitpos -= 6;
} while (bitpos > 0);
}
{
unsigned int len = 0;
size_t i;
i += uni_utf8_char_bytes(input[i]);
if (i > size)
break;
len++;
}
return len;
}
{
}
{
}
{
unsigned int idx;
if (chr <= 0xffff) {
return chr;
else
return titlecase16_values[idx];
} else {
return chr;
else
return titlecase32_values[idx];
}
}
{
unsigned int idx;
if (*chr <= 0xffff) {
if (!uint16_find(uni16_decomp_keys,
return FALSE;
} else {
if (!uint32_find(uni32_decomp_keys,
return FALSE;
}
return TRUE;
}
{
#define SBase HANGUL_FIRST
#define LBase 0x1100
#define VBase 0x1161
#define TBase 0x11A7
#define LCount 19
#define VCount 21
#define TCount 28
uni_ucs4_to_utf8_c(L, output);
uni_ucs4_to_utf8_c(V, output);
}
{
unsigned int idx;
if (chr > 0xffff)
return FALSE;
return FALSE;
return TRUE;
}
{
unsigned int bytes;
/* invalid input */
return -1;
}
else if (uni_ucs4_decompose_uni(&chr) ||
}
return 0;
}
static inline unsigned int
{
return 0;
for (i = 0; i < len; i++) {
return 0;
}
return len;
}
{
/* find the first invalid utf8 sequence */
for (i = 0; i < size;) {
if (input[i] < 0x80)
i++;
else {
goto broken;
i += len;
}
}
return TRUE;
/* broken utf-8 input - skip the broken characters */
while (i < size) {
if (input[i] < 0x80) {
continue;
}
if (len == 0) {
i++;
continue;
}
i += len;
}
return FALSE;
}