UTF8CodingSystem.cxx revision 7c478bd95313f5f23a4c958a745db2134aa03244
// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.
#pragma ident "%Z%%M% %I% %E% SMI"
#include "splib.h"
#ifdef SP_MULTI_BYTE
#include "UTF8CodingSystem.h"
#include "constant.h"
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
enum {
// cmaskN is mask for first byte to test for N byte sequence
cmask1 = 0x80,
cmask2 = 0xe0,
cmask3 = 0xf0,
cmask4 = 0xf8,
cmask5 = 0xfc,
cmask6 = 0xfe,
// cvalN is value of masked first byte of N byte sequence
cval1 = 0x00,
cval2 = 0xc0,
cval3 = 0xe0,
cval4 = 0xf0,
cval5 = 0xf8,
cval6 = 0xfc,
// vmaskN is mask to get value from first byte in N byte sequence
vmask2 = 0x1f,
vmask3 = 0xf,
vmask4 = 0x7,
vmask5 = 0x3,
vmask6 = 0x1,
// minN is minimum legal resulting value for N byte sequence
min2 = 0x80,
min3 = 0x800,
min4 = 0x10000,
min5 = 0x200000,
min6 = 0x4000000,
max6 = 0x7fffffff
};
class UTF8Decoder : public Decoder {
public:
UTF8Decoder();
private:
// value for encoding error
enum { invalid = 0xfffd };
};
class UTF8Encoder : public Encoder {
public:
UTF8Encoder();
}
};
{
return new UTF8Decoder;
}
{
return new UTF8Encoder;
}
: recovering_(0)
{
}
{
const unsigned char *us = (const unsigned char *)s;
if (recovering_) {
recovering_ = 0;
goto recover;
}
while (slen > 0) {
unsigned c0;
us++;
slen--;
}
if (slen < 2)
goto done;
if (c1 & 0xc0)
goto error;
if (c < min2)
c = invalid;
*to++ = c;
slen -= 2;
us += 2;
}
if (slen < 3)
goto done;
goto error;
if (c < min3)
c = invalid;
*to++ = c;
slen -= 3;
us += 3;
}
if (slen < 4)
goto done;
goto error;
else {
c = (c << 6) | c3;
if (c < min4)
c = invalid;
*to++ = c;
}
slen -= 4;
us += 4;
}
if (slen < 5)
goto done;
goto error;
else {
if (c < min5)
c = invalid;
*to++ = c;
}
slen -= 5;
us += 5;
}
if (slen < 6)
goto done;
goto error;
else {
if (c < min6)
c = invalid;
*to++ = c;
}
slen -= 6;
us += 6;
}
else {
us++;
slen--;
for (;;) {
if (slen == 0) {
recovering_ = 1;
goto done;
}
break;
us++;
slen--;
}
}
}
done:
}
{
}
{
for (; n > 0; s++, n--) {
Char c = *s;
if (c < min2)
else if (c < min3) {
}
else if (c < min4) {
}
else if (c < min5) {
}
else if (c < min6) {
}
else if (c <= max6) {
}
}
}
#ifdef SP_NAMESPACE
}
#endif
#else /* not SP_MULTI_BYTE */
#ifndef __GNUG__
static char non_empty_translation_unit; // sigh
#endif
#endif /* not SP_MULTI_BYTE */