/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1985-2012 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*
* iconv intercept
* minimally provides { utf*<=>bin ascii<=>ebcdic* }
*/
#include <ast.h>
#include <dirent.h>
#define DEBUG_TRACE 0
#define _ICONV_LIST_PRIVATE_
#include <ccode.h>
#include <ctype.h>
#include <iconv.h>
#include "lclib.h"
#if !_lib_iconv_open
#endif
#ifndef E2BIG
#endif
#ifndef EILSEQ
#endif
return n;
typedef struct Map_s
{
char* name;
const unsigned char* map;
int index;
} Map_t;
typedef struct Conv_s
{
char* buf;
} Conv_t;
static int freeindex;
{
{
"utf",
"un|unicode|utf",
"multibyte 8-bit unicode",
"UTF-%s",
"8",
},
{
"ume",
"um|ume|utf?(-)7",
"multibyte 7-bit unicode",
"UTF-7",
0,
},
{
"euc",
"(big|euc)*",
"euc family",
0,
0,
},
{
"dos",
"dos?(-)?(855)",
"dos code page",
"DOS855",
0,
},
{
"ucs",
"ucs?(-)?(2)?(be)|utf-16?(be)",
"unicode runes",
"UCS-%s",
"2",
},
{
"ucs-le",
"ucs?(-)?(2)le|utf-16le",
"little endian unicode runes",
"UCS-%sLE",
"2",
},
{ 0 },
};
#if _UWIN
#include <ast_windows.h>
#ifndef CP_UCS2
#endif
/*
* return the codeset index given its name or alias
* the map is in the what? oh, the registry
*/
static int
{
register char* s;
char* e;
int n;
#if DEBUG_TRACE
#endif
if (name == name_native)
return CP_ACP;
return CP_UTF8;
return CP_UCS2;
return n;
for (;;)
{
{
s = (char*)name;
if ((s[0] == 'c' || s[0] == 'C') && (s[1] == 'p' || s[1] == 'P'))
s += 2;
if (!isdigit(s[0]))
break;
break;
}
for (;;)
{
{
return -1;
}
{
s += 16;
if (n >= sizeof(aka))
n = sizeof(aka) - 1;
aka[n] = 0;
break;
}
{
s += 9;
n = strtol(s, 0, 0);
return n;
}
}
}
return -1;
}
/*
* get and check the codeset indices
*/
static _ast_iconv_t
{
#if DEBUG_TRACE
#endif
return (_ast_iconv_t)(-1);
return (_ast_iconv_t)(-1);
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_iconv_open f=0x%04x t=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
#endif
return (_ast_iconv_t)cc;
}
/*
* even though the indices already check out
* they could still be rejected
*/
static size_t
{
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_iconv from=0x%04x to=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
#endif
{
/*
* easy
*/
}
else
{
ub = 0;
/*
* from => ucs-2
*/
{
if ((tz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)*tb, *tn)) && tz <= *tn)
{
}
else
{
/*
* target too small
* binary search on input size to make it fit
*/
oz = 0;
for (;;)
{
goto nope;
break;
if (!(pz /= 2))
{
goto nope;
break;
}
else
{
}
}
}
}
else
{
{
}
goto nope;
goto nope;
goto nope;
/*
* ucs-2 => to
*/
else
{
/*
* target too small
* binary search on input size to make it fit
*/
oz = 0;
for (;;)
{
goto nope;
goto nope;
break;
if (!(pz /= 2))
{
goto nope;
break;
}
else
{
}
}
goto nope;
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_iconv *fn=%u fz=%u[%u] *tn=%u tz=%u\n", __LINE__, *fn, fz, fz * sizeof(WCHAR), *tn, tz);
#endif
#if 0
#endif
}
}
}
return fz;
nope:
return (size_t)(-1);
}
#endif
/*
* return canonical character code set name for m
* if b!=0 then canonical name placed in b of size n
* <ccode.h> index returned
*/
int
{
register int c;
register char* e;
#if DEBUG_TRACE
char* o;
#endif
if (!b)
{
b = buf;
n = sizeof(buf);
}
#if DEBUG_TRACE
o = b;
#endif
e = b + n - 1;
bp = 0;
n = 0;
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name m=\"%s\"\n", error_info.id, error_info.trace, __LINE__, m);
#endif
for (;;)
{
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name n=%d bp=%p cp=%p ccode=%d name=\"%s\"\n", error_info.id, error_info.trace, __LINE__, n, bp, cp, cp->ccode, cp->name);
#endif
{
if (!(c = m[sub[1]]))
{
break;
}
{
n = sub[1];
}
}
{
break;
}
}
{
{
{
if (!isdigit(*m))
}
else
m = "1";
}
{
if ((locales[AST_LC_CTYPE]->flags & LC_default) || !locales[AST_LC_CTYPE]->charset || !(m = locales[AST_LC_CTYPE]->charset->code) || streq(m, "iso8859-1"))
switch (CC_NATIVE)
{
case CC_EBCDIC:
m = (const char*)"EBCDIC";
break;
case CC_EBCDIC_I:
m = (const char*)"EBCDIC-I";
break;
case CC_EBCDIC_O:
m = (const char*)"EBCDIC-O";
break;
default:
m = (const char*)"ISO-8859-1";
break;
}
b += sfsprintf(b, e - b, "%s", m);
}
*b = 0;
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, cp->ccode, o);
#endif
}
while (b < e && (c = *m++))
{
if (islower(c))
c = toupper(c);
*b++ = c;
}
*b = 0;
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, CC_ICONV, o);
#endif
return CC_ICONV;
}
/*
* convert utf-8 to bin
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register unsigned char* p;
register int c;
register int w;
size_t n;
int e;
e = 0;
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
{
p = f;
c = *f++;
if (c & 0x80)
{
if (!(c & 0x40))
{
f = p;
e = EILSEQ;
break;
}
if (c & 0x20)
{
w = (c & 0x0F) << 12;
if (f >= fe)
{
f = p;
e = EINVAL;
break;
}
c = *f++;
if (c & 0x40)
{
f = p;
e = EILSEQ;
break;
}
w |= (c & 0x3F) << 6;
}
else
w = (c & 0x1F) << 6;
if (f >= fe)
{
f = p;
e = EINVAL;
break;
}
c = *f++;
w |= (c & 0x3F);
}
else
w = c;
*t++ = w;
}
*fb = (char*)f;
*tb = (char*)t;
}
/*
* convert bin to utf-8
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register int c;
wchar_t w;
size_t n;
int e;
e = 0;
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
{
if (!mbwide())
{
c = 1;
w = *f;
}
{
e = EINVAL;
break;
}
else if (!c)
c = 1;
if (!(w & ~0x7F))
*t++ = w;
else
{
if (!(w & ~0x7FF))
{
if (t >= (te - 2))
{
e = E2BIG;
break;
}
*t++ = 0xC0 + (w >> 6);
}
else if (!(w & ~0xffff))
{
if (t >= (te - 3))
{
e = E2BIG;
break;
}
*t++ = 0xE0 + (w >> 12);
*t++ = 0x80 + ((w >> 6 ) & 0x3F);
}
else
{
e = EILSEQ;
break;
}
*t++ = 0x80 + (w & 0x3F);
}
f += c;
}
*fb = (char*)f;
*tb = (char*)t;
}
static const unsigned char ume_D[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?!\"#$%&*;<=>@[]^_`{|} \t\n";
static const unsigned char ume_M[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
/*
* initialize the ume tables
*/
static int
umeinit(void)
{
register const unsigned char* s;
register int i;
register int c;
{
s = ume_D;
while (c = *s++)
ume_d[c] = 1;
for (i = 0; c = ume_M[i]; i++)
ume_m[c] = i;
}
return 0;
}
/*
* convert utf-7 to bin
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register unsigned char* p;
register int s;
register int c;
register int w;
size_t n;
int e;
e = 0;
UMEINIT();
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
s = 0;
{
p = f;
c = *f++;
if (s)
{
if (c == '-' && s > 1)
s = 0;
{
s = 0;
*t++ = c;
}
else if (f >= (fe - 2))
{
f = p;
e = EINVAL;
break;
}
else
{
s = 2;
w = (w << 6) | ume_m[*f++];
w = (w << 6) | ume_m[*f++];
if (!(w & ~0xFF))
*t++ = w;
else if (t >= (te - 1))
{
f = p;
e = E2BIG;
break;
}
else
{
*t++ = (w >> 8) & 0xFF;
*t++ = w & 0xFF;
}
}
}
else if (c == '+')
s = 1;
else
*t++ = c;
}
*fb = (char*)f;
*tb = (char*)t;
}
/*
* convert bin to utf-7
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register int c;
register int s;
wchar_t w;
size_t n;
int e;
e = 0;
UMEINIT();
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
s = 0;
{
if (!mbwide())
{
c = 1;
w = *f;
}
{
e = EINVAL;
break;
}
else if (!c)
c = 1;
if (!(w & ~0x7F) && ume_d[w])
{
if (s)
{
s = 0;
*t++ = '-';
}
*t++ = w;
}
else if (t >= (te - (4 + s)))
{
e = E2BIG;
break;
}
else
{
if (!s)
{
s = 1;
*t++ = '+';
}
*t++ = ume_M[w & 0x3F];
}
f += c;
}
if (s)
*t++ = '-';
*fb = (char*)f;
*tb = (char*)t;
}
/*
* convert ucs-2 to bin with no byte swap
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register int w;
size_t n;
int e;
e = 0;
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
{
w = *f++;
w = (w << 8) | *f++;
if (!(w & ~0xFF))
*t++ = w;
else if (t >= (te - 1))
{
f -= 2;
e = E2BIG;
break;
}
else
{
*t++ = (w >> 8) & 0xFF;
*t++ = w & 0xFF;
}
}
*fb = (char*)f;
*tb = (char*)t;
}
/*
* convert bin to ucs-2 with no byte swap
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register int c;
wchar_t w;
size_t n;
int e;
e = 0;
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
{
if (!mbwide())
{
c = 1;
w = *f;
}
{
e = EINVAL;
break;
}
else if (!c)
c = 1;
*t++ = (w >> 8) & 0xFF;
*t++ = w & 0xFF;
f += c;
}
*fb = (char*)f;
*tb = (char*)t;
}
/*
* convert ucs-2 to bin with byte swap
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register int w;
size_t n;
int e;
e = 0;
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
{
w = *f++;
w = w | (*f++ << 8);
if (!(w & ~0xFF))
*t++ = w;
else if (t >= (te - 1))
{
f -= 2;
e = E2BIG;
break;
}
else
{
*t++ = (w >> 8) & 0xFF;
*t++ = w & 0xFF;
}
}
*fb = (char*)f;
*tb = (char*)t;
}
/*
* convert bin to ucs-2 with byte swap
*/
static size_t
{
register unsigned char* f;
register unsigned char* fe;
register unsigned char* t;
register unsigned char* te;
register int c;
wchar_t w;
size_t n;
int e;
e = 0;
f = (unsigned char*)(*fb);
t = (unsigned char*)(*tb);
{
if (!mbwide())
{
c = 1;
w = *f;
}
{
e = EINVAL;
break;
}
else if (!c)
c = 1;
*t++ = w & 0xFF;
*t++ = (w >> 8) & 0xFF;
f += c;
}
*fb = (char*)f;
*tb = (char*)t;
}
/*
* open a character code conversion map from f to t
*/
_ast_iconv_open(const char* t, const char* f)
{
int fc;
int tc;
int i;
#if DEBUG_TRACE
#endif
if (!t || !*t || *t == '-' && !*(t + 1) || !strcasecmp(t, name_local) || !strcasecmp(t, name_native))
t = name_native;
if (!f || !*f || *f == '-' && !*(f + 1) || !strcasecmp(t, name_local) || !strcasecmp(f, name_native))
f = name_native;
/*
* the ast identify is always (iconv_t)(0)
*/
if (t == f)
return (iconv_t)(0);
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _ast_iconv_open f=%s:%s:%d t=%s:%s:%d\n", __LINE__, f, fr, fc, t, to, tc);
#endif
return (iconv_t)(0);
/*
* first check the free list
*/
for (i = 0; i < elementsof(freelist); i++)
{
freelist[i] = 0;
#if _lib_iconv_open
/*
* reset the shift state if any
*/
#endif
return cc;
}
/*
* allocate a new one
*/
return (iconv_t)(-1);
/*
* 8 bit maps are the easiest
*/
#if _lib_iconv_open
else if ((cc->cvt = iconv_open(t, f)) != (iconv_t)(-1) || (cc->cvt = iconv_open(to, fr)) != (iconv_t)(-1))
#endif
#if _UWIN
else if ((cc->cvt = _win_iconv_open(cc, t, f)) != (_ast_iconv_t)(-1) || (cc->cvt = _win_iconv_open(cc, to, fr)) != (_ast_iconv_t)(-1))
#endif
else
{
switch (fc)
{
case CC_UTF:
break;
case CC_UME:
break;
case CC_UCS:
break;
case CC_SCU:
break;
case CC_ASCII:
break;
default:
if (fc < 0)
goto nope;
break;
}
switch (tc)
{
case CC_UTF:
break;
case CC_UME:
break;
case CC_UCS:
break;
case CC_SCU:
break;
case CC_ASCII:
break;
default:
if (tc < 0)
goto nope;
break;
}
}
nope:
return (iconv_t)(-1);
}
/*
* close a character code conversion map
*/
int
{
int i;
int r = 0;
return -1;
return 0;
/*
* add to the free list
*/
i = freeindex;
for (;;)
{
if (++ i >= elementsof(freelist))
i = 0;
if (!freelist[i])
break;
if (i == freeindex)
{
if (++ i >= elementsof(freelist))
i = 0;
/*
* close the oldest
*/
{
#if _lib_iconv_open
#endif
}
break;
}
}
return r;
}
/*
* copy *fb size *fn to *tb size *tn
* fb,fn tb,tn updated on return
*/
{
register unsigned char* f;
register unsigned char* t;
register unsigned char* e;
register const unsigned char* m;
register size_t n;
char* b;
char* tfb;
size_t i;
{
/* TODO: reset to the initial state */
return 0;
/* TODO: write the initial state shift sequence */
return 0;
}
n = *tn;
if (cc)
{
{
{
{
return -1;
}
return -1;
*fb += i;
*fn -= i;
return n;
}
return -1;
n -= *tn;
{
e = (unsigned char*)(*tb);
for (t = e - n; t < e; t++)
*t = m[*t];
}
return n;
}
{
{
return -1;
}
f = (unsigned char*)(*fb);
e = f + n;
while (f < e)
*t++ = m[*f++];
return n;
}
}
if (n > *fn)
n = *fn;
{
f = (unsigned char*)(*fb);
e = f + n;
t = (unsigned char*)(*tb);
while (f < e)
*t++ = m[*f++];
}
else
*fb += n;
*fn -= n;
*tb += n;
*tn -= n;
return n;
}
/*
* write *fb size *fn to op
* fb,fn updated on return
* total bytes written to op returned
*/
{
char* tb;
char* ts;
size_t* e;
size_t r;
int ok;
/*
* the old api had optional size_t* instead of Iconv_disc_t*
*/
{
iconv_init(disc, 0);
}
else
e = 0;
r = 0;
tn = 0;
ok = 1;
{
{
if (!r)
r = -1;
break;
}
#if DEBUG_TRACE
for (;;)
#else
#endif
{
#if DEBUG_TRACE
break;
#endif
switch (errno)
{
case E2BIG:
break;
case EINVAL:
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "incomplete multibyte sequence at offset %I*u", sizeof(fo), *fb - fo);
goto bad;
default:
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "invalid multibyte sequence at offset %I*u", sizeof(fo), *fb - fo);
bad:
{
{
tn--;
}
(*fb)++;
(*fn)--;
continue;
}
ok = 0;
break;
}
break;
}
#if DEBUG_TRACE
#endif
}
if (e)
return r;
}
/*
* move n bytes from ip to op
*/
{
char* fb;
char* fs;
char* tb;
char* ts;
size_t* e;
size_t i;
ssize_t r = 0;
int locked;
/*
* the old api had optional size_t* instead of Iconv_disc_t*
*/
{
iconv_init(disc, 0);
}
else
e = 0;
tb = 0;
ft = 0;
fn = n;
do
{
if (n != SF_UNBOUND)
break;
{
if (!r)
r = -1;
break;
}
{
switch (errno)
{
case E2BIG:
break;
case EINVAL:
{
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "incomplete multibyte sequence at offset %I*u", sizeof(ft), ft + (fo - fn));
goto bad;
}
break;
default:
(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "invalid multibyte sequence at offset %I*u", sizeof(ft), ft + (fo - fn));
bad:
{
{
tn--;
}
fs++;
fn--;
continue;
}
ok = 0;
break;
}
break;
}
if (locked)
else
if (n != SF_UNBOUND)
{
break;
}
fn++;
} while (ok);
if (tb)
{
{
}
}
if (e)
return r;
}
/*
* iconv_list_t iterator
* call with arg 0 to start
* prev return value is current arg
*/
{
#if _UWIN
if (!cp)
{
{
}
}
{
{
return cp;
}
}
#else
if (!cp)
#endif
}