/* Convert multibyte character to wide character.
Copyright (C) 1999-2002, 2005-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2008.
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include <wchar.h>
/* Implement mbrtowc() on top of mbtowc(). */
# include <errno.h>
# include <stdlib.h>
# include "localcharset.h"
# include "streq.h"
# include "verify.h"
{
if (s == NULL)
{
s = "";
n = 1;
}
if (n == 0)
return (size_t)(-2);
/* Here n > 0. */
{
const char *p;
size_t m;
switch (nstate)
{
case 0:
p = s;
m = n;
break;
case 3:
/*FALLTHROUGH*/
case 2:
/*FALLTHROUGH*/
case 1:
p = buf;
m = nstate;
buf[m++] = s[0];
if (n >= 2 && m < 4)
{
buf[m++] = s[1];
if (n >= 3 && m < 4)
buf[m++] = s[2];
}
break;
default:
return (size_t)(-1);
}
/* Here m > 0. */
# if __GLIBC__
/* Work around bug <http://sourceware.org/bugzilla/show_bug.cgi?id=9674> */
# endif
{
if (res >= 0)
{
abort ();
abort ();
pstate[0] = 0;
return res;
}
/* mbtowc does not distinguish between invalid and incomplete multibyte
sequences. But mbrtowc needs to make this distinction.
There are two possible approaches:
- Use iconv() and its return value.
- Use built-in knowledge about the possible encodings.
Given the low quality of implementation of iconv() on the systems that
lack mbrtowc(), we use the second approach.
The possible encodings are:
- 8-bit encodings,
- EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
- UTF-8.
Use specialized code for each. */
if (m >= 4 || m >= MB_CUR_MAX)
goto invalid;
/* Here MB_CUR_MAX > 1 and 0 < m < 4. */
{
{
/* Cf. unistr/u8-mblen.c. */
unsigned char c = (unsigned char) p[0];
if (c >= 0xc2)
{
if (c < 0xe0)
{
if (m == 1)
goto incomplete;
}
else if (c < 0xf0)
{
if (m == 1)
goto incomplete;
if (m == 2)
{
goto incomplete;
}
}
else if (c <= 0xf4)
{
if (m == 1)
goto incomplete;
else /* m == 2 || m == 3 */
{
{
if (m == 2)
goto incomplete;
else /* m == 3 */
{
goto incomplete;
}
}
}
}
}
goto invalid;
}
/* As a reference for this code, you can use the GNU libiconv
implementation. Look for uses of the RET_TOOFEW macro. */
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
goto incomplete;
}
if (m == 2)
{
unsigned char c = (unsigned char) p[0];
if (c == 0x8f)
{
goto incomplete;
}
}
goto invalid;
}
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if (c >= 0xa1 && c < 0xff)
goto incomplete;
}
goto invalid;
}
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
goto incomplete;
}
else /* m == 2 || m == 3 */
{
unsigned char c = (unsigned char) p[0];
if (c == 0x8e)
goto incomplete;
}
goto invalid;
}
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
goto incomplete;
}
else /* m == 2 || m == 3 */
{
unsigned char c = (unsigned char) p[0];
if (c >= 0x90 && c <= 0xe3)
{
{
if (m == 2)
goto incomplete;
else /* m == 3 */
{
goto incomplete;
}
}
}
}
goto invalid;
}
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
|| (c >= 0xf0 && c <= 0xf9))
goto incomplete;
}
goto invalid;
}
/* An unknown multibyte encoding. */
goto incomplete;
}
{
/* Here 0 <= k < m < 4. */
pstate[++k] = s[0];
if (k < m)
{
pstate[++k] = s[1];
if (k < m)
pstate[++k] = s[2];
}
if (k != m)
abort ();
}
pstate[0] = m;
return (size_t)(-2);
/* The conversion state is undefined, says POSIX. */
return (size_t)(-1);
}
}
}
#else
/* Override the system's mbrtowc() function. */
{
if (s == NULL)
{
s = "";
n = 1;
}
# endif
# if MBRTOWC_RETVAL_BUG
{
/* Override mbrtowc's internal state. We can not call mbsinit() on the
hidden internal state, but we can call it on our variable. */
ps = &internal_state;
{
/* Parse the rest of the multibyte character byte for byte. */
for (; n > 0; s++, n--)
{
return (size_t)(-1);
count++;
{
/* The multibyte character has been completed. */
}
}
return (size_t)(-2);
}
}
# endif
{
{
if (wc == 0)
ret = 0;
}
return ret;
}
# else
# endif
}
#endif