4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore/*
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org>
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Copyright (c) 2002-2004 Tim J. Robbins
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * All rights reserved.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Redistribution and use in source and binary forms, with or without
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * modification, are permitted provided that the following conditions
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * are met:
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * 1. Redistributions of source code must retain the above copyright
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * notice, this list of conditions and the following disclaimer.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * notice, this list of conditions and the following disclaimer in the
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * documentation and/or other materials provided with the distribution.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * SUCH DAMAGE.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#include "lint.h"
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#include <errno.h>
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#include <limits.h>
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#include <stdlib.h>
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#include <string.h>
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#include <wchar.h>
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#include "mblocal.h"
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore#include "lctype.h"
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore const char *_RESTRICT_KYWD,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t, mbstate_t *_RESTRICT_KYWD);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic int _UTF8_mbsinit(const mbstate_t *);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore const char **_RESTRICT_KYWD, size_t, size_t,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore mbstate_t *_RESTRICT_KYWD);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t _UTF8_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore mbstate_t *_RESTRICT_KYWD);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t _UTF8_wcsnrtombs(char *_RESTRICT_KYWD,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore const wchar_t **_RESTRICT_KYWD,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t, size_t, mbstate_t *_RESTRICT_KYWD);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amoretypedef struct {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wchar_t ch;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore int want;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wchar_t lbound;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore} _UTF8State;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amorevoid
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore_UTF8_init(struct lc_ctype *lct)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore{
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore lct->lc_mbrtowc = _UTF8_mbrtowc;
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore lct->lc_wcrtomb = _UTF8_wcrtomb;
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore lct->lc_mbsinit = _UTF8_mbsinit;
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore lct->lc_mbsnrtowcs = _UTF8_mbsnrtowcs;
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore lct->lc_wcsnrtombs = _UTF8_wcsnrtombs;
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore lct->lc_is_ascii = 0;
2d08521bd15501c8370ba2153b9cca4f094979d0Garrett D'Amore lct->lc_max_mblen = 4;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore}
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic int
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore_UTF8_mbsinit(const mbstate_t *ps)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore{
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore}
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore_UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t n, mbstate_t *_RESTRICT_KYWD ps)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore{
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore _UTF8State *us;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore int ch, i, mask, want;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wchar_t lbound, wch;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us = (_UTF8State *)ps;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (us->want < 0 || us->want > 6) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EINVAL;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (s == NULL) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s = "";
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore n = 1;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore pwc = NULL;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (n == 0)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Incomplete multibyte sequence */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-2);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (us->want == 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Determine the number of octets that make up this character
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * from the first octet, and a mask that extracts the
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * interesting bits of the first octet. We already know
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * the character is at least two bytes long.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * We also specify a lower bound for the character code to
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * detect redundant, non-"shortest form" encodings. For
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * example, the sequence C0 80 is _not_ a legal representation
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * of the null character. This enforces a 1-to-1 mapping
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * between character codes and their multibyte representations.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore ch = (unsigned char)*s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if ((ch & 0x80) == 0) {
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore /* Fast path for plain ASCII characters. */
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore if (pwc != NULL)
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore *pwc = ch;
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore return (ch != '\0' ? 1 : 0);
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore }
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore if ((ch & 0xe0) == 0xc0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore mask = 0x1f;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore want = 2;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lbound = 0x80;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((ch & 0xf0) == 0xe0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore mask = 0x0f;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore want = 3;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lbound = 0x800;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((ch & 0xf8) == 0xf0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore mask = 0x07;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore want = 4;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lbound = 0x10000;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#if 0
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* These would be illegal in the UTF-8 space */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((ch & 0xfc) == 0xf8) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore mask = 0x03;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore want = 5;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lbound = 0x200000;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((ch & 0xfe) == 0xfc) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore mask = 0x01;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore want = 6;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lbound = 0x4000000;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#endif
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Malformed input; input is not UTF-8.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EILSEQ;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore want = us->want;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lbound = us->lbound;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Decode the octet sequence representing the character in chunks
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * of 6 bits, most significant first.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (us->want == 0)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wch = (unsigned char)*s++ & mask;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore else
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wch = us->ch;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if ((*s & 0xc0) != 0x80) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Malformed input; bad characters in the middle
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * of a character.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EILSEQ;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wch <<= 6;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wch |= *s++ & 0x3f;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (i < want) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Incomplete multibyte sequence. */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us->want = want - i;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us->lbound = lbound;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us->ch = wch;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-2);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (wch < lbound) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Malformed input; redundant encoding.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EILSEQ;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (pwc != NULL)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *pwc = wch;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us->want = 0;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (wch == L'\0' ? 0 : want);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore}
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore_UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore{
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore _UTF8State *us;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore const char *s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t nchr;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wchar_t wc;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us = (_UTF8State *)ps;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s = *src;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nchr = 0;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (dst == NULL) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * The fast path in the loop below is not safe if an ASCII
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * character appears as anything but the first byte of a
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (nms > 0 && us->want > 0 && (signed char)*s > 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EILSEQ;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore for (;;) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (nms > 0 && (signed char)*s > 0)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Fast path for plain ASCII characters
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * excluding NUL.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nb = 1;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore (size_t)-1)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Invalid sequence - mbrtowc() sets errno. */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore else if (nb == 0 || nb == (size_t)-2)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nchr);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s += nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nms -= nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nchr++;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*NOTREACHED*/
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * The fast path in the loop below is not safe if an ASCII
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * character appears as anything but the first byte of a
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EILSEQ;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore while (len-- > 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (nms > 0 && (signed char)*s > 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Fast path for plain ASCII characters
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * excluding NUL.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *dst = (wchar_t)*s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nb = 1;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore (size_t)-1) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if (nb == (size_t)-2) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = s + nms;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nchr);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if (nb == 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = NULL;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nchr);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s += nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nms -= nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nchr++;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore dst++;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nchr);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore}
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore_UTF8_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore{
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore _UTF8State *us;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore unsigned char lead;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore int i, len;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us = (_UTF8State *)ps;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (us->want != 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EINVAL;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (s == NULL)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Reset to initial shift state (no-op) */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Determine the number of octets needed to represent this character.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * We always output the shortest sequence possible. Also specify the
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * first few bits of the first octet, which contains the information
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * about the sequence length.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if ((wc & ~0x7f) == 0) {
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore /* Fast path for plain ASCII characters. */
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore *s = (char)wc;
475b496bc008381e64c802250441cc256622ce91Garrett D'Amore return (1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((wc & ~0x7ff) == 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lead = 0xc0;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore len = 2;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((wc & ~0xffff) == 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lead = 0xe0;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore len = 3;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((wc & ~0x1fffff) == 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lead = 0xf0;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore len = 4;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#if 0
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Again, 5 and 6 byte encodings are simply not permitted */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((wc & ~0x3ffffff) == 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lead = 0xf8;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore len = 5;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if ((wc & ~0x7fffffff) == 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore lead = 0xfc;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore len = 6;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore#endif
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EILSEQ;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * Output the octets representing the character in chunks
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * of 6 bits, least significant last. The first octet is
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * a special case because it contains the sequence length
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * information.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore for (i = len - 1; i > 0; i--) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s[i] = (wc & 0x3f) | 0x80;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore wc >>= 6;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *s = (wc & 0xff) | lead;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (len);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore}
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amorestatic size_t
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore_UTF8_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore{
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore _UTF8State *us;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore char buf[MB_LEN_MAX];
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore const wchar_t *s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t nbytes;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore size_t nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore us = (_UTF8State *)ps;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (us->want != 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore errno = EINVAL;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s = *src;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nbytes = 0;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (dst == NULL) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore while (nwc-- > 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (0 <= *s && *s < 0x80)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Fast path for plain ASCII characters. */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nb = 1;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore (size_t)-1)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Invalid character - wcrtomb() sets errno. */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (*s == L'\0')
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nbytes + nb - 1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s++;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nbytes += nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nbytes);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore while (len > 0 && nwc-- > 0) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (0 <= *s && *s < 0x80) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Fast path for plain ASCII characters. */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nb = 1;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *dst = *s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else if (len > (size_t)MB_CUR_MAX) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* Enough space to translate in-place. */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore } else {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /*
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore * May not be enough space; use temp. buffer.
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return ((size_t)-1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (nb > (int)len)
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore /* MB sequence for character won't fit. */
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore break;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore (void) memcpy(dst, buf, nb);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore if (*s == L'\0') {
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = NULL;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nbytes + nb - 1);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore s++;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore dst += nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore len -= nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore nbytes += nb;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore }
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore *src = s;
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore return (nbytes);
4297a3b0d0a35d80f86fff155e288e885a100e6dGarrett D'Amore}