5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#ifndef lint
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsstatic char *rcsid = "$Id: unicode.c,v 1.1 2003/06/04 00:26:16 marka Exp $";
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#endif
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews/*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Copyright (c) 2000,2001,2002 Japan Network Information Center.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * All rights reserved.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * By using this file, you agree to the terms and conditions set forth bellow.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * LICENSE TERMS AND CONDITIONS
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * The following License Terms and Conditions apply, unless a different
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * license is obtained from Japan Network Information Center ("JPNIC"),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Chiyoda-ku, Tokyo 101-0047, Japan.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * 1. Use, Modification and Redistribution (including distribution of any
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * modified or derived work) in source and/or binary forms is permitted
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * under this License Terms and Conditions.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * 2. Redistribution of source code must retain the copyright notices as they
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * appear in each source code file, this License Terms and Conditions.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * 3. Redistribution in binary form must reproduce the Copyright Notice,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * this License Terms and Conditions, in the documentation and/or other
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * materials provided with the distribution. For the purposes of binary
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * distribution the "Copyright Notice" refers to the following language:
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * 4. The name of JPNIC may not be used to endorse or promote products
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * derived from this Software without specific prior written approval of
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * JPNIC.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <config.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <stddef.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <stdlib.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <string.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <idn/result.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <idn/logmacro.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <idn/assert.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include <idn/unicode.h>
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define UNICODE_CURRENT "3.2.0"
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define UCS_MAX 0x10ffff
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define END_BIT 0x80000000
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews/*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Some constants for Hangul decomposition/composition.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define SBase 0xac00
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define LBase 0x1100
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define VBase 0x1161
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define TBase 0x11a7
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define LCount 19
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define VCount 21
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define TCount 28
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define SLast (SBase + LCount * VCount * TCount)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews/*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Symbol composition macro.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define compose_sym(a, b) compose_symX(a, b)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define compose_symX(a, b) a ## b
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsstruct composition {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unsigned long c2; /* 2nd character */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unsigned long comp; /* composed character */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews};
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include "unicodedata_320.c"
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define VERSION v320
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#include "unicode_template.c"
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#undef VERSION
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewstypedef int (*unicode_canonclassproc)(unsigned long v);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewstypedef int (*unicode_decomposeproc)(unsigned long c,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews const unsigned long **seqp);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewstypedef int (*unicode_composeproc)(unsigned long c,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews const struct composition **compp);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsstatic struct idn__unicode_ops {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews char *version;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unicode_canonclassproc canonclass_proc;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unicode_decomposeproc decompose_proc;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unicode_composeproc compose_proc;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews} unicode_versions[] = {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#define MAKE_UNICODE_HANDLE(version, suffix) \
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews { version, \
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews compose_sym(canonclass_, suffix), \
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews compose_sym(decompose_, suffix), \
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews compose_sym(compose_, suffix) }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews MAKE_UNICODE_HANDLE("3.2.0", v320),
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews { NULL },
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews#undef MAKE_UNICODE_HANDLE
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews};
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn_result_t
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn__unicode_create(const char *version,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews idn__unicode_version_t *versionp) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews idn__unicode_version_t v;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews assert(versionp != NULL);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews TRACE(("idn__unicode_create(version=%-.50s)\n",
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews version == NULL ? "<NULL>" : version));
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (version == NULL)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews version = UNICODE_CURRENT;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews for (v = unicode_versions; v->version != NULL; v++) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (strcmp(v->version, version) == 0) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *versionp = v;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_success);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_notfound);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsvoid
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn__unicode_destroy(idn__unicode_version_t version) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews assert(version != NULL);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews TRACE(("idn__unicode_destroy()\n"));
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /* Nothing to do */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsint
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn__unicode_canonicalclass(idn__unicode_version_t version, unsigned long c) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (c > UCS_MAX)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (0);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (*version->canonclass_proc)(c);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn_result_t
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn__unicode_decompose(idn__unicode_version_t version,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews int compat, unsigned long *v, size_t vlen,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unsigned long c, int *decomp_lenp) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unsigned long *vorg = v;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews int seqidx;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews const unsigned long *seq;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews assert(v != NULL && vlen >= 0 && decomp_lenp != NULL);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (c > UCS_MAX)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_notfound);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * First, check for Hangul.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (SBase <= c && c < SLast) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews int idx, t_offset, v_offset, l_offset;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews idx = c - SBase;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews t_offset = idx % TCount;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews idx /= TCount;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews v_offset = idx % VCount;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews l_offset = idx / VCount;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_buffer_overflow);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *v++ = LBase + l_offset;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *v++ = VBase + v_offset;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (t_offset > 0)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *v++ = TBase + t_offset;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *decomp_lenp = v - vorg;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_success);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Look up decomposition table. If no decomposition is defined
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * or if it is a compatibility decomosition when canonical
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * decomposition requested, return 'idn_notfound'.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews seqidx = (*version->decompose_proc)(c, &seq);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_notfound);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Copy the decomposed sequence. The end of the sequence are
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * marked with END_BIT.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews do {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unsigned long c;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews int dlen;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews idn_result_t r;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews c = *seq & ~END_BIT;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /* Decompose recursively. */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews r = idn__unicode_decompose(version, compat, v, vlen, c, &dlen);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (r == idn_success) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews v += dlen;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews vlen -= dlen;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews } else if (r == idn_notfound) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (vlen < 1)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_buffer_overflow);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *v++ = c;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews vlen--;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews } else {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (r);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews } while ((*seq++ & END_BIT) == 0);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *decomp_lenp = v - vorg;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_success);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsint
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn__unicode_iscompositecandidate(idn__unicode_version_t version,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unsigned long c) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews const struct composition *dummy;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (c > UCS_MAX)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (0);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /* Check for Hangul */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (1);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Look up composition table. If there are no composition
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * that begins with the given character, it is not a
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * composition candidate.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if ((*version->compose_proc)(c, &dummy) == 0)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (0);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews else
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (1);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn_result_t
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrewsidn__unicode_compose(idn__unicode_version_t version, unsigned long c1,
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews unsigned long c2, unsigned long *compp) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews int n;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews int lo, hi;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews const struct composition *cseq;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews assert(compp != NULL);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (c1 > UCS_MAX || c2 > UCS_MAX)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_notfound);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Check for Hangul.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (LBase <= c1 && c1 < LBase + LCount &&
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews VBase <= c2 && c2 < VBase + VCount) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Hangul L and V.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *compp = SBase +
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews ((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_success);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews } else if (SBase <= c1 && c1 < SLast &&
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews TBase <= c2 && c2 < TBase + TCount &&
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews (c1 - SBase) % TCount == 0) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Hangul LV and T.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *compp = c1 + (c2 - TBase);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_success);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * Look up composition table. If the result is 0, no composition
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * is defined. Otherwise, upper 16bits of the result contains
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * the number of composition that begins with 'c1', and the lower
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * 16bits is the offset in 'compose_seq'.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if ((n = (*version->compose_proc)(c1, &cseq)) == 0)
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_notfound);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews /*
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * The composite sequences are sorted by the 2nd character 'c2'.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews * So we can use binary search.
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews */
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews lo = 0;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews hi = n - 1;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews while (lo <= hi) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews int mid = (lo + hi) / 2;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews if (cseq[mid].c2 < c2) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews lo = mid + 1;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews } else if (cseq[mid].c2 > c2) {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews hi = mid - 1;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews } else {
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews *compp = cseq[mid].comp;
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_success);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews }
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews return (idn_notfound);
5c526acb82c882e41b655c31f5fa4425c87b671cMark Andrews}