#ifndef lint
static char *rcsid = "$Id: normalizer.c,v 1.1 2003/06/04 00:26:05 marka Exp $";
#endif
/*
* Copyright (c) 2000,2002 Japan Network Information Center.
* All rights reserved.
*
* By using this file, you agree to the terms and conditions set forth bellow.
*
* LICENSE TERMS AND CONDITIONS
*
* The following License Terms and Conditions apply, unless a different
* license is obtained from Japan Network Information Center ("JPNIC"),
* a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
* Chiyoda-ku, Tokyo 101-0047, Japan.
*
* 1. Use, Modification and Redistribution (including distribution of any
* modified or derived work) in source and/or binary forms is permitted
* under this License Terms and Conditions.
*
* 2. Redistribution of source code must retain the copyright notices as they
* appear in each source code file, this License Terms and Conditions.
*
* 3. Redistribution in binary form must reproduce the Copyright Notice,
* this License Terms and Conditions, in the documentation and/or other
* materials provided with the distribution. For the purposes of binary
* distribution the "Copyright Notice" refers to the following language:
* "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
*
* 4. The name of JPNIC may not be used to endorse or promote products
* derived from this Software without specific prior written approval of
* JPNIC.
*
* 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*/
#include <config.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <idn/assert.h>
#include <idn/logmacro.h>
#include <idn/result.h>
#include <idn/normalizer.h>
#include <idn/strhash.h>
#include <idn/unormalize.h>
#include <idn/unicode.h>
#include <idn/ucs4.h>
#include <idn/debug.h>
#include <idn/util.h>
#define MAX_LOCAL_SCHEME 3
#define INITIALIZED (scheme_hash != NULL)
typedef struct {
char *name;
idn_normalizer_proc_t proc;
} normalize_scheme_t;
struct idn_normalizer {
int nschemes;
int scheme_size;
normalize_scheme_t **schemes;
normalize_scheme_t *local_buf[MAX_LOCAL_SCHEME];
int reference_count;
};
static idn__strhash_t scheme_hash;
static idn__unicode_version_t vcur = NULL;
static idn__unicode_version_t v320 = NULL;
#define INIT_VERSION(version, var) \
if (var == NULL) { \
idn_result_t r = idn__unicode_create(version, &var); \
if (r != idn_success) \
return (r); \
}
static idn_result_t expand_schemes(idn_normalizer_t ctx);
static idn_result_t register_standard_normalizers(void);
static idn_result_t normalizer_formkc(const unsigned long *from,
unsigned long *to, size_t tolen);
static idn_result_t normalizer_formkc_v320(const unsigned long *from,
unsigned long *to,
size_t tolen);
static struct standard_normalizer {
char *name;
idn_normalizer_proc_t proc;
} standard_normalizer[] = {
{ "unicode-form-kc", normalizer_formkc },
{ "unicode-form-kc/3.2.0", normalizer_formkc_v320 },
{ "RFC3491", normalizer_formkc_v320 },
{ NULL, NULL },
};
idn_result_t
idn_normalizer_initialize(void) {
idn__strhash_t hash;
idn_result_t r;
TRACE(("idn_normalizer_initialize()\n"));
if (scheme_hash != NULL) {
r = idn_success; /* already initialized */
goto ret;
}
if ((r = idn__strhash_create(&hash)) != idn_success)
goto ret;
scheme_hash = hash;
/* Register standard normalizers */
r = register_standard_normalizers();
ret:
TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r)));
return (r);
}
idn_result_t
idn_normalizer_create(idn_normalizer_t *ctxp) {
idn_normalizer_t ctx;
idn_result_t r;
assert(ctxp != NULL);
TRACE(("idn_normalizer_create()\n"));
if ((ctx = malloc(sizeof(struct idn_normalizer))) == NULL) {
r = idn_nomemory;
goto ret;
}
ctx->nschemes = 0;
ctx->scheme_size = MAX_LOCAL_SCHEME;
ctx->schemes = ctx->local_buf;
ctx->reference_count = 1;
*ctxp = ctx;
r = idn_success;
ret:
TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r)));
return (r);
}
void
idn_normalizer_destroy(idn_normalizer_t ctx) {
assert(ctx != NULL);
TRACE(("idn_normalizer_destroy()\n"));
ctx->reference_count--;
if (ctx->reference_count <= 0) {
TRACE(("idn_normalizer_destroy(): the object is destroyed\n"));
if (ctx->schemes != ctx->local_buf)
free(ctx->schemes);
free(ctx);
} else {
TRACE(("idn_normalizer_destroy(): "
"update reference count (%d->%d)\n",
ctx->reference_count + 1, ctx->reference_count));
}
}
void
idn_normalizer_incrref(idn_normalizer_t ctx) {
assert(ctx != NULL);
TRACE(("idn_normalizer_incrref()\n"));
TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n",
ctx->reference_count, ctx->reference_count + 1));
ctx->reference_count++;
}
idn_result_t
idn_normalizer_add(idn_normalizer_t ctx, const char *scheme_name) {
idn_result_t r;
void *v;
normalize_scheme_t *scheme;
assert(ctx != NULL && scheme_name != NULL);
TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name));
assert(INITIALIZED);
if (idn__strhash_get(scheme_hash, scheme_name, &v) != idn_success) {
ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n",
scheme_name));
r = idn_invalid_name;
goto ret;
}
scheme = v;
assert(ctx->nschemes <= ctx->scheme_size);
if (ctx->nschemes == ctx->scheme_size &&
(r = expand_schemes(ctx)) != idn_success) {
goto ret;
}
ctx->schemes[ctx->nschemes++] = scheme;
r = idn_success;
ret:
TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r)));
return (r);
}
idn_result_t
idn_normalizer_addall(idn_normalizer_t ctx, const char **scheme_names,
int nschemes) {
idn_result_t r;
int i;
assert(ctx != NULL && scheme_names != NULL);
TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes));
for (i = 0; i < nschemes; i++) {
r = idn_normalizer_add(ctx, (const char *)*scheme_names);
if (r != idn_success)
goto ret;
scheme_names++;
}
r = idn_success;
ret:
TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r)));
return (r);
}
idn_result_t
idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from,
unsigned long *to, size_t tolen) {
idn_result_t r;
unsigned long *src, *dst;
unsigned long *buffers[2] = {NULL, NULL};
size_t buflen[2] = {0, 0};
size_t dstlen;
int idx;
int i;
assert(scheme_hash != NULL);
assert(ctx != NULL && from != NULL && to != NULL);
TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n",
idn__debug_ucs4xstring(from, 50), (int)tolen));
if (ctx->nschemes <= 0) {
if (tolen < idn_ucs4_strlen(from) + 1) {
r = idn_buffer_overflow;
goto ret;
}
idn_ucs4_strcpy(to, from);
r = idn_success;
goto ret;
}
/*
* Normalize.
*/
src = (void *)from;
dstlen = idn_ucs4_strlen(from) + 1;
i = 0;
while (i < ctx->nschemes) {
TRACE(("idn_normalizer_normalize(): normalize %s\n",
ctx->schemes[i]->name));
/*
* Choose destination area to restore the result of a mapping.
*/
if (i + 1 == ctx->nschemes) {
dst = to;
dstlen = tolen;
} else {
if (src == buffers[0])
idx = 1;
else
idx = 0;
if (buflen[idx] < dstlen) {
void *newbuf;
newbuf = realloc(buffers[idx],
sizeof(long) * dstlen);
if (newbuf == NULL) {
r = idn_nomemory;
goto ret;
}
buffers[idx] = (unsigned long *)newbuf;
buflen[idx] = dstlen;
}
dst = buffers[idx];
dstlen = buflen[idx];
}
/*
* Perform i-th normalization scheme.
* If buffer size is not enough, we double it and try again.
*/
r = (ctx->schemes[i]->proc)(src, dst, dstlen);
if (r == idn_buffer_overflow && dst != to) {
dstlen *= 2;
continue;
}
if (r != idn_success)
goto ret;
src = dst;
i++;
}
r = idn_success;
ret:
free(buffers[0]);
free(buffers[1]);
if (r == idn_success) {
TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n",
idn__debug_ucs4xstring(to, 50)));
} else {
TRACE(("idn_normalizer_normalize(): %s\n",
idn_result_tostring(r)));
}
return (r);
}
idn_result_t
idn_normalizer_register(const char *scheme_name, idn_normalizer_proc_t proc) {
idn_result_t r;
normalize_scheme_t *scheme;
assert(scheme_name != NULL && proc != NULL);
TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name));
assert(INITIALIZED);
scheme = malloc(sizeof(*scheme) + strlen(scheme_name) + 1);
if (scheme == NULL) {
r = idn_nomemory;
goto ret;
}
scheme->name = (char *)(scheme + 1);
(void)strcpy(scheme->name, scheme_name);
scheme->proc = proc;
r = idn__strhash_put(scheme_hash, scheme_name, scheme);
if (r != idn_success)
goto ret;
r = idn_success;
ret:
TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r)));
return (r);
}
static idn_result_t
expand_schemes(idn_normalizer_t ctx) {
normalize_scheme_t **new_schemes;
int new_size = ctx->scheme_size * 2;
if (ctx->schemes == ctx->local_buf) {
new_schemes = malloc(sizeof(normalize_scheme_t) * new_size);
} else {
new_schemes = realloc(ctx->schemes,
sizeof(normalize_scheme_t) * new_size);
}
if (new_schemes == NULL)
return (idn_nomemory);
if (ctx->schemes == ctx->local_buf)
memcpy(new_schemes, ctx->local_buf, sizeof(ctx->local_buf));
ctx->schemes = new_schemes;
ctx->scheme_size = new_size;
return (idn_success);
}
static idn_result_t
register_standard_normalizers(void) {
int i;
int failed = 0;
for (i = 0; standard_normalizer[i].name != NULL; i++) {
idn_result_t r;
r = idn_normalizer_register(standard_normalizer[i].name,
standard_normalizer[i].proc);
if (r != idn_success) {
WARNING(("idn_normalizer_initialize(): "
"failed to register \"%-.100s\"\n",
standard_normalizer[i].name));
failed++;
}
}
if (failed > 0)
return (idn_failure);
else
return (idn_success);
}
/*
* Unicode Normalization Forms -- latest version
*/
static idn_result_t
normalizer_formkc(const unsigned long *from, unsigned long *to, size_t tolen) {
INIT_VERSION(NULL, vcur);
return (idn__unormalize_formkc(vcur, from, to, tolen));
}
/*
* Unicode Normalization Forms -- version 3.2.0
*/
static idn_result_t
normalizer_formkc_v320(const unsigned long *from, unsigned long *to,
size_t tolen) {
INIT_VERSION("3.2.0", v320);
return (idn__unormalize_formkc(v320, from, to, tolen));
}