mdnkit/lib/translator.c

	translator.c revision 04bdb234571448ed6194e1d4048e6512f2446f1c
#ifndef lint
static char *rcsid = "$Id: translator.c,v 1.17 2000/11/21 02:09:05 ishisone Exp $";
#endif

/*
 * Copyright (c) 2000 Japan Network Information Center.  All rights reserved.
 *
 * By using this file, you agree to the terms and conditions set forth bellow.
 *
 * 			LICENSE TERMS AND CONDITIONS
 *
 * The following License Terms and Conditions apply, unless a different
 * license is obtained from Japan Network Information Center ("JPNIC"),
 * a Japanese association, Fuundo Bldg., 1-2 Kanda Ogawamachi, Chiyoda-ku,
 * Tokyo, Japan.
 *
 * 1. Use, Modification and Redistribution (including distribution of any
 *    modified or derived work) in source and/or binary forms is permitted
 *    under this License Terms and Conditions.
 *
 * 2. Redistribution of source code must retain the copyright notices as they
 *    appear in each source code file, this License Terms and Conditions.
 *
 * 3. Redistribution in binary form must reproduce the Copyright Notice,
 *    this License Terms and Conditions, in the documentation and/or other
 *    materials provided with the distribution.  For the purposes of binary
 *    distribution the "Copyright Notice" refers to the following language:
 *    "Copyright (c) Japan Network Information Center.  All rights reserved."
 *
 * 4. Neither the name of JPNIC may be used to endorse or promote products
 *    derived from this Software without specific prior written approval of
 *    JPNIC.
 *
 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
 *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
 *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 *
 * 6. Indemnification by Licensee
 *    Any person or entities using and/or redistributing this Software under
 *    this License Terms and Conditions shall defend indemnify and hold
 *    harmless JPNIC from and against any and all judgements damages,
 *    expenses, settlement liabilities, cost and other liabilities of any
 *    kind as a result of use and redistribution of this Software or any
 *    claim, suite, action, litigation or proceeding by any third party
 *    arising out of or relates to this License Terms and Conditions.
 *
 * 7. Governing Law, Jurisdiction and Venue
 *    This License Terms and Conditions shall be governed by and and
 *    construed in accordance with the law of Japan. Any person or entities
 *    using and/or redistributing this Software under this License Terms and
 *    Conditions hereby agrees and consent to the personal and exclusive
 *    jurisdiction and venue of Tokyo District Court of Japan.
 */

#include <config.h>

#include <stddef.h>
#include <stdlib.h>
#include <string.h>

#include <mdn/result.h>
#include <mdn/assert.h>
#include <mdn/logmacro.h>
#include <mdn/converter.h>
#include <mdn/normalizer.h>
#include <mdn/translator.h>
#include <mdn/debug.h>

static int		numdots(const char *s);
static int		contain_invalid_char(const char *s);
static mdn_result_t	append_zld(char *s, size_t len, const char *zld);

mdn_result_t
mdn_translator_translate(mdn_converter_t local_converter,
			 mdn_converter_t local_alternate_converter,
			 const char *local_zld,
			 mdn_normalizer_t normalizer,
			 mdn_converter_t target_converter,
			 mdn_converter_t target_alternate_converter,
			 const char *target_zld,
			 const char *from, char *to, size_t tolen)
{
	mdn_result_t r;
	size_t fromlen;
	int process;
	char domainbuf1[512], domainbuf2[512];	/* enough */

	assert(local_converter != NULL && target_converter != NULL &&
	       from != NULL && to != NULL && tolen >= 0);

	TRACE(("mdn_translator_translate(local_encoding=%s,local_zld=%s,"
	      "target_encoding=%s,target_zld=%s,from=\"%s\")\n",
	      mdn_converter_localencoding(local_converter),
	      local_zld == NULL ? "(none)" : local_zld,
	      mdn_converter_localencoding(target_converter),
	      target_zld == NULL ? "(none)" : target_zld,
	      mdn_debug_xstring(from, 30)));

	fromlen = strlen(from);
	if (fromlen + 1 > sizeof(domainbuf1)) {
		WARNING(("mdn_translator_translate: "
			"given domainname too long\n"));
		return (mdn_invalid_name);
	}

	(void)strcpy(domainbuf1, from);

	DUMP(("mdn_translator_translate: before translation \"%s\"\n",
	     mdn_debug_xstring(domainbuf1, 200)));

#define PROCESS_LOCAL		1
#define PROCESS_LOCALALT	2
#define PROCESS_DONE		4

	process = 0;

	if (local_zld != NULL) {
		/*
		 * Check if the domain name matches the local ZLD.
		 * If it does, strip ZLD and continue translation.
		 * Otherwise, no further processing is needed.
		 */
		if (mdn_translator_matchzld(domainbuf1, local_zld)) {
			/* Remove ZLD. */
			domainbuf1[fromlen - strlen(local_zld)] = '\0';
			process |= PROCESS_LOCAL;
			if (local_alternate_converter != NULL &&
			    !contain_invalid_char(domainbuf1))
				process |= PROCESS_LOCALALT;
		}
	} else if (contain_invalid_char(domainbuf1)) {
		/*
		 * The name contains invalid characters (as a legal
		 * traditional domain name).  So there's no point in
		 * trying local-alt codeset.
		 */
		process |= PROCESS_LOCAL;
	} else {
		/*
		 * The given name is a valid ASCII domain name.
		 */
		if (mdn_converter_isasciicompatible(local_converter))
			process |= PROCESS_LOCAL;
		if (local_alternate_converter != NULL)
			process |= PROCESS_LOCALALT;
	}

	if ((process & PROCESS_LOCALALT) != 0) {
		/*
		 * First, try converting from the alternate encoding to UTF-8.
		 */
		TRACE(("mdn_translator_translate: %s to UTF-8\n",
		       mdn_converter_localencoding(
			       local_alternate_converter)));
		r = mdn_converter_convert(local_alternate_converter,
					  mdn_converter_l2u,
					  domainbuf1, domainbuf2,
					  sizeof(domainbuf2));
		if (r == mdn_success)
			process |= PROCESS_DONE;
		else if (r != mdn_invalid_encoding)
			return (r);
	}
	if ((process & PROCESS_DONE) == 0 && (process & PROCESS_LOCAL) != 0) {
		/*
		 * Convert from local encoding to UTF-8.
		 */
		TRACE(("mdn_translator_translate: %s to UTF-8\n",
		       mdn_converter_localencoding(local_converter)));
		r = mdn_converter_convert(local_converter,
					  mdn_converter_l2u,
					  domainbuf1, domainbuf2,
					  sizeof(domainbuf2));
		if (r == mdn_success)
			process |= PROCESS_DONE;
		else if (r != mdn_invalid_encoding)
			return (r);
	}
	if ((process & PROCESS_DONE) == 0) {
		/*
		 * Not converted.  Copy verbatim.
		 */
		TRACE(("mdn_translator_translate: no translation required\n"));
		if (tolen < fromlen + 1)
			return (mdn_buffer_overflow);
		(void)memcpy(to, from, fromlen + 1);
		return (mdn_success);
	}
#undef PROCESS_LOCAL
#undef PROCESS_LOCALALT
#undef PROCESS_DONE

	DUMP(("mdn_translator_translate: UTF-8 string \"%s\"\n",
	     mdn_debug_xstring(domainbuf2, 200)));

	/*
	 * Normalize, if normalizer is specified.
	 */
	if (normalizer != NULL) {
		r = mdn_normalizer_normalize(normalizer,
					     domainbuf2, domainbuf1,
					     sizeof(domainbuf1));
		if (r != mdn_success)
			return (r);

		DUMP(("mdn_translator_translate: after normalization \"%s\"\n",
		     mdn_debug_xstring(domainbuf1, 200)));

		if (numdots(domainbuf2) != numdots(domainbuf1)) {
			INFO(("mdn_translator_translate: "
			     "number of labels has been changed by "
			     "normalization\n"));
		}
	}

	/*
	 * Convert from UTF-8 to target encoding.
	 */
	TRACE(("mdn_translator_translate: UTF-8 to %s\n",
	      mdn_converter_localencoding(target_converter)));
	r = mdn_converter_convert(target_converter,
				  mdn_converter_u2l,
				  normalizer == NULL ?
				      domainbuf2 : domainbuf1,
				  to, tolen);
	if (r == mdn_nomapping && target_alternate_converter != NULL) {
		TRACE(("mdn_translator_translate: use alternate encoding\n"));
		r = mdn_converter_convert(target_alternate_converter,
					  mdn_converter_u2l,
					  normalizer == NULL ?
					  domainbuf2 : domainbuf1,
					  to, tolen);
	}
	if (r != mdn_success)
		return (r);

	/*
	 * Append ZLD, if any.
	 */
	if (target_zld != NULL)
		r = append_zld(to, tolen, target_zld);

	DUMP(("mdn_translator_translate: after translation \"%s\"\n",
	     mdn_debug_xstring(to, 200)));

	return (r);
}

int
mdn_translator_matchzld(const char *domain, const char *zld) {
	int dlen;
	int zlen;
	const char *p;
	int i;

	/* An empty ZLD can match everything. */
	if (zld == NULL)
		return (1);

	dlen = strlen(domain);
	zlen = strlen(zld);

	/*
	 * Since ZLD is canonicalized, it must end with dot.
	 * DOMAIN may or may not end with dot.
	 */
	if (dlen > 0 && domain[dlen - 1] != '.')
		zlen--;

	/* If ZLD is longer than domain, no way. */
	if (zlen > dlen)
		return (0);

	p = domain + dlen - zlen;
	for (i = 0; p[i] != '\0'; i++) {
		/* ZLD is canonicalized (i.e. uppercase letters) */
		if (p[i] == zld[i] ||
		    ('a' <= p[i] && p[i] <= 'z' && p[i] - 'a' + 'A' == zld[i]))
			continue;
		else
			return (0);
	}

	if (p > domain && p[-1] != '.')
		return (0);

	return (1);
}

/*
 * Canonicalize ZLD.
 *  -- empty ZLD are nullified.
 *  -- leading dot is removed.
 *  -- append dot if it does not end with dot.
 *  -- lowercase characters are converted to uppercase.
 */
mdn_result_t
mdn_translator_canonicalzld(const char *zld, char **canonicalizedp) {
	size_t len;
	int append_dot = 0;
	char *canonicalized, *p;
	int c;

	/* Remove leading '.' */
	if (zld != NULL && zld[0] == '.')
		zld++;

	/* Is it empty? */
	if (zld == NULL || strcmp(zld, "") == 0) {
		*canonicalizedp = NULL;
		return (mdn_success);
	}

	len = strlen(zld);
	if (zld[len - 1] != '.')
		append_dot = 1;

	if ((canonicalized = malloc(len + 1 + append_dot)) == NULL)
		return (mdn_nomemory);
	*canonicalizedp = canonicalized;

	for (p = canonicalized; (c = *zld) != '\0'; zld++, p++) {
		if ('a' <= c && c <= 'z')
			c += 'A' - 'a';
		*p = c;
	}
	if (append_dot)
		*p++ = '.';
	*p = '\0';

	return (mdn_success);
}

static int
numdots(const char *s) {
	int n = 0;

	while ((s = strchr(s, '.')) != NULL) {
		n++;
		s++;
	}
	return (n);
}

static int
contain_invalid_char(const char *s) {
	int c;

	while ((c = *s++) != '\0') {
		if (('a' <= c && c <= 'z') ||
		    ('A' <= c && c <= 'Z') ||
		    ('0' <= c && c <= '9') ||
		    c == '.' || c == '-')
			continue;	/* valid character */
		return (1);
	}
	return (0);
}

static mdn_result_t
append_zld(char *s, size_t len, const char *zld) {
	size_t slen = strlen(s);

	if (slen + strlen(zld) + 1 > len)
		return (mdn_buffer_overflow);
	(void)strcpy(s + slen, zld);
	return (mdn_success);
}