common/smbsrv/smb_strcase.c

	smb_strcase.c revision da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * Case conversion functions for strings. Originally this module only
 * dealt with ASCII strings. It has been updated to support European
 * character set characters. The current implementation is based on
 * code page table lookup rather than simple character range checks.
 */

#ifdef _KERNEL
#include <sys/types.h>
#include <sys/sunddi.h>
#else
#include <stdio.h>
#include <string.h>
#endif
#include <smbsrv/ctype.h>
#include <smbsrv/codepage.h>
#include <smbsrv/cp_cyrillic.h>
#include <smbsrv/cp_latin1.h>
#include <smbsrv/cp_latin2.h>
#include <smbsrv/cp_latin3.h>
#include <smbsrv/cp_latin4.h>
#include <smbsrv/cp_latin5.h>
#include <smbsrv/cp_latin6.h>
#include <smbsrv/cp_usascii.h>

/*
 * Global pointer to the current code page. This is
 * defaulted to a standard ASCII table.
 */
static codepage_t *current_codepage = usascii_codepage;

/*
 * A flag indicating whether the codepage being used is ASCII
 * When this flag is set, string opeartions can go faster.
 */
static int is_unicode = 0;

/*
 * codepage_isupper
 *
 * Determine whether or not a character is an uppercase character.
 * This function operates on the current codepage table. Returns
 * non-zero if the character is uppercase. Otherwise returns zero.
 */
int
codepage_isupper(int c)
{
	unsigned short mask = is_unicode ? 0xffff : 0xff;

	return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER);
}


/*
 * codepage_islower
 *
 * Determine whether or not a character is an lowercase character.
 * This function operates on the current codepage table. Returns
 * non-zero if the character is lowercase. Otherwise returns zero.
 */
int
codepage_islower(int c)
{
	unsigned short mask = is_unicode ? 0xffff : 0xff;

	return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER);
}


/*
 * codepage_toupper
 *
 * Convert individual characters to their uppercase equivalent value.
 * If the specified character is lowercase, the uppercase value will
 * be returned. Otherwise the original value will be returned.
 */
int
codepage_toupper(int c)
{
	unsigned short mask = is_unicode ? 0xffff : 0xff;

	return (current_codepage[c & mask].upper);
}


/*
 * codepage_tolower
 *
 * Convert individual characters to their lowercase equivalent value.
 * If the specified character is uppercase, the lowercase value will
 * be returned. Otherwise the original value will be returned.
 */
int
codepage_tolower(int c)
{
	unsigned short mask = is_unicode ? 0xffff : 0xff;

	return (current_codepage[c & mask].lower);
}


/*
 * strupr
 *
 * Convert a string to uppercase using the appropriate codepage. The
 * string is converted in place. A pointer to the string is returned.
 * There is an assumption here that uppercase and lowercase values
 * always result encode to the same length.
 */
char *
utf8_strupr(char *s)
{
	mts_wchar_t c;
	char *p = s;

	while (*p) {
		if (mts_isascii(*p)) {
			*p = codepage_toupper(*p);
			p++;
		} else {
			if (mts_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
				return (0);

			if (c == 0)
				break;

			c = codepage_toupper(c);
			p += mts_wctomb(p, c);
		}
	}

	return (s);
}


/*
 * strlwr
 *
 * Convert a string to lowercase using the appropriate codepage. The
 * string is converted in place. A pointer to the string is returned.
 * There is an assumption here that uppercase and lowercase values
 * always result encode to the same length.
 */
char *
utf8_strlwr(char *s)
{
	mts_wchar_t c;
	char *p = s;

	while (*p) {
		if (mts_isascii(*p)) {
			*p = codepage_tolower(*p);
			p++;
		} else {
			if (mts_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0)
				return (0);

			if (c == 0)
				break;

			c = codepage_tolower(c);
			p += mts_wctomb(p, c);
		}
	}

	return (s);
}


/*
 * isstrlwr
 *
 * Returns 1 if string contains NO uppercase chars 0 otherwise. However,
 * -1 is returned if "s" is not a valid multi-byte string.
 */
int
utf8_isstrlwr(const char *s)
{
	mts_wchar_t c;
	int n;
	const char *p = s;

	while (*p) {
		if (mts_isascii(*p) && codepage_isupper(*p))
			return (0);
		else {
			if ((n = mts_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
				return (-1);

			if (c == 0)
				break;

			if (codepage_isupper(c))
				return (0);

			p += n;
		}
	}

	return (1);
}


/*
 * isstrupr
 *
 * Returns 1 if string contains NO lowercase chars 0 otherwise. However,
 * -1 is returned if "s" is not a valid multi-byte string.
 */
int
utf8_isstrupr(const char *s)
{
	mts_wchar_t c;
	int n;
	const char *p = s;

	while (*p) {
		if (mts_isascii(*p) && codepage_islower(*p))
			return (0);
		else {
			if ((n = mts_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0)
				return (-1);

			if (c == 0)
				break;

			if (codepage_islower(c))
				return (0);

			p += n;
		}
	}

	return (1);
}


/*
 * strcasecmp
 *
 * Compare the null-terminated strings s1 and s2 and return an integer
 * greater than, equal to, or less than 0, according as s1 is lexico
 * graphically greater than, equal to, or less than s2 after translation
 * of each corresponding character to lowercase. The strings themselves
 * are not modified.
 *
 * Out:    0 if strings are equal
 *       < 0 if first string < second string
 *       > 0 if first string > second string
 */
int
utf8_strcasecmp(const char *s1, const char *s2)
{
	mts_wchar_t c1, c2;
	int n1, n2;
	const char *p1 = s1;
	const char *p2 = s2;

	for (;;) {
		if (mts_isascii(*p1))
			c1 = *p1++;
		else {
			if ((n1 = mts_mbtowc(&c1, p1, MTS_MB_CHAR_MAX)) < 0)
				return (-1);
			p1 += n1;
		}

		if (mts_isascii(*p2))
			c2 = *p2++;
		else {
			if ((n2 = mts_mbtowc(&c2, p2, MTS_MB_CHAR_MAX)) < 0)
				return (1);
			p2 += n2;
		}

		if (c1 == 0 || c2 == 0)
			break;

		if (c1 == c2)
			continue;

		c1 = codepage_tolower(c1);
		c2 = codepage_tolower(c2);

		if (c1 != c2)
			break;
	}

	return ((int)c1 - (int)c2);
}


/*
 * strncasecmp
 *
 * Compare two null-terminated strings, s1 and s2, of at most len
 * characters and return an int greater than, equal to, or less than 0,
 * dependent on whether s1 is lexicographically greater than, equal to,
 * or less than s2 after translation of each corresponding character to
 * lowercase. The original strings are not modified.
 *
 * Out:    0 if strings are equal
 *       < 0 if first string < second string
 *       > 0 if first string > second string
 */
int
utf8_strncasecmp(const char *s1, const char *s2, int len)
{
	mts_wchar_t c1, c2;
	int n1, n2;
	const char *p1 = s1;
	const char *p2 = s2;

	if (len <= 0)
		return (0);

	while (len--) {
		if (mts_isascii(*p1))
			c1 = *p1++;
		else {
			if ((n1 = mts_mbtowc(&c1, p1, MTS_MB_CHAR_MAX)) < 0)
				return (-1);
			p1 += n1;
		}

		if (mts_isascii(*p2))
			c2 = *p2++;
		else {
			if ((n2 = mts_mbtowc(&c2, p2, MTS_MB_CHAR_MAX)) < 0)
				return (1);
			p2 += n2;
		}

		if (c1 == 0 || c2 == 0)
			break;

		if (c1 == c2)
			continue;

		c1 = codepage_tolower(c1);
		c2 = codepage_tolower(c2);

		if (c1 != c2)
			break;
	}

	return ((int)c1 - (int)c2);
}


int
utf8_isstrascii(const char *s)
{
	while (*s) {
		if (mts_isascii(*s) == 0)
			return (0);
		s++;
	}
	return (1);
}