utf-8.cpp revision 8f2f51347c7b5fe7c697debe7c2e46b46dd16489
/* $Id$ */
/** @file
* IPRT - UTF-8 Decoding.
*/
/*
* Copyright (C) 2006-2007 Sun Microsystems, Inc.
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 USA or visit http://www.sun.com if you need
* additional information or have any questions.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
/**
* Get get length in code points of a UTF-8 encoded string.
* The string is validated while doing this.
*
* @returns IPRT status code.
* @param psz Pointer to the UTF-8 string.
* @param cch The max length of the string. (btw cch = cb)
* Use RTSTR_MAX if all of the string is to be examined.
* @param pcuc Where to store the length in unicode code points.
* @param pcchActual Where to store the actual size of the UTF-8 string
* on success (cch = cb again). Optional.
*/
{
size_t cCodePoints = 0;
while (cch > 0)
{
if (!uch)
break;
{
/* figure sequence length and validate the first byte */
unsigned cb;
cb = 2;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5)))
cb = 3;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4)))
cb = 4;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3)))
cb = 5;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2) | RT_BIT(1))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2)))
cb = 6;
else
{
return VERR_INVALID_UTF8_ENCODING;
}
/* check length */
{
RTStrAssertMsgFailed(("Invalid UTF-8 length: cb=%d cch=%d (%.*Rhxs)\n", cb, cch, RT_MIN(cch, 10), puch));
return VERR_INVALID_UTF8_ENCODING;
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 5:
RTStrAssertMsgReturn((puch[4] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 4:
RTStrAssertMsgReturn((puch[3] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 3:
RTStrAssertMsgReturn((puch[2] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 2:
RTStrAssertMsgReturn((puch[1] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
break;
}
/* validate the code point. */
switch (cb)
{
case 6:
break;
case 5:
break;
case 4:
break;
case 3:
break;
case 2:
break;
}
/* advance */
}
else
{
/* one ASCII byte */
puch++;
cch--;
}
cCodePoints++;
}
/* done */
*pcuc = cCodePoints;
if (pcchActual)
return VINF_SUCCESS;
}
/**
* Decodes and UTF-8 string into an array of unicode code point.
*
* Since we know the input is valid, we do *not* perform encoding or length checks.
*
* @returns iprt status code.
* @param psz The UTF-8 string to recode. This is a valid encoding.
* @param cch The number of chars (the type char, so bytes if you like) to process of the UTF-8 string.
* The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'.
* @param paCps Where to store the code points array.
* @param cCps The number of RTUNICP items the paCps buffer can hold, excluding the terminator ('\\0').
*/
{
int rc = VINF_SUCCESS;
while (cch > 0)
{
/* read the next char and check for terminator. */
if (!uch)
break;
/* check for output overflow */
{
break;
}
cCps--;
/* decode and recode the code point */
{
puch++;
cch--;
}
#ifdef RT_STRICT
AssertMsgFailed(("Internal error!\n"));
#endif
{
puch += 2;
cch -= 2;
}
{
puch += 3;
cch -= 3;
}
{
puch += 4;
cch -= 4;
}
{
puch += 5;
cch -= 6;
}
else
{
puch += 6;
cch -= 6;
}
}
/* done */
*pCp = 0;
return rc;
}
{
}
{
if (pcCps)
return rc;
}
{
}
{
/*
* Use rtUtf8Length for the job.
*/
if (RT_SUCCESS(rc))
{
}
return rc;
}
{
return RT_SUCCESS(rc);
}
{
/*
* Validate input.
*/
/*
* Validate the UTF-8 input and count its code points.
*/
if (RT_SUCCESS(rc))
{
/*
* Allocate buffer.
*/
if (paCps)
{
/*
* Decode the string.
*/
if (RT_SUCCESS(rc))
{
return rc;
}
}
else
}
return rc;
}
RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps)
{
/*
* Validate input.
*/
/*
* Validate the UTF-8 input and count the code points.
*/
if (RT_SUCCESS(rc))
{
if (pcCps)
*pcCps = cCpsResult;
/*
* Check buffer size / Allocate buffer.
*/
bool fShouldFree;
{
fShouldFree = false;
if (cCps <= cCpsResult)
return VERR_BUFFER_OVERFLOW;
paCpsResult = *ppaCps;
}
else
{
fShouldFree = true;
}
if (paCpsResult)
{
/*
* Encode the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
*ppaCps = paCpsResult;
return rc;
}
if (fShouldFree)
}
else
}
return rc;
}
/**
* Calculates the UTF-16 length of a string, validating the encoding while doing so.
*
* @returns IPRT status code.
* @param psz Pointer to the UTF-8 string.
* @param cch The max length of the string. (btw cch = cb)
* Use RTSTR_MAX if all of the string is to be examined.s
* @param pcwc Where to store the length of the UTF-16 string as a number of RTUTF16 characters.
*/
{
while (cch > 0)
{
if (!uch)
break;
{
/* one ASCII byte */
cwc++;
puch++;
cch--;
}
else
{
/* figure sequence length and validate the first byte */
unsigned cb;
cb = 2;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5)))
cb = 3;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4)))
cb = 4;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3)))
cb = 5;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2) | RT_BIT(1))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2)))
cb = 6;
else
{
return VERR_INVALID_UTF8_ENCODING;
}
/* check length */
{
RTStrAssertMsgFailed(("Invalid UTF-8 length: cb=%d cch=%d (%.*Rhxs)\n", cb, cch, RT_MIN(cch, 10), puch));
return VERR_INVALID_UTF8_ENCODING;
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 5:
RTStrAssertMsgReturn((puch[4] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 4:
RTStrAssertMsgReturn((puch[3] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 3:
RTStrAssertMsgReturn((puch[2] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 2:
RTStrAssertMsgReturn((puch[1] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
break;
}
/* validate the code point. */
switch (cb)
{
case 6:
return VERR_CANT_RECODE_AS_UTF16;
case 5:
return VERR_CANT_RECODE_AS_UTF16;
case 4:
cwc++;
break;
case 3:
break;
case 2:
break;
}
/* advance */
cwc++;
}
}
/* done */
return VINF_SUCCESS;
}
/**
* Recodes a valid UTF-8 string as UTF-16.
*
* Since we know the input is valid, we do *not* perform encoding or length checks.
*
* @returns iprt status code.
* @param psz The UTF-8 string to recode. This is a valid encoding.
* @param cch The number of chars (the type char, so bytes if you like) to process of the UTF-8 string.
* The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'.
* @param pwsz Where to store the UTF-16 string.
* @param cwc The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0').
*/
{
int rc = VINF_SUCCESS;
while (cch > 0)
{
/* read the next char and check for terminator. */
if (!uch)
break;
/* check for output overflow */
{
break;
}
cwc--;
/* decode and recode the code point */
{
puch++;
cch--;
}
{
puch += 2;
cch -= 2;
}
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5)))
{
puch += 3;
cch -= 3;
}
else
{
/* generate surrugate pair */
Assert((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4)));
{
break;
}
cwc--;
uc -= 0x10000;
puch += 4;
cch -= 4;
}
}
/* done */
*pwc = '\0';
return rc;
}
{
/*
* Validate input.
*/
*ppwszString = NULL;
/*
* Validate the UTF-8 input and calculate the length of the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
/*
* Allocate buffer.
*/
if (pwsz)
{
/*
* Encode the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
*ppwszString = pwsz;
return rc;
}
}
else
}
return rc;
}
RTDECL(int) RTStrToUtf16Ex(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc)
{
/*
* Validate input.
*/
/*
* Validate the UTF-8 input and calculate the length of the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
if (pcwc)
/*
* Check buffer size / Allocate buffer.
*/
bool fShouldFree;
{
fShouldFree = false;
return VERR_BUFFER_OVERFLOW;
pwszResult = *ppwsz;
}
else
{
fShouldFree = true;
}
if (pwszResult)
{
/*
* Encode the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
*ppwsz = pwszResult;
return rc;
}
if (fShouldFree)
}
else
}
return rc;
}
{
}
{
if (pcwc)
return rc;
}
/**
* Handle invalid encodings passed to RTStrGetCp() and RTStrGetCpEx().
* @returns rc
* @param ppsz The pointer to the string position point.
* @param pCp Where to store RTUNICP_INVALID.
* @param rc The iprt error code.
*/
{
/*
* Try find a valid encoding.
*/
(*ppsz)++; /** @todo code this! */
*pCp = RTUNICP_INVALID;
return rc;
}
{
return Cp;
}
{
/* ASCII ? */
{
puch++;
}
{
/* figure the length and validate the first octet. */
unsigned cb;
cb = 2;
cb = 3;
cb = 4;
cb = 5;
cb = 6;
else
{
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & 0xc0) == 0x80, ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 5:
RTStrAssertMsgReturn((puch[4] & 0xc0) == 0x80, ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 4:
RTStrAssertMsgReturn((puch[3] & 0xc0) == 0x80, ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 3:
RTStrAssertMsgReturn((puch[2] & 0xc0) == 0x80, ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 2:
RTStrAssertMsgReturn((puch[1] & 0xc0) == 0x80, ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
break;
}
/* get and validate the code point. */
switch (cb)
{
case 6:
break;
case 5:
break;
case 4:
break;
case 3:
rtStrGetCpExFailure(ppsz, pCp, uc == 0xffff || uc == 0xfffe ? VERR_CODE_POINT_ENDIAN_INDICATOR : VERR_INVALID_UTF8_ENCODING));
break;
case 2:
break;
default: /* impossible, but GCC is bitching. */
break;
}
}
else
{
/* 6th bit is always set. */
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
return VINF_SUCCESS;
}
/**
* Handle invalid encodings passed to RTStrGetCpNEx().
* @returns rc
* @param ppsz The pointer to the string position point.
* @param pcch Pointer to the string length.
* @param pCp Where to store RTUNICP_INVALID.
* @param rc The iprt error code.
*/
{
/*
* Try find a valid encoding.
*/
(*ppsz)++; /** @todo code this! */
(*pcch)--;
*pCp = RTUNICP_INVALID;
return rc;
}
{
if (cch == 0)
{
*pCp = RTUNICP_INVALID;
return VERR_END_OF_STRING;
}
/* ASCII ? */
{
puch++;
cch--;
}
{
/* figure the length and validate the first octet. */
unsigned cb;
cb = 2;
cb = 3;
cb = 4;
cb = 5;
cb = 6;
else
{
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & 0xc0) == 0x80, ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 5:
RTStrAssertMsgReturn((puch[4] & 0xc0) == 0x80, ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 4:
RTStrAssertMsgReturn((puch[3] & 0xc0) == 0x80, ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 3:
RTStrAssertMsgReturn((puch[2] & 0xc0) == 0x80, ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 2:
RTStrAssertMsgReturn((puch[1] & 0xc0) == 0x80, ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
break;
}
/* get and validate the code point. */
switch (cb)
{
case 6:
break;
case 5:
break;
case 4:
break;
case 3:
rtStrGetCpNExFailure(ppsz, pcch, pCp, uc == 0xffff || uc == 0xfffe ? VERR_CODE_POINT_ENDIAN_INDICATOR : VERR_INVALID_UTF8_ENCODING));
break;
case 2:
break;
default: /* impossible, but GCC is bitching. */
break;
}
}
else
{
/* 6th bit is always set. */
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
return VINF_SUCCESS;
}
{
if (uc < 0x80)
else if (uc < 0x00000800)
{
}
else if (uc < 0x00010000)
{
if ( uc < 0x0000d8000
|| ( uc > 0x0000dfff
&& uc < 0x0000fffe))
{
}
else
{
*puch++ = 0x7f;
}
}
else if (uc < 0x00200000)
{
}
else if (uc < 0x04000000)
{
}
else if (uc <= 0x7fffffff)
{
}
else
{
*puch++ = 0x7f;
}
return (char *)puch;
}
{
{
/* simple char? */
return (char *)puch;
/* two or more. */
&& !(uMask & 1))
{
{
(char *)pszStart);
return (char *)puch;
}
uMask >>= 1;
}
RTStrAssertMsgFailed(("Invalid UTF-8 encoding: %.*Rhxs puch=%p psz=%p\n", psz - (char *)puch, puch, psz));
}
return (char *)pszStart;
}
/**
* Performs a case sensitive string compare between two UTF-8 strings.
*
* Encoding errors are ignored by the current implementation. So, the only
* difference between this and the CRT strcmp function is the handling of
* NULL arguments.
*
* @returns < 0 if the first string less than the second string.
* @returns 0 if the first string identical to the second string.
* @returns > 0 if the first string greater than the second string.
* @param psz1 First UTF-8 string. Null is allowed.
* @param psz2 Second UTF-8 string. Null is allowed.
*/
{
return 0;
if (!psz1)
return -1;
if (!psz2)
return 1;
}
/**
* Performs a case sensitive string compare between two UTF-8 strings, given
* a maximum string length.
*
* Encoding errors are ignored by the current implementation. So, the only
* difference between this and the CRT strncmp function is the handling of
* NULL arguments.
*
* @returns < 0 if the first string less than the second string.
* @returns 0 if the first string identical to the second string.
* @returns > 0 if the first string greater than the second string.
* @param psz1 First UTF-8 string. Null is allowed.
* @param psz2 Second UTF-8 string. Null is allowed.
* @param cchMax The maximum string length
*/
{
return 0;
if (!psz1)
return -1;
if (!psz2)
return 1;
}
/**
* Performs a case insensitive string compare between two UTF-8 strings.
*
* specified by the unicode specs are used. It does not consider character pairs
* as they are used in some languages, just simple upper & lower case compares.
*
* The result is the difference between the mismatching codepoints after they
* both have been lower cased.
*
* If the string encoding is invalid the function will assert (strict builds)
* and use RTStrCmp for the remainder of the string.
*
* @returns < 0 if the first string less than the second string.
* @returns 0 if the first string identical to the second string.
* @returns > 0 if the first string greater than the second string.
* @param psz1 First UTF-8 string. Null is allowed.
* @param psz2 Second UTF-8 string. Null is allowed.
*/
{
return 0;
if (!psz1)
return -1;
if (!psz2)
return 1;
for (;;)
{
/* Get the codepoints */
if (RT_FAILURE(rc))
{
psz1--;
break;
}
if (RT_FAILURE(rc))
{
psz2--;
break;
}
/* compare */
if (iDiff)
{
if (iDiff)
{
if (iDiff)
return iDiff;
}
}
/* hit the terminator? */
if (!cp1)
return 0;
}
/* Hit some bad encoding, continue in case insensitive mode. */
}
/**
* Performs a case insensitive string compare between two UTF-8 strings, given a
* maximum string length.
*
* specified by the unicode specs are used. It does not consider character pairs
* as they are used in some languages, just simple upper & lower case compares.
*
* The result is the difference between the mismatching codepoints after they
* both have been lower cased.
*
* If the string encoding is invalid the function will assert (strict builds)
* and use RTStrCmp for the remainder of the string.
*
* @returns < 0 if the first string less than the second string.
* @returns 0 if the first string identical to the second string.
* @returns > 0 if the first string greater than the second string.
* @param psz1 First UTF-8 string. Null is allowed.
* @param psz2 Second UTF-8 string. Null is allowed.
* @param cchMax Maximum string length
*/
{
if (cchMax == 0)
return 0;
return 0;
if (!psz1)
return -1;
if (!psz2)
return 1;
for (;;)
{
/* Get the codepoints */
if (RT_FAILURE(rc))
{
psz1--;
cchMax++;
break;
}
if (RT_FAILURE(rc))
{
psz2--;
break;
}
/* compare */
if (iDiff)
{
if (iDiff)
{
if (iDiff)
return iDiff;
}
}
/* hit the terminator? */
return 0;
}
/* Hit some bad encoding, continue in case insensitive mode. */
}
{
/* Any NULL strings means NULL return. (In the RTStrCmp tradition.) */
if (!pszHaystack)
return NULL;
if (!pszNeedle)
return NULL;
/* The rest is CRT. */
}
{
/* Any NULL strings means NULL return. (In the RTStrCmp tradition.) */
if (!pszHaystack)
return NULL;
if (!pszNeedle)
return NULL;
/* The empty string matches everything. */
if (!*pszNeedle)
return (char *)pszHaystack;
/*
* The search strategy is to pick out the first char of the needle, fold it,
* and match it against the haystack code point by code point. When encountering
* a matching code point we use RTStrNICmp for the remainder (if any) of the needle.
*/
const char * const pszNeedleStart = pszNeedle;
{
/* Cp0 is not a case sensitive char. */
for (;;)
{
if (!Cp)
break;
return (char *)pszHaystack - cchNeedleCp0;
}
}
{
/* Cp0 is case sensitive */
for (;;)
{
if (!Cp)
break;
return (char *)pszHaystack - cchNeedleCp0;
}
}
else
{
/* Cp0 is case sensitive and folds to two difference chars. (paranoia) */
for (;;)
{
if (!Cp)
break;
return (char *)pszHaystack - cchNeedleCp0;
}
}
return NULL;
}
{
/*
* Loop the code points in the string, converting them one by one.
* ASSUMES that the code points for upper and lower case are encoded
* with the exact same length.
*/
/** @todo Handled bad encodings correctly+quietly, remove assumption,
* optimize. */
while (*pszCur)
{
}
return psz;
}
{
/*
* Loop the code points in the string, converting them one by one.
* ASSUMES that the code points for upper and lower case are encoded
* with the exact same length.
*/
/** @todo Handled bad encodings correctly+quietly, remove assumption,
* optimize. */
while(*pszCur)
{
}
return psz;
}