utf-8.cpp revision f39bba5daba2465f228571265a47ed4b16b683e3
/* $Id$ */
/** @file
* IPRT - UTF-8 Decoding.
*/
/*
* Copyright (C) 2006-2010 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
/**
* Get get length in code points of a UTF-8 encoded string.
* The string is validated while doing this.
*
* @returns IPRT status code.
* @param psz Pointer to the UTF-8 string.
* @param cch The max length of the string. (btw cch = cb)
* Use RTSTR_MAX if all of the string is to be examined.
* @param pcuc Where to store the length in unicode code points.
* @param pcchActual Where to store the actual size of the UTF-8 string
* on success (cch = cb again). Optional.
*/
{
size_t cCodePoints = 0;
while (cch > 0)
{
if (!uch)
break;
{
/* figure sequence length and validate the first byte */
/** @todo RT_USE_RTC_3629 */
unsigned cb;
cb = 2;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5)))
cb = 3;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4)))
cb = 4;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3)))
cb = 5;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2) | RT_BIT(1))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2)))
cb = 6;
else
{
return VERR_INVALID_UTF8_ENCODING;
}
/* check length */
{
RTStrAssertMsgFailed(("Invalid UTF-8 length: cb=%d cch=%d (%.*Rhxs)\n", cb, cch, RT_MIN(cch, 10), puch));
return VERR_INVALID_UTF8_ENCODING;
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 5:
RTStrAssertMsgReturn((puch[4] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 4:
RTStrAssertMsgReturn((puch[3] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 3:
RTStrAssertMsgReturn((puch[2] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 2:
RTStrAssertMsgReturn((puch[1] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
break;
}
/* validate the code point. */
switch (cb)
{
case 6:
break;
case 5:
break;
case 4:
break;
case 3:
break;
case 2:
break;
}
/* advance */
}
else
{
/* one ASCII byte */
puch++;
cch--;
}
cCodePoints++;
}
/* done */
*pcuc = cCodePoints;
if (pcchActual)
return VINF_SUCCESS;
}
/**
* Decodes and UTF-8 string into an array of unicode code point.
*
* Since we know the input is valid, we do *not* perform encoding or length checks.
*
* @returns iprt status code.
* @param psz The UTF-8 string to recode. This is a valid encoding.
* @param cch The number of chars (the type char, so bytes if you like) to process of the UTF-8 string.
* The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'.
* @param paCps Where to store the code points array.
* @param cCps The number of RTUNICP items the paCps buffer can hold, excluding the terminator ('\\0').
*/
{
int rc = VINF_SUCCESS;
while (cch > 0)
{
/* read the next char and check for terminator. */
if (!uch)
break;
/* check for output overflow */
{
break;
}
cCps--;
/* decode and recode the code point */
{
puch++;
cch--;
}
#ifdef RT_STRICT
AssertMsgFailed(("Internal error!\n"));
#endif
{
puch += 2;
cch -= 2;
}
{
puch += 3;
cch -= 3;
}
{
puch += 4;
cch -= 4;
}
{
puch += 5;
cch -= 6;
}
else
{
puch += 6;
cch -= 6;
}
}
/* done */
*pCp = 0;
return rc;
}
{
}
{
if (pcCps)
return rc;
}
{
}
{
/*
* Use rtUtf8Length for the job.
*/
if (RT_SUCCESS(rc))
{
}
return rc;
}
{
return RT_SUCCESS(rc);
}
{
for (;;)
{
if (RT_SUCCESS(rc))
{
if (!Cp)
break;
}
else
{
cErrors++;
}
}
return cErrors;
}
{
size_t cReplacements = 0;
for (;;)
{
return -1;
if (!Cp)
break;
break;
if (!*pCp)
{
*pszOld = chReplacement;
}
}
return cReplacements;
}
{
/*
* Validate input.
*/
/*
* Validate the UTF-8 input and count its code points.
*/
if (RT_SUCCESS(rc))
{
/*
* Allocate buffer.
*/
if (paCps)
{
/*
* Decode the string.
*/
if (RT_SUCCESS(rc))
{
return rc;
}
}
else
}
return rc;
}
RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps)
{
/*
* Validate input.
*/
/*
* Validate the UTF-8 input and count the code points.
*/
if (RT_SUCCESS(rc))
{
if (pcCps)
*pcCps = cCpsResult;
/*
* Check buffer size / Allocate buffer.
*/
bool fShouldFree;
{
fShouldFree = false;
if (cCps <= cCpsResult)
return VERR_BUFFER_OVERFLOW;
paCpsResult = *ppaCps;
}
else
{
fShouldFree = true;
}
if (paCpsResult)
{
/*
* Encode the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
*ppaCps = paCpsResult;
return rc;
}
if (fShouldFree)
}
else
}
return rc;
}
/**
* Calculates the UTF-16 length of a string, validating the encoding while doing so.
*
* @returns IPRT status code.
* @param psz Pointer to the UTF-8 string.
* @param cch The max length of the string. (btw cch = cb)
* Use RTSTR_MAX if all of the string is to be examined.
* @param pcwc Where to store the length of the UTF-16 string as a number of RTUTF16 characters.
*/
{
while (cch > 0)
{
if (!uch)
break;
{
/* one ASCII byte */
cwc++;
puch++;
cch--;
}
else
{
/* figure sequence length and validate the first byte */
unsigned cb;
cb = 2;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5)))
cb = 3;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4)))
cb = 4;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3)))
cb = 5;
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2) | RT_BIT(1))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3) | RT_BIT(2)))
cb = 6;
else
{
return VERR_INVALID_UTF8_ENCODING;
}
/* check length */
{
RTStrAssertMsgFailed(("Invalid UTF-8 length: cb=%d cch=%d (%.*Rhxs)\n", cb, cch, RT_MIN(cch, 10), puch));
return VERR_INVALID_UTF8_ENCODING;
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 5:
RTStrAssertMsgReturn((puch[4] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 4:
RTStrAssertMsgReturn((puch[3] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 3:
RTStrAssertMsgReturn((puch[2] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
case 2:
RTStrAssertMsgReturn((puch[1] & (RT_BIT(7) | RT_BIT(6))) == RT_BIT(7), ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, cch), puch), VERR_INVALID_UTF8_ENCODING);
break;
}
/* validate the code point. */
switch (cb)
{
case 6:
return VERR_CANT_RECODE_AS_UTF16;
case 5:
return VERR_CANT_RECODE_AS_UTF16;
case 4:
cwc++;
break;
case 3:
break;
case 2:
break;
}
/* advance */
cwc++;
}
}
/* done */
return VINF_SUCCESS;
}
/**
* Recodes a valid UTF-8 string as UTF-16.
*
* Since we know the input is valid, we do *not* perform encoding or length checks.
*
* @returns iprt status code.
* @param psz The UTF-8 string to recode. This is a valid encoding.
* @param cch The number of chars (the type char, so bytes if you like) to process of the UTF-8 string.
* The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'.
* @param pwsz Where to store the UTF-16 string.
* @param cwc The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0').
*/
{
int rc = VINF_SUCCESS;
while (cch > 0)
{
/* read the next char and check for terminator. */
if (!uch)
break;
/* check for output overflow */
{
break;
}
cwc--;
/* decode and recode the code point */
{
puch++;
cch--;
}
{
puch += 2;
cch -= 2;
}
else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5)))
{
puch += 3;
cch -= 3;
}
else
{
/* generate surrogate pair */
Assert((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4)));
{
break;
}
cwc--;
uc -= 0x10000;
puch += 4;
cch -= 4;
}
}
/* done */
*pwc = '\0';
return rc;
}
{
/*
* Validate input.
*/
*ppwszString = NULL;
/*
* Validate the UTF-8 input and calculate the length of the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
/*
* Allocate buffer.
*/
if (pwsz)
{
/*
* Encode the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
*ppwszString = pwsz;
return rc;
}
}
else
}
return rc;
}
{
/*
* Validate input.
*/
/*
* Validate the UTF-8 input and calculate the length of the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
if (pcwc)
/*
* Check buffer size / Allocate buffer.
*/
bool fShouldFree;
{
fShouldFree = false;
return VERR_BUFFER_OVERFLOW;
pwszResult = *ppwsz;
}
else
{
fShouldFree = true;
}
if (pwszResult)
{
/*
* Encode the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
*ppwsz = pwszResult;
return rc;
}
if (fShouldFree)
}
else
}
return rc;
}
{
}
{
if (pcwc)
return rc;
}
/**
* Calculates the length of the UTF-8 encoding of a Latin-1 string.
*
* @returns iprt status code.
* @param psz The Latin-1 string.
* @param cchIn The max length of the Latin-1 string to consider.
* @param pcch Where to store the length (excluding '\\0') of the UTF-8 string. (cch == cb, btw)
*/
{
for (;;)
{
break;
if (RT_FAILURE(rc))
return rc;
}
/* done */
return VINF_SUCCESS;
}
/**
* Recodes a Latin-1 string as UTF-8.
*
* @returns iprt status code.
* @param psz The Latin-1 string.
* @param cchIn The number of characters to process from psz. The recoding
* will stop when cch or '\\0' is reached.
* @param psz Where to store the UTF-8 string.
* @param cch The size of the UTF-8 buffer, excluding the terminator.
*/
{
int rc;
for (;;)
{
break;
{
RTStrAssertMsgFailed(("Buffer overflow! 1\n"));
break;
}
}
/* done */
if (rc == VERR_END_OF_STRING)
rc = VINF_SUCCESS;
*psz = '\0';
return rc;
}
{
/*
* Validate input.
*/
*ppszString = NULL;
/*
* Calculate the length of the UTF-8 encoding of the Latin-1 string.
*/
if (RT_SUCCESS(rc))
{
/*
* Allocate buffer and recode it.
*/
if (pszResult)
{
if (RT_SUCCESS(rc))
{
*ppszString = pszResult;
return rc;
}
}
else
}
return rc;
}
RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag)
{
/*
* Validate input.
*/
/*
* Calculate the length of the UTF-8 encoding of the Latin-1 string.
*/
if (RT_SUCCESS(rc))
{
if (pcch)
/*
* Check buffer size / Allocate buffer and recode it.
*/
bool fShouldFree;
char *pszResult;
{
fShouldFree = false;
return VERR_BUFFER_OVERFLOW;
}
else
{
fShouldFree = true;
}
if (pszResult)
{
if (RT_SUCCESS(rc))
{
return rc;
}
if (fShouldFree)
}
else
}
return rc;
}
{
}
{
if (pcch)
return rc;
}
/**
* Calculates the Latin-1 length of a string, validating the encoding while
* doing so.
*
* @returns IPRT status code.
* @param psz Pointer to the UTF-8 string.
* @param cchIn The max length of the string. (btw cch = cb)
* Use RTSTR_MAX if all of the string is to be examined.
* @param pcch Where to store the length of the Latin-1 string in bytes.
*/
{
for (;;)
{
break;
if (RT_FAILURE(rc))
return rc;
if (cchCp == 0)
return VERR_NO_TRANSLATION;
}
/* done */
return VINF_SUCCESS;
}
/**
* Recodes a valid UTF-8 string as Latin-1.
*
* Since we know the input is valid, we do *not* perform encoding or length checks.
*
* @returns iprt status code.
* @param pszIn The UTF-8 string to recode. This is a valid encoding.
* @param cchIn The number of chars (the type char, so bytes if you like) to process of the UTF-8 string.
* The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'.
* @param psz Where to store the Latin-1 string.
* @param cch The number of characters the pszOut buffer can hold, excluding the terminator ('\\0').
*/
{
int rc;
for (;;)
{
break;
{
RTStrAssertMsgFailed(("Buffer overflow! 1\n"));
break;
}
}
/* done */
if (rc == VERR_END_OF_STRING)
rc = VINF_SUCCESS;
*psz = '\0';
return rc;
}
{
/*
* Validate input.
*/
*ppszString = NULL;
/*
* Validate the UTF-8 input and calculate the length of the Latin-1 string.
*/
if (RT_SUCCESS(rc))
{
/*
* Allocate buffer.
*/
if (psz)
{
/*
* Encode the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
*ppszString = psz;
return rc;
}
}
else
}
return rc;
}
{
/*
* Validate input.
*/
/*
* Validate the UTF-8 input and calculate the length of the UTF-16 string.
*/
if (RT_SUCCESS(rc))
{
if (pcch)
/*
* Check buffer size / Allocate buffer.
*/
bool fShouldFree;
char *pszResult;
{
fShouldFree = false;
return VERR_BUFFER_OVERFLOW;
}
else
{
fShouldFree = true;
}
if (pszResult)
{
/*
* Encode the Latin-1 string.
*/
if (RT_SUCCESS(rc))
{
return rc;
}
if (fShouldFree)
}
else
}
return rc;
}
{
}
{
if (pcch)
return rc;
}
/**
* Handle invalid encodings passed to RTStrGetCp() and RTStrGetCpEx().
* @returns rc
* @param ppsz The pointer to the string position point.
* @param pCp Where to store RTUNICP_INVALID.
* @param rc The iprt error code.
*/
{
/*
* Try find a valid encoding.
*/
(*ppsz)++; /** @todo code this! */
*pCp = RTUNICP_INVALID;
return rc;
}
{
return Cp;
}
{
/* ASCII ? */
{
puch++;
}
{
/* figure the length and validate the first octet. */
/** @todo RT_USE_RTC_3629 */
unsigned cb;
cb = 2;
cb = 3;
cb = 4;
cb = 5;
cb = 6;
else
{
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & 0xc0) == 0x80, ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 5:
RTStrAssertMsgReturn((puch[4] & 0xc0) == 0x80, ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 4:
RTStrAssertMsgReturn((puch[3] & 0xc0) == 0x80, ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 3:
RTStrAssertMsgReturn((puch[2] & 0xc0) == 0x80, ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 2:
RTStrAssertMsgReturn((puch[1] & 0xc0) == 0x80, ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
break;
}
/* get and validate the code point. */
switch (cb)
{
case 6:
break;
case 5:
break;
case 4:
break;
case 3:
rtStrGetCpExFailure(ppsz, pCp, uc == 0xffff || uc == 0xfffe ? VERR_CODE_POINT_ENDIAN_INDICATOR : VERR_INVALID_UTF8_ENCODING));
break;
case 2:
break;
default: /* impossible, but GCC is bitching. */
break;
}
}
else
{
/* 6th bit is always set. */
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
return VINF_SUCCESS;
}
/**
* Handle invalid encodings passed to RTStrGetCpNEx().
* @returns rc
* @param ppsz The pointer to the string position point.
* @param pcch Pointer to the string length.
* @param pCp Where to store RTUNICP_INVALID.
* @param rc The iprt error code.
*/
{
/*
* Try find a valid encoding.
*/
(*ppsz)++; /** @todo code this! */
(*pcch)--;
*pCp = RTUNICP_INVALID;
return rc;
}
{
if (cch == 0)
{
*pCp = RTUNICP_INVALID;
return VERR_END_OF_STRING;
}
/* ASCII ? */
{
puch++;
cch--;
}
{
/* figure the length and validate the first octet. */
/** @todo RT_USE_RTC_3629 */
unsigned cb;
cb = 2;
cb = 3;
cb = 4;
cb = 5;
cb = 6;
else
{
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
/* validate the rest */
switch (cb)
{
case 6:
RTStrAssertMsgReturn((puch[5] & 0xc0) == 0x80, ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 5:
RTStrAssertMsgReturn((puch[4] & 0xc0) == 0x80, ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 4:
RTStrAssertMsgReturn((puch[3] & 0xc0) == 0x80, ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 3:
RTStrAssertMsgReturn((puch[2] & 0xc0) == 0x80, ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
case 2:
RTStrAssertMsgReturn((puch[1] & 0xc0) == 0x80, ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch),
break;
}
/* get and validate the code point. */
switch (cb)
{
case 6:
break;
case 5:
break;
case 4:
break;
case 3:
rtStrGetCpNExFailure(ppsz, pcch, pCp, uc == 0xffff || uc == 0xfffe ? VERR_CODE_POINT_ENDIAN_INDICATOR : VERR_INVALID_UTF8_ENCODING));
break;
case 2:
break;
default: /* impossible, but GCC is bitching. */
break;
}
}
else
{
/* 6th bit is always set. */
RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch));
}
return VINF_SUCCESS;
}
{
if (uc < 0x80)
else if (uc < 0x00000800)
{
}
else if (uc < 0x00010000)
{
/** @todo RT_USE_RTC_3629 */
if ( uc < 0x0000d8000
|| ( uc > 0x0000dfff
&& uc < 0x0000fffe))
{
}
else
{
*puch++ = 0x7f;
}
}
/** @todo RT_USE_RTC_3629 */
else if (uc < 0x00200000)
{
}
else if (uc < 0x04000000)
{
}
else if (uc <= 0x7fffffff)
{
}
else
{
*puch++ = 0x7f;
}
return (char *)puch;
}
{
{
/* simple char? */
return (char *)puch;
/* two or more. */
&& !(uMask & 1))
{
{
(char *)pszStart);
return (char *)puch;
}
uMask >>= 1;
}
RTStrAssertMsgFailed(("Invalid UTF-8 encoding: %.*Rhxs puch=%p psz=%p\n", psz - (char *)puch, puch, psz));
}
return (char *)pszStart;
}