Normalize.xs revision 7c478bd95313f5f23a4c958a745db2134aa03244
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
/* These 5 files are prepared by mkheader */
#include "unfcmb.h"
#include "unfcan.h"
#include "unfcpt.h"
#include "unfcmp.h"
#include "unfexc.h"
/* Perl 5.6.1 ? */
#ifndef uvuni_to_utf8
#define uvuni_to_utf8 uv_to_utf8
#endif /* uvuni_to_utf8 */
/* Perl 5.6.1 ? */
#ifndef utf8n_to_uvuni
#define utf8n_to_uvuni utf8_to_uv
#endif /* utf8n_to_uvuni */
/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
#ifdef UTF8_ALLOW_BOM
#else
#endif
/* if utf8n_to_uvuni() sets retlen to 0 (?) */
#define ErrRetlenIsZero "panic (Unicode::Normalize): zero-length character"
/* utf8_hop() hops back before start. Maybe broken UTF-8 */
#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
/* At present, char > 0x10ffff are unaffected without complaint, right? */
#define VALID_UTF_MAX (0x10ffff)
/* HANGUL_H */
#define Hangul_SBase 0xAC00
#define Hangul_SFinal 0xD7A3
#define Hangul_SCount 11172
#define Hangul_NCount 588
#define Hangul_LBase 0x1100
#define Hangul_LFinal 0x1112
#define Hangul_LCount 19
#define Hangul_VBase 0x1161
#define Hangul_VFinal 0x1175
#define Hangul_VCount 21
#define Hangul_TBase 0x11A7
#define Hangul_TFinal 0x11C2
#define Hangul_TCount 28
/* HANGUL_H */
/* this is used for canonical ordering of combining characters (c.c.). */
typedef struct {
} UNF_cc;
static int compare_cc (const void *a, const void *b)
{
int ret_cc;
if (ret_cc)
return ret_cc;
}
{
if (OVER_UTF_MAX(uv))
return NULL;
if (! plane)
return NULL;
}
{
if (OVER_UTF_MAX(uv))
return NULL;
if (! plane)
return NULL;
}
{
return 0;
}
}
if (! plane)
return 0;
if (! row)
return 0;
if (! cell)
return 0;
return i->composite;
}
return 0;
}
{
if (OVER_UTF_MAX(uv))
return 0;
if (! plane)
return 0;
}
{
if (! Hangul_IsS(uv))
return;
t = tmp;
if (tindex)
*t = '\0';
}
{
t = tmp;
t = uvuni_to_utf8(t, uv);
*t = '\0';
}
SV*
PROTOTYPE: $;$
U8 *s, *e, *p, *r;
bool iscompat;
CODE:
} else {
}
(void)SvPOK_only(dst);
e = s + srclen;
for (p = s; p < e; p += retlen) {
if (!retlen)
if (Hangul_IsS(uv))
else {
if (r)
else
}
}
SV*
PROTOTYPE: $
bool valid_uvlast;
CODE:
} else {
}
e = s + srclen;
(void)SvPOK_only(dst);
for (p = s; p < e;) {
if (!retlen)
p += retlen;
if (curCC == 0) {
d = uvuni_to_utf8(d, uv);
continue;
}
cc_pos = 0;
while (p < e) {
if (!retlen)
p += retlen;
if (curCC == 0) {
valid_uvlast = TRUE;
break;
}
cc_pos++;
}
}
/* reordered if there are two c.c.'s */
if (cc_pos) {
}
for (i = 0; i <= cc_pos; i++) {
}
if (valid_uvlast)
{
d = uvuni_to_utf8(d, uvlast);
}
}
*d = '\0';
SV*
PROTOTYPE: $
CODE:
} else {
}
e = s + srclen;
(void)SvPOK_only(dst);
/* for uncomposed combining char */
(void)SvPOK_only(tmp);
for (p = s; p < e;) {
if (beginning) {
if (!retlen)
p += retlen;
d = uvuni_to_utf8(d, uvS);
continue;
}
}
/* Starter */
preCC = 0;
/* to the next Starter */
while (p < e) {
if (!retlen)
p += retlen;
t = uvuni_to_utf8(t, uv);
} else {
}
/* preCC not changed to curCC */
} else if (! curCC && p < e) { /* blocked */
break;
} else {
t = uvuni_to_utf8(t, uv);
}
}
}
if (tmplen) { /* uncomposed combining char */
while (tmplen--)
*d++ = *t++;
}
} /* for */
*d = '\0';
void
PROTOTYPE: $
checkNFKD = 1
CODE:
} else {
}
e = s + srclen;
preCC = 0;
for (p = s; p < e; p += retlen) {
if (!retlen)
}
void
PROTOTYPE: $
checkNFKC = 1
bool isMAYBE;
CODE:
} else {
}
e = s + srclen;
preCC = 0;
for (p = s; p < e; p += retlen) {
if (!retlen)
; /* YES */
else if (ix) {
/* NFKC_NO when having compatibility mapping. */
}
if (isMAYBE)
else
void
PROTOTYPE: $
checkFCC = 1
bool isMAYBE;
CODE:
} else {
}
e = s + srclen;
preCC = 0;
for (p = s; p < e; p += retlen) {
if (!retlen)
if (sCan) {
}
else {
}
if (ix) {
}
if (sCan) {
}
else {
}
}
if (isMAYBE)
else
PROTOTYPE: $
bool
PROTOTYPE: $
bool
PROTOTYPE: $
bool
PROTOTYPE: $
bool
PROTOTYPE: $
isNFC_MAYBE = 1
isNFKC_MAYBE = 2
void
PROTOTYPE: $
isNFKD_NO = 1
CODE:
XSRETURN_YES; /* NFD_NO or NFKD_NO */
else
void
PROTOTYPE: $
isNFC_NO = 0
isNFKC_NO = 1
CODE:
XSRETURN_YES; /* NFC_NO or NFKC_NO */
else if (ix) {
XSRETURN_YES; /* NFC_NO or NFKC_NO */
else
}
else
SV*
PROTOTYPE: $$
CODE:
SV*
PROTOTYPE: $
getCompat = 1
CODE:
if (Hangul_IsS(uv)) {
(void)SvPOK_only(dst);
} else {
if (!rstr)
}
void
U8 *s, *e, *p;
} else {
}
e = s + srclen;
for (p = e; s < p; ) {
p = utf8_hop(p, -1);
if (p < s)
break;
}