1N/A
1N/A#include "EXTERN.h"
1N/A#include "perl.h"
1N/A#include "XSUB.h"
1N/A
1N/A/* These 5 files are prepared by mkheader */
1N/A#include "unfcmb.h"
1N/A#include "unfcan.h"
1N/A#include "unfcpt.h"
1N/A#include "unfcmp.h"
1N/A#include "unfexc.h"
1N/A
1N/A/* Perl 5.6.1 ? */
1N/A#ifndef uvuni_to_utf8
1N/A#define uvuni_to_utf8 uv_to_utf8
1N/A#endif /* uvuni_to_utf8 */
1N/A
1N/A/* Perl 5.6.1 ? */
1N/A#ifndef utf8n_to_uvuni
1N/A#define utf8n_to_uvuni utf8_to_uv
1N/A#endif /* utf8n_to_uvuni */
1N/A
1N/A/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
1N/A#ifdef UTF8_ALLOW_BOM
1N/A#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FFFF)
1N/A#else
1N/A#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
1N/A#endif
1N/A
1N/A/* if utf8n_to_uvuni() sets retlen to 0 (?) */
1N/A#define ErrRetlenIsZero "panic (Unicode::Normalize): zero-length character"
1N/A
1N/A/* utf8_hop() hops back before start. Maybe broken UTF-8 */
1N/A#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
1N/A
1N/A/* At present, char > 0x10ffff are unaffected without complaint, right? */
1N/A#define VALID_UTF_MAX (0x10ffff)
1N/A#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
1N/A
1N/A/* HANGUL_H */
1N/A#define Hangul_SBase 0xAC00
1N/A#define Hangul_SFinal 0xD7A3
1N/A#define Hangul_SCount 11172
1N/A
1N/A#define Hangul_NCount 588
1N/A
1N/A#define Hangul_LBase 0x1100
1N/A#define Hangul_LFinal 0x1112
1N/A#define Hangul_LCount 19
1N/A
1N/A#define Hangul_VBase 0x1161
1N/A#define Hangul_VFinal 0x1175
1N/A#define Hangul_VCount 21
1N/A
1N/A#define Hangul_TBase 0x11A7
1N/A#define Hangul_TFinal 0x11C2
1N/A#define Hangul_TCount 28
1N/A
1N/A#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal))
1N/A#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0)
1N/A#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u))
1N/A#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal))
1N/A#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal))
1N/A#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal))
1N/A/* HANGUL_H */
1N/A
1N/A/* this is used for canonical ordering of combining characters (c.c.). */
1N/Atypedef struct {
1N/A U8 cc; /* combining class */
1N/A UV uv; /* codepoint */
1N/A STRLEN pos; /* position */
1N/A} UNF_cc;
1N/A
1N/Astatic int compare_cc (const void *a, const void *b)
1N/A{
1N/A int ret_cc;
1N/A ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc;
1N/A if (ret_cc)
1N/A return ret_cc;
1N/A
1N/A return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos )
1N/A - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos );
1N/A}
1N/A
1N/Astatic U8* dec_canonical (UV uv)
1N/A{
1N/A U8 ***plane, **row;
1N/A if (OVER_UTF_MAX(uv))
1N/A return NULL;
1N/A plane = (U8***)UNF_canon[uv >> 16];
1N/A if (! plane)
1N/A return NULL;
1N/A row = plane[(uv >> 8) & 0xff];
1N/A return row ? row[uv & 0xff] : NULL;
1N/A}
1N/A
1N/Astatic U8* dec_compat (UV uv)
1N/A{
1N/A U8 ***plane, **row;
1N/A if (OVER_UTF_MAX(uv))
1N/A return NULL;
1N/A plane = (U8***)UNF_compat[uv >> 16];
1N/A if (! plane)
1N/A return NULL;
1N/A row = plane[(uv >> 8) & 0xff];
1N/A return row ? row[uv & 0xff] : NULL;
1N/A}
1N/A
1N/Astatic UV composite_uv (UV uv, UV uv2)
1N/A{
1N/A UNF_complist ***plane, **row, *cell, *i;
1N/A
1N/A if (! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2))
1N/A return 0;
1N/A
1N/A if (Hangul_IsL(uv) && Hangul_IsV(uv2)) {
1N/A uv -= Hangul_LBase; /* lindex */
1N/A uv2 -= Hangul_VBase; /* vindex */
1N/A return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount);
1N/A }
1N/A if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
1N/A uv2 -= Hangul_TBase; /* tindex */
1N/A return(uv + uv2);
1N/A }
1N/A plane = UNF_compos[uv >> 16];
1N/A if (! plane)
1N/A return 0;
1N/A row = plane[(uv >> 8) & 0xff];
1N/A if (! row)
1N/A return 0;
1N/A cell = row[uv & 0xff];
1N/A if (! cell)
1N/A return 0;
1N/A for (i = cell; i->nextchar; i++) {
1N/A if (uv2 == i->nextchar)
1N/A return i->composite;
1N/A }
1N/A return 0;
1N/A}
1N/A
1N/Astatic U8 getCombinClass (UV uv)
1N/A{
1N/A U8 **plane, *row;
1N/A if (OVER_UTF_MAX(uv))
1N/A return 0;
1N/A plane = (U8**)UNF_combin[uv >> 16];
1N/A if (! plane)
1N/A return 0;
1N/A row = plane[(uv >> 8) & 0xff];
1N/A return row ? row[uv & 0xff] : 0;
1N/A}
1N/A
1N/Astatic void sv_cat_decompHangul (SV* sv, UV uv)
1N/A{
1N/A UV sindex, lindex, vindex, tindex;
1N/A U8 *t, tmp[3 * UTF8_MAXLEN + 1];
1N/A
1N/A if (! Hangul_IsS(uv))
1N/A return;
1N/A
1N/A sindex = uv - Hangul_SBase;
1N/A lindex = sindex / Hangul_NCount;
1N/A vindex = (sindex % Hangul_NCount) / Hangul_TCount;
1N/A tindex = sindex % Hangul_TCount;
1N/A
1N/A t = tmp;
1N/A t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
1N/A t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
1N/A if (tindex)
1N/A t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
1N/A *t = '\0';
1N/A sv_catpvn(sv, (char *)tmp, t - tmp);
1N/A}
1N/A
1N/Astatic void sv_cat_uvuni (SV* sv, UV uv)
1N/A{
1N/A U8 *t, tmp[UTF8_MAXLEN + 1];
1N/A
1N/A t = tmp;
1N/A t = uvuni_to_utf8(t, uv);
1N/A *t = '\0';
1N/A sv_catpvn(sv, (char *)tmp, t - tmp);
1N/A}
1N/A
1N/AMODULE = Unicode::Normalize PACKAGE = Unicode::Normalize
1N/A
1N/ASV*
1N/Adecompose(arg, compat = &PL_sv_no)
1N/A SV * arg
1N/A SV * compat
1N/A PROTOTYPE: $;$
1N/A PREINIT:
1N/A UV uv;
1N/A SV *src, *dst;
1N/A STRLEN srclen, retlen;
1N/A U8 *s, *e, *p, *r;
1N/A bool iscompat;
1N/A CODE:
1N/A if (SvUTF8(arg)) {
1N/A src = arg;
1N/A } else {
1N/A src = sv_mortalcopy(arg);
1N/A sv_utf8_upgrade(src);
1N/A }
1N/A iscompat = SvTRUE(compat);
1N/A
1N/A dst = newSV(1);
1N/A (void)SvPOK_only(dst);
1N/A SvUTF8_on(dst);
1N/A
1N/A s = (U8*)SvPV(src,srclen);
1N/A e = s + srclen;
1N/A for (p = s; p < e; p += retlen) {
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A
1N/A if (Hangul_IsS(uv))
1N/A sv_cat_decompHangul(dst, uv);
1N/A else {
1N/A r = iscompat ? dec_compat(uv) : dec_canonical(uv);
1N/A if (r)
1N/A sv_catpv(dst, (char *)r);
1N/A else
1N/A sv_cat_uvuni(dst, uv);
1N/A }
1N/A }
1N/A RETVAL = dst;
1N/A OUTPUT:
1N/A RETVAL
1N/A
1N/A
1N/A
1N/ASV*
1N/Areorder(arg)
1N/A SV * arg
1N/A PROTOTYPE: $
1N/A PREINIT:
1N/A SV *src, *dst;
1N/A STRLEN srclen, dstlen, retlen, stk_cc_max;
1N/A U8 *s, *e, *p, *d, curCC;
1N/A UV uv, uvlast;
1N/A UNF_cc * stk_cc;
1N/A STRLEN i, cc_pos;
1N/A bool valid_uvlast;
1N/A CODE:
1N/A if (SvUTF8(arg)) {
1N/A src = arg;
1N/A } else {
1N/A src = sv_mortalcopy(arg);
1N/A sv_utf8_upgrade(src);
1N/A }
1N/A
1N/A s = (U8*)SvPV(src, srclen);
1N/A e = s + srclen;
1N/A dstlen = srclen + 1;
1N/A dst = newSV(dstlen);
1N/A (void)SvPOK_only(dst);
1N/A SvUTF8_on(dst);
1N/A d = (U8*)SvPVX(dst);
1N/A
1N/A stk_cc_max = 10; /* enough as an initial value? */
1N/A New(0, stk_cc, stk_cc_max, UNF_cc);
1N/A
1N/A for (p = s; p < e;) {
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A p += retlen;
1N/A
1N/A curCC = getCombinClass(uv);
1N/A if (curCC == 0) {
1N/A d = uvuni_to_utf8(d, uv);
1N/A continue;
1N/A }
1N/A
1N/A cc_pos = 0;
1N/A stk_cc[cc_pos].cc = curCC;
1N/A stk_cc[cc_pos].uv = uv;
1N/A stk_cc[cc_pos].pos = cc_pos;
1N/A
1N/A valid_uvlast = FALSE;
1N/A while (p < e) {
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A p += retlen;
1N/A
1N/A curCC = getCombinClass(uv);
1N/A if (curCC == 0) {
1N/A uvlast = uv;
1N/A valid_uvlast = TRUE;
1N/A break;
1N/A }
1N/A
1N/A cc_pos++;
1N/A if (stk_cc_max <= cc_pos) { /* extend if need */
1N/A stk_cc_max = cc_pos + 1;
1N/A Renew(stk_cc, stk_cc_max, UNF_cc);
1N/A }
1N/A stk_cc[cc_pos].cc = curCC;
1N/A stk_cc[cc_pos].uv = uv;
1N/A stk_cc[cc_pos].pos = cc_pos;
1N/A }
1N/A
1N/A /* reordered if there are two c.c.'s */
1N/A if (cc_pos) {
1N/A qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc);
1N/A }
1N/A
1N/A for (i = 0; i <= cc_pos; i++) {
1N/A d = uvuni_to_utf8(d, stk_cc[i].uv);
1N/A }
1N/A if (valid_uvlast)
1N/A {
1N/A d = uvuni_to_utf8(d, uvlast);
1N/A }
1N/A }
1N/A *d = '\0';
1N/A SvCUR_set(dst, d - (U8*)SvPVX(dst));
1N/A Safefree(stk_cc);
1N/A RETVAL = dst;
1N/A OUTPUT:
1N/A RETVAL
1N/A
1N/A
1N/A
1N/ASV*
1N/Acompose(arg)
1N/A SV * arg
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A composeContiguous = 1
1N/A PREINIT:
1N/A SV *src, *dst, *tmp;
1N/A U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
1N/A UV uv, uvS, uvComp;
1N/A STRLEN srclen, dstlen, tmplen, retlen;
1N/A bool beginning = TRUE;
1N/A CODE:
1N/A if (SvUTF8(arg)) {
1N/A src = arg;
1N/A } else {
1N/A src = sv_mortalcopy(arg);
1N/A sv_utf8_upgrade(src);
1N/A }
1N/A
1N/A s = (U8*)SvPV(src, srclen);
1N/A e = s + srclen;
1N/A dstlen = srclen + 1;
1N/A dst = newSV(dstlen);
1N/A (void)SvPOK_only(dst);
1N/A SvUTF8_on(dst);
1N/A d = (U8*)SvPVX(dst);
1N/A
1N/A /* for uncomposed combining char */
1N/A tmp = sv_2mortal(newSV(dstlen));
1N/A (void)SvPOK_only(tmp);
1N/A SvUTF8_on(tmp);
1N/A
1N/A for (p = s; p < e;) {
1N/A if (beginning) {
1N/A uvS = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A p += retlen;
1N/A
1N/A if (getCombinClass(uvS)) { /* no Starter found yet */
1N/A d = uvuni_to_utf8(d, uvS);
1N/A continue;
1N/A }
1N/A beginning = FALSE;
1N/A }
1N/A
1N/A /* Starter */
1N/A t = tmp_start = (U8*)SvPVX(tmp);
1N/A preCC = 0;
1N/A
1N/A /* to the next Starter */
1N/A while (p < e) {
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A p += retlen;
1N/A
1N/A curCC = getCombinClass(uv);
1N/A
1N/A if (preCC && preCC == curCC) {
1N/A preCC = curCC;
1N/A t = uvuni_to_utf8(t, uv);
1N/A } else {
1N/A uvComp = composite_uv(uvS, uv);
1N/A
1N/A if (uvComp && ! isExclusion(uvComp) &&
1N/A (ix ? (t == tmp_start) : (preCC <= curCC))) {
1N/A STRLEN leftcur, rightcur, dstcur;
1N/A leftcur = UNISKIP(uvComp);
1N/A rightcur = UNISKIP(uvS) + UNISKIP(uv);
1N/A
1N/A if (leftcur > rightcur) {
1N/A dstcur = d - (U8*)SvPVX(dst);
1N/A dstlen += leftcur - rightcur;
1N/A d = (U8*)SvGROW(dst,dstlen) + dstcur;
1N/A }
1N/A /* preCC not changed to curCC */
1N/A uvS = uvComp;
1N/A } else if (! curCC && p < e) { /* blocked */
1N/A break;
1N/A } else {
1N/A preCC = curCC;
1N/A t = uvuni_to_utf8(t, uv);
1N/A }
1N/A }
1N/A }
1N/A d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */
1N/A tmplen = t - tmp_start;
1N/A if (tmplen) { /* uncomposed combining char */
1N/A t = (U8*)SvPVX(tmp);
1N/A while (tmplen--)
1N/A *d++ = *t++;
1N/A }
1N/A uvS = uv;
1N/A } /* for */
1N/A *d = '\0';
1N/A SvCUR_set(dst, d - (U8*)SvPVX(dst));
1N/A RETVAL = dst;
1N/A OUTPUT:
1N/A RETVAL
1N/A
1N/A
1N/Avoid
1N/AcheckNFD(arg)
1N/A SV * arg
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A checkNFKD = 1
1N/A PREINIT:
1N/A UV uv;
1N/A SV *src;
1N/A STRLEN srclen, retlen;
1N/A U8 *s, *e, *p, curCC, preCC;
1N/A CODE:
1N/A if (SvUTF8(arg)) {
1N/A src = arg;
1N/A } else {
1N/A src = sv_mortalcopy(arg);
1N/A sv_utf8_upgrade(src);
1N/A }
1N/A
1N/A s = (U8*)SvPV(src,srclen);
1N/A e = s + srclen;
1N/A
1N/A preCC = 0;
1N/A for (p = s; p < e; p += retlen) {
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A
1N/A curCC = getCombinClass(uv);
1N/A if (preCC > curCC && curCC != 0) /* canonical ordering violated */
1N/A XSRETURN_NO;
1N/A if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
1N/A XSRETURN_NO;
1N/A preCC = curCC;
1N/A }
1N/A XSRETURN_YES;
1N/A
1N/A
1N/A
1N/Avoid
1N/AcheckNFC(arg)
1N/A SV * arg
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A checkNFKC = 1
1N/A PREINIT:
1N/A UV uv;
1N/A SV *src;
1N/A STRLEN srclen, retlen;
1N/A U8 *s, *e, *p, curCC, preCC;
1N/A bool isMAYBE;
1N/A CODE:
1N/A if (SvUTF8(arg)) {
1N/A src = arg;
1N/A } else {
1N/A src = sv_mortalcopy(arg);
1N/A sv_utf8_upgrade(src);
1N/A }
1N/A
1N/A s = (U8*)SvPV(src,srclen);
1N/A e = s + srclen;
1N/A
1N/A preCC = 0;
1N/A isMAYBE = FALSE;
1N/A for (p = s; p < e; p += retlen) {
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A
1N/A curCC = getCombinClass(uv);
1N/A
1N/A if (preCC > curCC && curCC != 0) /* canonical ordering violated */
1N/A XSRETURN_NO;
1N/A
1N/A /* get NFC/NFKC property */
1N/A if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */
1N/A ; /* YES */
1N/A else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
1N/A XSRETURN_NO;
1N/A else if (isComp2nd(uv))
1N/A isMAYBE = TRUE;
1N/A else if (ix) {
1N/A char *canon, *compat;
1N/A /* NFKC_NO when having compatibility mapping. */
1N/A canon = (char *) dec_canonical(uv);
1N/A compat = (char *) dec_compat(uv);
1N/A if (compat && !(canon && strEQ(canon, compat)))
1N/A XSRETURN_NO;
1N/A } /* end of get NFC/NFKC property */
1N/A
1N/A preCC = curCC;
1N/A }
1N/A if (isMAYBE)
1N/A XSRETURN_UNDEF;
1N/A else
1N/A XSRETURN_YES;
1N/A
1N/A
1N/A
1N/Avoid
1N/AcheckFCD(arg)
1N/A SV * arg
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A checkFCC = 1
1N/A PREINIT:
1N/A UV uv, uvLead, uvTrail;
1N/A SV *src;
1N/A STRLEN srclen, retlen, canlen, canret;
1N/A U8 *s, *e, *p, curCC, preCC;
1N/A U8 *sCan, *pCan, *eCan;
1N/A bool isMAYBE;
1N/A CODE:
1N/A if (SvUTF8(arg)) {
1N/A src = arg;
1N/A } else {
1N/A src = sv_mortalcopy(arg);
1N/A sv_utf8_upgrade(src);
1N/A }
1N/A
1N/A s = (U8*)SvPV(src,srclen);
1N/A e = s + srclen;
1N/A
1N/A preCC = 0;
1N/A isMAYBE = FALSE;
1N/A for (p = s; p < e; p += retlen) {
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (!retlen)
1N/A croak(ErrRetlenIsZero);
1N/A
1N/A sCan = (U8*) dec_canonical(uv);
1N/A
1N/A if (sCan) {
1N/A canlen = (STRLEN)strlen((char *) sCan);
1N/A uvLead = utf8n_to_uvuni(sCan, canlen, &canret, AllowAnyUTF);
1N/A }
1N/A else {
1N/A uvLead = uv;
1N/A }
1N/A
1N/A curCC = getCombinClass(uvLead);
1N/A
1N/A if (curCC != 0 && curCC < preCC) /* canonical ordering violated */
1N/A XSRETURN_NO;
1N/A
1N/A if (ix) {
1N/A if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
1N/A XSRETURN_NO;
1N/A else if (isComp2nd(uv))
1N/A isMAYBE = TRUE;
1N/A }
1N/A
1N/A if (sCan) {
1N/A eCan = sCan + canlen;
1N/A pCan = utf8_hop(eCan, -1);
1N/A if (pCan < sCan)
1N/A croak(ErrHopBeforeStart);
1N/A uvTrail = utf8n_to_uvuni(pCan, eCan - pCan, &canret, AllowAnyUTF);
1N/A preCC = getCombinClass(uvTrail);
1N/A }
1N/A else {
1N/A preCC = curCC;
1N/A }
1N/A }
1N/A if (isMAYBE)
1N/A XSRETURN_UNDEF;
1N/A else
1N/A XSRETURN_YES;
1N/A
1N/A
1N/A
1N/AU8
1N/AgetCombinClass(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A
1N/Abool
1N/AisExclusion(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A
1N/Abool
1N/AisSingleton(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A
1N/Abool
1N/AisNonStDecomp(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A
1N/Abool
1N/AisComp2nd(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A isNFC_MAYBE = 1
1N/A isNFKC_MAYBE = 2
1N/A
1N/A
1N/A
1N/Avoid
1N/AisNFD_NO(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A isNFKD_NO = 1
1N/A CODE:
1N/A if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
1N/A XSRETURN_YES; /* NFD_NO or NFKD_NO */
1N/A else
1N/A XSRETURN_NO;
1N/A
1N/A
1N/A
1N/Avoid
1N/AisComp_Ex(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A isNFC_NO = 0
1N/A isNFKC_NO = 1
1N/A CODE:
1N/A if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
1N/A XSRETURN_YES; /* NFC_NO or NFKC_NO */
1N/A else if (ix) {
1N/A char *canon, *compat;
1N/A canon = (char *) dec_canonical(uv);
1N/A compat = (char *) dec_compat(uv);
1N/A if (compat && (!canon || strNE(canon, compat)))
1N/A XSRETURN_YES; /* NFC_NO or NFKC_NO */
1N/A else
1N/A XSRETURN_NO;
1N/A }
1N/A else
1N/A XSRETURN_NO;
1N/A
1N/A
1N/A
1N/ASV*
1N/AgetComposite(uv, uv2)
1N/A UV uv
1N/A UV uv2
1N/A PROTOTYPE: $$
1N/A PREINIT:
1N/A UV composite;
1N/A CODE:
1N/A composite = composite_uv(uv, uv2);
1N/A RETVAL = composite ? newSVuv(composite) : &PL_sv_undef;
1N/A OUTPUT:
1N/A RETVAL
1N/A
1N/A
1N/A
1N/ASV*
1N/AgetCanon(uv)
1N/A UV uv
1N/A PROTOTYPE: $
1N/A ALIAS:
1N/A getCompat = 1
1N/A PREINIT:
1N/A U8 * rstr;
1N/A CODE:
1N/A if (Hangul_IsS(uv)) {
1N/A SV * dst;
1N/A dst = newSV(1);
1N/A (void)SvPOK_only(dst);
1N/A sv_cat_decompHangul(dst, uv);
1N/A RETVAL = dst;
1N/A } else {
1N/A rstr = ix ? dec_compat(uv) : dec_canonical(uv);
1N/A if (!rstr)
1N/A XSRETURN_UNDEF;
1N/A RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));
1N/A }
1N/A SvUTF8_on(RETVAL);
1N/A OUTPUT:
1N/A RETVAL
1N/A
1N/A
1N/Avoid
1N/AsplitOnLastStarter(arg)
1N/A SV * arg
1N/A PREINIT:
1N/A UV uv;
1N/A SV *src, *svp;
1N/A STRLEN srclen, retlen;
1N/A U8 *s, *e, *p;
1N/A PPCODE:
1N/A if (SvUTF8(arg)) {
1N/A src = arg;
1N/A } else {
1N/A src = sv_mortalcopy(arg);
1N/A sv_utf8_upgrade(src);
1N/A }
1N/A
1N/A s = (U8*)SvPV(src,srclen);
1N/A e = s + srclen;
1N/A
1N/A for (p = e; s < p; ) {
1N/A p = utf8_hop(p, -1);
1N/A if (p < s)
1N/A croak(ErrHopBeforeStart);
1N/A uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
1N/A if (getCombinClass(uv) == 0) /* Last Starter found */
1N/A break;
1N/A }
1N/A
1N/A svp = sv_2mortal(newSVpvn((char*)s, p - s));
1N/A SvUTF8_on(svp);
1N/A XPUSHs(svp);
1N/A
1N/A svp = sv_2mortal(newSVpvn((char*)p, e - p));
1N/A SvUTF8_on(svp);
1N/A XPUSHs(svp);
1N/A