regcoll.c revision 3e14f97f673e8a630f076077de35afdd43dc1587
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner* Copyright (c) 1985-2010 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* David Korn <dgk@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Phong Vo <kpv@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * regex collation symbol support
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Ucs_map_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin const char* name;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define setattr(a,i) ((a)[(i)>>5]|=(1<<((i)&((1<<5)-1))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define tstattr(a,i) ((a)[(i)>>5]&(1<<((i)&((1<<5)-1))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define clrattr(a,i) ((a)[(i)>>5]&=~(1<<((i)&((1<<5)-1))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic struct Local_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin unsigned char* a2n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * initialize the writeable tables from the readonly data
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * the tables are big enough to be concerned about text vs. data vs. bss
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * UCS_BYTE==0 100K
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * UCS_BYTE==1 20K
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int i;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register Ucs_map_t* a;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register Ucs_map_t* w;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!(w = (Ucs_map_t*)malloc(sizeof(Ucs_map_t) * (elementsof(ucs_attrs) + elementsof(ucs_names)))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin w->name = ucs_strings[ucs_attrs[i].table] + ucs_attrs[i].index;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin w->name = ucs_strings[ucs_names[i].table] + ucs_names[i].index;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while (a->next)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * return the collating symbol delimited by [c c], where c is either '=' or '.'
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * s points to the first char after the initial [
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * if e!=0 it is set to point to the next char in s on return
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * the collating symbol is converted to multibyte in <buf,size>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * the return value is:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * -1 syntax error or buf not large enough
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * >=0 size with 0-terminated mb collation element
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * or ligature value in buf
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinregcollate(register const char* s, char** e, char* buf, int size)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register char* u;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register char* b;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register char* x;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register Ucs_map_t* a;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin const char* t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin const char* v;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((n = (s - t)) == 1)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!(c = *s++))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!(c = *s++))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (c != ']')
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (c == ' ' || c == '-' && u > b && *s != ' ' && *s != '-')
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (u > x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (u < x)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *u++ = ' ';
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else if (islower(*v))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin } while (r > 0);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for (a = z; a; a = a->next)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if ((attr[0] & a->attr[0]) == attr[0] && (attr[1] & a->attr[1]) == attr[1] && (attr[2] & a->attr[2]) == attr[2])
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin w[0] = a->code;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (r < 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin buf[r = n] = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *e = (char*)s;