encode.h revision 7c478bd95313f5f23a4c958a745db2134aa03244
#ifndef ENCODE_H
#define ENCODE_H
#ifndef U8
/*
A tad devious this:
perl normally has a #define for U8 - if that isn't present then we
typedef it - leaving it #ifndef so we can do data parts without
getting extern references to the code parts
*/
typedef unsigned char U8;
#endif
struct encpage_s
{
/* fields ordered to pack nicely on 32-bit machines */
if we match */
size of entries in seq */
number of source octets needed */
};
/*
At any point in a translation there is a page pointer which points
at an array of the above structures.
Basic operation :
get octet from source stream.
if (octet >= min && octet < max) {
if slen is 0 then we cannot represent this character.
if we have less than slen octets (including this one) then
we have a partial character.
otherwise
copy dlen octets from seq + dlen*(octet-min) to output
(dlen may be zero if we don't know yet.)
load page pointer with next to continue.
(is slen is one this is end of a character)
get next octet.
}
else {
increment the page pointer to look at next slot in the array
}
arrays SHALL be constructed so there is an entry which matches
..0xFF at the end, and either maps it or indicates no
representation.
if MSB of slen is set then mapping is an approximate "FALLBACK" entry.
*/
struct encode_s
{
the encoding to UTF-8 form */
from UTF-8 to the encoding */
e.g. "?" */
int replen; /* Number of octets in rep */
};
#ifdef U8
/* See comment at top of file for deviousness */
#endif /* U8 */
#define ENCODE_NOSPACE 1
#define ENCODE_PARTIAL 2
#define ENCODE_NOREP 3
#define ENCODE_FALLBACK 4
#define ENCODE_FOUND_TERM 5
#define FBCHAR_UTF8 "\xEF\xBF\xBD"
#define ENCODE_FB_DEFAULT 0x0000
#define ENCODE_FB_CROAK 0x0001
#define ENCODE_FB_QUIET ENCODE_RETURN_ON_ERR
#define ENCODE_FB_PERLQQ ENCODE_PERLQQ
#define ENCODE_FB_HTMLCREF ENCODE_HTMLCREF
#define ENCODE_FB_XMLCREF ENCODE_XMLCREF
#endif /* ENCODE_H */