/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
*/
/* This file is included! */
case BT_LEAD ## n: \
return XML_TOK_PARTIAL_CHAR; \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
case BT_NONXML: \
case BT_MALFORM: \
case BT_TRAIL: \
*(nextTokPtr) = (ptr); \
return XML_TOK_INVALID;
case BT_LEAD ## n: \
return XML_TOK_PARTIAL_CHAR; \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
case BT_NONASCII: \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
case BT_NMSTRT: \
case BT_HEX: \
case BT_DIGIT: \
case BT_NAME: \
case BT_MINUS: \
break; \
case BT_LEAD ## n: \
return XML_TOK_PARTIAL_CHAR; \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
ptr += n; \
break;
case BT_NONASCII: \
*nextTokPtr = ptr; \
return XML_TOK_INVALID; \
} \
case BT_NMSTRT: \
case BT_HEX: \
break; \
/* ptr points to character following "<!-" */
static int PTRCALL
const char *end, const char **nextTokPtr)
{
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_MINUS:
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_COMMENT;
}
break;
default:
break;
}
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "<!" */
static int PTRCALL
const char *end, const char **nextTokPtr)
{
return XML_TOK_PARTIAL;
case BT_MINUS:
case BT_LSQB:
return XML_TOK_COND_SECT_OPEN;
case BT_NMSTRT:
case BT_HEX:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_PERCNT:
return XML_TOK_PARTIAL;
/* don't allow <!ENTITY% foo "whatever"> */
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* fall through */
*nextTokPtr = ptr;
return XML_TOK_DECL_OPEN;
case BT_NMSTRT:
case BT_HEX:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
static int PTRCALL
{
int upper = 0;
*tokPtr = XML_TOK_PI;
return 1;
case ASCII_x:
break;
case ASCII_X:
upper = 1;
break;
default:
return 1;
}
case ASCII_m:
break;
case ASCII_M:
upper = 1;
break;
default:
return 1;
}
case ASCII_l:
break;
case ASCII_L:
upper = 1;
break;
default:
return 1;
}
if (upper)
return 0;
return 1;
}
/* ptr points to character following "<?" */
static int PTRCALL
const char *end, const char **nextTokPtr)
{
int tok;
return XML_TOK_PARTIAL;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_QUEST:
return XML_TOK_PARTIAL;
return tok;
}
break;
default:
break;
}
}
return XML_TOK_PARTIAL;
case BT_QUEST:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_PARTIAL;
return tok;
}
/* fall through */
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
static int PTRCALL
const char *end, const char **nextTokPtr)
{
int i;
/* CDATA[ */
return XML_TOK_PARTIAL;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
*nextTokPtr = ptr;
return XML_TOK_CDATA_SECT_OPEN;
}
static int PTRCALL
const char *end, const char **nextTokPtr)
{
return XML_TOK_NONE;
if (n == 0)
return XML_TOK_PARTIAL;
}
}
case BT_RSQB:
return XML_TOK_PARTIAL;
break;
return XML_TOK_PARTIAL;
break;
}
return XML_TOK_CDATA_SECT_CLOSE;
case BT_CR:
return XML_TOK_PARTIAL;
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
case BT_LF:
return XML_TOK_DATA_NEWLINE;
default:
break;
}
case BT_LEAD ## n: \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
case BT_NONXML:
case BT_MALFORM:
case BT_TRAIL:
case BT_CR:
case BT_LF:
case BT_RSQB:
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
default:
break;
}
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
}
/* ptr points to character following "</" */
static int PTRCALL
const char *end, const char **nextTokPtr)
{
return XML_TOK_PARTIAL;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
case BT_GT:
return XML_TOK_END_TAG;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
case BT_COLON:
/* no need to check qname syntax here,
since end-tag must match exactly */
break;
case BT_GT:
return XML_TOK_END_TAG;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "&#X" */
static int PTRCALL
const char *end, const char **nextTokPtr)
{
case BT_DIGIT:
case BT_HEX:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_DIGIT:
case BT_HEX:
break;
case BT_SEMI:
return XML_TOK_CHAR_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "&#" */
static int PTRCALL
const char *end, const char **nextTokPtr)
{
case BT_DIGIT:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_DIGIT:
break;
case BT_SEMI:
return XML_TOK_CHAR_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "&" */
static int PTRCALL
const char **nextTokPtr)
{
return XML_TOK_PARTIAL;
case BT_NUM:
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_SEMI:
return XML_TOK_ENTITY_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following first character of attribute name */
static int PTRCALL
const char **nextTokPtr)
{
int hadColon = 0;
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
hadColon = 1;
return XML_TOK_PARTIAL;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
for (;;) {
int t;
return XML_TOK_PARTIAL;
if (t == BT_EQUALS)
break;
switch (t) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
/* fall through */
case BT_EQUALS:
{
int open;
hadColon = 0;
for (;;) {
return XML_TOK_PARTIAL;
break;
switch (open) {
case BT_S:
case BT_LF:
case BT_CR:
break;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
/* in attribute value */
for (;;) {
int t;
return XML_TOK_PARTIAL;
if (t == open)
break;
switch (t) {
case BT_AMP:
{
if (tok <= 0) {
if (tok == XML_TOK_INVALID)
*nextTokPtr = ptr;
return tok;
}
break;
}
case BT_LT:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
default:
break;
}
}
return XML_TOK_PARTIAL;
case BT_S:
case BT_CR:
case BT_LF:
break;
case BT_SOL:
goto sol;
case BT_GT:
goto gt;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
/* ptr points to closing quote */
for (;;) {
return XML_TOK_PARTIAL;
continue;
case BT_GT:
gt:
return XML_TOK_START_TAG_WITH_ATTS;
case BT_SOL:
sol:
return XML_TOK_PARTIAL;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
}
break;
}
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
/* ptr points to character following "<" */
static int PTRCALL
const char **nextTokPtr)
{
int hadColon;
return XML_TOK_PARTIAL;
case BT_EXCL:
return XML_TOK_PARTIAL;
case BT_MINUS:
case BT_LSQB:
end, nextTokPtr);
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
case BT_QUEST:
case BT_SOL:
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
hadColon = 0;
/* we have a start-tag */
case BT_COLON:
if (hadColon) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
hadColon = 1;
return XML_TOK_PARTIAL;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
break;
{
case BT_GT:
goto gt;
case BT_SOL:
goto sol;
continue;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
case BT_GT:
gt:
return XML_TOK_START_TAG_NO_ATTS;
case BT_SOL:
sol:
return XML_TOK_PARTIAL;
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
static int PTRCALL
const char **nextTokPtr)
{
return XML_TOK_NONE;
if (n == 0)
return XML_TOK_PARTIAL;
}
}
case BT_LT:
case BT_AMP:
case BT_CR:
return XML_TOK_TRAILING_CR;
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
case BT_LF:
return XML_TOK_DATA_NEWLINE;
case BT_RSQB:
return XML_TOK_TRAILING_RSQB;
break;
return XML_TOK_TRAILING_RSQB;
break;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
default:
break;
}
case BT_LEAD ## n: \
*nextTokPtr = ptr; \
return XML_TOK_DATA_CHARS; \
} \
ptr += n; \
break;
case BT_RSQB:
break;
}
break;
}
return XML_TOK_INVALID;
}
}
/* fall through */
case BT_AMP:
case BT_LT:
case BT_NONXML:
case BT_MALFORM:
case BT_TRAIL:
case BT_CR:
case BT_LF:
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
default:
break;
}
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
}
/* ptr points to character following "%" */
static int PTRCALL
const char **nextTokPtr)
{
return XML_TOK_PARTIAL;
*nextTokPtr = ptr;
return XML_TOK_PERCENT;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_SEMI:
return XML_TOK_PARAM_ENTITY_REF;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return XML_TOK_PARTIAL;
}
static int PTRCALL
const char **nextTokPtr)
{
return XML_TOK_PARTIAL;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr;
return XML_TOK_POUND_NAME;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return -XML_TOK_POUND_NAME;
}
static int PTRCALL
const char **nextTokPtr)
{
switch (t) {
case BT_QUOT:
case BT_APOS:
if (t != open)
break;
return -XML_TOK_LITERAL;
*nextTokPtr = ptr;
return XML_TOK_LITERAL;
default:
return XML_TOK_INVALID;
}
default:
break;
}
}
return XML_TOK_PARTIAL;
}
static int PTRCALL
const char **nextTokPtr)
{
int tok;
return XML_TOK_NONE;
if (n == 0)
return XML_TOK_PARTIAL;
}
}
case BT_QUOT:
case BT_APOS:
case BT_LT:
{
return XML_TOK_PARTIAL;
case BT_EXCL:
case BT_QUEST:
case BT_NMSTRT:
case BT_HEX:
case BT_NONASCII:
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
return XML_TOK_INSTANCE_START;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
case BT_CR:
*nextTokPtr = end;
return -XML_TOK_PROLOG_S;
}
/* fall through */
for (;;) {
break;
break;
case BT_CR:
break;
/* fall through */
default:
*nextTokPtr = ptr;
return XML_TOK_PROLOG_S;
}
}
*nextTokPtr = ptr;
return XML_TOK_PROLOG_S;
case BT_PERCNT:
case BT_COMMA:
return XML_TOK_COMMA;
case BT_LSQB:
return XML_TOK_OPEN_BRACKET;
case BT_RSQB:
return -XML_TOK_CLOSE_BRACKET;
return XML_TOK_PARTIAL;
return XML_TOK_COND_SECT_CLOSE;
}
}
*nextTokPtr = ptr;
return XML_TOK_CLOSE_BRACKET;
case BT_LPAR:
return XML_TOK_OPEN_PAREN;
case BT_RPAR:
return -XML_TOK_CLOSE_PAREN;
case BT_AST:
return XML_TOK_CLOSE_PAREN_ASTERISK;
case BT_QUEST:
return XML_TOK_CLOSE_PAREN_QUESTION;
case BT_PLUS:
return XML_TOK_CLOSE_PAREN_PLUS;
case BT_RPAR:
*nextTokPtr = ptr;
return XML_TOK_CLOSE_PAREN;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
case BT_VERBAR:
return XML_TOK_OR;
case BT_GT:
return XML_TOK_DECL_CLOSE;
case BT_NUM:
case BT_LEAD ## n: \
return XML_TOK_PARTIAL_CHAR; \
ptr += n; \
tok = XML_TOK_NAME; \
break; \
} \
ptr += n; \
tok = XML_TOK_NMTOKEN; \
break; \
} \
*nextTokPtr = ptr; \
return XML_TOK_INVALID;
case BT_NMSTRT:
case BT_HEX:
tok = XML_TOK_NAME;
break;
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
case BT_COLON:
break;
case BT_NONASCII:
tok = XML_TOK_NAME;
break;
}
break;
}
/* fall through */
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
*nextTokPtr = ptr;
return tok;
case BT_COLON:
switch (tok) {
case XML_TOK_NAME:
return XML_TOK_PARTIAL;
default:
break;
}
break;
case XML_TOK_PREFIXED_NAME:
break;
}
break;
case BT_PLUS:
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_NAME_PLUS;
case BT_AST:
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_NAME_ASTERISK;
case BT_QUEST:
if (tok == XML_TOK_NMTOKEN) {
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
return XML_TOK_NAME_QUESTION;
default:
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
}
return -tok;
}
static int PTRCALL
const char *end, const char **nextTokPtr)
{
const char *start;
return XML_TOK_NONE;
case BT_AMP:
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_LT:
/* this is for inside entity references */
*nextTokPtr = ptr;
return XML_TOK_INVALID;
case BT_LF:
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_CR:
return XML_TOK_TRAILING_CR;
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_S:
return XML_TOK_ATTRIBUTE_VALUE_S;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
default:
break;
}
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
}
static int PTRCALL
const char *end, const char **nextTokPtr)
{
const char *start;
return XML_TOK_NONE;
case BT_AMP:
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_PERCNT:
end, nextTokPtr);
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_LF:
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
case BT_CR:
return XML_TOK_TRAILING_CR;
*nextTokPtr = ptr;
return XML_TOK_DATA_NEWLINE;
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
default:
break;
}
}
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
}
static int PTRCALL
const char *end, const char **nextTokPtr)
{
int level = 0;
}
}
case BT_LT:
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
++level;
}
}
break;
case BT_RSQB:
return XML_TOK_PARTIAL;
return XML_TOK_PARTIAL;
if (level == 0) {
*nextTokPtr = ptr;
return XML_TOK_IGNORE_SECT;
}
--level;
}
}
break;
default:
break;
}
}
return XML_TOK_PARTIAL;
}
#endif /* XML_DTD */
static int PTRCALL
const char **badPtr)
{
case BT_DIGIT:
case BT_HEX:
case BT_MINUS:
case BT_APOS:
case BT_LPAR:
case BT_RPAR:
case BT_PLUS:
case BT_COMMA:
case BT_SOL:
case BT_EQUALS:
case BT_QUEST:
case BT_CR:
case BT_LF:
case BT_SEMI:
case BT_EXCL:
case BT_AST:
case BT_PERCNT:
case BT_NUM:
case BT_COLON:
break;
case BT_S:
return 0;
}
break;
case BT_NAME:
case BT_NMSTRT:
break;
default:
case 0x24: /* $ */
case 0x40: /* @ */
break;
default:
return 0;
}
break;
}
}
return 1;
}
/* This must only be called for a well-formed start-tag or empty
element tag. Returns the number of attributes. Pointers to the
first attsMax attributes are stored in atts.
*/
static int PTRCALL
{
int nAtts = 0;
initialization just to shut up compilers */
#define START_NAME \
} \
}
case BT_NONASCII:
case BT_NMSTRT:
case BT_HEX:
break;
case BT_QUOT:
}
nAtts++;
}
break;
case BT_APOS:
}
nAtts++;
}
break;
case BT_AMP:
break;
case BT_S:
break;
/* This case ensures that the first attribute name is counted
Apart from that we could just change state on the quote. */
break;
case BT_GT:
case BT_SOL:
return nAtts;
break;
default:
break;
}
}
/* not reached */
}
static int PTRFASTCALL
{
int result = 0;
/* skip &# */
switch (c) {
result <<= 4;
break;
result <<= 4;
break;
result <<= 4;
break;
}
if (result >= 0x110000)
return -1;
}
}
else {
result *= 10;
if (result >= 0x110000)
return -1;
}
}
return checkCharRefNumber(result);
}
static int PTRCALL
const char *end)
{
case 2:
case ASCII_l:
return ASCII_LT;
case ASCII_g:
return ASCII_GT;
}
}
break;
case 3:
return ASCII_AMP;
}
}
break;
case 4:
case ASCII_q:
return ASCII_QUOT;
}
}
break;
case ASCII_a:
return ASCII_APOS;
}
}
break;
}
}
return 0;
}
static int PTRCALL
{
for (;;) {
case BT_LEAD ## n: \
return 0;
/* fall through */
return 0;
break;
case BT_NONASCII:
case BT_NMSTRT:
case BT_COLON:
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
return 0;
return 0;
return 0;
return 0;
}
}
}
break;
default:
return 1;
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
case BT_NONASCII:
case BT_NMSTRT:
case BT_COLON:
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
return 0;
default:
return 1;
}
}
}
/* not reached */
}
static int PTRCALL
{
return 0;
return 0;
}
}
static int PTRFASTCALL
{
for (;;) {
case BT_NONASCII:
case BT_NMSTRT:
case BT_COLON:
case BT_HEX:
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
break;
default:
}
}
}
static const char * PTRFASTCALL
{
for (;;) {
case BT_LF:
case BT_CR:
case BT_S:
break;
default:
return ptr;
}
}
}
static void PTRCALL
const char *ptr,
const char *end,
{
case BT_LEAD ## n: \
ptr += n; \
break;
case BT_LF:
pos->lineNumber++;
break;
case BT_CR:
pos->lineNumber++;
break;
default:
break;
}
pos->columnNumber++;
}
}
#endif /* XML_TOK_IMPL_C */