38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Summary: internals routines exported by the parser.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Description: this module exports a number of internal parsing routines
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * they are not really all intended for applications but
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * can prove useful doing low level processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Copy: See Copyright for the status of this software.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Author: Daniel Veillard
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncextern "C" {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlParserMaxDepth:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * arbitrary depth limit for the XML documents that we allow to
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * process. This is not a limitation of the parser but a safety
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * boundary feature.
4fe8ee94ff7f4b4dca778979a9658d0a12b4c3e4vboxsync * XML_MAX_TEXT_LENGTH:
4fe8ee94ff7f4b4dca778979a9658d0a12b4c3e4vboxsync * Maximum size allowed for a single text node when building a tree.
4fe8ee94ff7f4b4dca778979a9658d0a12b4c3e4vboxsync * This is not a limitation of the parser but a safety boundary feature,
4fe8ee94ff7f4b4dca778979a9658d0a12b4c3e4vboxsync * use XML_PARSE_HUGE option to override it.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * XML_MAX_NAMELEN:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Identifiers can be longer, but this will be more costly
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * at runtime.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * INPUT_CHUNK:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The parser tries to always have that amount of input ready.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * One of the point is providing context when reporting errors.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UNICODE version of the macros. *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_BYTE_CHAR:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an byte value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [2] Char ::= #x9 | #xA | #xD | [#x20...]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * any byte character in the accepted range
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * | [#x10000-#x10FFFF]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_CHAR_CH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar (usually an unsigned char)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Behaves like IS_CHAR on single-byte value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_BLANK:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [3] S ::= (#x20 | #x9 | #xD | #xA)+
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_BLANK_CH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar value (normally unsigned char)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Behaviour same as IS_BLANK
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_BASECHAR:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [85] BaseChar ::= ... long list see REC ...
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_DIGIT:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [88] Digit ::= ... long list see REC ...
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_DIGIT_CH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar value (usually an unsigned char)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Behaves like IS_DIGIT but with a single byte argument
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_COMBINING:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [87] CombiningChar ::= ... long list see REC ...
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_COMBINING_CH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar (usually an unsigned char)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Always false (all combining chars > 0xff)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_EXTENDER:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [#x309D-#x309E] | [#x30FC-#x30FE]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_EXTENDER_CH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar value (usually an unsigned char)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Behaves like IS_EXTENDER but with a single-byte argument
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_IDEOGRAPHIC:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_LETTER:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [84] Letter ::= BaseChar | Ideographic
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_LETTER_CH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar value (normally unsigned char)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro behaves like IS_LETTER, but only check base chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_ASCII_LETTER:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check [a-zA-Z]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_ASCII_DIGIT:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check [0-9]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_PUBIDCHAR:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an UNICODE value (int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro to check the following production in the XML spec:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * IS_PUBIDCHAR_CH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @c: an xmlChar value (normally unsigned char)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Same as IS_PUBIDCHAR but for single-byte value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SKIP_EOL:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @p: and UTF8 string pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Skips the end of line chars.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * MOVETO_ENDTAG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @p: and UTF8 string pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Skips to the next '>' char.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((*p) && (*(p) != '>')) (p)++
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * MOVETO_STARTTAG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @p: and UTF8 string pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Skips to the next '<' char.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((*p) && (*(p) != '<')) (p)++
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Global variables used for predefined strings.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Function to finish the work of the macros where needed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parser context.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* internal error reporting */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *msg,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Input Streams.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *filename);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Namespaces.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Generic production rules.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_SAX1_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * XML_SUBSTITUTE_NONE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If no entities need to be substituted.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * XML_SUBSTITUTE_REF:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Whether general entities need to be substituted.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * XML_SUBSTITUTE_PEREF:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Whether parameter entities need to be substituted.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * XML_SUBSTITUTE_BOTH:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Both general and parameter entities need to be substituted.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL nodePush (xmlParserCtxtPtr ctxt,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN xmlNodePtr XMLCALL nodePop (xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL inputPush (xmlParserCtxtPtr ctxt,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN xmlParserInputPtr XMLCALL inputPop (xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN const xmlChar * XMLCALL namePop (xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL namePush (xmlParserCtxtPtr ctxt,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * other commodities shared between parser.c and parserInternals.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN void XMLCALL xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL xmlCheckLanguageID (const xmlChar *lang);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Really core function shared with HTML parser.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL xmlCurrentChar (xmlParserCtxtPtr ctxt,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL xmlCopyCharMultiByte (xmlChar *out,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN void XMLCALL xmlNextChar (xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN void XMLCALL xmlParserInputShrink (xmlParserInputPtr in);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Actually comes from the HTML parser but launched from the init stuff.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN htmlParserCtxtPtr XMLCALL htmlCreateFileParserCtxt(const char *filename,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Specific function to keep track of entities references
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and used by the XSLT debugger.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlEntityReferenceFunc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ent: the entity
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @firstNode: the fist node in the chunk
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @lastNode: the last nod in the chunk
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Callback function used when one needs to be able to track back the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * provenance of a chunk of nodes inherited from an entity replacement.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN void XMLCALL xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN void XMLCALL xmlParserHandleReference(xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_LEGACY_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * internal only
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *extra);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* __XML_PARSER_INTERNALS_H__ */