38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Summary: interface for an HTML 4.0 non-verifying parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Description: this module implements an HTML 4.0 non-verifying parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * with API compatible with the XML parser ones. It should
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * be able to parse "real world" HTML, even if severely
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * broken from a specification point of view.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Copy: See Copyright for the status of this software.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Author: Daniel Veillard
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncextern "C" {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Most of the back-end structures from XML and HTML are shared.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Internal description of an HTML element, representing HTML 4.01
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and XHTML 1.0 (which share the same structure).
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char startTag; /* Whether the start tag can be implied */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char endTag; /* Whether the end tag can be implied */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char saveEndTag; /* Whether the end tag should be saved */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char isinline; /* is this a block 0 or inline 1 element */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* NRK Jan.2003
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * New fields encapsulating HTML structure
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This is a very limited representation. It fails to tell us when
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * an element *requires* subelements (we only have whether they're
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * allowed or not), and it doesn't tell us where CDATA and PCDATA
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * are allowed. Some element relationships are not fully represented:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * these are flagged with the word MODIFIER
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char** subelts; /* allowed sub-elements of this element */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char* defaultsubelt; /* subelement for suggested auto-repair
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if necessary or NULL */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char** attrs_depr; /* Additional deprecated attributes */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Internal description of an HTML entity.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int value; /* the UNICODE value for the character */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * There is only few public functions.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char *in,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char *in,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Interfaces for the Push mode.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *chunk,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *filename,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *chunk,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_PUSH_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * New set of simpler/more flexible APIs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlParserOption:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This is the set of XML parser options that can be passed down
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to the xmlReadDoc() and similar calls.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef enum {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_PARSE_NONET = 1<<11,/* Forbid network access */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *filename,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *buffer,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* NRK/Jan2003: further knowledge of HTML structure
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef enum {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_NA = 0 , /* something we don't check at all */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* Using htmlElemDesc rather than name here, to emphasise the fact
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync that otherwise there's a lookup overhead
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncXMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDefaultSubelement:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the default subelement for this element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define htmlDefaultSubelement(elt) elt->defaultsubelt
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlElementAllowedHereDesc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @parent: HTML parent element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether an HTML element description may be a
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * direct child of the specified element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if allowed; 0 otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlRequiredAttrs:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the attributes required for the specified element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_HTML_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* __HTML_PARSER_H__ */