HTMLparser.c revision 38ae7e4efe803ea78b6499cd05a394db32623e41
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * HTMLparser.c : an HTML 4.0 non-verifying parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * See Copyright for the status of this software.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * daniel@veillard.com
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* #define DEBUG */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* #define DEBUG_PUSH */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncxmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void htmlParseComment(htmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Some factorized error routines *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlErrMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @extra: extra informations
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle a redefinition of attribute error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseErr:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @error: the error number
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @msg: the error message
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str1: string infor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str2: string infor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle a fatal parser error, i.e. violating Well-Formedness constraints
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *msg, const xmlChar *str1, const xmlChar *str2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseErrInt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @error: the error number
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @msg: the error message
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @val: integer info
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle a fatal parser error, i.e. violating Well-Formedness constraints
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parser stacks related functions and macros *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlnamePush:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @value: the element name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Pushes a new element name on top of the name stack
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of error, the index in the stack otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlnamePop:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Pops the top element name from the name stack
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the name just removed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macros for accessing the content. Those should be used only by the parser,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and not exported.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dirty macros, i.e. one need to make assumption on the context to use them
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CUR_PTR return the current pointer to the xmlChar to be parsed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in UNICODE mode. This should be used internally by the parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * only to compare to ASCII values otherwise it would break when
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * running with UTF-8 encoding.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to compare on ASCII based substring.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it should be used only to compare on ASCII based substring.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * strings without newlines within the parser.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CURRENT Returns the current char value, with the full decoding of
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UTF-8 if we are using this mode. It returns an int.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NEXT Skip to the next character, this does the proper decoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in UTF-8 mode. It also pop-up unfinished entities on the fly.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NEXTL(l) Skip the current unicode character of l xmlChars long.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* Inported from XML */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NEXTL(l) do { \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } while (0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define COPY_BUF(l,b,i,v) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else i += xmlCopyChar(l,&b[i],v)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCurrentChar:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: the HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @len: pointer to the length of the char read
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The current char value, if using UTF-8 this may actually span multiple
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * bytes in the input buffer. Implement the end of line normalization:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 2.11 End-of-Line Handling
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If the encoding is unspecified, in the case we find an ISO-Latin-1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * char, then the encoding converter is plugged in automatically.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the current char value and its length
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We are supposed to handle UTF8, check it's valid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * From rfc2044: encoding of the Unicode values on UTF-8:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UCS-4 range (hex.) UTF-8 octet sequence (binary)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 0000 0000-0000 007F 0xxxxxxx
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for the 0x110000 limit too
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned char c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int val;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c & 0x80) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 4-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 3-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 2-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 1-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Assume it's a fixed length encoding (1) with
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * a compatible encoding for the ASCII set, since
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * XML constructs only use < 128 chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Humm this is bad, do an automatic flow conversion
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If we detect an UTF8 error that probably mean that the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * input encoding didn't get properly advertized in the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * declaration header. Report the error and switch the encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to ISO-Latin-1 (if you don't like this policy, just declare the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * encoding !)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Input is not proper UTF-8, indicate encoding !\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSkipBlankChars:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: the HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * skip all blanks character found at that point in the input streams.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the number of space chars skipped
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML elements and their properties *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Start Tag: 1 means the start tag can be ommited
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * End Tag: 1 means the end tag can be ommited
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 2 means it's forbidden (empty elements)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 3 means the tag is stylistic and should be closed easily
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Depr: this element is deprecated
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * DTD: 1 means that this element is valid only in the Loose DTD
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 2 means that this element is valid only in the Frameset DTD
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync , subElements , impliedsubelt , Attributes, userdata
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* Definitions and a couple of vars for HTML Elements */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define INLINE PCDATA FONTSTYLE PHRASE SPECIAL FORMCTRL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define BLOCK HEADING, LIST "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define FORMCTRL "input", "select", "textarea", "label", "button"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_flow[] = { FLOW, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_inline[] = { INLINE, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* placeholders: elts with content but no subelements */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* ... and for HTML Attributes */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_attrs[] = { ATTRS, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const core_i18n_attrs[] = { COREATTRS, I18N, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const core_attrs[] = { COREATTRS, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const i18n_attrs[] = { I18N, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* Other declarations that should go inline ... */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const a_attrs[] = { ATTRS, "charset", "type", "name",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const target_attr[] = { "target", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const rows_cols_attr[] = { "rows", "cols", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const alt_attr[] = { "alt", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const src_alt_attrs[] = { "src", "alt", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const href_attrs[] = { "href", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const clear_attrs[] = { "clear", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const inline_p[] = { INLINE, "p", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const flow_param[] = { FLOW, "param", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const applet_attrs[] = { COREATTRS , "codebase",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "archive", "alt", "name", "height", "width", "align",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const area_attrs[] = { "shape", "coords", "href", "nohref",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const basefont_attrs[] =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const quote_attrs[] = { ATTRS, "cite", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const body_contents[] = { FLOW, "ins", "del", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const body_attrs[] = { ATTRS, "onload", "onunload", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const body_depr[] = { "background", "bgcolor", "text",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const button_attrs[] = { ATTRS, "name", "value", "type",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const col_elt[] = { "col", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const compact_attrs[] = { ATTRS, "compact", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const dl_contents[] = { "dt", "dd", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const compact_attr[] = { "compact", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const label_attr[] = { "label", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const fieldset_contents[] = { FLOW, "legend" } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const head_attrs[] = { I18N, "profile", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const version_attr[] = { "version", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_content[] = { "head", "body", "frameset", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const embed_attrs[] = { COREATTRS, "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const align_attr[] = { "align", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const map_contents[] = { BLOCK, "area", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const name_attr[] = { "name", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const action_attr[] = { "action", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const content_attr[] = { "content", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const type_attr[] = { "type", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const object_contents[] = { FLOW, "param", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const ol_attrs[] = { "type", "compact", "start", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const option_elt[] = { "option", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const width_attr[] = { "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const language_attr[] = { "language", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const select_content[] = { "optgroup", "option", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const style_attrs[] = { I18N, "media", "title", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const table_attrs[] = { ATTRS "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const table_depr[] = { "align", "bgcolor", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const tr_elt[] = { "tr", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const tr_contents[] = { "th", "td", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const bgcolor_attr[] = { "bgcolor", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const li_elt[] = { "li", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const ul_depr[] = { "type", "compact", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const dir_attr[] = { "dir", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define DECL (const char**)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "address", 0, 0, 0, 0, 0, 0, 0, "information on author ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL flow_param , NULL , NULL , DECL applet_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "base", 0, 2, 2, 1, 0, 0, 0, "document base uri ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , DECL quote_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "dd", 0, 1, 0, 0, 0, 0, 0, "definition description ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , DECL edit_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "embed", 0, 1, 0, 0, 1, 1, 1, "generic embedded object ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "font", 0, 3, 0, 0, 1, 1, 1, "local change to font ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL font_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL head_contents, NULL, DECL head_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "html", 1, 1, 0, 0, 0, 0, 0, "document root element ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "label", 0, 0, 0, 0, 0, 0, 1, "form field label text ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL noframes_content, "body" , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "param", 0, 2, 2, 1, 0, 0, 0, "named property value ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL quote_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "script", 0, 0, 0, 0, 0, 0, 2, "script statements ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "select", 0, 0, 0, 0, 0, 0, 1, "option selector ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL select_content, NULL, DECL select_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * start tags that imply the end of current element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char * const htmlStartClose[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"body", "head", "style", "link", "title", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"frameset", "head", "style", "link", "title", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"dl", "p", "dt", "menu", "dir", "address", "pre", "listing",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"ul", "p", "head", "ol", "menu", "dir", "address", "pre",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"colgroup", "caption", "colgroup", "col", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML elements which are supposed not to have
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CDATA content and where a p element will be implied
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: extend that list by reading the HTML SGML DTD on
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * implied paragraph
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char *const htmlNoContentElements[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML attributes which are of content %Script;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NOTE: when adding ones, check htmlIsScriptAttribute() since
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it assumes the name starts with 'on'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char *const htmlScriptAttributes[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "ondblclick",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmousedown",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmouseup",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmouseover",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmousemove",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmouseout",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onkeypress",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onkeydown",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onunload",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onsubmit",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onchange",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This table is used by the htmlparser to know what to do with
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * broken html pages. By assigning different priorities to different
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * elements the parser can decide how to handle extra endtags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Endtags are only allowed to close elements with lower or equal
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * priority.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef struct {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * functions to handle HTML specific data *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlInitAutoClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This is not reentrant. Call xmlInitParser() once before processing in
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * case of use in multithreaded programs.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx, i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlTagLookup:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @tag: The tag name in lowercase
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the HTML tag in the ElementTable
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the related htmlElemDescPtr or NULL if not found.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0; i < (sizeof(html40ElementTable) /
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sizeof(html40ElementTable[0]));i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlGetEndPriority:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: The name of the element to look up the priority for.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Return value: The "endtag" priority.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckAutoClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @oldtag: The old tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether the new tag is one of the registered valid tags for
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * closing old.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 if no, 1 if yes.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* inefficient, but not a big deal */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoCloseOnClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @force: force the tag closure
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows an ending tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * A missplaced endtag can only close elements with lower
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * or equal priority, so if we find an element with higher
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * priority before we find an element with
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * matching name, we just ignore this endtag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Opening and ending tag mismatch: %s and %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoCloseOnEnd:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Close all remaining tags at the end of the stream
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list is kept in htmlStartClose array. This function is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * called when a new tag has been detected and generates the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * appropriates closes if possible/needed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If newtag is NULL this mean we are at the end of the resource
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and we should check
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoCloseTag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the HTML document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: The tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elem: the HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list is kept in htmlStartClose array. This function checks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if the element or one of it's children would autoclose the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * given tag.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if autoclose, 0 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlCheckAutoClose(elem->name, name)) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlIsAutoClosed:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the HTML document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elem: the HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list is kept in htmlStartClose array. This function checks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if a tag is autoclosed by one of it's child
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if autoclosed, 0 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckImplied:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to exists only implicitly
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * called when a new tag has been detected and generates the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * appropriates implicit tags if missing
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * dropped OBJECT ... i you put it first BODY will be
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * assumed !
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckParagraph
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check whether a p element need to be implied before inserting
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * characters in the current element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if a paragraph has been inserted, 0 if not and -1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in case of error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0; htmlNoContentElements[i] != NULL; i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlIsScriptAttribute:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: an attribute name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check if an attribute is of content type Script
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 is the attribute is a script 0 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * all script attributes start with 'on'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML predefined entities *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const htmlEntityDesc html40EntitiesTable[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the 4 absolute ones, plus apostrophe.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * A bunch still in the 128-255 range
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Replacing them depend really on the charset used.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 215, "times","multiplication sign, U+00D7 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 240, "eth", "latin small letter eth, U+00F0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 339, "oelig","latin small ligature oe, U+0153 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Anything below should really be kept as entities references
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 913, "Alpha","greek capital letter alpha, U+0391" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 914, "Beta", "greek capital letter beta, U+0392" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 917, "Epsilon","greek capital letter epsilon, U+0395" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 918, "Zeta", "greek capital letter zeta, U+0396" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 921, "Iota", "greek capital letter iota, U+0399" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 922, "Kappa","greek capital letter kappa, U+039A" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 927, "Omicron","greek capital letter omicron, U+039F" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 948, "delta","greek small letter delta, U+03B4 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 952, "theta","greek small letter theta, U+03B8 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 956, "mu", "greek small letter mu, U+03BC ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 957, "nu", "greek small letter nu, U+03BD ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 958, "xi", "greek small letter xi, U+03BE ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 959, "omicron","greek small letter omicron, U+03BF NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 969, "omega","greek small letter omega, U+03C9 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8216, "lsquo","left single quotation mark, U+2018 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8217, "rsquo","right single quotation mark, U+2019 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8218, "sbquo","single low-9 quotation mark, U+201A NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8220, "ldquo","left double quotation mark, U+201C ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8221, "rdquo","right double quotation mark, U+201D ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8222, "bdquo","double low-9 quotation mark, U+201E NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8226, "bull", "bullet = black small circle, U+2022 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8240, "permil","per mille sign, U+2030 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8242, "prime","prime = minutes = feet, U+2032 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8254, "oline","overline = spacing overscore, U+203E NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8594, "rarr", "rightwards arrow, U+2192 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8596, "harr", "left right arrow, U+2194 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8706, "part", "partial differential, U+2202 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8711, "nabla","nabla = backward difference, U+2207 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8713, "notin","not an element of, U+2209 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8715, "ni", "contains as member, U+220B ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8727, "lowast","asterisk operator, U+2217 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8730, "radic","square root = radical sign, U+221A ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8733, "prop", "proportional to, U+221D ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8743, "and", "logical and = wedge, U+2227 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8745, "cap", "intersection = cap, U+2229 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8773, "cong", "approximately equal to, U+2245 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8804, "le", "less-than or equal to, U+2264 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8805, "ge", "greater-than or equal to, U+2265 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8836, "nsub", "not a subset of, U+2284 ISOamsn" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8838, "sube", "subset of or equal to, U+2286 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8839, "supe", "superset of or equal to, U+2287 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8855, "otimes","circled times = vector product, U+2297 ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9824, "spades","black spade suit, U+2660 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9830, "diams","black diamond suit, U+2666 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Commodity functions to handle entities *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro used to grow the current buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlEntityLookup:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: the entity name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the given entity in EntitiesTable
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: the linear scan is really ugly, an hash table is really needed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0;i < (sizeof(html40EntitiesTable)/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sizeof(html40EntitiesTable[0]));i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((htmlEntityDescPtr) &html40EntitiesTable[i]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlEntityValueLookup:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @value: the entity's unicode value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the given entity in EntitiesTable
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: the linear scan is really ugly, an hash table is really needed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0;i < (sizeof(html40EntitiesTable)/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sizeof(html40EntitiesTable[0]));i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((htmlEntityDescPtr) &html40EntitiesTable[i]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UTF8ToHtml:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @out: a pointer to an array of bytes to store the result
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @outlen: the length of @out
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @in: a pointer to an array of UTF-8 chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @inlen: the length of @in
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Take a block of UTF-8 chars in and try to convert it to an ASCII
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * plus HTML entities block of chars out.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @inlen after return is the number of octets consumed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as the return value is positive, else unpredictable.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @outlen after return is the number of octets consumed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* outend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* inend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c, d;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * initialization nothing to do
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xC0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* trailing byte in leading position */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* no chance for this in Ascii */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c |= d & 0x3F;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* assertion: c is a single UTF-4 value */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *cp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to lookup a predefined HTML entity for it
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlEncodeEntities:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @out: a pointer to an array of bytes to store the result
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @outlen: the length of @out
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @in: a pointer to an array of UTF-8 chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @inlen: the length of @in
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @quoteChar: the quote character to escape (' or ") or zero.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Take a block of UTF-8 chars in and try to convert it to an ASCII
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * plus HTML entities block of chars out.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @inlen after return is the number of octets consumed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as the return value is positive, else unpredictable.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @outlen after return is the number of octets consumed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* in, int *inlen, int quoteChar) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* outend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* inend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c, d;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xC0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* trailing byte in leading position */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* no chance for this in Ascii */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c |= d & 0x3F;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* assertion: c is a single UTF-4 value */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((c < 0x80) && (c != (unsigned int) quoteChar) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *cp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to lookup a predefined HTML entity for it
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Commodity functions to handle streams *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewInputStream:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a new input stream structure
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new input stream or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Commodity functions, cleanup needed ? *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * all tags allowing pc data from the html 4.01 loose dtd
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NOTE: it might be more apropriate to integrate this information
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * into the html40ElementTable array but I don't want to risk any
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * binary incomptibility
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char *allowPCData[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "blockquote", "body", "button", "caption", "center", "cite", "code",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * areBlanks:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str: a xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @len: the size of @str
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Is this a sequence of blank chars that one can ignore ?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if ignorable 0 otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (j = 0;j < len;j++)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Only strip CDATA children of the body tag for strict HTML DTDs */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* keep ws in constructs like ...<b> </b>...
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for all tags "b" allowing PCDATA */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* keep ws in constructs like <p><b>xy</b> <i>z</i><p>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for all tags "p" allowing PCDATA */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewDocNoDtD:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URI: URI for the dtd, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ExternalID: the external ID of the DTD, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Creates a new HTML document without a DTD node if @URI and @ExternalID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns a new document, do not initialize the DTD if not provided
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Allocate a new document and fill the fields.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "HTML document creation failed\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URI: URI for the dtd, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ExternalID: the external ID of the DTD, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Creates a new HTML document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns a new document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The parser itself *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The parser itself *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseHTMLName:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML tag or attribute name, note that we convert it to lowercase
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * since HTML names are not case-sensitive.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the Tag Name parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseHTMLName_nonInvasive:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML tag or attribute name, note that we convert it to lowercase
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * since HTML names are not case-sensitive, this doesn't consume the data
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * from the stream, it's a look-ahead
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the Tag Name parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseName:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML name, this routine is case sensitive.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the Name parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Accelerator for simple ASCII names
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int len = 0, l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handler for more complex cases
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (c != ':'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseHTMLAttribute:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @stop: a char stop value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML attribute value till the stop (quote), if
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * stop is 0 then it stops at the first space
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the attribute parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * allocate a translation buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Ok loop until we reach one of the ending chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (*cur != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseEntityRef:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str: location to store the entity name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML ENTITY references
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [68] EntityRef ::= '&' Name ';'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the associated htmlEntityDescPtr if found, or NULL otherwise,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if non-NULL *str will have to be freed by the caller.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the entity in the table.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseEntityRef: expecting ';'\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseAttValue:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a value for an attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Note: the parser won't do substitution of entities here, this
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * will be handled later in xmlStringGetNodeList, unless it was
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * asked for ctxt->replaceEntities != 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the AttValue parsed or NULL.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * That's an HTMLism, the attribute value may not be quoted
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseSystemLiteral:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML Literal
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the SystemLiteral parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParsePubidLiteral:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML public literal
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the PubidLiteral parsed or NULL.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Name ::= (Letter | '_') (NameChar)*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseScript:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse the content of an HTML SCRIPT or STYLE element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/types.html#type-script
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Script data ( %Script; in the DTD) can be the content of the SCRIPT
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * element and the value of intrinsic event attributes. User agents must
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * not evaluate script data as HTML markup but instead must pass it on as
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * data to a script engine.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * - The content is passed like CDATA
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * - the attributes for style and scripting "onXXX" are also described
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as CDATA but SGML allows entities references in attributes so their
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * processing is identical as other attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * One should break here, the specification is clear:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Authors should therefore escape "</" within the content.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Escape mechanisms are specific to each scripting or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * style sheet language.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * In recovery mode, only break if end tag match the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * current tag, effectively ignoring all tags inside the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * script/style block and treating the entire block as
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break; /* while */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Element %s embeds close tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break; /* while */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseCharData:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a CharData section.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if we are within a CDATA section ']]>' marks an end of section.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Ok the segment is to be consumed as chars.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Ok the segment is to be consumed as chars.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Loop detection
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseExternalID:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @publicID: a xmlChar** receiving PubidLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse an External ID or a Public ID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * | 'PUBLIC' S PubidLiteral S SystemLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [83] PublicID ::= 'PUBLIC' S PubidLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the function returns SystemLiteral and in the second
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * case publicID receives PubidLiteral, is strict is off
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it is possible to return NULL and have publicID set.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseExternalID: PUBLIC, no Public Identifier\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlParsePI:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an XML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML Processing Instruction.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * this is a Processing Instruction.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse the target name and check for special support like
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * namespace.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: PI detected.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: PI detected.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseComment:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse an XML (SGML) comment <!-- .... -->
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check that there is a comment right here.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Comment not terminated \n<!--%.50s\n", buf, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseCharRef:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse Reference declarations
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [66] CharRef ::= '&#' [0-9]+ ';' |
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * '&#x' [0-9a-fA-F]+ ';'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the value parsed (as an int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: context error\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: invalid hexadecimal value\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: invalid decimal value\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check the value IS_CHAR ...
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: invalid xmlChar value %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseDocTypeDecl:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a DOCTYPE declaration
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We know that '<!DOCTYPE' has been detected.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse the DOCTYPE name.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseDocTypeDecl : no DOCTYPE name !\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check that upper(name) == "HTML" !!!!!!!!!!!!!
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for SystemID and ExternalID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We should be at the end of the DOCTYPE declaration.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* We shouldn't try to resynchronize ... */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create or update the document accordingly to the DOCTYPE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Cleanup, since we don't use all those identifiers
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseAttribute:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @value: a xmlChar ** used to store the value of the attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [41] Attribute ::= Name Eq AttValue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [25] Eq ::= S? '=' S?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With namespace:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 11] Attribute ::= QName Eq AttValue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Also the case QName == xmlns:??? is handled independently as a namespace
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * definition.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the attribute name, and the value in *value.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * read the value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * assume a minimized attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckEncoding:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @attvalue: the attribute value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks an http-equiv attribute from a Meta tag to detect
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If a new encoding is detected the parser is switched to decode
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it and pass UTF8
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* do not change encoding */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding((const char *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * registered set of known encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlCheckEncoding: wrong encoding meta\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * fallback for unknown encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler((const char *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * convert as much as possible to the parser reading buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferShrink(ctxt->input->buf->buffer, processed);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlCheckEncoding: encoder error\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur = ctxt->input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckMeta:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @atts: the attributes values
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks an attributes from a Meta tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseStartTag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a start of tag either for rule element or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * EmptyElement. In both case we don't parse the tag closing chars.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [40] STag ::= '<' Name (S Attribute)* S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With namespace:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of success and -1 in case of error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: invalid element name\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Dump the bogus tag like browsers do */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for auto-closure of HTML elements.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for implied HTML elements.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Avoid html at any level > 0, head at any level != 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * or any attempt to recurse body
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: misplaced <html> tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: misplaced <head> tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: misplaced <body> tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Now parse the attributes, it ends up with the ending
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * (S Attribute)* S?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Well formedness requires at most one declaration of an attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Add the pair to atts
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (n == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Dump the bogus attribute string up to the next blank or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the end of the tag. */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: problem parsing attributes\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle specific association to the META tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: Start of Element !
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, name, atts);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseEndTag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an end of tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [42] ETag ::= '</' Name S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With namespace
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 9] ETag ::= '</' QName S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if the current level should be closed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We should definitely be at the ending "S? '>'" part
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We're not at the ending > !!
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Error, unless in recover mode where we search forwards
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * until we find a >
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If the name read is not one of the element in the parsing stack
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * then return, it's just an error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (i < 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for auto-closure of HTML elements.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Well formedness constraints, opening and closing must match.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With the exception that the autoclose may have popped stuff out
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * of the stack.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Opening and ending tag mismatch: %s and %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: End of Tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseReference:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse and handle entity references in content,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * this will end-up in a call to character() since this is either a
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CharRef, or a predefined entity.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int bits, i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int bits, i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseContent:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse a content: comment, sub-element, reference or text.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (1) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Our tag or one of it's parent or children is ending.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue; /* while */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: invalid element name\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Dump the bogus tag like browsers do */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Has this node been popped out during parsing of
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the next element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle SCRIPT/STYLE separately
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Sometimes DOCTYPE arrives in the middle of the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Misplaced DOCTYPE declaration\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * First case : a comment
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Second case : a Processing Instruction.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Third case : a sub-element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fourth case : a reference. If if has not been resolved,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parsing returns it's Name, create the node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fifth case : end of the resource
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (CUR == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Last case, text. Note that References are handled directly.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "detected an error in element content\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseContent:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse a content: comment, sub-element, reference or text.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseElement:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML element, this is highly recursive
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [39] element ::= EmptyElemTag | STag content ETag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [41] Attribute ::= Name Eq AttValue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Capture start position */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the info for that element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element labeled the XML/SGML way
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Couldn't find end of Start Tag %s\n", name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * end of parsing of this node.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Capture end position and add node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element from DTD definition
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse the content of the element:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Capture end position and add node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseDocument:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML document (and build a tree if using the standard SAX
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * interface).
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0, -1 in case of error. the parser context is augmented
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as a result of the parsing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: beginning of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Wipe out everything which is before the first '<'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse possible comments and PIs before any content
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Then possibly doc type declaration(s) and more Misc
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * (doctypedecl Misc*)?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse possible comments and PIs before any content
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Time to start parsing the tree itself
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * autoclose
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: end of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parser contexts handling *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlInitParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Initialize a parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of success and -1 in case of error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Allocate the Input stack */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Allocate the Node stack */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Allocate the Name stack */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlFreeParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Free all the memory used by a parser context. However the parsed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * document in ctxt->myDoc is not freed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Allocate and initialize a new parser context.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the htmlParserCtxtPtr or NULL in case of allocation error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "NewParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreateMemoryParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buffer: a pointer to a char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size of the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for an HTML in-memory document.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreateMemoryParserCtxt(const char *buffer, int size) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->end = &input->buf->buffer->content[input->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreateDocParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to an array of xmlChar
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for an HTML document.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: check the need to add encoding handling there
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * registered set of known encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unsupported encoding %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * fallback for unknown encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler((const char *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unsupported encoding %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Progressive parsing interfaces *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseLookupSequence:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @first: the first char to lookup
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @next: the next char to lookup or zero
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @third: the next char to lookup or zero
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @comment: flag to force checking inside comments
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to find if a sequence (first, next, third) or just (first next) or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * (first) is available in the input stream.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This function has a side effect of (possibly) incrementing ctxt->checkIndex
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to avoid rescanning sequences of bytes, it DOES change the state of the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parser, do not use liberally.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This is basically similar to xmlParseLookupSequence()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the index to the current parsing point if the full sequence
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * is available, -1 otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* take into account the sequence length */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!incomment && (base + 4 < len) && !iscomment) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((buf[base] == '<') && (buf[base + 1] == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* do not increment past <! - some people use <!--> */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((buf[base] == '-') && (buf[base + 1] == '-') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (next != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c' found at %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (third == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c%c' found at %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c%c%c' found at %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (third == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c%c%c' failed\n", first, next, third);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseTryOrFinish:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @terminate: last chunk indicator
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to progress on parsing
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns zero if no parsing was possible
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try EOF\n"); break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try START\n"); break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try MISC\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try COMMENT\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try PROLOG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try START_TAG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try CONTENT\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try CDATA_SECTION\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try END_TAG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try ENTITY_DECL\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try ENTITY_VALUE\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try ATTRIBUTE_VALUE\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try DTD\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try EPILOG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try PI\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try SYSTEM_LITERAL\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (1) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: end of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Document parsing is done !
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Very first chars read from the document flow.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing internal subset\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering PROLOG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering MISC\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing internal subset\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering PROLOG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering EOF\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering END_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the info for that element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element labeled the XML/SGML way
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Couldn't find end of Start Tag %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * end of parsing of this node.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element from DTD definition
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle preparsed entities and charRef
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle SCRIPT/STYLE separately
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering END_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Sometimes DOCTYPE arrives in the middle of the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Misplaced DOCTYPE declaration\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '!') && (avail < 4)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering END_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Reference\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* TODO: check generation of subtrees if noent !!! */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * check that the text sequence is complete
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * before handing out the data to the parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to avoid problems with erroneous end of
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * data detection.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing char data\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "detected an error in element content\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == CDATA\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == DTD\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == COMMENT\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == PI\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == ENTITY_DECL\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == ENTITY_VALUE\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering DTD\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == ATTRIBUTE_VALUE\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == XML_PARSER_IGNORE\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == XML_PARSER_LITERAL\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: end of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseChunk:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @chunk: an char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size in byte of the chunk
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @terminate: last chunk indicator
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse a Chunk of memory
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns zero if no error, the xmlParserErrors otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int base = ctxt->input->base - ctxt->input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->base = ctxt->input->buf->buffer->content + base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((terminate) || (ctxt->input->buf->buffer->use > 80))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((in->encoder != NULL) && (in->buffer != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * User entry points *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreatePushParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @sax: a SAX handler
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @user_data: The user data returned on SAX callbacks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @chunk: a pointer to an array of chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: number of chars in the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: an optional file name or URI
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @enc: an optional encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for using the HTML parser in push mode
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @filename is used for fetching external entities
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and error/warning reports.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->base = inputStream->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->cur = inputStream->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int base = ctxt->input->base - ctxt->input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->base = ctxt->input->buf->buffer->content + base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_PUSH_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSAXParseDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to an array of xmlChar
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @sax: the SAX handler block
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @userData: if using SAX, this pointer will be provided on callbacks.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to handle parse events. If sax is NULL, fallback to the default DOM
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * behavior and return a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree unless SAX is NULL or the document is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * not well formed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to an array of xmlChar
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlSAXParseDoc(cur, encoding, NULL, NULL));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreateFileParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for a file content.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Automatic support for ZLIB/Compress compressed document is provided
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * by default if found at compile-time.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreateFileParserCtxt(const char *filename, const char *encoding)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* htmlCharEncoding enc; */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *content, *content_line = (xmlChar *) "charset=";
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDefaultSAXHandler.error(NULL, "out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* set encoding */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSAXParseFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @sax: the SAX handler block
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @userData: if using SAX, this pointer will be provided on callbacks.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * compressed document is provided by default if found at compile-time.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * It use the given SAX function block to handle the parsing callback.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If sax is NULL, fallback to the default DOM tree building routines.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree unless SAX is NULL or the document is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * not well formed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateFileParserCtxt(filename, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * compressed document is provided by default if found at compile-time.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseFile(const char *filename, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlSAXParseFile(filename, encoding, NULL, NULL));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlHandleOmittedElem:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @val: int 0 or 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Set and return the previous value for handling HTML omitted tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the last value for 0 for no handling, 1 for auto insertion.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlElementAllowedHere:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @parent: HTML parent element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether an HTML element may be a direct child of a parent element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Note - doesn't check for deprecated elements
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if allowed; 0 otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char** p ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlElementStatusHere:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @parent: HTML parent element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether an HTML element may be a direct child of a parent element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and if so whether it is valid or deprecated.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAttrAllowed:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @attr: HTML attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @legacy: whether to allow deprecated attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether an attribute is valid for an element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Has full knowledge of Required and Deprecated attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char** p ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeStatus:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @node: an htmlNodePtr in a tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @legacy: whether to allow deprecated elements (YES is faster here
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * for Element nodes)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether the tree node is valid. Experimental (the author
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * only uses the HTML enhancements in a SAX parser)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Return: for Element nodes, a return from htmlElementAllowedHere (if
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * legacy allowed) or htmlElementStatusHere (otherwise).
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * for Attribute nodes, a return from htmlAttrAllowed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * for other nodes, HTML_NA (no checks performed)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeStatus(const htmlNodePtr node, int legacy) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlTagLookup(node->parent->name) , node->name, legacy) ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync default: return HTML_NA ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * New set (2.6.0) of simpler and more flexible APIs *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * DICT_FREE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str: a string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Free a string if it is not owned by the "dict" dictionnary in the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * current scope
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReset:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Reset a parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtUseOptions:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Applies the options to the parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of success, the set of unknown or unimplemented options
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in case of error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDoRead:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @reuse: keep the context for reuse
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Common front-end for the htmlRead functions
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to a zero terminated string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: a file or URL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML file from the filesystem or the network.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadFile(const char *filename, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateFileParserCtxt(filename, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buffer: a pointer to a char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size of the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadFd:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @fd: an open file descriptor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML from a file descriptor and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadFd(int fd, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadIO:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioread: an I/O read function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioclose: an I/O close function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioctx: an I/O handler
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML document from I/O functions and source and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync void *ioctx, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to a zero terminated string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: a file or URL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML file from the filesystem or the network.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlLoadExternalEntity(filename, NULL, ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, NULL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buffer: a pointer to a char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size of the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadFd:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @fd: an open file descriptor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML from a file descriptor and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadIO:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioread: an I/O read function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioclose: an I/O close function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioctx: an I/O handler
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML document from I/O functions and source and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_HTML_ENABLED */