38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * HTMLparser.c : an HTML 4.0 non-verifying parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * See Copyright for the status of this software.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * daniel@veillard.com
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define IN_LIBXML
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include "libxml.h"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef LIBXML_HTML_ENABLED
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <string.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_CTYPE_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <ctype.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_STDLIB_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <stdlib.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_SYS_STAT_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <sys/stat.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_FCNTL_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <fcntl.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_UNISTD_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <unistd.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_ZLIB_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <zlib.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlmemory.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/tree.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/parser.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/parserInternals.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlerror.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/HTMLparser.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/HTMLtree.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/entities.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/encoding.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/valid.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlIO.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/globals.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/uri.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define HTML_MAX_NAMELEN 1000
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define HTML_PARSER_BIG_BUFFER_SIZE 1000
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define HTML_PARSER_BUFFER_SIZE 100
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* #define DEBUG */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* #define DEBUG_PUSH */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int htmlOmittedDefaultValue = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncxmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar end, xmlChar end2, xmlChar end3);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void htmlParseComment(htmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Some factorized error routines *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlErrMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @extra: extra informations
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle a redefinition of attribute error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->instate == XML_PARSER_EOF))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = XML_ERR_NO_MEMORY;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EOF;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->disableSAX = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (extra)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL, 0, 0,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Memory allocation failed : %s\n", extra);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL, 0, 0, "Memory allocation failed\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseErr:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @error: the error number
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @msg: the error message
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str1: string infor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str2: string infor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle a fatal parser error, i.e. violating Well-Formedness constraints
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *msg, const xmlChar *str1, const xmlChar *str2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->instate == XML_PARSER_EOF))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = error;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync XML_ERR_ERROR, NULL, 0,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (const char *) str1, (const char *) str2,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, 0, 0,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync msg, str1, str2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->wellFormed = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseErrInt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @error: the error number
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @msg: the error message
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @val: integer info
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle a fatal parser error, i.e. violating Well-Formedness constraints
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *msg, int val)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->instate == XML_PARSER_EOF))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = error;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync XML_ERR_ERROR, NULL, 0, NULL, NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, val, 0, msg, val);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->wellFormed = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parser stacks related functions and macros *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlnamePush:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @value: the element name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Pushes a new element name on top of the name stack
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of error, the index in the stack otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr >= ctxt->nameMax) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameMax *= 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameTab = (const xmlChar * *)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlRealloc((xmlChar * *)ctxt->nameTab,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameMax *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sizeof(ctxt->nameTab[0]));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameTab == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameTab[ctxt->nameNr] = value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->name = value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (ctxt->nameNr++);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlnamePop:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Pops the top element name from the name stack
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the name just removed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlnamePop(htmlParserCtxtPtr ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr <= 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameNr--;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr < 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr > 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->name = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = ctxt->nameTab[ctxt->nameNr];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameTab[ctxt->nameNr] = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macros for accessing the content. Those should be used only by the parser,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and not exported.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dirty macros, i.e. one need to make assumption on the context to use them
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CUR_PTR return the current pointer to the xmlChar to be parsed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in UNICODE mode. This should be used internally by the parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * only to compare to ASCII values otherwise it would break when
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * running with UTF-8 encoding.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to compare on ASCII based substring.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it should be used only to compare on ASCII based substring.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * strings without newlines within the parser.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CURRENT Returns the current char value, with the full decoding of
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UTF-8 if we are using this mode. It returns an int.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NEXT Skip to the next character, this does the proper decoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in UTF-8 mode. It also pop-up unfinished entities on the fly.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NEXTL(l) Skip the current unicode character of l xmlChars long.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define UPPER (toupper(*ctxt->input->cur))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NXT(val) ctxt->input->cur[(val)]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define UPP(val) (toupper(ctxt->input->cur[(val)]))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CUR_PTR ctxt->input->cur
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputShrink(ctxt->input)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define GROW if ((ctxt->progressive == 0) && \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CURRENT ((int) (*ctxt->input->cur))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* Inported from XML */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CUR ((int) (*ctxt->input->cur))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NEXT xmlNextChar(ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NXT(val) ctxt->input->cur[(val)]
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CUR_PTR ctxt->input->cur
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NEXTL(l) do { \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*(ctxt->input->cur) == '\n') { \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->line++; ctxt->input->col = 1; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else ctxt->input->col++; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } while (0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define COPY_BUF(l,b,i,v) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (l == 1) b[i++] = (xmlChar) v; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else i += xmlCopyChar(l,&b[i],v)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCurrentChar:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: the HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @len: pointer to the length of the char read
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The current char value, if using UTF-8 this may actually span multiple
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * bytes in the input buffer. Implement the end of line normalization:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 2.11 End-of-Line Handling
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If the encoding is unspecified, in the case we find an ISO-Latin-1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * char, then the encoding converter is plugged in automatically.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the current char value and its length
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->instate == XML_PARSER_EOF)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->token != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *len = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ctxt->token);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We are supposed to handle UTF8, check it's valid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * From rfc2044: encoding of the Unicode values on UTF-8:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UCS-4 range (hex.) UTF-8 octet sequence (binary)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 0000 0000-0000 007F 0xxxxxxx
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for the 0x110000 limit too
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char *cur = ctxt->input->cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned char c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int val;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = *cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c & 0x80) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur[1] == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur[1] & 0xc0) != 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto encoding_error;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((c & 0xe0) == 0xe0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur[2] == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur[2] & 0xc0) != 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto encoding_error;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((c & 0xf0) == 0xf0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur[3] == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (((c & 0xf8) != 0xf0) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((cur[3] & 0xc0) != 0x80))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto encoding_error;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 4-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *len = 4;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = (cur[0] & 0x7) << 18;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val |= (cur[1] & 0x3f) << 12;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val |= (cur[2] & 0x3f) << 6;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val |= cur[3] & 0x3f;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 3-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *len = 3;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = (cur[0] & 0xf) << 12;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val |= (cur[1] & 0x3f) << 6;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val |= cur[2] & 0x3f;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 2-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *len = 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = (cur[0] & 0x1f) << 6;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val |= cur[1] & 0x3f;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_CHAR(val)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Char 0x%X out of allowed range\n", val);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(val);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* 1-byte code */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *len = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((int) *ctxt->input->cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Assume it's a fixed length encoding (1) with
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * a compatible encoding for the ASCII set, since
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * XML constructs only use < 128 chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *len = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((int) *ctxt->input->cur < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((int) *ctxt->input->cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Humm this is bad, do an automatic flow conversion
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->charset = XML_CHAR_ENCODING_UTF8;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(xmlCurrentChar(ctxt, len));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncencoding_error:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If we detect an UTF8 error that probably mean that the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * input encoding didn't get properly advertized in the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * declaration header. Report the error and switch the encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to ISO-Latin-1 (if you don't like this policy, just declare the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * encoding !)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char buffer[150];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->input->end - ctxt->input->cur >= 4) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur[0], ctxt->input->cur[1],
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur[2], ctxt->input->cur[3]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Input is not proper UTF-8, indicate encoding !\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST buffer, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->charset = XML_CHAR_ENCODING_8859_1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *len = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((int) *ctxt->input->cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSkipBlankChars:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: the HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * skip all blanks character found at that point in the input streams.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the number of space chars skipped
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int res = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (IS_BLANK_CH(*(ctxt->input->cur))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((*ctxt->input->cur == 0) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlPopInput(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*(ctxt->input->cur) == '\n') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->line++; ctxt->input->col = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else ctxt->input->col++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nbChars++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*ctxt->input->cur == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync res++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(res);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML elements and their properties *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Start Tag: 1 means the start tag can be ommited
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * End Tag: 1 means the end tag can be ommited
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 2 means it's forbidden (empty elements)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 3 means the tag is stylistic and should be closed easily
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Depr: this element is deprecated
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * DTD: 1 means that this element is valid only in the Loose DTD
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * 2 means that this element is valid only in the Frameset DTD
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync , subElements , impliedsubelt , Attributes, userdata
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* Definitions and a couple of vars for HTML Elements */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_FONTSTYLE 8
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_PHRASE 10
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_SPECIAL 16
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define INLINE PCDATA FONTSTYLE PHRASE SPECIAL FORMCTRL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define BLOCK HEADING, LIST "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_BLOCK NB_HEADING + NB_LIST + 14
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define FORMCTRL "input", "select", "textarea", "label", "button"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_FORMCTRL 5
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define PCDATA
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_PCDATA 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define HEADING "h1", "h2", "h3", "h4", "h5", "h6"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_HEADING 6
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define LIST "ul", "ol", "dir", "menu"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_LIST 4
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define MODIFIER
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_MODIFIER 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define FLOW BLOCK,INLINE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_FLOW NB_BLOCK + NB_INLINE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define EMPTY NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_flow[] = { FLOW, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_inline[] = { INLINE, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* placeholders: elts with content but no subelements */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_pcdata[] = { NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define html_cdata html_pcdata
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* ... and for HTML Attributes */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define COREATTRS "id", "class", "style", "title"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_COREATTRS 4
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define I18N "lang", "dir"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_I18N 2
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_EVENTS 9
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define ATTRS COREATTRS,I18N,EVENTS
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CELLHALIGN "align", "char", "charoff"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_CELLHALIGN 3
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define CELLVALIGN "valign"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define NB_CELLVALIGN 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_attrs[] = { ATTRS, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const core_i18n_attrs[] = { COREATTRS, I18N, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const core_attrs[] = { COREATTRS, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const i18n_attrs[] = { I18N, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/* Other declarations that should go inline ... */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const a_attrs[] = { ATTRS, "charset", "type", "name",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "tabindex", "onfocus", "onblur", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const target_attr[] = { "target", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const rows_cols_attr[] = { "rows", "cols", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const alt_attr[] = { "alt", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const src_alt_attrs[] = { "src", "alt", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const href_attrs[] = { "href", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const clear_attrs[] = { "clear", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const inline_p[] = { INLINE, "p", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const flow_param[] = { FLOW, "param", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const applet_attrs[] = { COREATTRS , "codebase",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "archive", "alt", "name", "height", "width", "align",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "hspace", "vspace", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const area_attrs[] = { "shape", "coords", "href", "nohref",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const basefont_attrs[] =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { "id", "size", "color", "face", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const quote_attrs[] = { ATTRS, "cite", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const body_contents[] = { FLOW, "ins", "del", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const body_attrs[] = { ATTRS, "onload", "onunload", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const body_depr[] = { "background", "bgcolor", "text",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "link", "vlink", "alink", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const button_attrs[] = { ATTRS, "name", "value", "type",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const col_elt[] = { "col", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const compact_attrs[] = { ATTRS, "compact", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const dl_contents[] = { "dt", "dd", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const compact_attr[] = { "compact", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const label_attr[] = { "label", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const fieldset_contents[] = { FLOW, "legend" } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const head_attrs[] = { I18N, "profile", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const version_attr[] = { "version", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const html_content[] = { "head", "body", "frameset", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const embed_attrs[] = { COREATTRS, "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const align_attr[] = { "align", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const map_contents[] = { BLOCK, "area", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const name_attr[] = { "name", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const action_attr[] = { "action", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const content_attr[] = { "content", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const type_attr[] = { "type", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const object_contents[] = { FLOW, "param", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const ol_attrs[] = { "type", "compact", "start", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const option_elt[] = { "option", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const width_attr[] = { "width", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const language_attr[] = { "language", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const select_content[] = { "optgroup", "option", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const style_attrs[] = { I18N, "media", "title", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const table_attrs[] = { ATTRS "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const table_depr[] = { "align", "bgcolor", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const tr_elt[] = { "tr", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const tr_contents[] = { "th", "td", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const bgcolor_attr[] = { "bgcolor", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const li_elt[] = { "li", NULL } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const ul_depr[] = { "type", "compact", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* const dir_attr[] = { "dir", NULL} ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define DECL (const char**)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const htmlElemDesc
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtml40ElementTable[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "a", 0, 0, 0, 0, 0, 0, 1, "anchor ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "abbr", 0, 0, 0, 0, 0, 0, 1, "abbreviated form",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "acronym", 0, 0, 0, 0, 0, 0, 1, "",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "address", 0, 0, 0, 0, 0, 0, 0, "information on author ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL inline_p , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "applet", 0, 0, 0, 0, 1, 1, 2, "java applet ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL flow_param , NULL , NULL , DECL applet_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "b", 0, 3, 0, 0, 0, 0, 1, "bold text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "base", 0, 2, 2, 1, 0, 0, 0, "document base uri ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , NULL, DECL basefont_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "big", 0, 3, 0, 0, 0, 0, 1, "large text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , DECL quote_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "body", 1, 1, 0, 0, 0, 0, 0, "document body ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "br", 0, 2, 2, 1, 0, 0, 1, "forced line break ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "button", 0, 0, 0, 0, 0, 0, 2, "push button ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "caption", 0, 0, 0, 0, 0, 0, 0, "table caption ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "cite", 0, 0, 0, 0, 0, 0, 1, "citation",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "col", 0, 2, 2, 1, 0, 0, 0, "table column ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY , NULL , DECL col_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL col_elt , "col" , DECL col_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "dd", 0, 1, 0, 0, 0, 0, 0, "definition description ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "del", 0, 0, 0, 0, 0, 0, 2, "deleted text ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow , NULL , DECL edit_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline , NULL , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "dir", 0, 0, 0, 0, 1, 1, 0, "directory list",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "dl", 0, 0, 0, 0, 0, 0, 0, "definition list ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "dt", 0, 1, 0, 0, 0, 0, 0, "definition term ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "em", 0, 3, 0, 0, 0, 0, 1, "emphasis",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "embed", 0, 1, 0, 0, 1, 1, 1, "generic embedded object ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL embed_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "font", 0, 3, 0, 0, 1, 1, 1, "local change to font ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL font_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "form", 0, 0, 0, 0, 0, 0, 0, "interactive form ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "frame", 0, 2, 2, 1, 0, 2, 0, "subwindow " ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, NULL, DECL frame_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "h1", 0, 0, 0, 0, 0, 0, 0, "heading ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "h2", 0, 0, 0, 0, 0, 0, 0, "heading ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "h3", 0, 0, 0, 0, 0, 0, 0, "heading ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "h4", 0, 0, 0, 0, 0, 0, 0, "heading ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "h5", 0, 0, 0, 0, 0, 0, 0, "heading ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "h6", 0, 0, 0, 0, 0, 0, 0, "heading ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "head", 1, 1, 0, 0, 0, 0, 0, "document head ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL head_contents, NULL, DECL head_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "hr", 0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "html", 1, 1, 0, 0, 0, 0, 0, "document root element ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "i", 0, 3, 0, 0, 0, 0, 1, "italic text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "img", 0, 2, 2, 1, 0, 0, 1, "embedded image ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "input", 0, 2, 2, 1, 0, 0, 1, "form control ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "ins", 0, 0, 0, 0, 0, 0, 2, "inserted text",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL edit_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, NULL, DECL prompt_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "label", 0, 0, 0, 0, 0, 0, 1, "form field label text ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "li", 0, 1, 1, 0, 0, 0, 0, "list item ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "menu", 0, 0, 0, 0, 1, 1, 0, "menu list ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL noframes_content, "body" , DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, "div", DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "ol", 0, 0, 0, 0, 0, 0, 0, "ordered list ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "optgroup", 0, 0, 0, 0, 0, 0, 0, "option group ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "p", 0, 1, 0, 0, 0, 0, 0, "paragraph ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "param", 0, 2, 2, 1, 0, 0, 0, "named property value ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "pre", 0, 0, 0, 0, 0, 0, 0, "preformatted text ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL quote_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "script", 0, 0, 0, 0, 0, 0, 2, "script statements ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "select", 0, 0, 0, 0, 0, 0, 1, "option selector ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL select_content, NULL, DECL select_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "small", 0, 3, 0, 0, 0, 0, 1, "small text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "strong", 0, 3, 0, 0, 0, 0, 1, "strong emphasis",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "style", 0, 0, 0, 0, 0, 0, 0, "style info ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "sub", 0, 3, 0, 0, 0, 0, 1, "subscript",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "sup", 0, 3, 0, 0, 0, 0, 1, "superscript ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "table", 0, 0, 0, 0, 0, 0, 0, "",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "tbody", 1, 0, 0, 0, 0, 0, 0, "table body ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "td", 0, 0, 0, 0, 0, 0, 0, "table data cell",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "tfoot", 0, 1, 0, 0, 0, 0, 0, "table footer ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "th", 0, 1, 0, 0, 0, 0, 0, "table header cell",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "thead", 0, 1, 0, 0, 0, 0, 0, "table header ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "title", 0, 0, 0, 0, 0, 0, 0, "document title ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "tr", 0, 0, 0, 0, 0, 0, 0, "table row ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, NULL, DECL html_attrs, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "ul", 0, 0, 0, 0, 0, 0, 0, "unordered list ",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DECL html_inline, NULL, DECL html_attrs, NULL, NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * start tags that imply the end of current element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char * const htmlStartClose[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "dl", "ul", "ol", "menu", "dir", "address", "pre",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "listing", "xmp", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"head", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"title", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"body", "head", "style", "link", "title", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"frameset", "head", "style", "link", "title", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "pre", "listing", "xmp", "head", "li", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"hr", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"h1", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"h2", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"h3", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"h4", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"h5", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"h6", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"dir", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"address", "p", "head", "ul", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"pre", "p", "head", "ul", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"listing", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"xmp", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"blockquote", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"dl", "p", "dt", "menu", "dir", "address", "pre", "listing",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "xmp", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "head", "dd", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "head", "dt", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"ul", "p", "head", "ol", "menu", "dir", "address", "pre",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "listing", "xmp", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"ol", "p", "head", "ul", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"menu", "p", "head", "ul", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"div", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"noscript", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"center", "font", "b", "i", "p", "head", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"a", "a", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"caption", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"colgroup", "caption", "colgroup", "col", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"col", "caption", "col", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "listing", "xmp", "a", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"thead", "caption", "col", "colgroup", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "tbody", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "tfoot", "tbody", "p", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"optgroup", "option", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"option", "option", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "pre", "listing", "xmp", "a", NULL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncNULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML elements which are supposed not to have
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CDATA content and where a p element will be implied
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: extend that list by reading the HTML SGML DTD on
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * implied paragraph
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char *const htmlNoContentElements[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "html",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "head",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML attributes which are of content %Script;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NOTE: when adding ones, check htmlIsScriptAttribute() since
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it assumes the name starts with 'on'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char *const htmlScriptAttributes[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onclick",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "ondblclick",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmousedown",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmouseup",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmouseover",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmousemove",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onmouseout",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onkeypress",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onkeydown",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onkeyup",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onload",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onunload",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onfocus",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onblur",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onsubmit",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onrest",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onchange",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "onselect"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This table is used by the htmlparser to know what to do with
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * broken html pages. By assigning different priorities to different
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * elements the parser can decide how to handle extra endtags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Endtags are only allowed to close elements with lower or equal
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * priority.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynctypedef struct {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int priority;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync} elementPriority;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const elementPriority htmlEndPriority[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"div", 150},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"td", 160},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"th", 160},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"tr", 170},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"thead", 180},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"tbody", 180},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"tfoot", 180},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"table", 190},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"head", 200},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"body", 200},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {"html", 220},
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {NULL, 100} /* Default priority */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char** htmlStartCloseIndex[100];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int htmlStartCloseIndexinitialized = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * functions to handle HTML specific data *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlInitAutoClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This is not reentrant. Call xmlInitParser() once before processing in
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * case of use in multithreaded programs.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlInitAutoClose(void) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx, i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlStartCloseIndexinitialized) return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync indx = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (htmlStartClose[i] != NULL) i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlStartCloseIndexinitialized = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlTagLookup:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @tag: The tag name in lowercase
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the HTML tag in the ElementTable
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the related htmlElemDescPtr or NULL if not found.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncconst htmlElemDesc *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlTagLookup(const xmlChar *tag) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0; i < (sizeof(html40ElementTable) /
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sizeof(html40ElementTable[0]));i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((htmlElemDescPtr) &html40ElementTable[i]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlGetEndPriority:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: The name of the element to look up the priority for.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Return value: The "endtag" priority.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync **/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlGetEndPriority (const xmlChar *name) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((htmlEndPriority[i].name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlEndPriority[i].priority);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckAutoClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @oldtag: The old tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether the new tag is one of the registered valid tags for
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * closing old.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 if no, 1 if yes.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i, indx;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char **closed = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlStartCloseIndexinitialized == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlInitAutoClose();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* inefficient, but not a big deal */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (indx = 0; indx < 100; indx++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync closed = htmlStartCloseIndex[indx];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (closed == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(BAD_CAST * closed, newtag))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i = closed - htmlStartClose;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (htmlStartClose[i] != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoCloseOnClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @force: force the tag closure
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows an ending tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlElemDesc *info;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i, priority;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync priority = htmlGetEndPriority(newtag);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = (ctxt->nameNr - 1); i >= 0; i--) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(newtag, ctxt->nameTab[i]))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * A missplaced endtag can only close elements with lower
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * or equal priority, so if we find an element with higher
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * priority before we find an element with
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * matching name, we just ignore this endtag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (i < 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (!xmlStrEqual(newtag, ctxt->name)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync info = htmlTagLookup(ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((info != NULL) && (info->endTag == 3)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Opening and ending tag mismatch: %s and %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync newtag, ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoCloseOnEnd:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Close all remaining tags at the end of the stream
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = (ctxt->nameNr - 1); i >= 0; i--) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoClose:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list is kept in htmlStartClose array. This function is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * called when a new tag has been detected and generates the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * appropriates closes if possible/needed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If newtag is NULL this mean we are at the end of the resource
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and we should check
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((newtag != NULL) && (ctxt->name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlCheckAutoClose(newtag, ctxt->name))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (newtag == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoCloseOnEnd(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((newtag == NULL) && (ctxt->name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAutoCloseTag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the HTML document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: The tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elem: the HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list is kept in htmlStartClose array. This function checks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if the element or one of it's children would autoclose the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * given tag.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if autoclose, 0 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodePtr child;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (elem == NULL) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, elem->name)) return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlCheckAutoClose(elem->name, name)) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync child = elem->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (child != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlAutoCloseTag(doc, name, child)) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync child = child->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlIsAutoClosed:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the HTML document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elem: the HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to implicitly close other tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list is kept in htmlStartClose array. This function checks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if a tag is autoclosed by one of it's child
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if autoclosed, 0 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodePtr child;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (elem == NULL) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync child = elem->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (child != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync child = child->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckImplied:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @newtag: The new tag name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The HTML DTD allows a tag to exists only implicitly
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * called when a new tag has been detected and generates the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * appropriates implicit tags if missing
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!htmlOmittedDefaultValue)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(newtag, BAD_CAST"html"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr <= 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePush(ctxt, BAD_CAST"html");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr <= 1) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((xmlStrEqual(newtag, BAD_CAST"script")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(newtag, BAD_CAST"style")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(newtag, BAD_CAST"meta")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(newtag, BAD_CAST"link")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(newtag, BAD_CAST"title")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(newtag, BAD_CAST"base")))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * dropped OBJECT ... i you put it first BODY will be
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * assumed !
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePush(ctxt, BAD_CAST"head");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrEqual(newtag, BAD_CAST"frame")) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrEqual(newtag, BAD_CAST"frameset"))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0;i < ctxt->nameNr;i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePush(ctxt, BAD_CAST"body");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckParagraph
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check whether a p element need to be implied before inserting
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * characters in the current element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if a paragraph has been inserted, 0 if not and -1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in case of error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckParagraph(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *tag;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync tag = ctxt->name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (tag == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoClose(ctxt, BAD_CAST"p");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckImplied(ctxt, BAD_CAST"p");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePush(ctxt, BAD_CAST"p");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!htmlOmittedDefaultValue)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0; htmlNoContentElements[i] != NULL; i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoClose(ctxt, BAD_CAST"p");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckImplied(ctxt, BAD_CAST"p");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePush(ctxt, BAD_CAST"p");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlIsScriptAttribute:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: an attribute name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check if an attribute is of content type Script
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 is the attribute is a script 0 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlIsScriptAttribute(const xmlChar *name) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * all script attributes start with 'on'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((name[0] != 'o') || (name[1] != 'n'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The list of HTML predefined entities *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const htmlEntityDesc html40EntitiesTable[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the 4 absolute ones, plus apostrophe.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 38, "amp", "ampersand, U+0026 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 39, "apos", "single quote" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 60, "lt", "less-than sign, U+003C ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 62, "gt", "greater-than sign, U+003E ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * A bunch still in the 128-255 range
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Replacing them depend really on the charset used.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 162, "cent", "cent sign, U+00A2 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 163, "pound","pound sign, U+00A3 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 164, "curren","currency sign, U+00A4 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 167, "sect", "section sign, U+00A7 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 169, "copy", "copyright sign, U+00A9 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 172, "not", "not sign, U+00AC ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 176, "deg", "degree sign, U+00B0 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 181, "micro","micro sign, U+00B5 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 215, "times","multiplication sign, U+00D7 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 240, "eth", "latin small letter eth, U+00F0 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 247, "divide","division sign, U+00F7 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 339, "oelig","latin small ligature oe, U+0153 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Anything below should really be kept as entities references
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 732, "tilde","small tilde, U+02DC ISOdia" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 913, "Alpha","greek capital letter alpha, U+0391" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 914, "Beta", "greek capital letter beta, U+0392" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 917, "Epsilon","greek capital letter epsilon, U+0395" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 918, "Zeta", "greek capital letter zeta, U+0396" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 919, "Eta", "greek capital letter eta, U+0397" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 921, "Iota", "greek capital letter iota, U+0399" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 922, "Kappa","greek capital letter kappa, U+039A" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 924, "Mu", "greek capital letter mu, U+039C" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 925, "Nu", "greek capital letter nu, U+039D" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 927, "Omicron","greek capital letter omicron, U+039F" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 929, "Rho", "greek capital letter rho, U+03A1" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 932, "Tau", "greek capital letter tau, U+03A4" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 935, "Chi", "greek capital letter chi, U+03A7" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 948, "delta","greek small letter delta, U+03B4 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 952, "theta","greek small letter theta, U+03B8 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 956, "mu", "greek small letter mu, U+03BC ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 957, "nu", "greek small letter nu, U+03BD ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 958, "xi", "greek small letter xi, U+03BE ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 959, "omicron","greek small letter omicron, U+03BF NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 969, "omega","greek small letter omega, U+03C9 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8194, "ensp", "en space, U+2002 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8195, "emsp", "em space, U+2003 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8201, "thinsp","thin space, U+2009 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8211, "ndash","en dash, U+2013 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8212, "mdash","em dash, U+2014 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8216, "lsquo","left single quotation mark, U+2018 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8217, "rsquo","right single quotation mark, U+2019 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8218, "sbquo","single low-9 quotation mark, U+201A NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8220, "ldquo","left double quotation mark, U+201C ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8221, "rdquo","right double quotation mark, U+201D ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8222, "bdquo","double low-9 quotation mark, U+201E NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8224, "dagger","dagger, U+2020 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8225, "Dagger","double dagger, U+2021 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8226, "bull", "bullet = black small circle, U+2022 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8240, "permil","per mille sign, U+2030 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8242, "prime","prime = minutes = feet, U+2032 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8254, "oline","overline = spacing overscore, U+203E NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8260, "frasl","fraction slash, U+2044 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8364, "euro", "euro sign, U+20AC NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8482, "trade","trade mark sign, U+2122 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8592, "larr", "leftwards arrow, U+2190 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8593, "uarr", "upwards arrow, U+2191 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8594, "rarr", "rightwards arrow, U+2192 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8595, "darr", "downwards arrow, U+2193 ISOnum" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8596, "harr", "left right arrow, U+2194 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8704, "forall","for all, U+2200 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8706, "part", "partial differential, U+2202 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8707, "exist","there exists, U+2203 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8711, "nabla","nabla = backward difference, U+2207 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8712, "isin", "element of, U+2208 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8713, "notin","not an element of, U+2209 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8715, "ni", "contains as member, U+220B ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8721, "sum", "n-ary summation, U+2211 ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8722, "minus","minus sign, U+2212 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8727, "lowast","asterisk operator, U+2217 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8730, "radic","square root = radical sign, U+221A ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8733, "prop", "proportional to, U+221D ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8734, "infin","infinity, U+221E ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8736, "ang", "angle, U+2220 ISOamso" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8743, "and", "logical and = wedge, U+2227 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8744, "or", "logical or = vee, U+2228 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8745, "cap", "intersection = cap, U+2229 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8746, "cup", "union = cup, U+222A ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8747, "int", "integral, U+222B ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8756, "there4","therefore, U+2234 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8773, "cong", "approximately equal to, U+2245 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8800, "ne", "not equal to, U+2260 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8801, "equiv","identical to, U+2261 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8804, "le", "less-than or equal to, U+2264 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8805, "ge", "greater-than or equal to, U+2265 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8834, "sub", "subset of, U+2282 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8835, "sup", "superset of, U+2283 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8836, "nsub", "not a subset of, U+2284 ISOamsn" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8838, "sube", "subset of or equal to, U+2286 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8839, "supe", "superset of or equal to, U+2287 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8855, "otimes","circled times = vector product, U+2297 ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8901, "sdot", "dot operator, U+22C5 ISOamsb" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8969, "rceil","right ceiling, U+2309 ISOamsc" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 8971, "rfloor","right floor, U+230B ISOamsc" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9674, "loz", "lozenge, U+25CA ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9824, "spades","black spade suit, U+2660 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{ 9830, "diams","black diamond suit, U+2666 ISOpub" },
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Commodity functions to handle entities *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Macro used to grow the current buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define growBuffer(buffer) { \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *tmp; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buffer##_size *= 2; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (tmp == NULL) { \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, "growing buffer\n"); \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buffer); \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL); \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buffer = tmp; \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlEntityLookup:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: the entity name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the given entity in EntitiesTable
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: the linear scan is really ugly, an hash table is really needed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncconst htmlEntityDesc *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlEntityLookup(const xmlChar *name) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0;i < (sizeof(html40EntitiesTable)/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sizeof(html40EntitiesTable[0]));i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((htmlEntityDescPtr) &html40EntitiesTable[i]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlEntityValueLookup:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @value: the entity's unicode value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the given entity in EntitiesTable
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: the linear scan is really ugly, an hash table is really needed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncconst htmlEntityDesc *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlEntityValueLookup(unsigned int value) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0;i < (sizeof(html40EntitiesTable)/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sizeof(html40EntitiesTable[0]));i++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (html40EntitiesTable[i].value >= value) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (html40EntitiesTable[i].value > value)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((htmlEntityDescPtr) &html40EntitiesTable[i]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * UTF8ToHtml:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @out: a pointer to an array of bytes to store the result
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @outlen: the length of @out
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @in: a pointer to an array of UTF-8 chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @inlen: the length of @in
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Take a block of UTF-8 chars in and try to convert it to an ASCII
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * plus HTML entities block of chars out.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @inlen after return is the number of octets consumed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as the return value is positive, else unpredictable.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @outlen after return is the number of octets consumed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncUTF8ToHtml(unsigned char* out, int *outlen,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* in, int *inlen) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* processed = in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* outend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* outstart = out;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* instart = in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* inend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c, d;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int trailing;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * initialization nothing to do
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inend = in + (*inlen);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outend = out + (*outlen);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (in < inend) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync d = *in++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (d < 0x80) { c= d; trailing= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xC0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* trailing byte in leading position */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = out - outstart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = processed - instart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* no chance for this in Ascii */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = out - outstart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = processed - instart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (inend - in < trailing) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( ; trailing; trailing--) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c <<= 6;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c |= d & 0x3F;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* assertion: c is a single UTF-4 value */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out + 1 >= outend)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int len;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlEntityDesc * ent;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *cp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char nbuf[16];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to lookup a predefined HTML entity for it
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ent = htmlEntityValueLookup(c);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ent == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync snprintf(nbuf, sizeof(nbuf), "#%u", c);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cp = nbuf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cp = ent->name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync len = strlen(cp);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out + 2 + len >= outend)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = '&';
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memcpy(out, cp, len);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out += len;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = ';';
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync processed = in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = out - outstart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = processed - instart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlEncodeEntities:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @out: a pointer to an array of bytes to store the result
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @outlen: the length of @out
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @in: a pointer to an array of UTF-8 chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @inlen: the length of @in
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @quoteChar: the quote character to escape (' or ") or zero.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Take a block of UTF-8 chars in and try to convert it to an ASCII
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * plus HTML entities block of chars out.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @inlen after return is the number of octets consumed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as the return value is positive, else unpredictable.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @outlen after return is the number of octets consumed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlEncodeEntities(unsigned char* out, int *outlen,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* in, int *inlen, int quoteChar) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* processed = in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* outend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* outstart = out;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* instart = in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const unsigned char* inend;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c, d;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int trailing;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outend = out + (*outlen);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inend = in + (*inlen);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (in < inend) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync d = *in++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (d < 0x80) { c= d; trailing= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xC0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* trailing byte in leading position */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = out - outstart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = processed - instart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* no chance for this in Ascii */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = out - outstart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = processed - instart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (inend - in < trailing)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (trailing--) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (((d= *in++) & 0xC0) != 0x80) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = out - outstart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = processed - instart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c <<= 6;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c |= d & 0x3F;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* assertion: c is a single UTF-4 value */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((c < 0x80) && (c != (unsigned int) quoteChar) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (c != '&') && (c != '<') && (c != '>')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out >= outend)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlEntityDesc * ent;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *cp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char nbuf[16];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int len;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to lookup a predefined HTML entity for it
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ent = htmlEntityValueLookup(c);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ent == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync snprintf(nbuf, sizeof(nbuf), "#%u", c);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cp = nbuf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cp = ent->name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync len = strlen(cp);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out + 2 + len > outend)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = '&';
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memcpy(out, cp, len);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out += len;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = ';';
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync processed = in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *outlen = out - outstart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *inlen = processed - instart;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Commodity functions to handle streams *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewInputStream:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a new input stream structure
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new input stream or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic htmlParserInputPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNewInputStream(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserInputPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (input == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memset(input, 0, sizeof(htmlParserInput));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->filename = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->directory = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->base = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->cur = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->buf = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->line = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->col = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->buf = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->free = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->version = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->consumed = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->length = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Commodity functions, cleanup needed ? *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * all tags allowing pc data from the html 4.01 loose dtd
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NOTE: it might be more apropriate to integrate this information
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * into the html40ElementTable array but I don't want to risk any
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * binary incomptibility
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char *allowPCData[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "blockquote", "body", "button", "caption", "center", "cite", "code",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * areBlanks:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str: a xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @len: the size of @str
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Is this a sequence of blank chars that one can ignore ?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if ignorable 0 otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int j;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNodePtr lastChild;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDtdPtr dtd;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (j = 0;j < len;j++)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!(IS_BLANK_CH(str[j]))) return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == 0) return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '<') return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->name == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Only strip CDATA children of the body tag for strict HTML DTDs */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync dtd = xmlGetIntSubset(ctxt->myDoc);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (dtd != NULL && dtd->ExternalID != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->node == NULL) return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync lastChild = xmlGetLastChild(ctxt->node);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync lastChild = lastChild->prev;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (lastChild == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->node->type != XML_ELEMENT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->node->content != NULL)) return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* keep ws in constructs like ...<b> </b>...
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for all tags "b" allowing PCDATA */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (xmlNodeIsText(lastChild)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* keep ws in constructs like <p><b>xy</b> <i>z</i><p>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for all tags "p" allowing PCDATA */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewDocNoDtD:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URI: URI for the dtd, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ExternalID: the external ID of the DTD, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Creates a new HTML document without a DTD node if @URI and @ExternalID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * are NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns a new document, do not initialize the DTD if not provided
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDocPtr cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Allocate a new document and fill the fields.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "HTML document creation failed\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memset(cur, 0, sizeof(xmlDoc));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->type = XML_HTML_DOCUMENT_NODE;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->version = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->intSubset = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->doc = cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->name = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->children = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->extSubset = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->oldNs = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->encoding = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->standalone = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->compression = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->ids = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->refs = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->_private = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->charset = XML_CHAR_ENCODING_UTF8;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ExternalID != NULL) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (URI != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URI: URI for the dtd, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ExternalID: the external ID of the DTD, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Creates a new HTML document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns a new document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((URI == NULL) && (ExternalID == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlNewDocNoDtD(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlNewDocNoDtD(URI, ExternalID));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The parser itself *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Relates to http://www.w3.org/TR/html40 *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The parser itself *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseHTMLName:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML tag or attribute name, note that we convert it to lowercase
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * since HTML names are not case-sensitive.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the Tag Name parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseHTMLName(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar loc[HTML_PARSER_BUFFER_SIZE];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (CUR != ':')) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((i < HTML_PARSER_BUFFER_SIZE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (CUR == ':') || (CUR == '-') || (CUR == '_'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else loc[i] = CUR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(xmlDictLookup(ctxt->dict, loc, i));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseHTMLName_nonInvasive:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML tag or attribute name, note that we convert it to lowercase
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * since HTML names are not case-sensitive, this doesn't consume the data
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * from the stream, it's a look-ahead
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the Tag Name parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar loc[HTML_PARSER_BUFFER_SIZE];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(1) != ':')) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((i < HTML_PARSER_BUFFER_SIZE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else loc[i] = NXT(1+i);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(xmlDictLookup(ctxt->dict, loc, i));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseName:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML name, this routine is case sensitive.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the Name parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseName(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int count = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Accelerator for simple ASCII names
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync in = ctxt->input->cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (((*in >= 0x61) && (*in <= 0x7A)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((*in >= 0x41) && (*in <= 0x5A)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (*in == '_') || (*in == ':')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync in++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (((*in >= 0x61) && (*in <= 0x7A)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((*in >= 0x41) && (*in <= 0x5A)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((*in >= 0x30) && (*in <= 0x39)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (*in == '_') || (*in == '-') ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (*in == ':') || (*in == '.'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync in++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((*in > 0) && (*in < 0x80)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync count = in - ctxt->input->cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur = in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nbChars += count;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->col += count;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlParseNameComplex(ctxt));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseNameComplex(xmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int len = 0, l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int count = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handler for more complex cases
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!IS_LETTER(c) && (c != '_') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (c != ':'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (c == '.') || (c == '-') ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (c == '_') || (c == ':') ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (IS_COMBINING(c)) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (IS_EXTENDER(c)))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (count++ > 100) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync count = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync len += l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXTL(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseHTMLAttribute:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @stop: a char stop value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML attribute value till the stop (quote), if
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * stop is 0 then it stops at the first space
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the attribute parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *buffer = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int buffer_size = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *out = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *cur = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlEntityDesc * ent;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * allocate a translation buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buffer_size = HTML_PARSER_BUFFER_SIZE;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buffer == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, "buffer allocation failed\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out = buffer;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Ok loop until we reach one of the ending chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((CUR != 0) && (CUR != stop)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((stop == 0) && (CUR == '>')) break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((stop == 0) && (IS_BLANK_CH(CUR))) break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '&') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (NXT(1) == '#') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int bits;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = htmlParseCharRef(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ = c; bits= -6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( ; bits >= 0; bits-= 6) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = ((c >> bits) & 0x3F) | 0x80;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out - buffer > buffer_size - 100) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx = out - buffer;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync growBuffer(buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out = &buffer[indx];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ent = htmlParseEntityRef(ctxt, &name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = '&';
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out - buffer > buffer_size - 100) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx = out - buffer;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync growBuffer(buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out = &buffer[indx];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (ent == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = '&';
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (*cur != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out - buffer > buffer_size - 100) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx = out - buffer;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync growBuffer(buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out = &buffer[indx];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = *cur++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int bits;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out - buffer > buffer_size - 100) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx = out - buffer;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync growBuffer(buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out = &buffer[indx];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = ent->value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ = c; bits= -6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( ; bits >= 0; bits-= 6) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = ((c >> bits) & 0x3F) | 0x80;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int bits, l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (out - buffer > buffer_size - 100) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx = out - buffer;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync growBuffer(buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out = &buffer[indx];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ = c; bits= -6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( ; bits >= 0; bits-= 6) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = ((c >> bits) & 0x3F) | 0x80;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *out++ = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseEntityRef:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str: location to store the entity name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML ENTITY references
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [68] EntityRef ::= '&' Name ';'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the associated htmlEntityDescPtr if found, or NULL otherwise,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if non-NULL *str will have to be freed by the caller.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncconst htmlEntityDesc *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlEntityDesc * ent = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (str != NULL) *str = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '&') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = htmlParseName(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseEntityRef: no name\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == ';') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (str != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *str = name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the entity in the table.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ent = htmlEntityLookup(name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ent != NULL) /* OK that's ugly !!! */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseEntityRef: expecting ';'\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (str != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *str = name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ent);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseAttValue:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a value for an attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Note: the parser won't do substitution of entities here, this
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * will be handled later in xmlStringGetNodeList, unless it was
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * asked for ctxt->replaceEntities != 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the AttValue parsed or NULL.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseAttValue(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *ret = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '"') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = htmlParseHTMLAttribute(ctxt, '"');
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '"') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "AttValue: \" expected\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (CUR == '\'') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = htmlParseHTMLAttribute(ctxt, '\'');
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '\'') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "AttValue: ' expected\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * That's an HTMLism, the attribute value may not be quoted
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = htmlParseHTMLAttribute(ctxt, 0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ret == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "AttValue: no value found\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseSystemLiteral:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML Literal
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the SystemLiteral parsed or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *q;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *ret = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '"') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync q = CUR_PTR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_CHAR_CH(CUR)) && (CUR != '"'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_CHAR_CH(CUR)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unfinished SystemLiteral\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlStrndup(q, CUR_PTR - q);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (CUR == '\'') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync q = CUR_PTR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_CHAR_CH(CUR)) && (CUR != '\''))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_CHAR_CH(CUR)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unfinished SystemLiteral\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlStrndup(q, CUR_PTR - q);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync " or ' expected\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParsePubidLiteral:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML public literal
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the PubidLiteral parsed or NULL.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *q;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *ret = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Name ::= (Letter | '_') (NameChar)*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '"') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync q = CUR_PTR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (IS_PUBIDCHAR_CH(CUR)) NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '"') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unfinished PubidLiteral\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlStrndup(q, CUR_PTR - q);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (CUR == '\'') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync q = CUR_PTR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\''))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '\'') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unfinished PubidLiteral\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlStrndup(q, CUR_PTR - q);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "PubidLiteral \" or ' expected\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseScript:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse the content of an HTML SCRIPT or STYLE element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/sgml/dtd.html#Script
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/types.html#type-script
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/types.html#h-6.15
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Script data ( %Script; in the DTD) can be the content of the SCRIPT
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * element and the value of intrinsic event attributes. User agents must
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * not evaluate script data as HTML markup but instead must pass it on as
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * data to a script engine.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NOTES:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * - The content is passed like CDATA
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * - the attributes for style and scripting "onXXX" are also described
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as CDATA but SGML allows entities references in attributes so their
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * processing is identical as other attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseScript(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int nbchar = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int cur,l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SHRINK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (IS_CHAR_CH(cur)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == '<') && (NXT(1) == '/')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * One should break here, the specification is clear:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Authors should therefore escape "</" within the content.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Escape mechanisms are specific to each scripting or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * style sheet language.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * In recovery mode, only break if end tag match the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * current tag, effectively ignoring all tags inside the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * script/style block and treating the entire block as
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CDATA.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->recovery) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlStrlen(ctxt->name)) == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break; /* while */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Element %s embeds close tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((NXT(2) >= 'a') && (NXT(2) <= 'z')))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break; /* while */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync COPY_BUF(l,buf,nbchar,cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->cdataBlock!= NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (ctxt->sax->characters != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nbchar = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXTL(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Invalid char in CDATA 0x%X\n", cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->cdataBlock!= NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (ctxt->sax->characters != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseCharData:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a CharData section.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * if we are within a CDATA section ']]>' marks an end of section.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseCharData(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int nbchar = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int cur, l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SHRINK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (((cur != '<') || (ctxt->token == '<')) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((cur != '&') || (ctxt->token == '&')) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur != 0)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!(IS_CHAR(cur))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Invalid char in CDATA 0x%X\n", cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync COPY_BUF(l,buf,nbchar,cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Ok the segment is to be consumed as chars.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (areBlanks(ctxt, buf, nbchar)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->ignorableWhitespace != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->ignorableWhitespace(ctxt->userData,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->characters != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nbchar = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXTL(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SHRINK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (nbchar != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf[nbchar] = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Ok the segment is to be consumed as chars.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (areBlanks(ctxt, buf, nbchar)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->ignorableWhitespace != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->characters != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, buf, nbchar);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Loop detection
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EOF;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseExternalID:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @publicID: a xmlChar** receiving PubidLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse an External ID or a Public ID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * | 'PUBLIC' S PubidLiteral S SystemLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [83] PublicID ::= 'PUBLIC' S PubidLiteral
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the function returns SystemLiteral and in the second
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * case publicID receives PubidLiteral, is strict is off
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it is possible to return NULL and have publicID set.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *URI = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((UPPER == 'S') && (UPP(1) == 'Y') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(2) == 'S') && (UPP(3) == 'T') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(4) == 'E') && (UPP(5) == 'M')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(6);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_BLANK_CH(CUR)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Space required after 'SYSTEM'\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync URI = htmlParseSystemLiteral(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (URI == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(2) == 'B') && (UPP(3) == 'L') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(4) == 'I') && (UPP(5) == 'C')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(6);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_BLANK_CH(CUR)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Space required after 'PUBLIC'\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *publicID = htmlParsePubidLiteral(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (*publicID == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseExternalID: PUBLIC, no Public Identifier\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '"') || (CUR == '\'')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync URI = htmlParseSystemLiteral(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(URI);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * xmlParsePI:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an XML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML Processing Instruction.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParsePI(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *buf = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int len = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int size = HTML_PARSER_BUFFER_SIZE;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int cur, l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *target;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputState state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int count = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((RAW == '<') && (NXT(1) == '?')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync state = ctxt->instate;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_PI;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * this is a Processing Instruction.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SHRINK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse the target name and check for special support like
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * namespace.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync target = htmlParseName(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (target != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (RAW == '>') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: PI detected.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (!ctxt->disableSAX) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->sax->processingInstruction != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->processingInstruction(ctxt->userData,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync target, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_BLANK(cur)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "ParsePI: PI %s space expected\n", target, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (IS_CHAR(cur) && (cur != '>')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (len + 5 >= size) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *tmp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync size *= 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (tmp == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = tmp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync count++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (count > 50) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync count = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync COPY_BUF(l,buf,len,cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXTL(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SHRINK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf[len] = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur != '>') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "ParsePI: PI %s never end ...\n", target, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: PI detected.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (!ctxt->disableSAX) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->sax->processingInstruction != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->processingInstruction(ctxt->userData,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync target, buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "PI is not started correctly", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseComment:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse an XML (SGML) comment <!-- .... -->
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseComment(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *buf = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int len;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int size = HTML_PARSER_BUFFER_SIZE;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int q, ql;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int r, rl;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int cur, l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputState state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check that there is a comment right here.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((RAW != '<') || (NXT(1) != '!') ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(2) != '-') || (NXT(3) != '-')) return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync state = ctxt->instate;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_COMMENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SHRINK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(4);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, "buffer allocation failed\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync q = CUR_CHAR(ql);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXTL(ql);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync r = CUR_CHAR(rl);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXTL(rl);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync len = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (IS_CHAR(cur) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((cur != '>') ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (r != '-') || (q != '-'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (len + 5 >= size) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *tmp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync size *= 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (tmp == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, "growing buffer failed\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = tmp;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync COPY_BUF(ql,buf,len,q);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync q = r;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ql = rl;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync r = cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync rl = l;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXTL(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SHRINK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = CUR_CHAR(l);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf[len] = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_CHAR(cur)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Comment not terminated \n<!--%.50s\n", buf, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!ctxt->disableSAX))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->comment(ctxt->userData, buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = state;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseCharRef:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse Reference declarations
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [66] CharRef ::= '&#' [0-9]+ ';' |
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * '&#x' [0-9a-fA-F]+ ';'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the value parsed (as an int)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseCharRef(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int val = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (ctxt->input == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: context error\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '&') && (NXT(1) == '#') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((NXT(2) == 'x') || NXT(2) == 'X')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(3);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (CUR != ';') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR >= '0') && (CUR <= '9'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = val * 16 + (CUR - '0');
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if ((CUR >= 'a') && (CUR <= 'f'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = val * 16 + (CUR - 'a') + 10;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if ((CUR >= 'A') && (CUR <= 'F'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = val * 16 + (CUR - 'A') + 10;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: invalid hexadecimal value\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == ';')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((CUR == '&') && (NXT(1) == '#')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (CUR != ';') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR >= '0') && (CUR <= '9'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = val * 10 + (CUR - '0');
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: invalid decimal value\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == ';')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: invalid value\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check the value IS_CHAR ...
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (IS_CHAR(val)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(val);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseCharRef: invalid xmlChar value %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseDocTypeDecl:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a DOCTYPE declaration
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *ExternalID = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *URI = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We know that '<!DOCTYPE' has been detected.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(9);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse the DOCTYPE name.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = htmlParseName(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseDocTypeDecl : no DOCTYPE name !\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check that upper(name) == "HTML" !!!!!!!!!!!!!
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for SystemID and ExternalID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync URI = htmlParseExternalID(ctxt, &ExternalID);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We should be at the end of the DOCTYPE declaration.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '>') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "DOCTYPE improperly terminated\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* We shouldn't try to resynchronize ... */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create or update the document accordingly to the DOCTYPE
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!ctxt->disableSAX))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Cleanup, since we don't use all those identifiers
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (URI != NULL) xmlFree(URI);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ExternalID != NULL) xmlFree(ExternalID);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseAttribute:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @value: a xmlChar ** used to store the value of the attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [41] Attribute ::= Name Eq AttValue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [25] Eq ::= S? '=' S?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With namespace:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 11] Attribute ::= QName Eq AttValue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Also the case QName == xmlns:??? is handled independently as a namespace
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * definition.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the attribute name, and the value in *value.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *val = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *value = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = htmlParseHTMLName(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "error parsing attribute name\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * read the value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '=') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = htmlParseAttValue(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (htmlIsBooleanAttr(name)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * assume a minimized attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = xmlStrdup(name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *value = val;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckEncoding:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @attvalue: the attribute value
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks an http-equiv attribute from a Meta tag to detect
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If a new encoding is detected the parser is switched to decode
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * it and pass UTF8
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *encoding;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (attvalue == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* do not change encoding */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->input->encoding != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding += 8;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding += 9;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr handler;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->input->encoding != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree((xmlChar *) ctxt->input->encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->encoding = xmlStrdup(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding((const char *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * registered set of known encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (enc != XML_CHAR_ENCODING_ERROR) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (((enc == XML_CHAR_ENCODING_UTF16LE) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (enc == XML_CHAR_ENCODING_UTF16BE) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (enc == XML_CHAR_ENCODING_UCS4LE) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (enc == XML_CHAR_ENCODING_UCS4BE)) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf->encoder == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlCheckEncoding: wrong encoding meta\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlSwitchEncoding(ctxt, enc);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->charset = XML_CHAR_ENCODING_UTF8;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * fallback for unknown encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler((const char *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlSwitchToEncoding(ctxt, handler);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->charset = XML_CHAR_ENCODING_UTF8;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->input->buf != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf->encoder != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf->raw != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf->buffer != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int nbchars;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int processed;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * convert as much as possible to the parser reading buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync processed = ctxt->input->cur - ctxt->input->base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferShrink(ctxt->input->buf->buffer, processed);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->buf->buffer,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->buf->raw);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (nbchars < 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlCheckEncoding: encoder error\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->base =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur = ctxt->input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCheckMeta:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @atts: the attributes values
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks an attributes from a Meta tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *att, *value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int http = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *content = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (atts == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync att = atts[i++];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (att != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = atts[i++];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync http = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync content = value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync att = atts[i++];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((http) && (content != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckEncoding(ctxt, content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseStartTag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse a start of tag either for rule element or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * EmptyElement. In both case we don't parse the tag closing chars.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [40] STag ::= '<' Name (S Attribute)* S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With namespace:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of success and -1 in case of error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseStartTag(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *attname;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *attvalue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar **atts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int nbatts = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int maxatts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int meta = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (ctxt->input == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: context error\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return -1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '<') return -1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync atts = ctxt->atts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync maxatts = ctxt->maxatts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = htmlParseHTMLName(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: invalid element name\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Dump the bogus tag like browsers do */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return -1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, BAD_CAST"meta"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync meta = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for auto-closure of HTML elements.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoClose(ctxt, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for implied HTML elements.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckImplied(ctxt, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Avoid html at any level > 0, head at any level != 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * or any attempt to recurse body
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: misplaced <html> tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr != 1) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(name, BAD_CAST"head"))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: misplaced <head> tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, BAD_CAST"body")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int indx;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (indx = 0;indx < ctxt->nameNr;indx++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: misplaced <body> tag\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Now parse the attributes, it ends up with the ending
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * (S Attribute)* S?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_CHAR_CH(CUR)) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (CUR != '>') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((CUR != '/') || (NXT(1) != '>'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync long cons = ctxt->nbChars;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync attname = htmlParseAttribute(ctxt, &attvalue);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (attname != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Well formedness requires at most one declaration of an attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 0; i < nbatts;i += 2) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(atts[i], attname)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Attribute %s redefined\n", attname, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (attvalue != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(attvalue);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto failed;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Add the pair to atts
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (atts == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync maxatts = 22; /* allow for 10 attrs by default */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync atts = (const xmlChar **)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlMalloc(maxatts * sizeof(xmlChar *));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (atts == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (attvalue != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(attvalue);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto failed;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->atts = atts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->maxatts = maxatts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (nbatts + 4 > maxatts) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar **n;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync maxatts *= 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync n = (const xmlChar **) xmlRealloc((void *) atts,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync maxatts * sizeof(const xmlChar *));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (n == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(ctxt, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (attvalue != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(attvalue);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto failed;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync atts = n;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->atts = atts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->maxatts = maxatts;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync atts[nbatts++] = attname;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync atts[nbatts++] = attvalue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync atts[nbatts] = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync atts[nbatts + 1] = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (attvalue != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(attvalue);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Dump the bogus attribute string up to the next blank or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the end of the tag. */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_CHAR_CH(CUR)) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync !(IS_BLANK_CH(CUR)) && (CUR != '>') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((CUR != '/') || (NXT(1) != '>')))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfailed:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cons == ctxt->nbChars) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: problem parsing attributes\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle specific association to the META tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (meta && (nbatts != 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckMeta(ctxt, atts);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: Start of Element !
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePush(ctxt, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (nbatts != 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, name, atts);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startElement(ctxt->userData, name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (atts != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = 1;i < nbatts;i += 2) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (atts[i] != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree((xmlChar *) atts[i]);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseEndTag:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an end of tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [42] ETag ::= '</' Name S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With namespace
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [NS 9] ETag ::= '</' QName S? '>'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if the current level should be closed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseEndTag(htmlParserCtxtPtr ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *oldname;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i, ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR != '<') || (NXT(1) != '/')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseEndTag: '</' not found\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = htmlParseHTMLName(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We should definitely be at the ending "S? '>'" part
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "End tag : expected '>'\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->recovery) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * We're not at the ending > !!
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Error, unless in recover mode where we search forwards
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * until we find a >
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (CUR != '\0' && CUR != '>') NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If the name read is not one of the element in the parsing stack
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * then return, it's just an error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (i = (ctxt->nameNr - 1); i >= 0; i--) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, ctxt->nameTab[i]))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (i < 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unexpected end tag : %s\n", name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for auto-closure of HTML elements.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoCloseOnClose(ctxt, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Well formedness constraints, opening and closing must match.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * With the exception that the autoclose may have popped stuff out
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * of the stack.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!xmlStrEqual(name, ctxt->name)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Opening and ending tag mismatch: %s and %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name, ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: End of Tag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync oldname = ctxt->name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseReference:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse and handle entity references in content,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * this will end-up in a call to character() since this is either a
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * CharRef, or a predefined entity.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseReference(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlEntityDesc * ent;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar out[6];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR != '&') return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (NXT(1) == '#') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int bits, i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = htmlParseCharRef(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80) { out[i++]= c; bits= -6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( ; bits >= 0; bits-= 6) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out[i++]= ((c >> bits) & 0x3F) | 0x80;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out[i] = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, out, i);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ent = htmlParseEntityRef(ctxt, &name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ent == NULL) || !(ent->value > 0)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int c;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int bits, i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync c = ent->value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (c < 0x80)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { out[i++]= c; bits= -6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x800)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (c < 0x10000)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( ; bits >= 0; bits-= 6) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out[i++]= ((c >> bits) & 0x3F) | 0x80;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync out[i] = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, out, i);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseContent:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse a content: comment, sub-element, reference or text.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseContent(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *currentNode;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int depth;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync currentNode = xmlStrdup(ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync depth = ctxt->nameNr;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (1) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync long cons = ctxt->nbChars;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Our tag or one of it's parent or children is ending.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '<') && (NXT(1) == '/')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlParseEndTag(ctxt) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((currentNode != NULL) || (ctxt->nameNr == 0))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (currentNode != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(currentNode);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue; /* while */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if ((CUR == '<') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((IS_ASCII_LETTER(NXT(1))) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(1) == '_') || (NXT(1) == ':'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = htmlParseHTMLName_nonInvasive(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (name == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseStartTag: invalid element name\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Dump the bogus tag like browsers do */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (currentNode != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(currentNode);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->name != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlCheckAutoClose(name, ctxt->name) == 1) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoClose(ctxt, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Has this node been popped out during parsing of
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the next element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrEqual(currentNode, ctxt->name)))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (currentNode != NULL) xmlFree(currentNode);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(currentNode, BAD_CAST"style")))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle SCRIPT/STYLE separately
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseScript(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Sometimes DOCTYPE arrives in the middle of the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '<') && (NXT(1) == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(2) == 'D') && (UPP(3) == 'O') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(4) == 'C') && (UPP(5) == 'T') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(6) == 'Y') && (UPP(7) == 'P') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(8) == 'E')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Misplaced DOCTYPE declaration\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "DOCTYPE" , NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocTypeDecl(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * First case : a comment
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '<') && (NXT(1) == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(2) == '-') && (NXT(3) == '-')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseComment(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Second case : a Processing Instruction.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if ((CUR == '<') && (NXT(1) == '?')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParsePI(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Third case : a sub-element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (CUR == '<') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseElement(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fourth case : a reference. If if has not been resolved,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parsing returns it's Name, create the node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (CUR == '&') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseReference(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fifth case : end of the resource
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (CUR == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoCloseOnEnd(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Last case, text. Note that References are handled directly.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseCharData(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cons == ctxt->nbChars) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->node != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "detected an error in element content\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (currentNode != NULL) xmlFree(currentNode);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseContent:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse a content: comment, sub-element, reference or text.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync__htmlParseContent(void *ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseContent((htmlParserCtxtPtr) ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseElement:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML element, this is highly recursive
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [39] element ::= EmptyElemTag | STag content ETag
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * [41] Attribute ::= Name Eq AttValue
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseElement(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *currentNode = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlElemDesc * info;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserNodeInfo node_info;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int failed;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int depth;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *oldptr;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (ctxt->input == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseElement: context error\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Capture start position */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->record_info) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.begin_pos = ctxt->input->consumed +
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (CUR_PTR - ctxt->input->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.begin_line = ctxt->input->line;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync failed = htmlParseStartTag(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = ctxt->name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (failed || (name == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '>')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the info for that element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync info = htmlTagLookup(name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (info == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Tag %s invalid\n", name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element labeled the XML/SGML way
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '/') && (NXT(1) == '>')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '>') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Couldn't find end of Start Tag %s\n", name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * end of parsing of this node.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, ctxt->name)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nodePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Capture end position and add node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->record_info) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.end_pos = ctxt->input->consumed +
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (CUR_PTR - ctxt->input->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.end_line = ctxt->input->line;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.node = ctxt->node;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserAddNodeInfo(ctxt, &node_info);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element from DTD definition
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((info != NULL) && (info->empty)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse the content of the element:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync currentNode = xmlStrdup(ctxt->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync depth = ctxt->nameNr;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (IS_CHAR_CH(CUR)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync oldptr = ctxt->input->cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseContent(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (oldptr==ctxt->input->cur) break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr < depth) break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Capture end position and add node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( currentNode != NULL && ctxt->record_info ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.end_pos = ctxt->input->consumed +
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (CUR_PTR - ctxt->input->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.end_line = ctxt->input->line;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync node_info.node = ctxt->node;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserAddNodeInfo(ctxt, &node_info);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!IS_CHAR_CH(CUR)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoCloseOnEnd(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (currentNode != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(currentNode);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseDocument:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML document (and build a tree if using the standard SAX
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * interface).
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0, -1 in case of error. the parser context is augmented
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * as a result of the parsing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseDocument(htmlParserCtxtPtr ctxt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDtdPtr dtd;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDefaultSAXHandlerInit();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (ctxt->input == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseDocument: context error\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(XML_ERR_INTERNAL_ERROR);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->html = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync GROW;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: beginning of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Wipe out everything which is before the first '<'
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Document is empty\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startDocument(ctxt->userData);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse possible comments and PIs before any content
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (((CUR == '<') && (NXT(1) == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(2) == '-') && (NXT(3) == '-')) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((CUR == '<') && (NXT(1) == '?'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseComment(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParsePI(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Then possibly doc type declaration(s) and more Misc
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * (doctypedecl Misc*)?
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '<') && (NXT(1) == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(2) == 'D') && (UPP(3) == 'O') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(4) == 'C') && (UPP(5) == 'T') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(6) == 'Y') && (UPP(7) == 'P') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(8) == 'E')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocTypeDecl(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse possible comments and PIs before any content
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (((CUR == '<') && (NXT(1) == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (NXT(2) == '-') && (NXT(3) == '-')) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((CUR == '<') && (NXT(1) == '?'))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseComment(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParsePI(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Time to start parsing the tree itself
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseContent(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * autoclose
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoCloseOnEnd(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: end of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endDocument(ctxt->userData);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->myDoc != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync dtd = xmlGetIntSubset(ctxt->myDoc);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (dtd == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->myDoc->intSubset =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (! ctxt->wellFormed) return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parser contexts handling *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlInitParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Initialize a parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of success and -1 in case of error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlInitParserCtxt(htmlParserCtxtPtr ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlSAXHandler *sax;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memset(ctxt, 0, sizeof(htmlParserCtxt));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->dict = xmlDictCreate();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->dict == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memset(sax, 0, sizeof(htmlSAXHandler));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Allocate the Input stack */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputTab = (htmlParserInputPtr *)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlMalloc(5 * sizeof(htmlParserInputPtr));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->inputTab == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputMax = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputMax = 5;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->version = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->encoding = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->standalone = -1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_START;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Allocate the Node stack */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nodeTab == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeMax = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->node = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputMax = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeMax = 10;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->node = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* Allocate the Name stack */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameTab == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameMax = 10;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->name = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeMax = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->node = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputMax = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameMax = 10;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->name = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax = sax;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData = ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->myDoc = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->wellFormed = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->replaceEntities = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->linenumbers = xmlLineNumbersDefaultValue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->html = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.userData = ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.error = xmlParserValidityError;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.warning = xmlParserValidityWarning;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->record_info = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->validate = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nbChars = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->catalogs = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitNodeInfoSeq(&ctxt->node_seq);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlFreeParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Free all the memory used by a parser context. However the parsed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * document in ctxt->myDoc is not freed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNewParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Allocate and initialize a new parser context.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the htmlParserCtxtPtr or NULL in case of allocation error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParserCtxtPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNewParserCtxt(void)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlErrMemory(NULL, "NewParserCtxt: out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memset(ctxt, 0, sizeof(xmlParserCtxt));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (htmlInitParserCtxt(ctxt) < 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreateMemoryParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buffer: a pointer to a char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size of the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for an HTML in-memory document.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParserCtxtPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreateMemoryParserCtxt(const char *buffer, int size) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buffer == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (size <= 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlNewParserCtxt();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlNewInputStream(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (input == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->filename = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->buf = buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->base = input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->cur = input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input->end = &input->buf->buffer->content[input->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreateDocParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to an array of xmlChar
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for an HTML document.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: check the need to add encoding handling there
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic htmlParserCtxtPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int len;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync len = xmlStrlen(cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr handler;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->input->encoding != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree((xmlChar *) ctxt->input->encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * registered set of known encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (enc != XML_CHAR_ENCODING_ERROR) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlSwitchEncoding(ctxt, enc);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unsupported encoding %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (const xmlChar *) encoding, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * fallback for unknown encodings
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler((const char *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlSwitchToEncoding(ctxt, handler);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Unsupported encoding %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (const xmlChar *) encoding, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef LIBXML_PUSH_ENABLED
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Progressive parsing interfaces *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseLookupSequence:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @first: the first char to lookup
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @next: the next char to lookup or zero
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @third: the next char to lookup or zero
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @comment: flag to force checking inside comments
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to find if a sequence (first, next, third) or just (first next) or
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * (first) is available in the input stream.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This function has a side effect of (possibly) incrementing ctxt->checkIndex
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to avoid rescanning sequences of bytes, it DOES change the state of the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parser, do not use liberally.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This is basically similar to xmlParseLookupSequence()
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the index to the current parsing point if the full sequence
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * is available, -1 otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar next, xmlChar third, int iscomment) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int base, len;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserInputPtr in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int incomment = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync in = ctxt->input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in == NULL) return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync base = in->cur - in->base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (base < 0) return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->checkIndex > base)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync base = ctxt->checkIndex;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in->buf == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = in->base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync len = in->length;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = in->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync len = in->buf->buffer->use;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* take into account the sequence length */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (third) len -= 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (next) len --;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for (;base < len;base++) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!incomment && (base + 4 < len) && !iscomment) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((buf[base] == '<') && (buf[base + 1] == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync incomment = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* do not increment past <! - some people use <!--> */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync base += 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (incomment) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (base + 3 > len)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((buf[base] == '-') && (buf[base + 1] == '-') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (buf[base + 2] == '>')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync incomment = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync base += 2;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf[base] == first) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (third != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((buf[base + 1] != next) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (buf[base + 2] != third)) continue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (next != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf[base + 1] != next) continue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (next == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c' found at %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync first, base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (third == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c%c' found at %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync first, next, base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c%c%c' found at %d\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync first, next, third, base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(base - (in->cur - in->base));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (next == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c' failed\n", first);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if (third == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c%c' failed\n", first, next);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: lookup '%c%c%c' failed\n", first, next, third);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseTryOrFinish:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @terminate: last chunk indicator
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Try to progress on parsing
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns zero if no parsing was possible
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int ret = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserInputPtr in;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int avail = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar cur, next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync switch (ctxt->instate) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_EOF:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try EOF\n"); break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_START:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try START\n"); break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_MISC:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try MISC\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_COMMENT:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try COMMENT\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_PROLOG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try PROLOG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_START_TAG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try START_TAG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_CONTENT:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try CONTENT\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_CDATA_SECTION:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try CDATA_SECTION\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_END_TAG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try END_TAG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_ENTITY_DECL:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try ENTITY_DECL\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_ENTITY_VALUE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try ENTITY_VALUE\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_ATTRIBUTE_VALUE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try ATTRIBUTE_VALUE\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_DTD:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try DTD\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_EPILOG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try EPILOG\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_PI:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try PI\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_SYSTEM_LITERAL:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: try SYSTEM_LITERAL\n");break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (1) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync in = ctxt->input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in == NULL) break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in->buf == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->length - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((avail == 0) && (terminate)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoCloseOnEnd(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: end of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EOF;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endDocument(ctxt->userData);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 1)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync switch (ctxt->instate) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_EOF:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Document parsing is done !
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_START:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Very first chars read from the document flow.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (IS_BLANK_CH(cur)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in->buf == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->length - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->setDocumentLocator(ctxt->userData,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync &xmlDefaultSAXLocator);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->startDocument) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!ctxt->disableSAX))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->startDocument(ctxt->userData);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync next = in->cur[1];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(2) == 'D') && (UPP(3) == 'O') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(4) == 'C') && (UPP(5) == 'T') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(6) == 'Y') && (UPP(7) == 'P') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(8) == 'E')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing internal subset\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocTypeDecl(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_PROLOG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering PROLOG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_MISC;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering MISC\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_MISC:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in->buf == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->length - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync next = in->cur[1];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (in->cur[2] == '-') && (in->cur[3] == '-')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseComment(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_MISC;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '?')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParsePI(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_MISC;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(2) == 'D') && (UPP(3) == 'O') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(4) == 'C') && (UPP(5) == 'T') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(6) == 'Y') && (UPP(7) == 'P') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(8) == 'E')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing internal subset\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocTypeDecl(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_PROLOG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering PROLOG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (avail < 9)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_START_TAG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_PROLOG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP_BLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in->buf == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->length - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync next = in->cur[1];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (in->cur[2] == '-') && (in->cur[3] == '-')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseComment(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_PROLOG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '?')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParsePI(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_PROLOG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (avail < 4)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_START_TAG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_EPILOG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in->buf == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->length - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync avail = in->buf->buffer->use - (in->cur - in->base);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 1)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (IS_BLANK_CH(cur)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseCharData(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync next = in->cur[1];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (in->cur[2] == '-') && (in->cur[3] == '-')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseComment(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EPILOG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '?')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParsePI(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EPILOG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (avail < 4)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = XML_ERR_DOCUMENT_END;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->wellFormed = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EOF;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering EOF\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endDocument(ctxt->userData);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_START_TAG: {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int failed;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlElemDesc * info;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur != '<') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (in->cur[1] == '/') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_END_TAG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering END_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync failed = htmlParseStartTag(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name = ctxt->name;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (failed ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (name == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '>')
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Lookup the info for that element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync info = htmlTagLookup(name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (info == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Tag %s invalid\n", name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element labeled the XML/SGML way
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((CUR == '/') && (NXT(1) == '>')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync SKIP(2);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (CUR == '>') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Couldn't find end of Start Tag %s\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync name, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * end of parsing of this node.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(name, ctxt->name)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nodePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Check for an Empty Element from DTD definition
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((info != NULL) && (info->empty)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endElement(ctxt->userData, name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlnamePop(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_CONTENT: {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync long cons;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle preparsed entities and charRef
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->token != 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar chr[2] = { 0 , 0 } ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync chr[0] = (xmlChar) ctxt->token;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(ctxt->userData, chr, 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->token = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((avail == 1) && (terminate)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur != '<') && (cur != '&')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (IS_BLANK_CH(cur)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->ignorableWhitespace != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->ignorableWhitespace(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData, &cur, 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckParagraph(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax->characters != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->characters(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData, &cur, 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->token = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync in->cur++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = in->cur[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync next = in->cur[1];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cons = ctxt->nbChars;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle SCRIPT/STYLE separately
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!terminate) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int idx;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar val;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (idx < 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync val = in->cur[idx + 2];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (val == 0) /* bad cut of input */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseScript(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == '<') && (next == '/')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_END_TAG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering END_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Sometimes DOCTYPE arrives in the middle of the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(2) == 'D') && (UPP(3) == 'O') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(4) == 'C') && (UPP(5) == 'T') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(6) == 'Y') && (UPP(7) == 'P') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (UPP(8) == 'E')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "Misplaced DOCTYPE declaration\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "DOCTYPE" , NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocTypeDecl(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '!') &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (in->cur[2] == '-') && (in->cur[3] == '-')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt, '-', '-', '>', 1) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Comment\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseComment(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '?')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing PI\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParsePI(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '!') && (avail < 4)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if ((cur == '<') && (next == '/')) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_END_TAG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering END_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (cur == '<') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_START_TAG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (cur == '&') {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing Reference\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* TODO: check generation of subtrees if noent !!! */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseReference(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * check that the text sequence is complete
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * before handing out the data to the parser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to avoid problems with erroneous end of
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * data detection.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: Parsing char data\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseCharData(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cons == ctxt->nbChars) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->node != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "detected an error in element content\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NEXT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_END_TAG:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (avail < 2)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!terminate) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto done;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseEndTag(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->nameNr == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EPILOG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_CDATA_SECTION:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == CDATA\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_DTD:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == DTD\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_COMMENT:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == COMMENT\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_PI:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == PI\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_ENTITY_DECL:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == ENTITY_DECL\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_ENTITY_VALUE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == ENTITY_VALUE\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering DTD\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_ATTRIBUTE_VALUE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == ATTRIBUTE_VALUE\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_START_TAG;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering START_TAG\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_SYSTEM_LITERAL:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_IGNORE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == XML_PARSER_IGNORE\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_PARSER_PUBLIC_LITERAL:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: internal error, state == XML_PARSER_LITERAL\n",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_CONTENT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "HPP: entering CONTENT\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncdone:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((avail == 0) && (terminate)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAutoCloseOnEnd(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * SAX: end of the document processing.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EOF;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endDocument(ctxt->userData);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->myDoc != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->instate == XML_PARSER_EPILOG))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDtdPtr dtd;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync dtd = xmlGetIntSubset(ctxt->myDoc);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (dtd == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->myDoc->intSubset =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseChunk:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @chunk: an char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size in byte of the chunk
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @terminate: last chunk indicator
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse a Chunk of memory
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns zero if no error, the xmlParserErrors otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int terminate) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt == NULL) || (ctxt->input == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "htmlParseChunk: context error\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(XML_ERR_INTERNAL_ERROR);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int base = ctxt->input->base - ctxt->input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int cur = ctxt->input->cur - ctxt->input->base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int res;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (res < 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = XML_PARSER_EOF;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->disableSAX = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (XML_PARSER_EOF);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->base = ctxt->input->buf->buffer->content + base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur = ctxt->input->base + cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->end =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#if 0
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((terminate) || (ctxt->input->buf->buffer->use > 80))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseTryOrFinish(ctxt, terminate);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (ctxt->instate != XML_PARSER_EOF) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr in = ctxt->input->buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((in->encoder != NULL) && (in->buffer != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (in->raw != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int nbchars;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (nbchars < 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "encoder error\n", NULL, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(XML_ERR_INVALID_ENCODING);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseTryOrFinish(ctxt, terminate);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (terminate) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->instate != XML_PARSER_EOF) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->instate != XML_PARSER_EPILOG) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->instate != XML_PARSER_MISC)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = XML_ERR_DOCUMENT_END;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->wellFormed = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->instate != XML_PARSER_EOF) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->endDocument(ctxt->userData);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_EOF;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return((xmlParserErrors) ctxt->errNo);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * User entry points *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreatePushParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @sax: a SAX handler
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @user_data: The user data returned on SAX callbacks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @chunk: a pointer to an array of chars
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: number of chars in the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: an optional file name or URI
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @enc: an optional encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for using the HTML parser in push mode
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * The value of @filename is used for fetching external entities
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and error/warning reports.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParserCtxtPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *chunk, int size, const char *filename,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserInputPtr inputStream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlAllocParserInputBuffer(enc);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlNewParserCtxt();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->charset=XML_CHAR_ENCODING_UTF8;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(ctxt->sax);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (user_data != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData = user_data;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (filename == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->directory = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->directory = xmlParserGetDirectory(filename);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream = htmlNewInputStream(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (inputStream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (filename == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->filename = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->filename = (char *)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCanonicPath((const xmlChar *) filename);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->buf = buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->base = inputStream->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->cur = inputStream->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream->end =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, inputStream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->buf != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int base = ctxt->input->base - ctxt->input->buf->buffer->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int cur = ctxt->input->cur - ctxt->input->base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->base = ctxt->input->buf->buffer->content + base;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->cur = ctxt->input->base + cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->end =
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef DEBUG_PUSH
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->progressive = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_PUSH_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSAXParseDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to an array of xmlChar
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @sax: the SAX handler block
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @userData: if using SAX, this pointer will be provided on callbacks.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * to handle parse events. If sax is NULL, fallback to the default DOM
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * behavior and return a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree unless SAX is NULL or the document is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * not well formed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocPtr ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateDocParserCtxt(cur, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax != NULL) xmlFree (ctxt->sax);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax = sax;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData = userData;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocument(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = ctxt->myDoc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to an array of xmlChar
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseDoc(xmlChar *cur, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlSAXParseDoc(cur, encoding, NULL, NULL));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCreateFileParserCtxt:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a parser context for a file content.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Automatic support for ZLIB/Compress compressed document is provided
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * by default if found at compile-time.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the new parser context or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParserCtxtPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCreateFileParserCtxt(const char *filename, const char *encoding)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserInputPtr inputStream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char *canonicFilename;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* htmlCharEncoding enc; */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *content, *content_line = (xmlChar *) "charset=";
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (filename == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlNewParserCtxt();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (canonicFilename == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef LIBXML_SAX1_ENABLED
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlDefaultSAXHandler.error != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDefaultSAXHandler.error(NULL, "out of memory\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(canonicFilename);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (inputStream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, inputStream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /* set encoding */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (content) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync strcpy ((char *)content, (char *)content_line);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync strcat ((char *)content, (char *)encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCheckEncoding (ctxt, content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree (content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSAXParseFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @sax: the SAX handler block
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @userData: if using SAX, this pointer will be provided on callbacks.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * compressed document is provided by default if found at compile-time.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * It use the given SAX function block to handle the parsing callback.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * If sax is NULL, fallback to the default DOM tree building routines.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree unless SAX is NULL or the document is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * not well formed.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync void *userData) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocPtr ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlSAXHandlerPtr oldsax = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateFileParserCtxt(filename, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync oldsax = ctxt->sax;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax = sax;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData = userData;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocument(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = ctxt->myDoc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (sax != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax = oldsax;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->userData = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlParseFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: a free form C string describing the HTML document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * compressed document is provided by default if found at compile-time.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlParseFile(const char *filename, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlSAXParseFile(filename, encoding, NULL, NULL));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlHandleOmittedElem:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @val: int 0 or 1
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Set and return the previous value for handling HTML omitted tags.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the last value for 0 for no handling, 1 for auto insertion.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlHandleOmittedElem(int val) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int old = htmlOmittedDefaultValue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlOmittedDefaultValue = val;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(old);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlElementAllowedHere:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @parent: HTML parent element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether an HTML element may be a direct child of a parent element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Note - doesn't check for deprecated elements
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 1 if allowed; 0 otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char** p ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( ! elt || ! parent || ! parent->subelts )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 0 ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( p = parent->subelts; *p; ++p )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( !xmlStrcmp((const xmlChar *)*p, elt) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 1 ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 0 ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlElementStatusHere:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @parent: HTML parent element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether an HTML element may be a direct child of a parent element.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and if so whether it is valid or deprecated.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlStatus
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( ! parent || ! elt )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_INVALID ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_INVALID ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAttrAllowed:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @elt: HTML element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @attr: HTML attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @legacy: whether to allow deprecated attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether an attribute is valid for an element
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Has full knowledge of Required and Deprecated attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlStatus
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char** p ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( !elt || ! attr )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_INVALID ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( elt->attrs_req )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( p = elt->attrs_req; *p; ++p)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( !xmlStrcmp((const xmlChar*)*p, attr) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_REQUIRED ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( elt->attrs_opt )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( p = elt->attrs_opt; *p; ++p)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( !xmlStrcmp((const xmlChar*)*p, attr) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_VALID ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( legacy && elt->attrs_depr )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync for ( p = elt->attrs_depr; *p; ++p)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( !xmlStrcmp((const xmlChar*)*p, attr) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_DEPRECATED ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_INVALID ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeStatus:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @node: an htmlNodePtr in a tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @legacy: whether to allow deprecated elements (YES is faster here
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * for Element nodes)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Checks whether the tree node is valid. Experimental (the author
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * only uses the HTML enhancements in a SAX parser)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Return: for Element nodes, a return from htmlElementAllowedHere (if
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * legacy allowed) or htmlElementStatusHere (otherwise).
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * for Attribute nodes, a return from htmlAttrAllowed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * for other nodes, HTML_NA (no checks performed)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlStatus
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeStatus(const htmlNodePtr node, int legacy) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ( ! node )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return HTML_INVALID ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync switch ( node->type ) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_ELEMENT_NODE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return legacy
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ? ( htmlElementAllowedHere (
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlTagLookup(node->parent->name) , node->name
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ) ? HTML_VALID : HTML_INVALID )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync : htmlElementStatusHere(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlTagLookup(node->parent->name) ,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlTagLookup(node->name) )
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_ATTRIBUTE_NODE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return htmlAttrAllowed(
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlTagLookup(node->parent->name) , node->name, legacy) ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync default: return HTML_NA ;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * New set (2.6.0) of simpler and more flexible APIs *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * DICT_FREE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @str: a string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Free a string if it is not owned by the "dict" dictionnary in the
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * current scope
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define DICT_FREE(str) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((str) && ((!dict) || \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree((char *)(str));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReset:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Reset a parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReset(htmlParserCtxtPtr ctxt)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDictPtr dict;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync dict = ctxt->dict;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeInputStream(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inputNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->spaceNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->spaceTab != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->spaceTab[0] = -1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->space = &ctxt->spaceTab[0];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->space = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nodeNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->node = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nameNr = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->name = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DICT_FREE(ctxt->version);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->version = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DICT_FREE(ctxt->encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->encoding = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DICT_FREE(ctxt->directory);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->directory = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DICT_FREE(ctxt->extSubURI);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->extSubURI = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync DICT_FREE(ctxt->extSubSystem);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->extSubSystem = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->myDoc != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeDoc(ctxt->myDoc);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->myDoc = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->standalone = -1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->hasExternalSubset = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->hasPErefs = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->html = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->external = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->instate = XML_PARSER_START;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->token = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->wellFormed = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nsWellFormed = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->valid = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.userData = ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.error = xmlParserValidityError;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.warning = xmlParserValidityWarning;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->record_info = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->nbChars = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->checkIndex = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->inSubset = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->errNo = XML_ERR_OK;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->depth = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->charset = XML_CHAR_ENCODING_NONE;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->catalogs = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitNodeInfoSeq(&ctxt->node_seq);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->attsDefault != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->attsDefault = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->attsSpecial != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlHashFree(ctxt->attsSpecial, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->attsSpecial = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtUseOptions:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Applies the options to the parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of success, the set of unknown or unimplemented options
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in case of error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (options & HTML_PARSE_NOWARNING) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->warning = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.warning = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync options -= XML_PARSE_NOWARNING;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->options |= XML_PARSE_NOWARNING;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (options & HTML_PARSE_NOERROR) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->error = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->vctxt.error = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->fatalError = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync options -= XML_PARSE_NOERROR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->options |= XML_PARSE_NOERROR;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (options & HTML_PARSE_PEDANTIC) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->pedantic = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync options -= XML_PARSE_PEDANTIC;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->options |= XML_PARSE_PEDANTIC;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->pedantic = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (options & XML_PARSE_NOBLANKS) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->keepBlanks = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync options -= XML_PARSE_NOBLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->options |= XML_PARSE_NOBLANKS;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->keepBlanks = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (options & HTML_PARSE_RECOVER) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->recovery = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync options -= HTML_PARSE_RECOVER;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->recovery = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (options & HTML_PARSE_COMPACT) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->options |= HTML_PARSE_COMPACT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync options -= HTML_PARSE_COMPACT;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->dictNames = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (options);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDoRead:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @reuse: keep the context for reuse
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Common front-end for the htmlRead functions
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic htmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int options, int reuse)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocPtr ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCtxtUseOptions(ctxt, options);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->html = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr hdlr;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync hdlr = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (hdlr != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlSwitchToEncoding(ctxt, hdlr);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((URL != NULL) && (ctxt->input != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ctxt->input->filename == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParseDocument(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = ctxt->myDoc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->myDoc = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (!reuse) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((ctxt->dictNames) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ret != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (ret->dict == ctxt->dict))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt->dict = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to a zero terminated string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateDocParserCtxt(cur, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: a file or URL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML file from the filesystem or the network.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadFile(const char *filename, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlCreateFileParserCtxt(filename, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, NULL, NULL, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buffer: a pointer to a char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size of the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = xmlCreateMemoryParserCtxt(buffer, size);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDefaultSAXHandlerInit();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt->sax != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadFd:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @fd: an open file descriptor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML from a file descriptor and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadFd(int fd, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr stream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (fd < 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (input == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = xmlNewParserCtxt();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (stream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, stream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlReadIO:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioread: an I/O read function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioclose: an I/O close function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioctx: an I/O handler
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML document from I/O functions and source and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync void *ioctx, const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlParserCtxtPtr ctxt;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr stream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ioread == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (input == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ctxt = htmlNewParserCtxt();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (stream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserCtxt(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, stream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 0));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadDoc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: a pointer to a zero terminated string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr stream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCtxtReset(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewStringInputStream(ctxt, cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (stream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, stream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: a file or URL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML file from the filesystem or the network.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr stream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (filename == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCtxtReset(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlLoadExternalEntity(filename, NULL, ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (stream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, stream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, NULL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buffer: a pointer to a char array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: the size of the array
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML in-memory document and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr stream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buffer == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCtxtReset(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (input == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (stream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, stream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadFd:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @fd: an open file descriptor
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an XML from a file descriptor and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL, const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr stream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (fd < 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCtxtReset(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (input == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (stream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, stream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlCtxtReadIO:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ctxt: an HTML parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioread: an I/O read function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioclose: an I/O close function
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @ioctx: an I/O handler
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @URL: the base URL to use for the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding, or NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @options: a combination of htmlParserOption(s)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * parse an HTML document from I/O functions and source and build a tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * This reuses the existing @ctxt parser context
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the resulting document tree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocPtr
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInputCloseCallback ioclose, void *ioctx,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *URL,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding, int options)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputBufferPtr input;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlParserInputPtr stream;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ioread == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (ctxt == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlCtxtReset(ctxt);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (input == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (stream == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeParserInputBuffer(input);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync inputPush(ctxt, stream);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (htmlDoRead(ctxt, URL, encoding, options, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define bottom_HTMLparser
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include "elfgcchack.h"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_HTML_ENABLED */