Lines Matching refs:ctxt

56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
58 static void htmlParseComment(htmlParserCtxtPtr ctxt);
68 * @ctxt: an HTML parser context
74 htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
76 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
77 (ctxt->instate == XML_PARSER_EOF))
79 if (ctxt != NULL) {
80 ctxt->errNo = XML_ERR_NO_MEMORY;
81 ctxt->instate = XML_PARSER_EOF;
82 ctxt->disableSAX = 1;
85 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
90 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
97 * @ctxt: an HTML parser context
106 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
109 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
110 (ctxt->instate == XML_PARSER_EOF))
112 if (ctxt != NULL)
113 ctxt->errNo = error;
114 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
119 if (ctxt != NULL)
120 ctxt->wellFormed = 0;
125 * @ctxt: an HTML parser context
133 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
136 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
137 (ctxt->instate == XML_PARSER_EOF))
139 if (ctxt != NULL)
140 ctxt->errNo = error;
141 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
144 if (ctxt != NULL)
145 ctxt->wellFormed = 0;
156 * @ctxt: an HTML parser context
164 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
166 if (ctxt->nameNr >= ctxt->nameMax) {
167 ctxt->nameMax *= 2;
168 ctxt->nameTab = (const xmlChar * *)
169 xmlRealloc((xmlChar * *)ctxt->nameTab,
170 ctxt->nameMax *
171 sizeof(ctxt->nameTab[0]));
172 if (ctxt->nameTab == NULL) {
173 htmlErrMemory(ctxt, NULL);
177 ctxt->nameTab[ctxt->nameNr] = value;
178 ctxt->name = value;
179 return (ctxt->nameNr++);
183 * @ctxt: an HTML parser context
190 htmlnamePop(htmlParserCtxtPtr ctxt)
194 if (ctxt->nameNr <= 0)
196 ctxt->nameNr--;
197 if (ctxt->nameNr < 0)
199 if (ctxt->nameNr > 0)
200 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
202 ctxt->name = NULL;
203 ret = ctxt->nameTab[ctxt->nameNr];
204 ctxt->nameTab[ctxt->nameNr] = NULL;
237 #define UPPER (toupper(*ctxt->input->cur))
239 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
241 #define NXT(val) ctxt->input->cur[(val)]
243 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
245 #define CUR_PTR ctxt->input->cur
247 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
248 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
249 xmlParserInputShrink(ctxt->input)
251 #define GROW if ((ctxt->progressive == 0) && \
252 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
253 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
255 #define CURRENT ((int) (*ctxt->input->cur))
257 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)
261 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
262 #define CUR ((int) (*ctxt->input->cur))
263 #define NEXT xmlNextChar(ctxt)
265 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
266 #define NXT(val) ctxt->input->cur[(val)]
267 #define CUR_PTR ctxt->input->cur
271 if (*(ctxt->input->cur) == '\n') { \
272 ctxt->input->line++; ctxt->input->col = 1; \
273 } else ctxt->input->col++; \
274 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
280 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
283 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
284 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
292 * @ctxt: the HTML parser context
305 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
306 if (ctxt->instate == XML_PARSER_EOF)
309 if (ctxt->token != 0) {
311 return(ctxt->token);
313 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
325 const unsigned char *cur = ctxt->input->cur;
332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
338 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
343 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
367 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
374 return((int) *ctxt->input->cur);
383 if ((int) *ctxt->input->cur < 0x80)
384 return((int) *ctxt->input->cur);
389 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
390 ctxt->charset = XML_CHAR_ENCODING_UTF8;
391 return(xmlCurrentChar(ctxt, len));
404 if (ctxt->input->end - ctxt->input->cur >= 4) {
406 ctxt->input->cur[0], ctxt->input->cur[1],
407 ctxt->input->cur[2], ctxt->input->cur[3]);
409 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
411 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
416 ctxt->charset = XML_CHAR_ENCODING_8859_1;
418 return((int) *ctxt->input->cur);
423 * @ctxt: the HTML parser context
431 htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
434 while (IS_BLANK_CH(*(ctxt->input->cur))) {
435 if ((*ctxt->input->cur == 0) &&
436 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
437 xmlPopInput(ctxt);
439 if (*(ctxt->input->cur) == '\n') {
440 ctxt->input->line++; ctxt->input->col = 1;
441 } else ctxt->input->col++;
442 ctxt->input->cur++;
443 ctxt->nbChars++;
444 if (*ctxt->input->cur == 0)
445 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1145 * @ctxt: an HTML parser context
1152 htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1159 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1161 if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1169 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1175 while (!xmlStrEqual(newtag, ctxt->name)) {
1176 info = htmlTagLookup(ctxt->name);
1178 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1180 newtag, ctxt->name);
1182 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1183 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1184 htmlnamePop(ctxt);
1190 * @ctxt: an HTML parser context
1195 htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1199 if (ctxt->nameNr == 0)
1201 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1202 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1203 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1204 htmlnamePop(ctxt);
1210 * @ctxt: an HTML parser context
1221 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1223 while ((newtag != NULL) && (ctxt->name != NULL) &&
1224 (htmlCheckAutoClose(newtag, ctxt->name))) {
1225 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1226 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1227 htmlnamePop(ctxt);
1230 htmlAutoCloseOnEnd(ctxt);
1233 while ((newtag == NULL) && (ctxt->name != NULL) &&
1234 ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
1235 (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
1236 (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
1237 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1238 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1239 htmlnamePop(ctxt);
1297 * @ctxt: an HTML parser context
1305 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1310 if (ctxt->nameNr <= 0) {
1311 htmlnamePush(ctxt, BAD_CAST"html");
1312 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1313 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1317 if ((ctxt->nameNr <= 1) &&
1328 htmlnamePush(ctxt, BAD_CAST"head");
1329 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1330 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1335 for (i = 0;i < ctxt->nameNr;i++) {
1336 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1339 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1344 htmlnamePush(ctxt, BAD_CAST"body");
1345 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1346 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1352 * @ctxt: an HTML parser context
1362 htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1366 if (ctxt == NULL)
1368 tag = ctxt->name;
1370 htmlAutoClose(ctxt, BAD_CAST"p");
1371 htmlCheckImplied(ctxt, BAD_CAST"p");
1372 htmlnamePush(ctxt, BAD_CAST"p");
1373 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1374 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1381 htmlAutoClose(ctxt, BAD_CAST"p");
1382 htmlCheckImplied(ctxt, BAD_CAST"p");
1383 htmlnamePush(ctxt, BAD_CAST"p");
1384 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1385 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1724 htmlErrMemory(ctxt, "growing buffer\n"); \
1989 * @ctxt: an HTML parser context
1995 htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2000 htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2042 * @ctxt: an HTML parser context
2051 static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2062 if (ctxt->name == NULL)
2064 if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2066 if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2070 if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2071 dtd = xmlGetIntSubset(ctxt->myDoc);
2079 if (ctxt->node == NULL) return(0);
2080 lastChild = xmlGetLastChild(ctxt->node);
2084 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2085 (ctxt->node->content != NULL)) return(0);
2089 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2185 static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2189 * @ctxt: an HTML parser context
2198 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2215 return(xmlDictLookup(ctxt->dict, loc, i));
2221 * @ctxt: an HTML parser context
2231 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2246 return(xmlDictLookup(ctxt->dict, loc, i));
2252 * @ctxt: an HTML parser context
2260 htmlParseName(htmlParserCtxtPtr ctxt) {
2270 in = ctxt->input->cur;
2282 count = in - ctxt->input->cur;
2283 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2284 ctxt->input->cur = in;
2285 ctxt->nbChars += count;
2286 ctxt->input->col += count;
2290 return(htmlParseNameComplex(ctxt));
2294 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2324 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2330 * @ctxt: an HTML parser context
2340 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2354 htmlErrMemory(ctxt, "buffer allocation failed\n");
2370 c = htmlParseCharRef(ctxt);
2391 ent = htmlParseEntityRef(ctxt, &name);
2469 * @ctxt: an HTML parser context
2480 htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2485 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
2489 name = htmlParseName(ctxt);
2491 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2506 htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2519 * @ctxt: an HTML parser context
2524 * asked for ctxt->replaceEntities != 0
2530 htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2535 ret = htmlParseHTMLAttribute(ctxt, '"');
2537 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2543 ret = htmlParseHTMLAttribute(ctxt, '\'');
2545 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2553 ret = htmlParseHTMLAttribute(ctxt, 0);
2555 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2564 * @ctxt: an HTML parser context
2574 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2584 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2596 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2603 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2612 * @ctxt: an HTML parser context
2622 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2633 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2645 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2652 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2661 * @ctxt: an HTML parser context
2681 htmlParseScript(htmlParserCtxtPtr ctxt) {
2701 if (ctxt->recovery) {
2702 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
2703 xmlStrlen(ctxt->name)) == 0)
2707 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
2709 ctxt->name, NULL);
2721 if (ctxt->sax->cdataBlock!= NULL) {
2725 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2726 } else if (ctxt->sax->characters != NULL) {
2727 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2736 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
2737 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2742 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2743 if (ctxt->sax->cdataBlock!= NULL) {
2747 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2748 } else if (ctxt->sax->characters != NULL) {
2749 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2757 * @ctxt: an HTML parser context
2766 htmlParseCharData(htmlParserCtxtPtr ctxt) {
2773 while (((cur != '<') || (ctxt->token == '<')) &&
2774 ((cur != '&') || (ctxt->token == '&')) &&
2777 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2786 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2787 if (areBlanks(ctxt, buf, nbchar)) {
2788 if (ctxt->sax->ignorableWhitespace != NULL)
2789 ctxt->sax->ignorableWhitespace(ctxt->userData,
2792 htmlCheckParagraph(ctxt);
2793 if (ctxt->sax->characters != NULL)
2794 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2814 if (areBlanks(ctxt, buf, nbchar)) {
2815 if (ctxt->sax->ignorableWhitespace != NULL)
2816 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2818 htmlCheckParagraph(ctxt);
2819 if (ctxt->sax->characters != NULL)
2820 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2828 ctxt->instate = XML_PARSER_EOF;
2834 * @ctxt: an HTML parser context
2850 htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
2858 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2862 URI = htmlParseSystemLiteral(ctxt);
2864 htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
2872 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2876 *publicID = htmlParsePubidLiteral(ctxt);
2878 htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
2884 URI = htmlParseSystemLiteral(ctxt);
2892 * @ctxt: an XML parser context
2899 htmlParsePI(htmlParserCtxtPtr ctxt) {
2909 state = ctxt->instate;
2910 ctxt->instate = XML_PARSER_PI;
2921 target = htmlParseName(ctxt);
2929 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2930 (ctxt->sax->processingInstruction != NULL))
2931 ctxt->sax->processingInstruction(ctxt->userData,
2933 ctxt->instate = state;
2938 htmlErrMemory(ctxt, NULL);
2939 ctxt->instate = state;
2944 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2956 htmlErrMemory(ctxt, NULL);
2958 ctxt->instate = state;
2979 htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
2987 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2988 (ctxt->sax->processingInstruction != NULL))
2989 ctxt->sax->processingInstruction(ctxt->userData,
2994 htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
2997 ctxt->instate = state;
3003 * @ctxt: an HTML parser context
3010 htmlParseComment(htmlParserCtxtPtr ctxt) {
3025 state = ctxt->instate;
3026 ctxt->instate = XML_PARSER_COMMENT;
3031 htmlErrMemory(ctxt, "buffer allocation failed\n");
3032 ctxt->instate = state;
3051 htmlErrMemory(ctxt, "growing buffer failed\n");
3052 ctxt->instate = state;
3072 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3077 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3078 (!ctxt->disableSAX))
3079 ctxt->sax->comment(ctxt->userData, buf);
3082 ctxt->instate = state;
3087 * @ctxt: an HTML parser context
3097 htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3100 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3101 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3117 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3132 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3142 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3151 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3161 * @ctxt: an HTML parser context
3170 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3185 name = htmlParseName(ctxt);
3187 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3200 URI = htmlParseExternalID(ctxt, &ExternalID);
3207 htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
3216 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3217 (!ctxt->disableSAX))
3218 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3229 * @ctxt: an HTML parser context
3249 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3254 name = htmlParseHTMLName(ctxt);
3256 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3268 val = htmlParseAttValue(ctxt);
3282 * @ctxt: an HTML parser context
3291 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3294 if ((ctxt == NULL) || (attvalue == NULL))
3298 if (ctxt->input->encoding != NULL)
3315 if (ctxt->input->encoding != NULL)
3316 xmlFree((xmlChar *) ctxt->input->encoding);
3317 ctxt->input->encoding = xmlStrdup(encoding);
3328 (ctxt->input->buf != NULL) &&
3329 (ctxt->input->buf->encoder == NULL)) {
3330 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3334 xmlSwitchEncoding(ctxt, enc);
3336 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3343 xmlSwitchToEncoding(ctxt, handler);
3344 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3346 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
3350 if ((ctxt->input->buf != NULL) &&
3351 (ctxt->input->buf->encoder != NULL) &&
3352 (ctxt->input->buf->raw != NULL) &&
3353 (ctxt->input->buf->buffer != NULL)) {
3360 processed = ctxt->input->cur - ctxt->input->base;
3361 xmlBufferShrink(ctxt->input->buf->buffer, processed);
3362 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
3363 ctxt->input->buf->buffer,
3364 ctxt->input->buf->raw);
3366 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3370 ctxt->input->base =
3371 ctxt->input->cur = ctxt->input->buf->buffer->content;
3378 * @ctxt: an HTML parser context
3384 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3390 if ((ctxt == NULL) || (atts == NULL))
3405 htmlCheckEncoding(ctxt, content);
3411 * @ctxt: an HTML parser context
3430 htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3440 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3441 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3448 atts = ctxt->atts;
3449 maxatts = ctxt->maxatts;
3452 name = htmlParseHTMLName(ctxt);
3454 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3468 htmlAutoClose(ctxt, name);
3473 htmlCheckImplied(ctxt, name);
3479 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
3480 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3485 if ((ctxt->nameNr != 1) &&
3487 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3494 for (indx = 0;indx < ctxt->nameNr;indx++) {
3495 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
3496 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3515 long cons = ctxt->nbChars;
3518 attname = htmlParseAttribute(ctxt, &attvalue);
3526 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
3542 htmlErrMemory(ctxt, NULL);
3547 ctxt->atts = atts;
3548 ctxt->maxatts = maxatts;
3556 htmlErrMemory(ctxt, NULL);
3562 ctxt->atts = atts;
3563 ctxt->maxatts = maxatts;
3583 if (cons == ctxt->nbChars) {
3584 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3595 htmlCheckMeta(ctxt, atts);
3600 htmlnamePush(ctxt, name);
3601 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
3603 ctxt->sax->startElement(ctxt->userData, name, atts);
3605 ctxt->sax->startElement(ctxt->userData, name, NULL);
3620 * @ctxt: an HTML parser context
3634 htmlParseEndTag(htmlParserCtxtPtr ctxt)
3641 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
3647 name = htmlParseHTMLName(ctxt);
3656 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
3658 if (ctxt->recovery) {
3674 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
3675 if (xmlStrEqual(name, ctxt->nameTab[i]))
3679 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3689 htmlAutoCloseOnClose(ctxt, name);
3696 if (!xmlStrEqual(name, ctxt->name)) {
3697 if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
3698 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3700 name, ctxt->name);
3707 oldname = ctxt->name;
3709 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3710 ctxt->sax->endElement(ctxt->userData, name);
3711 htmlnamePop(ctxt);
3723 * @ctxt: an HTML parser context
3730 htmlParseReference(htmlParserCtxtPtr ctxt) {
3740 c = htmlParseCharRef(ctxt);
3754 htmlCheckParagraph(ctxt);
3755 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3756 ctxt->sax->characters(ctxt->userData, out, i);
3758 ent = htmlParseEntityRef(ctxt, &name);
3760 htmlCheckParagraph(ctxt);
3761 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3762 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3766 htmlCheckParagraph(ctxt);
3767 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
3768 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
3769 ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
3770 /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
3791 htmlCheckParagraph(ctxt);
3792 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3793 ctxt->sax->characters(ctxt->userData, out, i);
3800 * @ctxt: an HTML parser context
3806 htmlParseContent(htmlParserCtxtPtr ctxt) {
3811 currentNode = xmlStrdup(ctxt->name);
3812 depth = ctxt->nameNr;
3814 long cons = ctxt->nbChars;
3821 if (htmlParseEndTag(ctxt) &&
3822 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
3833 name = htmlParseHTMLName_nonInvasive(ctxt);
3835 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3847 if (ctxt->name != NULL) {
3848 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
3849 htmlAutoClose(ctxt, name);
3859 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
3860 (!xmlStrEqual(currentNode, ctxt->name)))
3871 htmlParseScript(ctxt);
3881 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3884 htmlParseDocTypeDecl(ctxt);
3892 htmlParseComment(ctxt);
3899 htmlParsePI(ctxt);
3906 htmlParseElement(ctxt);
3914 htmlParseReference(ctxt);
3921 htmlAutoCloseOnEnd(ctxt);
3929 htmlParseCharData(ctxt);
3932 if (cons == ctxt->nbChars) {
3933 if (ctxt->node != NULL) {
3934 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3948 * @ctxt: an HTML parser context
3954 __htmlParseContent(void *ctxt) {
3955 if (ctxt != NULL)
3956 htmlParseContent((htmlParserCtxtPtr) ctxt);
3961 * @ctxt: an HTML parser context
3971 htmlParseElement(htmlParserCtxtPtr ctxt) {
3980 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3981 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3986 if (ctxt->record_info) {
3987 node_info.begin_pos = ctxt->input->consumed +
3988 (CUR_PTR - ctxt->input->base);
3989 node_info.begin_line = ctxt->input->line;
3992 failed = htmlParseStartTag(ctxt);
3993 name = ctxt->name;
4005 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4014 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4015 ctxt->sax->endElement(ctxt->userData, name);
4016 htmlnamePop(ctxt);
4023 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4029 if (xmlStrEqual(name, ctxt->name)) {
4030 nodePop(ctxt);
4031 htmlnamePop(ctxt);
4037 if (ctxt->record_info) {
4038 node_info.end_pos = ctxt->input->consumed +
4039 (CUR_PTR - ctxt->input->base);
4040 node_info.end_line = ctxt->input->line;
4041 node_info.node = ctxt->node;
4042 xmlParserAddNodeInfo(ctxt, &node_info);
4051 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4052 ctxt->sax->endElement(ctxt->userData, name);
4053 htmlnamePop(ctxt);
4060 currentNode = xmlStrdup(ctxt->name);
4061 depth = ctxt->nameNr;
4063 oldptr = ctxt->input->cur;
4064 htmlParseContent(ctxt);
4065 if (oldptr==ctxt->input->cur) break;
4066 if (ctxt->nameNr < depth) break;
4072 if ( currentNode != NULL && ctxt->record_info ) {
4073 node_info.end_pos = ctxt->input->consumed +
4074 (CUR_PTR - ctxt->input->base);
4075 node_info.end_line = ctxt->input->line;
4076 node_info.node = ctxt->node;
4077 xmlParserAddNodeInfo(ctxt, &node_info);
4080 htmlAutoCloseOnEnd(ctxt);
4089 * @ctxt: an HTML parser context
4099 htmlParseDocument(htmlParserCtxtPtr ctxt) {
4106 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4107 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4111 ctxt->html = 1;
4116 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4117 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4124 htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
4128 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4129 ctxt->sax->startDocument(ctxt->userData);
4138 htmlParseComment(ctxt);
4139 htmlParsePI(ctxt);
4153 htmlParseDocTypeDecl(ctxt);
4163 htmlParseComment(ctxt);
4164 htmlParsePI(ctxt);
4171 htmlParseContent(ctxt);
4177 htmlAutoCloseOnEnd(ctxt);
4183 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4184 ctxt->sax->endDocument(ctxt->userData);
4186 if (ctxt->myDoc != NULL) {
4187 dtd = xmlGetIntSubset(ctxt->myDoc);
4189 ctxt->myDoc->intSubset =
4190 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4194 if (! ctxt->wellFormed) return(-1);
4207 * @ctxt: an HTML parser context
4215 htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
4219 if (ctxt == NULL) return(-1);
4220 memset(ctxt, 0, sizeof(htmlParserCtxt));
4222 ctxt->dict = xmlDictCreate();
4223 if (ctxt->dict == NULL) {
4236 ctxt->inputTab = (htmlParserInputPtr *)
4238 if (ctxt->inputTab == NULL) {
4240 ctxt->inputNr = 0;
4241 ctxt->inputMax = 0;
4242 ctxt->input = NULL;
4245 ctxt->inputNr = 0;
4246 ctxt->inputMax = 5;
4247 ctxt->input = NULL;
4248 ctxt->version = NULL;
4249 ctxt->encoding = NULL;
4250 ctxt->standalone = -1;
4251 ctxt->instate = XML_PARSER_START;
4254 ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
4255 if (ctxt->nodeTab == NULL) {
4257 ctxt->nodeNr = 0;
4258 ctxt->nodeMax = 0;
4259 ctxt->node = NULL;
4260 ctxt->inputNr = 0;
4261 ctxt->inputMax = 0;
4262 ctxt->input = NULL;
4265 ctxt->nodeNr = 0;
4266 ctxt->nodeMax = 10;
4267 ctxt->node = NULL;
4270 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
4271 if (ctxt->nameTab == NULL) {
4273 ctxt->nameNr = 0;
4274 ctxt->nameMax = 10;
4275 ctxt->name = NULL;
4276 ctxt->nodeNr = 0;
4277 ctxt->nodeMax = 0;
4278 ctxt->node = NULL;
4279 ctxt->inputNr = 0;
4280 ctxt->inputMax = 0;
4281 ctxt->input = NULL;
4284 ctxt->nameNr = 0;
4285 ctxt->nameMax = 10;
4286 ctxt->name = NULL;
4288 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
4290 ctxt->sax = sax;
4293 ctxt->userData = ctxt;
4294 ctxt->myDoc = NULL;
4295 ctxt->wellFormed = 1;
4296 ctxt->replaceEntities = 0;
4297 ctxt->linenumbers = xmlLineNumbersDefaultValue;
4298 ctxt->html = 1;
4299 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
4300 ctxt->vctxt.userData = ctxt;
4301 ctxt->vctxt.error = xmlParserValidityError;
4302 ctxt->vctxt.warning = xmlParserValidityWarning;
4303 ctxt->record_info = 0;
4304 ctxt->validate = 0;
4305 ctxt->nbChars = 0;
4306 ctxt->checkIndex = 0;
4307 ctxt->catalogs = NULL;
4308 xmlInitNodeInfoSeq(&ctxt->node_seq);
4314 * @ctxt: an HTML parser context
4317 * document in ctxt->myDoc is not freed.
4321 htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
4323 xmlFreeParserCtxt(ctxt);
4337 xmlParserCtxtPtr ctxt;
4339 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
4340 if (ctxt == NULL) {
4344 memset(ctxt, 0, sizeof(xmlParserCtxt));
4345 if (htmlInitParserCtxt(ctxt) < 0) {
4346 htmlFreeParserCtxt(ctxt);
4349 return(ctxt);
4363 xmlParserCtxtPtr ctxt;
4372 ctxt = htmlNewParserCtxt();
4373 if (ctxt == NULL)
4379 input = xmlNewInputStream(ctxt);
4381 xmlFreeParserCtxt(ctxt);
4391 inputPush(ctxt, input);
4392 return(ctxt);
4409 htmlParserCtxtPtr ctxt;
4414 ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
4415 if (ctxt == NULL)
4422 if (ctxt->input->encoding != NULL)
4423 xmlFree((xmlChar *) ctxt->input->encoding);
4424 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
4431 xmlSwitchEncoding(ctxt, enc);
4432 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4433 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4443 xmlSwitchToEncoding(ctxt, handler);
4445 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
4451 return(ctxt);
4463 * @ctxt: an HTML parser context
4471 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
4480 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
4487 in = ctxt->input;
4491 if (ctxt->checkIndex > base)
4492 base = ctxt->checkIndex;
4529 ctxt->checkIndex = 0;
4547 ctxt->checkIndex = base;
4564 * @ctxt: an HTML parser context
4572 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
4579 switch (ctxt->instate) {
4633 in = ctxt->input;
4640 htmlAutoCloseOnEnd(ctxt);
4641 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
4645 ctxt->instate = XML_PARSER_EOF;
4646 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4647 ctxt->sax->endDocument(ctxt->userData);
4658 switch (ctxt->instate) {
4676 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4677 ctxt->sax->setDocumentLocator(ctxt->userData,
4679 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
4680 (!ctxt->disableSAX))
4681 ctxt->sax->startDocument(ctxt->userData);
4691 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4697 htmlParseDocTypeDecl(ctxt);
4698 ctxt->instate = XML_PARSER_PROLOG;
4704 ctxt->instate = XML_PARSER_MISC;
4724 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4730 htmlParseComment(ctxt);
4731 ctxt->instate = XML_PARSER_MISC;
4734 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4740 htmlParsePI(ctxt);
4741 ctxt->instate = XML_PARSER_MISC;
4748 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4754 htmlParseDocTypeDecl(ctxt);
4755 ctxt->instate = XML_PARSER_PROLOG;
4764 ctxt->instate = XML_PARSER_START_TAG;
4784 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4790 htmlParseComment(ctxt);
4791 ctxt->instate = XML_PARSER_PROLOG;
4794 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4800 htmlParsePI(ctxt);
4801 ctxt->instate = XML_PARSER_PROLOG;
4806 ctxt->instate = XML_PARSER_START_TAG;
4822 htmlParseCharData(ctxt);
4831 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
4837 htmlParseComment(ctxt);
4838 ctxt->instate = XML_PARSER_EPILOG;
4841 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4847 htmlParsePI(ctxt);
4848 ctxt->instate = XML_PARSER_EPILOG;
4853 ctxt->errNo = XML_ERR_DOCUMENT_END;
4854 ctxt->wellFormed = 0;
4855 ctxt->instate = XML_PARSER_EOF;
4860 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4861 ctxt->sax->endDocument(ctxt->userData);
4874 ctxt->instate = XML_PARSER_CONTENT;
4882 ctxt->instate = XML_PARSER_END_TAG;
4883 ctxt->checkIndex = 0;
4891 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
4894 failed = htmlParseStartTag(ctxt);
4895 name = ctxt->name;
4908 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4917 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4918 ctxt->sax->endElement(ctxt->userData, name);
4919 htmlnamePop(ctxt);
4920 ctxt->instate = XML_PARSER_CONTENT;
4931 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4938 if (xmlStrEqual(name, ctxt->name)) {
4939 nodePop(ctxt);
4940 htmlnamePop(ctxt);
4943 ctxt->instate = XML_PARSER_CONTENT;
4955 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4956 ctxt->sax->endElement(ctxt->userData, name);
4957 htmlnamePop(ctxt);
4959 ctxt->instate = XML_PARSER_CONTENT;
4971 if (ctxt->token != 0) {
4974 chr[0] = (xmlChar) ctxt->token;
4975 htmlCheckParagraph(ctxt);
4976 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4977 ctxt->sax->characters(ctxt->userData, chr, 1);
4978 ctxt->token = 0;
4979 ctxt->checkIndex = 0;
4984 if (ctxt->sax != NULL) {
4986 if (ctxt->sax->ignorableWhitespace != NULL)
4987 ctxt->sax->ignorableWhitespace(
4988 ctxt->userData, &cur, 1);
4990 htmlCheckParagraph(ctxt);
4991 if (ctxt->sax->characters != NULL)
4992 ctxt->sax->characters(
4993 ctxt->userData, &cur, 1);
4996 ctxt->token = 0;
4997 ctxt->checkIndex = 0;
5006 cons = ctxt->nbChars;
5007 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
5008 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
5016 idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0);
5023 htmlParseScript(ctxt);
5025 ctxt->instate = XML_PARSER_END_TAG;
5026 ctxt->checkIndex = 0;
5043 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5045 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
5048 htmlParseDocTypeDecl(ctxt);
5053 ctxt, '-', '-', '>', 1) < 0))
5059 htmlParseComment(ctxt);
5060 ctxt->instate = XML_PARSER_CONTENT;
5063 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5069 htmlParsePI(ctxt);
5070 ctxt->instate = XML_PARSER_CONTENT;
5074 ctxt->instate = XML_PARSER_END_TAG;
5075 ctxt->checkIndex = 0;
5082 ctxt->instate = XML_PARSER_START_TAG;
5083 ctxt->checkIndex = 0;
5091 (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0))
5098 htmlParseReference(ctxt);
5107 (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
5109 ctxt->checkIndex = 0;
5114 htmlParseCharData(ctxt);
5117 if (cons == ctxt->nbChars) {
5118 if (ctxt->node != NULL) {
5119 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5133 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5135 htmlParseEndTag(ctxt);
5136 if (ctxt->nameNr == 0) {
5137 ctxt->instate = XML_PARSER_EPILOG;
5139 ctxt->instate = XML_PARSER_CONTENT;
5141 ctxt->checkIndex = 0;
5148 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5151 ctxt->instate = XML_PARSER_CONTENT;
5152 ctxt->checkIndex = 0;
5159 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5162 ctxt->instate = XML_PARSER_CONTENT;
5163 ctxt->checkIndex = 0;
5170 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5173 ctxt->instate = XML_PARSER_CONTENT;
5174 ctxt->checkIndex = 0;
5181 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5184 ctxt->instate = XML_PARSER_CONTENT;
5185 ctxt->checkIndex = 0;
5192 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5195 ctxt->instate = XML_PARSER_CONTENT;
5196 ctxt->checkIndex = 0;
5203 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5206 ctxt->instate = XML_PARSER_CONTENT;
5207 ctxt->checkIndex = 0;
5214 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5217 ctxt->instate = XML_PARSER_START_TAG;
5218 ctxt->checkIndex = 0;
5225 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5228 ctxt->instate = XML_PARSER_CONTENT;
5229 ctxt->checkIndex = 0;
5236 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5239 ctxt->instate = XML_PARSER_CONTENT;
5240 ctxt->checkIndex = 0;
5247 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5250 ctxt->instate = XML_PARSER_CONTENT;
5251 ctxt->checkIndex = 0;
5262 htmlAutoCloseOnEnd(ctxt);
5263 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5267 ctxt->instate = XML_PARSER_EOF;
5268 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5269 ctxt->sax->endDocument(ctxt->userData);
5272 if ((ctxt->myDoc != NULL) &&
5273 ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5274 (ctxt->instate == XML_PARSER_EPILOG))) {
5276 dtd = xmlGetIntSubset(ctxt->myDoc);
5278 ctxt->myDoc->intSubset =
5279 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5291 * @ctxt: an HTML parser context
5301 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
5303 if ((ctxt == NULL) || (ctxt->input == NULL)) {
5304 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5308 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5309 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
5310 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5311 int cur = ctxt->input->cur - ctxt->input->base;
5314 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5316 ctxt->errNo = XML_PARSER_EOF;
5317 ctxt->disableSAX = 1;
5320 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5321 ctxt->input->cur = ctxt->input->base + cur;
5322 ctxt->input->end =
5323 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5329 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5330 htmlParseTryOrFinish(ctxt, terminate);
5332 } else if (ctxt->instate != XML_PARSER_EOF) {
5333 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
5334 xmlParserInputBufferPtr in = ctxt->input->buf;
5341 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5348 htmlParseTryOrFinish(ctxt, terminate);
5350 if ((ctxt->instate != XML_PARSER_EOF) &&
5351 (ctxt->instate != XML_PARSER_EPILOG) &&
5352 (ctxt->instate != XML_PARSER_MISC)) {
5353 ctxt->errNo = XML_ERR_DOCUMENT_END;
5354 ctxt->wellFormed = 0;
5356 if (ctxt->instate != XML_PARSER_EOF) {
5357 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5358 ctxt->sax->endDocument(ctxt->userData);
5360 ctxt->instate = XML_PARSER_EOF;
5362 return((xmlParserErrors) ctxt->errNo);
5390 htmlParserCtxtPtr ctxt;
5399 ctxt = htmlNewParserCtxt();
5400 if (ctxt == NULL) {
5405 ctxt->charset=XML_CHAR_ENCODING_UTF8;
5407 if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
5408 xmlFree(ctxt->sax);
5409 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
5410 if (ctxt->sax == NULL) {
5412 xmlFree(ctxt);
5415 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
5417 ctxt->userData = user_data;
5420 ctxt->directory = NULL;
5422 ctxt->directory = xmlParserGetDirectory(filename);
5425 inputStream = htmlNewInputStream(ctxt);
5427 xmlFreeParserCtxt(ctxt);
5443 inputPush(ctxt, inputStream);
5445 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5446 (ctxt->input->buf != NULL)) {
5447 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
5448 int cur = ctxt->input->cur - ctxt->input->base;
5450 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5452 ctxt->input->base = ctxt->input->buf->buffer->content + base;
5453 ctxt->input->cur = ctxt->input->base + cur;
5454 ctxt->input->end =
5455 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5460 ctxt->progressive = 1;
5462 return(ctxt);
5484 htmlParserCtxtPtr ctxt;
5491 ctxt = htmlCreateDocParserCtxt(cur, encoding);
5492 if (ctxt == NULL) return(NULL);
5494 if (ctxt->sax != NULL) xmlFree (ctxt->sax);
5495 ctxt->sax = sax;
5496 ctxt->userData = userData;
5499 htmlParseDocument(ctxt);
5500 ret = ctxt->myDoc;
5502 ctxt->sax = NULL;
5503 ctxt->userData = NULL;
5505 htmlFreeParserCtxt(ctxt);
5540 htmlParserCtxtPtr ctxt;
5549 ctxt = htmlNewParserCtxt();
5550 if (ctxt == NULL) {
5560 xmlFreeParserCtxt(ctxt);
5564 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
5567 xmlFreeParserCtxt(ctxt);
5571 inputPush(ctxt, inputStream);
5579 htmlCheckEncoding (ctxt, content);
5584 return(ctxt);
5607 htmlParserCtxtPtr ctxt;
5612 ctxt = htmlCreateFileParserCtxt(filename, encoding);
5613 if (ctxt == NULL) return(NULL);
5615 oldsax = ctxt->sax;
5616 ctxt->sax = sax;
5617 ctxt->userData = userData;
5620 htmlParseDocument(ctxt);
5622 ret = ctxt->myDoc;
5624 ctxt->sax = oldsax;
5625 ctxt->userData = NULL;
5627 htmlFreeParserCtxt(ctxt);
5796 * @ctxt: an HTML parser context
5801 htmlCtxtReset(htmlParserCtxtPtr ctxt)
5806 if (ctxt == NULL)
5810 dict = ctxt->dict;
5812 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
5815 ctxt->inputNr = 0;
5816 ctxt->input = NULL;
5818 ctxt->spaceNr = 0;
5819 if (ctxt->spaceTab != NULL) {
5820 ctxt->spaceTab[0] = -1;
5821 ctxt->space = &ctxt->spaceTab[0];
5823 ctxt->space = NULL;
5827 ctxt->nodeNr = 0;
5828 ctxt->node = NULL;
5830 ctxt->nameNr = 0;
5831 ctxt->name = NULL;
5833 DICT_FREE(ctxt->version);
5834 ctxt->version = NULL;
5835 DICT_FREE(ctxt->encoding);
5836 ctxt->encoding = NULL;
5837 DICT_FREE(ctxt->directory);
5838 ctxt->directory = NULL;
5839 DICT_FREE(ctxt->extSubURI);
5840 ctxt->extSubURI = NULL;
5841 DICT_FREE(ctxt->extSubSystem);
5842 ctxt->extSubSystem = NULL;
5843 if (ctxt->myDoc != NULL)
5844 xmlFreeDoc(ctxt->myDoc);
5845 ctxt->myDoc = NULL;
5847 ctxt->standalone = -1;
5848 ctxt->hasExternalSubset = 0;
5849 ctxt->hasPErefs = 0;
5850 ctxt->html = 1;
5851 ctxt->external = 0;
5852 ctxt->instate = XML_PARSER_START;
5853 ctxt->token = 0;
5855 ctxt->wellFormed = 1;
5856 ctxt->nsWellFormed = 1;
5857 ctxt->valid = 1;
5858 ctxt->vctxt.userData = ctxt;
5859 ctxt->vctxt.error = xmlParserValidityError;
5860 ctxt->vctxt.warning = xmlParserValidityWarning;
5861 ctxt->record_info = 0;
5862 ctxt->nbChars = 0;
5863 ctxt->checkIndex = 0;
5864 ctxt->inSubset = 0;
5865 ctxt->errNo = XML_ERR_OK;
5866 ctxt->depth = 0;
5867 ctxt->charset = XML_CHAR_ENCODING_NONE;
5868 ctxt->catalogs = NULL;
5869 xmlInitNodeInfoSeq(&ctxt->node_seq);
5871 if (ctxt->attsDefault != NULL) {
5872 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
5873 ctxt->attsDefault = NULL;
5875 if (ctxt->attsSpecial != NULL) {
5876 xmlHashFree(ctxt->attsSpecial, NULL);
5877 ctxt->attsSpecial = NULL;
5883 * @ctxt: an HTML parser context
5892 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
5894 if (ctxt == NULL)
5898 ctxt->sax->warning = NULL;
5899 ctxt->vctxt.warning = NULL;
5901 ctxt->options |= XML_PARSE_NOWARNING;
5904 ctxt->sax->error = NULL;
5905 ctxt->vctxt.error = NULL;
5906 ctxt->sax->fatalError = NULL;
5908 ctxt->options |= XML_PARSE_NOERROR;
5911 ctxt->pedantic = 1;
5913 ctxt->options |= XML_PARSE_PEDANTIC;
5915 ctxt->pedantic = 0;
5917 ctxt->keepBlanks = 0;
5918 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
5920 ctxt->options |= XML_PARSE_NOBLANKS;
5922 ctxt->keepBlanks = 1;
5924 ctxt->recovery = 1;
5927 ctxt->recovery = 0;
5929 ctxt->options |= HTML_PARSE_COMPACT;
5932 ctxt->dictNames = 0;
5938 * @ctxt: an HTML parser context
5949 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
5954 htmlCtxtUseOptions(ctxt, options);
5955 ctxt->html = 1;
5961 xmlSwitchToEncoding(ctxt, hdlr);
5963 if ((URL != NULL) && (ctxt->input != NULL) &&
5964 (ctxt->input->filename == NULL))
5965 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
5966 htmlParseDocument(ctxt);
5967 ret = ctxt->myDoc;
5968 ctxt->myDoc = NULL;
5970 if ((ctxt->dictNames) &&
5972 (ret->dict == ctxt->dict))
5973 ctxt->dict = NULL;
5974 xmlFreeParserCtxt(ctxt);
5993 htmlParserCtxtPtr ctxt;
5999 ctxt = htmlCreateDocParserCtxt(cur, NULL);
6000 if (ctxt == NULL)
6002 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6018 htmlParserCtxtPtr ctxt;
6021 ctxt = htmlCreateFileParserCtxt(filename, encoding);
6022 if (ctxt == NULL)
6024 return (htmlDoRead(ctxt, NULL, NULL, options, 0));
6042 htmlParserCtxtPtr ctxt;
6045 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6046 if (ctxt == NULL)
6049 if (ctxt->sax != NULL)
6050 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
6051 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6068 htmlParserCtxtPtr ctxt;
6079 ctxt = xmlNewParserCtxt();
6080 if (ctxt == NULL) {
6084 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6087 xmlFreeParserCtxt(ctxt);
6090 inputPush(ctxt, stream);
6091 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6111 htmlParserCtxtPtr ctxt;
6123 ctxt = htmlNewParserCtxt();
6124 if (ctxt == NULL) {
6128 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6131 xmlFreeParserCtxt(ctxt);
6134 inputPush(ctxt, stream);
6135 return (htmlDoRead(ctxt, URL, encoding, options, 0));
6140 * @ctxt: an HTML parser context
6147 * This reuses the existing @ctxt parser context
6152 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
6159 if (ctxt == NULL)
6162 htmlCtxtReset(ctxt);
6164 stream = xmlNewStringInputStream(ctxt, cur);
6168 inputPush(ctxt, stream);
6169 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6174 * @ctxt: an HTML parser context
6180 * This reuses the existing @ctxt parser context
6185 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
6192 if (ctxt == NULL)
6195 htmlCtxtReset(ctxt);
6197 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
6201 inputPush(ctxt, stream);
6202 return (htmlDoRead(ctxt, NULL, encoding, options, 1));
6207 * @ctxt: an HTML parser context
6215 * This reuses the existing @ctxt parser context
6220 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
6226 if (ctxt == NULL)
6231 htmlCtxtReset(ctxt);
6238 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6244 inputPush(ctxt, stream);
6245 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6250 * @ctxt: an HTML parser context
6257 * This reuses the existing @ctxt parser context
6262 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
6270 if (ctxt == NULL)
6273 htmlCtxtReset(ctxt);
6279 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6284 inputPush(ctxt, stream);
6285 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6290 * @ctxt: an HTML parser context
6299 * This reuses the existing @ctxt parser context
6304 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
6314 if (ctxt == NULL)
6317 htmlCtxtReset(ctxt);
6323 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6328 inputPush(ctxt, stream);
6329 return (htmlDoRead(ctxt, URL, encoding, options, 1));