38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * HTMLtree.c : implementation of access function for an HTML tree.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * See Copyright for the status of this software.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * daniel@veillard.com
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define IN_LIBXML
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include "libxml.h"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef LIBXML_HTML_ENABLED
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <string.h> /* for memset() only ! */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_CTYPE_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <ctype.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef HAVE_STDLIB_H
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <stdlib.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlmemory.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/HTMLparser.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/HTMLtree.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/entities.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/valid.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/xmlerror.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/parserInternals.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/globals.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include <libxml/uri.h>
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Getting/Setting encoding meta tags *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlGetMetaEncoding:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Encoding definition lookup in the Meta tags
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the current encoding as flagged in the HTML source
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncconst xmlChar *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlGetMetaEncoding(htmlDocPtr doc) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodePtr cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *encoding;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (doc == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = doc->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Search the html
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(cur->name, BAD_CAST"html"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(cur->name, BAD_CAST"head"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto found_head;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(cur->name, BAD_CAST"meta"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto found_meta;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Search the head
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(cur->name, BAD_CAST"head"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(cur->name, BAD_CAST"meta"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto found_meta;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfound_head:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Search the meta elements
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfound_meta:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlAttrPtr attr = cur->properties;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int http;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync content = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync http = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (attr != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((attr->children != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (attr->children->type == XML_TEXT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (attr->children->next == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = attr->children->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync http = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else if ((value != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync content = value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((http != 0) && (content != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto found_content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync attr = attr->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfound_content:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrstr(content, BAD_CAST"charset=");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrstr(content, BAD_CAST"Charset=");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding += 8;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrstr(content, BAD_CAST"charset =");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrstr(content, BAD_CAST"Charset =");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding += 9;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSetMetaEncoding:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Sets the current encoding in the Meta tags
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * NOTE: this will not change the document content encoding, just
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * the META flag associated.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns 0 in case of success and -1 in case of error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodePtr cur, meta;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync char newcontent[100];
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (doc == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (char *)encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync newcontent[sizeof(newcontent) - 1] = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = doc->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Search the html
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto found_head;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto found_meta;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Search the head
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync goto found_meta;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfound_head:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->children == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlAddChild(cur, meta);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->children;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncfound_meta:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Create a new Meta element with the right attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlAddPrevSibling(cur, meta);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Search and destroy all the remaining the meta elements carrying
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * encoding informations
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlAttrPtr attr = cur->properties;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int http;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const xmlChar *value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync content = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync http = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (attr != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((attr->children != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (attr->children->type == XML_TEXT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (attr->children->next == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = attr->children->content;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync http = 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((value != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync content = value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((http != 0) && (content != NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync attr = attr->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((http != 0) && (content != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync meta = cur;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlUnlinkNode(meta);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFreeNode(meta);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync continue;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * booleanHTMLAttrs:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * These are the HTML attributes which will be output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * in minimized form, i.e. <option selected="selected"> will be
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic const char* htmlBooleanAttrs[] = {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "checked", "compact", "declare", "defer", "disabled", "ismap",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync "selected", NULL
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync};
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlIsBooleanAttr:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @name: the name of the attribute to check
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Determine if a given attribute is a boolean attribute.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * returns: false if the attribute is not boolean, true otherwise.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlIsBooleanAttr(const xmlChar *name)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int i = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (htmlBooleanAttrs[i] != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 1;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync i++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#ifdef LIBXML_OUTPUT_ENABLED
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Output error handlers *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSaveErrMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @extra: extra informations
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle an out of memory condition
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSaveErrMemory(const char *extra)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSaveErr:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @code: the error number
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @node: the location of the error.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @extra: extra informations
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Handle an out of memory condition
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSaveErr(int code, xmlNodePtr node, const char *extra)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync{
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *msg = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync switch(code) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_SAVE_NOT_UTF8:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync msg = "string is not in UTF-8\n";
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_SAVE_CHAR_INVALID:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync msg = "invalid character value\n";
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_SAVE_UNKNOWN_ENCODING:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync msg = "unknown encoding %s\n";
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync case XML_SAVE_NO_DOCTYPE:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync msg = "HTML has no DOCTYPE\n";
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync break;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync default:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync msg = "unexpected error number\n";
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dumping HTML tree content to a simple buffer *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeDumpFormat:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the current node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @format: should formatting spaces been added
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML node, recursive behaviour,children are printed too.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the number of byte written or -1 in case of error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic int
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int format) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync unsigned int use;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferPtr outbuf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (outbuf == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlSaveErrMemory("allocating HTML output buffer");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outbuf->buffer = buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outbuf->encoder = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outbuf->writecallback = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outbuf->closecallback = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outbuf->context = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync outbuf->written = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync use = buf->use;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(outbuf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = buf->use - use;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return (ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeDump:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the current node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML node, recursive behaviour,children are printed too,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and formatting returns are added.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Returns the number of byte written or -1 in case of error
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlNodeDumpFormat(buf, doc, cur, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeDumpFileFormat:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @out: the FILE pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the current node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @format: should formatting spaces been added
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML node, recursive behaviour,children are printed too.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: if encoding == NULL try to save in the doc encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * returns: the number of byte written or -1 in case of failure.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNodePtr cur, const char *encoding, int format) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferPtr buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr handler = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (enc != XML_CHAR_ENCODING_UTF8) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fallback to HTML or ASCII when the encoding is unspecified
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("HTML");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("ascii");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * save the content to a temp buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlOutputBufferCreateFile(out, handler);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlOutputBufferClose(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeDumpFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @out: the FILE pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the current node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML node, recursive behaviour,children are printed too,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and formatting returns are added.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDocDumpMemoryFormat:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @mem: OUT: the memory pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: OUT: the memory length
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @format: should formatting spaces been added
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document in memory and return the xmlChar * and it's size.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * It's up to the caller to free the memory.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferPtr buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr handler = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((mem == NULL) || (size == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *mem = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *size = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = (const char *) htmlGetMetaEncoding(cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (enc != cur->charset) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->charset != XML_CHAR_ENCODING_UTF8) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Not supported yet
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *mem = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *size = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *mem = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *size = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fallback to HTML or ASCII when the encoding is unspecified
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("HTML");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("ascii");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlAllocOutputBuffer(handler);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *mem = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *size = 0;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferFlush(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf->conv != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *size = buf->conv->use;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *mem = xmlStrndup(buf->conv->content, *size);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *size = buf->buffer->use;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *mem = xmlStrndup(buf->buffer->content, *size);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (void)xmlOutputBufferClose(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDocDumpMemory:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @mem: OUT: the memory pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @size: OUT: the memory length
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document in memory and return the xmlChar * and it's size.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * It's up to the caller to free the memory.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocDumpMemoryFormat(cur, mem, size, 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dumping HTML tree content to an I/O output buffer *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDtdDumpOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: check whether encoding is needed
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump the HTML document DTD, if any.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding ATTRIBUTE_UNUSED) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlDtdPtr cur = doc->intSubset;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->ExternalID != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, " PUBLIC ");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->SystemID != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, " ");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else if (cur->SystemID != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, " SYSTEM ");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ">\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAttrDumpOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the attribute pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML attribute
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding ATTRIBUTE_UNUSED) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * TODO: The html output method should not escape a & character
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * occurring in an attribute value immediately followed by
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, " ");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ":");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync value = xmlNodeListGetString(doc, cur->children, 0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (value) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "=");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->ns == NULL) && (cur->parent != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent->ns == NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *escaped;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *tmp = value;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (IS_BLANK_CH(*tmp)) tmp++;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (escaped != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferWriteQuotedString(buf->buffer, escaped);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(escaped);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferWriteQuotedString(buf->buffer, value);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlBufferWriteQuotedString(buf->buffer, value);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(value);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "=\"\"");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlAttrListDumpOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the first attribute pointer
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump a list of HTML attributes
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAttrDumpOutput(buf, doc, cur, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeListDumpOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the first node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @format: should formatting spaces been added
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML node list, recursive behaviour,children are printed too.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncstatic void
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNodePtr cur, const char *encoding, int format) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur == NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync while (cur != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur = cur->next;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeDumpFormatOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the current node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @format: should formatting spaces been added
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML node, recursive behaviour,children are printed too.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNodePtr cur, const char *encoding, int format) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const htmlElemDesc * info;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == NULL) || (buf == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Special cases.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->type == XML_DTD_NODE)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->type == XML_DOCUMENT_NODE)){
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->type == XML_ATTRIBUTE_NODE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->type == HTML_TEXT_NODE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->content != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (((cur->name == (const xmlChar *)xmlStringText) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((cur->parent == NULL) ||
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlChar *buffer;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buffer != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlFree(buffer);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->type == HTML_COMMENT_NODE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->content != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "<!--");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "-->");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->type == HTML_PI_NODE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->name == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "<?");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->content != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, " ");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ">");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->type == HTML_ENTITY_REF_NODE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "&");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ";");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->type == HTML_PRESERVE_NODE) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->content != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Get specific HTML info for that node.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->ns == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync info = htmlTagLookup(cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync else
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync info = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "<");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ":");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->nsDef)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNsListDumpOutput(buf, cur->nsDef);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->properties != NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((info != NULL) && (info->empty)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ">");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((format) && (!info->isinline) && (cur->next != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->next->type != HTML_TEXT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->next->type != HTML_ENTITY_REF_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent->name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent->name[0] != 'p')) /* p, pre, param */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->children == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((info != NULL) && (info->saveEndTag != 0) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ">");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "></");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ":");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ">");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((format) && (cur->next != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (info != NULL) && (!info->isinline)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->next->type != HTML_TEXT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->next->type != HTML_ENTITY_REF_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent->name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent->name[0] != 'p')) /* p, pre, param */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ">");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->type != XML_ELEMENT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->content != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Uses the OutputBuffer property to automatically convert
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * invalids to charrefs
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *) cur->content);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->children != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((format) && (info != NULL) && (!info->isinline) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->children->type != HTML_TEXT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->children->type != HTML_ENTITY_REF_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->children != cur->last) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->name[0] != 'p')) /* p, pre, param */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((format) && (info != NULL) && (!info->isinline) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->last->type != HTML_TEXT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->last->type != HTML_ENTITY_REF_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->children != cur->last) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->name[0] != 'p')) /* p, pre, param */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "</");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ":");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, (const char *)cur->name);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, ">");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((format) && (info != NULL) && (!info->isinline) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->next != NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur->next->type != HTML_TEXT_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->next->type != HTML_ENTITY_REF_NODE) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent->name != NULL) &&
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync (cur->parent->name[0] != 'p')) /* p, pre, param */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlNodeDumpOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @doc: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the current node
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML node, recursive behaviour,children are printed too,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and formatting returns/spaces are added.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlNodePtr cur, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDocContentDumpFormatOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @format: should formatting spaces been added
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding, int format) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int type;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((buf == NULL) || (cur == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * force to output the stuff as HTML, especially for entities
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync type = cur->type;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->type = XML_HTML_DOCUMENT_NODE;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->intSubset != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDtdDumpOutput(buf, cur, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->children != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferWriteString(buf, "\n");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync cur->type = (xmlElementType) type;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDocContentDumpOutput:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @buf: the HTML buffer output
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the encoding string
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document. Formating return/spaces are added.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncvoid
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/************************************************************************
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Saving functions front-ends *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ************************************************************************/
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlDocDump:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @f: the FILE*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document to an open FILE.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * returns: the number of byte written or -1 in case of failure.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlDocDump(FILE *f, xmlDocPtr cur) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferPtr buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr handler = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == NULL) || (f == NULL)) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = (const char *) htmlGetMetaEncoding(cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (enc != cur->charset) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->charset != XML_CHAR_ENCODING_UTF8) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Not supported yet
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fallback to HTML or ASCII when the encoding is unspecified
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("HTML");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("ascii");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlOutputBufferCreateFile(f, handler);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocContentDumpOutput(buf, cur, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlOutputBufferClose(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSaveFile:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename (or URL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document to a file. If @filename is "-" the stdout file is
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * used.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * returns: the number of byte written or -1 in case of failure.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSaveFile(const char *filename, xmlDocPtr cur) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferPtr buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr handler = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == NULL) || (filename == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync encoding = (const char *) htmlGetMetaEncoding(cur);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (enc != cur->charset) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->charset != XML_CHAR_ENCODING_UTF8) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Not supported yet
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fallback to HTML or ASCII when the encoding is unspecified
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("HTML");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("ascii");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * save the content to a temp buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocContentDumpOutput(buf, cur, NULL);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlOutputBufferClose(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSaveFileFormat:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @format: should formatting spaces been added
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document to a file using a given encoding.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * returns: the number of byte written or -1 in case of failure.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSaveFileFormat(const char *filename, xmlDocPtr cur,
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync const char *encoding, int format) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlOutputBufferPtr buf;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncodingHandlerPtr handler = NULL;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync int ret;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if ((cur == NULL) || (filename == NULL))
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlInitParser();
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (encoding != NULL) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync xmlCharEncoding enc;
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync enc = xmlParseCharEncoding(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (enc != cur->charset) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (cur->charset != XML_CHAR_ENCODING_UTF8) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Not supported yet
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler(encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(-1);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync } else {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync }
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Fallback to HTML or ASCII when the encoding is unspecified
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("HTML");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (handler == NULL)
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync handler = xmlFindCharEncodingHandler("ascii");
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync /*
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * save the content to a temp buffer.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync buf = xmlOutputBufferCreateFilename(filename, handler, 0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync if (buf == NULL) return(0);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync ret = xmlOutputBufferClose(buf);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(ret);
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync/**
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * htmlSaveFileEnc:
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @filename: the filename
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @cur: the document
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * @encoding: the document encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * Dump an HTML document to a file using a given encoding
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * and formatting returns/spaces are added.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync *
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync * returns: the number of byte written or -1 in case of failure.
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsyncint
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsynchtmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync return(htmlSaveFileFormat(filename, cur, encoding, 1));
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync}
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_OUTPUT_ENABLED */
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#define bottom_HTMLtree
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#include "elfgcchack.h"
38ae7e4efe803ea78b6499cd05a394db32623e41vboxsync#endif /* LIBXML_HTML_ENABLED */