HTMLtree.c revision d46ee884c41b808b239563b1978468aae12e33a2
/*
* HTMLtree.c : implementation of access function for an HTML tree.
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
*/
#define IN_LIBXML
#include "libxml.h"
#ifdef LIBXML_HTML_ENABLED
#include <string.h> /* for memset() only ! */
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <libxml/xmlmemory.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/parserInternals.h>
/************************************************************************
* *
* *
************************************************************************/
/**
* htmlGetMetaEncoding:
* @doc: the document
*
* Encoding definition lookup in the Meta tags
*
* Returns the current encoding as flagged in the HTML source
*/
const xmlChar *
return(NULL);
/*
* Search the html
*/
break;
goto found_head;
goto found_meta;
}
}
return(NULL);
/*
* Search the head
*/
break;
goto found_meta;
}
}
return(NULL);
/*
* Search the meta elements
*/
int http;
http = 0;
http = 1;
goto found_content;
}
}
}
}
}
return(NULL);
encoding += 8;
} else {
encoding += 9;
}
}
return(encoding);
}
/**
* htmlSetMetaEncoding:
* @doc: the document
* @encoding: the encoding string
*
* Sets the current encoding in the Meta tags
* NOTE: this will not change the document content encoding, just
* the META flag associated.
*
* Returns 0 in case of success and -1 in case of error
*/
int
char newcontent[100];
return(-1);
(char *)encoding);
}
/*
* Search the html
*/
break;
goto found_head;
goto found_meta;
}
}
return(-1);
/*
* Search the head
*/
break;
goto found_meta;
}
}
return(-1);
return(0);
return(0);
}
/*
* Create a new Meta element with the right attributes
*/
}
/*
* Search and destroy all the remaining the meta elements carrying
* encoding informations
*/
int http;
http = 0;
http = 1;
else
{
}
break;
}
}
continue;
}
}
}
}
return(0);
}
/**
* booleanHTMLAttrs:
*
* These are the HTML attributes which will be output
* in minimized form, i.e. <option selected="selected"> will be
* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
*
*/
static const char* htmlBooleanAttrs[] = {
"checked", "compact", "declare", "defer", "disabled", "ismap",
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
"selected", NULL
};
/**
* htmlIsBooleanAttr:
* @name: the name of the attribute to check
*
* Determine if a given attribute is a boolean attribute.
*
* returns: false if the attribute is not boolean, true otherwise.
*/
int
{
int i = 0;
while (htmlBooleanAttrs[i] != NULL) {
return 1;
i++;
}
return 0;
}
#ifdef LIBXML_OUTPUT_ENABLED
/************************************************************************
* *
* Output error handlers *
* *
************************************************************************/
/**
* htmlSaveErrMemory:
* @extra: extra informations
*
* Handle an out of memory condition
*/
static void
htmlSaveErrMemory(const char *extra)
{
}
/**
* htmlSaveErr:
* @code: the error number
* @node: the location of the error.
* @extra: extra informations
*
* Handle an out of memory condition
*/
static void
{
switch(code) {
case XML_SAVE_NOT_UTF8:
msg = "string is not in UTF-8\n";
break;
case XML_SAVE_CHAR_INVALID:
msg = "invalid character value\n";
break;
msg = "unknown encoding %s\n";
break;
case XML_SAVE_NO_DOCTYPE:
msg = "HTML has no DOCTYPE\n";
break;
default:
msg = "unexpected error number\n";
}
}
/************************************************************************
* *
* Dumping HTML tree content to a simple buffer *
* *
************************************************************************/
static int
int format);
/**
* htmlNodeDumpFormat:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*
* Returns the number of byte written or -1 in case of error
*/
static int
int format) {
unsigned int use;
int ret;
return (-1);
}
return (-1);
}
htmlSaveErrMemory("allocating HTML output buffer");
return (-1);
}
return (ret);
}
/**
* htmlNodeDump:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns are added.
*
* Returns the number of byte written or -1 in case of error
*/
int
}
/**
* htmlNodeDumpFileFormat:
* @out: the FILE pointer
* @doc: the document
* @cur: the current node
* @encoding: the document encoding
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*
* TODO: if encoding == NULL try to save in the doc encoding
*
* returns: the number of byte written or -1 in case of failure.
*/
int
int ret;
if (enc != XML_CHAR_ENCODING_UTF8) {
return(-1);
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
/*
* save the content to a temp buffer.
*/
return(ret);
}
/**
* htmlNodeDumpFile:
* @out: the FILE pointer
* @doc: the document
* @cur: the current node
*
* Dump an HTML node, recursive behaviour,children are printed too,
* and formatting returns are added.
*/
void
}
/**
* htmlDocDumpMemoryFormat:
* @cur: the document
* @mem: OUT: the memory pointer
* @size: OUT: the memory length
* @format: should formatting spaces been added
*
* Dump an HTML document in memory and return the xmlChar * and it's size.
* It's up to the caller to free the memory.
*/
void
const char *encoding;
return;
*size = 0;
return;
}
/*
* Not supported yet
*/
*size = 0;
return;
}
*size = 0;
return;
}
} else {
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
*size = 0;
return;
}
} else {
}
(void)xmlOutputBufferClose(buf);
}
/**
* htmlDocDumpMemory:
* @cur: the document
* @mem: OUT: the memory pointer
* @size: OUT: the memory length
*
* Dump an HTML document in memory and return the xmlChar * and it's size.
* It's up to the caller to free the memory.
*/
void
}
/************************************************************************
* *
* Dumping HTML tree content to an I/O output buffer *
* *
************************************************************************/
/**
* htmlDtdDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @encoding: the encoding string
*
* TODO: check whether encoding is needed
*
* Dump the HTML document DTD, if any.
*/
static void
const char *encoding ATTRIBUTE_UNUSED) {
return;
}
}
}
}
/**
* htmlAttrDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the attribute pointer
* @encoding: the encoding string
*
* Dump an HTML attribute
*/
static void
const char *encoding ATTRIBUTE_UNUSED) {
/*
* TODO: The html output method should not escape a & character
* occurring in an attribute value immediately followed by
* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
*/
return;
}
}
if (value) {
} else {
}
} else {
}
} else {
}
}
}
/**
* htmlAttrListDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the first attribute pointer
* @encoding: the encoding string
*
* Dump a list of HTML attributes
*/
static void
htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
return;
}
}
}
/**
* htmlNodeListDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the first node
* @encoding: the encoding string
* @format: should formatting spaces been added
*
* Dump an HTML node list, recursive behaviour,children are printed too.
*/
static void
return;
}
}
}
/**
* htmlNodeDumpFormatOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
* @encoding: the encoding string
* @format: should formatting spaces been added
*
* Dump an HTML node, recursive behaviour,children are printed too.
*/
void
const htmlElemDesc * info;
return;
}
/*
* Special cases.
*/
return;
return;
}
return;
}
}
} else {
}
}
return;
}
}
return;
}
return;
}
return;
}
return;
}
}
return;
}
/*
* Get specific HTML info for that node.
*/
else
}
}
return;
}
} else {
}
}
}
return;
}
/*
* Uses the OutputBuffer property to automatically convert
* invalids to charrefs
*/
}
}
}
}
}
/**
* htmlNodeDumpOutput:
* @buf: the HTML buffer output
* @doc: the document
* @cur: the current node
* @encoding: the encoding string
*
* Dump an HTML node, recursive behaviour,children are printed too,
*/
void
}
/**
* htmlDocContentDumpFormatOutput:
* @buf: the HTML buffer output
* @cur: the document
* @encoding: the encoding string
* @format: should formatting spaces been added
*
* Dump an HTML document.
*/
void
int type;
return;
/*
* force to output the stuff as HTML, especially for entities
*/
}
}
}
/**
* htmlDocContentDumpOutput:
* @buf: the HTML buffer output
* @cur: the document
* @encoding: the encoding string
*
*/
void
const char *encoding) {
}
/************************************************************************
* *
* Saving functions front-ends *
* *
************************************************************************/
/**
* htmlDocDump:
* @f: the FILE*
* @cur: the document
*
* Dump an HTML document to an open FILE.
*
* returns: the number of byte written or -1 in case of failure.
*/
int
const char *encoding;
int ret;
return(-1);
}
/*
* Not supported yet
*/
return(-1);
}
return(-1);
} else {
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
return(ret);
}
/**
* htmlSaveFile:
* @filename: the filename (or URL)
* @cur: the document
*
* Dump an HTML document to a file. If @filename is "-" the stdout file is
* used.
* returns: the number of byte written or -1 in case of failure.
*/
int
const char *encoding;
int ret;
return(-1);
/*
* Not supported yet
*/
return(-1);
}
return(-1);
}
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
/*
* save the content to a temp buffer.
*/
return(ret);
}
/**
* htmlSaveFileFormat:
* @filename: the filename
* @cur: the document
* @format: should formatting spaces been added
* @encoding: the document encoding
*
* Dump an HTML document to a file using a given encoding.
*
* returns: the number of byte written or -1 in case of failure.
*/
int
int ret;
return(-1);
/*
* Not supported yet
*/
return(-1);
}
return(-1);
}
} else {
}
/*
* Fallback to HTML or ASCII when the encoding is unspecified
*/
/*
* save the content to a temp buffer.
*/
return(ret);
}
/**
* htmlSaveFileEnc:
* @filename: the filename
* @cur: the document
* @encoding: the document encoding
*
* Dump an HTML document to a file using a given encoding
*
* returns: the number of byte written or -1 in case of failure.
*/
int
}
#endif /* LIBXML_OUTPUT_ENABLED */
#define bottom_HTMLtree
#include "elfgcchack.h"
#endif /* LIBXML_HTML_ENABLED */