/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Sep 14, 2000:
// Fixed serializer to report IO exception directly, instead at
// the end of document processing.
// Reported by Patrick Higgins <phiggins@transzap.com>
// Aug 21, 2000:
// Fixed bug in startDocument not calling prepare.
// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
// Aug 21, 2000:
// Added ability to omit DOCTYPE declaration.
// Sep 1, 2000:
// If no output format is provided the serializer now defaults
// to ISO-8859-1 encoding. Reported by Mikael Staldal
// <d96-mst@d.kth.se>
/**
* constructor. For usage instructions see {@link Serializer}.
* <p>
* If an output stream is used, the encoding is taken from the
* output format (defaults to <tt>UTF-8</tt>). If a writer is
* used, make sure the writer uses the same encoding (if applies)
* as specified in the output format.
* <p>
* The serializer supports both DOM and SAX. DOM serializing is done
* by calling {@link #serialize} and SAX serializing is done by firing
* SAX events and using the serializer as a document handler.
* <p>
* If an I/O exception occurs while serializing, the serializer
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's {@link
* org.xml.sax.DocumentHandler#endDocument}.
* <p>
* For elements that are not specified as whitespace preserving,
* the serializer will potentially break long text lines at space
* boundaries, indent lines, and serialize elements on separate
* lines. Line terminators will be regarded as spaces, and
* spaces at beginning of line will be stripped.
* <p>
* XHTML is slightly different than HTML:
* <ul>
* <li>Attributes must specify value, even if empty string
* <li>Empty elements must have '/' in empty tag
* <li>Contents of SCRIPT and STYLE elements serialized as CDATA
* </ul>
*
* @deprecated This class was deprecated in Xerces 2.6.2. It is
* recommended that new applications use JAXP's Transformation API
* for XML (TrAX) for serializing HTML. See the Xerces documentation
* for more information.
* @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
* @see Serializer
*/
public class HTMLSerializer
extends BaseMarkupSerializer
{
/**
* True if serializing in XHTML format.
*/
private boolean _xhtml;
// for users to override XHTMLNamespace if need be.
/**
* <tt>xhtml</tt>. The serializer cannot be used without calling
* {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
*
* @param xhtml True if XHTML serializing
*/
{
super( format );
}
/**
* Constructs a new serializer. The serializer cannot be used without
* calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
* first.
*/
public HTMLSerializer()
{
}
/**
* Constructs a new serializer. The serializer cannot be used without
* calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
* first.
*/
{
}
/**
* Constructs a new serializer that writes to the specified writer
* using the specified output format. If <tt>format</tt> is null,
* will use a default output format.
*
* @param writer The writer to use
* @param format The output format to use, null for the default
*/
{
}
/**
* Constructs a new serializer that writes to the specified output
* stream using the specified output format. If <tt>format</tt>
* is null, will use a default output format.
*
* @param output The output stream to use
* @param format The output format to use, null for the default
*/
{
}
{
super.setOutputFormat( format != null ? format : new OutputFormat( Method.HTML, "ISO-8859-1", false ) );
}
// Set value for alternate XHTML namespace.
} // setXHTMLNamespace(String)
//-----------------------------------------//
// SAX content handler serializing methods //
//-----------------------------------------//
throws SAXException
{
int i;
boolean preserveSpace;
boolean addNSAttr = false;
try {
throw new IllegalStateException(
"NoWriterSupplied", null));
state = getElementState();
if ( isDocumentState() ) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
}
// Do not change the current element state yet.
// This only happens in endElement().
// As per SAX2, the namespace URI is an empty string if the element has no
// namespace URI, or namespaces is turned off. The check against null protects
// against broken SAX implementations, so I've left it there. - mrglavas
// SAX2: rawName (QName) could be empty string if
// namespace-prefixes property is false.
if ( hasNamespaceURI ) {
}
addNSAttr = true;
}
if ( !hasNamespaceURI )
else {
else
}
// XHTML: element names are lower case, DOM will be different
if ( _xhtml )
else
// For each attribute serialize it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
if ( _xhtml || hasNamespaceURI ) {
// XHTML: print empty string for null values.
} else {
printEscaped( value );
}
} else {
// HTML: Empty values print as attribute name, no value.
// HTML: URI attributes will print unescaped
value = "";
}
else {
printEscaped( value );
}
}
}
}
preserveSpace = true;
if ( addNSAttr ) {
while ( keys.hasMoreElements() ) {
printEscaped( value );
} else {
printEscaped( value );
}
}
}
// Now it's time to enter a new element state
// with the tag name and space preserving.
// We still do not change the curent element state.
}
// Handle SCRIPT and STYLE specifically by changing the
// state of the current element to CDATA (XHTML) or
// unescaped (HTML).
if ( _xhtml ) {
// XHTML: Print contents as CDATA section
} else {
// HTML: Print contents unescaped
}
}
} catch ( IOException except ) {
throw new SAXException( except );
}
}
throws SAXException
{
try {
} catch ( IOException except ) {
throw new SAXException( except );
}
}
throws IOException
{
// Works much like content() with additions for closing
// an element. Note the different checks for the closed
// element's state and the parent element's state.
state = getElementState();
else {
else
}
if ( _xhtml) {
} else {
// Must leave CData section first
// XHTML: element names are lower case, DOM will be different
}
} else {
// This element is not empty and that last content was
// another element, so print a line break before that
// last element and this element's closing tag.
// [keith] Provided this is not an anchor.
// HTML: some elements do not print closing tag (e.g. LI)
// Must leave CData section first (Illegal in HTML, but still)
}
}
// Leave the element state and update that of the parent
// (if we're not root) to not empty and after element.
state = leaveElementState();
state.afterElement = true;
if ( isDocumentState() )
}
//------------------------------------------//
// SAX document handler serializing methods //
//------------------------------------------//
throws SAXException
{
try {
// HTML: no CDATA section
} catch ( IOException except ) {
throw new SAXException( except );
}
}
throws SAXException
{
int i;
boolean preserveSpace;
try {
throw new IllegalStateException(
"NoWriterSupplied", null));
state = getElementState();
if ( isDocumentState() ) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
startDocument( tagName );
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
}
// Do not change the current element state yet.
// This only happens in endElement().
// XHTML: element names are lower case, DOM will be different
if ( _xhtml )
else
// For each attribute serialize it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
if ( _xhtml ) {
// XHTML: print empty string for null values.
} else {
printEscaped( value );
}
} else {
// HTML: Empty values print as attribute name, no value.
// HTML: URI attributes will print unescaped
value = "";
}
else {
printEscaped( value );
}
}
}
}
preserveSpace = true;
// Now it's time to enter a new element state
// with the tag name and space preserving.
// We still do not change the curent element state.
}
// Handle SCRIPT and STYLE specifically by changing the
// state of the current element to CDATA (XHTML) or
// unescaped (HTML).
if ( _xhtml ) {
// XHTML: Print contents as CDATA section
} else {
// HTML: Print contents unescaped
}
}
} catch ( IOException except ) {
throw new SAXException( except );
}
}
throws SAXException
{
}
//------------------------------------------//
// Generic node serializing methods methods //
//------------------------------------------//
/**
* Called to serialize the document's DOCTYPE by the root element.
* The document type declaration must name the root element,
* but the root element is only known when that element is serialized,
* and not at the start of the document.
* <p>
* This method will check if it has not been called before ({@link #_started}),
* will serialize the document type declaration, and will serialize all
* pre-root comments and PIs that were accumulated in the document
* (see {@link #serializePreRoot}). Pre-root will be serialized even if
* this is not the first root element of the document.
*/
throws IOException
{
// out of DTD mode.
if ( ! _started ) {
// If the public and system identifiers were not specified
// in the output format, use the appropriate ones for HTML
// or XHTML.
if ( _xhtml ) {
} else {
}
}
if ( ! _format.getOmitDocumentType() ) {
// XHTML: If public identifier and system identifier
// specified, print them, else print just system identifier
// HTML: If public identifier specified, print it with
// system identifier, if specified.
// XHTML requires that all element names are lower case, so the
// root on the DOCTYPE must be 'html'. - mrglavas
if (_xhtml) {
}
else {
}
if ( _docTypeSystemId != null ) {
if ( _indenting ) {
} else
}
} else if ( _docTypeSystemId != null ) {
if (_xhtml) {
}
else {
}
}
}
}
_started = true;
// Always serialize these, even if not te first root element.
}
/**
* Called to serialize a DOM element. Equivalent to calling {@link
* #startElement}, {@link #endElement} and serializing everything
* inbetween, but better optimized.
*/
throws IOException
{
int i;
boolean preserveSpace;
state = getElementState();
if ( isDocumentState() ) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
startDocument( tagName );
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
}
// Do not change the current element state yet.
// This only happens in endElement().
// XHTML: element names are lower case, DOM will be different
if ( _xhtml )
else
// Lookup the element's attribute, but only print specified
// attributes. (Unspecified attributes are derived from the DTD.
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
if ( attr.getSpecified() ) {
if ( _xhtml ) {
// XHTML: print empty string for null values.
} else {
printEscaped( value );
}
} else {
// HTML: Empty values print as attribute name, no value.
// HTML: URI attributes will print unescaped
value = "";
}
else {
printEscaped( value );
}
}
}
}
}
preserveSpace = true;
// If element has children, or if element is not an empty tag,
// serialize an opening tag.
// Enter an element state, and serialize the children
// one by one. Finally, end the element.
}
// Handle SCRIPT and STYLE specifically by changing the
// state of the current element to CDATA (XHTML) or
// unescaped (HTML).
if ( _xhtml ) {
// XHTML: Print contents as CDATA section
} else {
// HTML: Print contents unescaped
}
}
serializeNode( child );
}
} else {
// XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
// HTML: Empty tags are defined as such in DTD no in document.
if ( _xhtml )
else
// After element but parent element is no longer empty.
state.afterElement = true;
if ( isDocumentState() )
}
}
throws IOException
{
// HTML: no CDATA section
super.characters( text );
}
{
}
{
int index;
// XXX Apparently Netscape doesn't like if we escape the URI
// using %nn, so we leave it as is, just remove any quotes.
if ( index >= 0 )
else
return uri;
}
}