/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
*/
/*
* Copyright 1999-2002,2004,2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Sep 14, 2000:
// Fixed problem with namespace handling. Contributed by
// David Blondeau <blondeau@intalio.com>
// Sep 14, 2000:
// Fixed serializer to report IO exception directly, instead at
// the end of document processing.
// Reported by Patrick Higgins <phiggins@transzap.com>
// Aug 21, 2000:
// Fixed bug in startDocument not calling prepare.
// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
// Aug 21, 2000:
// Added ability to omit DOCTYPE declaration.
/**
* Implements an XML serializer supporting both DOM and SAX pretty
* serializing. For usage instructions see {@link Serializer}.
* <p>
* If an output stream is used, the encoding is taken from the
* output format (defaults to <tt>UTF-8</tt>). If a writer is
* used, make sure the writer uses the same encoding (if applies)
* as specified in the output format.
* <p>
* The serializer supports both DOM and SAX. SAX serializing is done by firing
* SAX events and using the serializer as a document handler. DOM serializing is done
* by calling {@link #serialize(Document)} or by using DOM Level 3
* {@link org.w3c.dom.ls.DOMSerializer} and
* serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
* {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
* <p>
* If an I/O exception occurs while serializing, the serializer
* will not throw an exception directly, but only throw it
* at the end of serializing (either DOM or SAX's {@link
* org.xml.sax.DocumentHandler#endDocument}.
* <p>
* For elements that are not specified as whitespace preserving,
* the serializer will potentially break long text lines at space
* boundaries, indent lines, and serialize elements on separate
* lines. Line terminators will be regarded as spaces, and
* spaces at beginning of line will be stripped.
* @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
* @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
* @author Elena Litani IBM
* @see Serializer
*/
public class XMLSerializer
extends BaseMarkupSerializer {
//
// constants
//
protected static final boolean DEBUG = false;
//
// data
//
//
// DOM Level 3 implementation: variables intialized in DOMSerializerImpl
//
/** stores namespaces in scope */
/** stores all namespace bindings on the current element */
/** symbol table for serialization */
/**
* Controls whether namespace fixup should be performed during
* the serialization.
* NOTE: if this field is set to true the following
* fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
* XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol
*/
protected boolean fNamespaces = false;
/**
* Controls whether namespace prefixes will be printed out during serialization
*/
protected boolean fNamespacePrefixes = true;
private boolean fPreserveSpace;
/**
* Constructs a new serializer. The serializer cannot be used without
* calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
* first.
*/
public XMLSerializer() {
}
/**
* Constructs a new serializer. The serializer cannot be used without
* calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
* first.
*/
}
/**
* Constructs a new serializer that writes to the specified writer
* using the specified output format. If <tt>format</tt> is null,
* will use a default output format.
*
* @param writer The writer to use
* @param format The output format to use, null for the default
*/
}
/**
* Constructs a new serializer that writes to the specified output
* stream using the specified output format. If <tt>format</tt>
* is null, will use a default output format.
*
* @param output The output stream to use
* @param format The output format to use, null for the default
*/
}
}
/**
* This methods turns on namespace fixup algorithm during
* DOM serialization.
* @see org.w3c.dom.ls.DOMSerializer
*
* @param namespaces
*/
fNSBinder = new NamespaceSupport();
fLocalNSBinder = new NamespaceSupport();
fSymbolTable = new SymbolTable();
}
}
//-----------------------------------------//
// SAX content handler serializing methods //
//-----------------------------------------//
throws SAXException
{
int i;
boolean preserveSpace;
boolean addNSAttr = false;
if (DEBUG) {
}
try {
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "NoWriterSupplied", null);
throw new IllegalStateException(msg);
}
state = getElementState();
if (isDocumentState()) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if (! _started)
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
// Must leave CData section first
}
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element or a comment
}
//We remove the namespaces from the attributes list so that they will
//be in _prefixes
// Do not change the current element state yet.
// This only happens in endElement().
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "NoName", null);
throw new SAXException(msg);
}
else
} else
addNSAttr = true;
}
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
}
}
value = "";
printEscaped( value );
// If the attribute xml:space exists, determine whether
// to preserve spaces in this and child nodes based on
// its value.
preserveSpace = true;
else
}
}
}
while (keys.hasMoreElements()) {
printEscaped( value );
} else {
printEscaped( value );
}
}
}
// Now it's time to enter a new element state
// with the tag name and space preserving.
// We still do not change the curent element state.
} catch (IOException except) {
throw new SAXException( except );
}
}
throws SAXException
{
try {
} catch (IOException except) {
throw new SAXException( except );
}
}
throws IOException
{
if (DEBUG) {
}
// Works much like content() with additions for closing
// an element. Note the different checks for the closed
// element's state and the parent element's state.
state = getElementState();
} else {
// Must leave CData section first
// This element is not empty and that last content was
// another element, so print a line break before that
// last element and this element's closing tag.
}
// Leave the element state and update that of the parent
// (if we're not root) to not empty and after element.
state = leaveElementState();
state.afterElement = true;
state.afterComment = false;
if (isDocumentState())
}
//------------------------------------------//
// SAX document handler serializing methods //
//------------------------------------------//
throws SAXException
{
int i;
boolean preserveSpace;
if (DEBUG) {
}
try {
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "NoWriterSupplied", null);
throw new IllegalStateException(msg);
}
state = getElementState();
if (isDocumentState()) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if (! _started)
startDocument( tagName );
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
// Must leave CData section first
}
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
}
// Do not change the current element state yet.
// This only happens in endElement().
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
printEscaped( value );
}
// If the attribute xml:space exists, determine whether
// to preserve spaces in this and child nodes based on
// its value.
preserveSpace = true;
else
}
}
}
// Now it's time to enter a new element state
// with the tag name and space preserving.
// We still do not change the curent element state.
} catch (IOException except) {
throw new SAXException( except );
}
}
throws SAXException
{
}
//------------------------------------------//
// Generic node serializing methods methods //
//------------------------------------------//
/**
* Called to serialize the document's DOCTYPE by the root element.
* The document type declaration must name the root element,
* but the root element is only known when that element is serialized,
* and not at the start of the document.
* <p>
* This method will check if it has not been called before ({@link #_started}),
* will serialize the document type declaration, and will serialize all
* pre-root comments and PIs that were accumulated in the document
* (see {@link #serializePreRoot}). Pre-root will be serialized even if
* this is not the first root element of the document.
*/
throws IOException
{
int i;
if (! _started) {
if (! _format.getOmitXMLDeclaration()) {
// Serialize the document declaration appreaing at the head
// of very XML document (unless asked not to).
else
if (format_encoding != null) {
}
_docTypePublicId == null)
}
if (! _format.getOmitDocumentType()) {
if (_docTypeSystemId != null) {
// System identifier must be specified to print DOCTYPE.
// If public identifier is specified print 'PUBLIC
// <public> <system>', if not, print 'SYSTEM <system>'.
if (_docTypePublicId != null) {
if (_indenting) {
} else
} else {
}
// If we accumulated any DTD contents while printing.
// this would be the place to print it.
}
}
}
}
_started = true;
// Always serialize these, even if not te first root element.
}
/**
* Called to serialize a DOM element. Equivalent to calling {@link
* #startElement}, {@link #endElement} and serializing everything
* inbetween, but better optimized.
*/
throws IOException
{
int i;
if (fNamespaces) {
// local binder stores namespace declaration
// that has been printed out during namespace fixup of
// the current element
// add new namespace context
}
if (DEBUG) {
}
state = getElementState();
if (isDocumentState()) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if (! _started) {
}
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parent's
// space preserving.
// Must leave CData section first
}
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
}
// Do not change the current element state yet.
// This only happens in endElement().
int length = 0;
// retrieve attributes
if (elem.hasAttributes()) {
}
if (!fNamespaces) { // no namespace fixup should be performed
// serialize element name
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
for ( i = 0 ; i < length ; ++i ) {
value = "";
}
} else { // do namespace fixup
// REVISIT: some optimization could probably be done to avoid traversing
// attributes twice.
//
// ---------------------------------------
// record all valid namespace declarations
// before attempting to fix element's namespace
// ---------------------------------------
for (i = 0;i < length;i++) {
// check if attribute is a namespace decl
}
if (fDOMErrorHandler != null) {
if (!continueProcess) {
// stop the namespace fixup and validation
throw new RuntimeException(
"SerializationStopped", null));
}
}
} else {
// record valid decl
} else {
// REVISIT: issue error on invalid declarations
// xmlns:foo = ""
}
continue;
} else { // xmlns
// empty prefix is always bound ("" or some string)
continue;
}
} // end-else: valid declaration
} // end-if: namespace declaration
} // end-for
//-----------------------
//-----------------------
//----------------------
// output element name
//----------------------
// REVISIT: this could be removed if we always convert empty string to null
// for the namespaces.
// uri is an empty string and element has some prefix
// the namespace alg later will fix up the namespace attributes
// remove element prefix
} else {
}
// ---------------------------------------------------------
// Fix up namespaces for element: per DOM L3
// Need to consider the following cases:
//
// case 1: <foo:elem xmlns:ns1="myURI" xmlns="default"/>
// Assume "foo", "ns1" are declared on the parent. We should not miss
// redeclaration for both "ns1" and default namespace. To solve this
// we add a local binder that stores declaration only for current element.
// This way we avoid outputing duplicate declarations for the same element
// as well as we are not omitting redeclarations.
//
// case 2: <elem xmlns="" xmlns="default"/>
// We need to bind default namespace to empty string, to be able to
// omit duplicate declarations for the same element
//
// case 3: <xsl:stylesheet xmlns:xsl="http://xsl">
// We create another element body bound to the "http://xsl" namespace
// as well as namespace attribute rebounding xsl to another namespace.
// <xsl:body xmlns:xsl="http://another">
// Need to make sure that the new namespace decl value is changed to
// "http://xsl"
//
// ---------------------------------------------------------
// ---------------------------------------------------------
// The xmlns:prefix=namespace or xmlns="default" was declared at parent.
// The binder always stores mapping of empty prefix to "".
// (NOTE: local binder does not store this kind of binding!)
// Thus the case where element was declared with uri="" (with or without a prefix)
// will be covered here.
} else {
// the prefix is either undeclared
// or
// conflict: the prefix is bound to another URI
if (fNamespacePrefixes) {
}
}
} else { // Element has no namespace
// DOM Level 1 node!
if (fDOMErrorHandler != null) {
// REVISIT: should we terminate upon request?
if (!continueProcess) {
throw new RuntimeException(
"SerializationStopped", null));
}
}
} else { // uri=null and no colon (DOM L2 node)
// there is a default namespace decl that is bound to
// non-zero length uri, output xmlns=""
if (fNamespacePrefixes) {
}
}
}
}
// -----------------------------------------
// Fix up namespaces for attributes: per DOM L3
// -----------------------------------------
for (i = 0; i < length; i++) {
// Fix attribute that was declared with a prefix and namespace=""
// we must remove prefix for this attribute
}
if (DEBUG) {
}
// make sure that value is never null.
}
// ---------------------------------------------------
// print namespace declarations namespace declarations
// ---------------------------------------------------
// check if we need to output this declaration
// declaration was not printed while fixing element namespace binding
// If the DOM Level 3 namespace-prefixes feature is set to false
// do not print xmlns attributes
if (fNamespacePrefixes) {
}
// case 4: <elem xmlns:xx="foo" xx:attr=""/>
// where attribute is bound to "bar".
// If the xmlns:xx is output here first, later we should not
// redeclare "xx" prefix. Instead we would pick up different prefix
// for the attribute.
// final: <elem xmlns:xx="foo" NS1:attr="" xmlns:NS1="bar"/>
}
} else {
// REVISIT: issue error on invalid declarations
// xmlns:foo = ""
}
continue;
} else { // xmlns
// empty prefix is always bound ("" or some string)
// declaration was not printed while fixing element namespace binding
if (fNamespacePrefixes) {
}
// case 4 does not apply here since attributes can't use
// default namespace
}
continue;
}
}
// find if for this prefix a URI was already declared
// attribute has no prefix (default namespace decl does not apply to attributes)
// OR
// attribute prefix is not declared
// OR
// conflict: attr URI does not match the prefix in scope
// Find if any prefix for attributes namespace URI is available
// in the scope
// use the prefix that was found
} else {
if (DEBUG) {
}
// the current prefix is not null and it has no in scope declaration
// use this prefix
} else {
// find a prefix following the pattern "NS" +index (starting at 1)
// make sure this prefix is not declared in the current scope.
int counter = 1;
}
}
// add declaration for the new prefix
if (fNamespacePrefixes) {
}
}
// change prefix for this attribute
}
} else { // attribute uri == null
if (fDOMErrorHandler != null) {
if (!continueProcess) {
// stop the namespace fixup and validation
throw new RuntimeException(
"SerializationStopped", null));
}
}
} else { // uri=null and no colon
// no fix up is needed: default namespace decl does not
// apply to attributes
}
}
} // end loop for attributes
}// end namespace fixup algorithm
// If element has children, then serialize them, otherwise
// serialize en empty tag.
if (elem.hasChildNodes()) {
// Enter an element state, and serialize the children
// one by one. Finally, end the element.
serializeNode( child );
}
if (fNamespaces) {
}
} else {
if (DEBUG) {
}
if (fNamespaces) {
}
// After element but parent element is no longer empty.
state.afterElement = true;
state.afterComment = false;
if (isDocumentState())
}
}
/**
* Serializes a namespace attribute with the given prefix and value for URI.
* In case prefix is empty will serialize default namespace declaration.
*
* @param prefix
* @param uri
* @exception IOException
*/
if (DEBUG) {
}
} else {
if (DEBUG) {
}
}
printEscaped( uri );
}
/**
* Prints attribute.
* NOTE: xml:space attribute modifies output format
*
* @param name
* @param value
* @param isSpecified
* @exception IOException
*/
private void printAttribute (String name, String value, boolean isSpecified, Attr attr) throws IOException{
if (fDOMFilter !=null &&
switch (code) {
case NodeFilter.FILTER_REJECT:
case NodeFilter.FILTER_SKIP: {
return;
}
default: {
// fall through
}
}
}
printEscaped( value );
}
// If the attribute xml:space exists, determine whether
// to preserve spaces in this and child nodes based on
// its value.
fPreserveSpace = true;
else
}
}
// Encode special XML characters into the equivalent character references.
// These five are defined by default for all XML documents.
switch (ch) {
case '<':
return "lt";
case '>':
return "gt";
case '"':
return "quot";
case '\'':
return "apos";
case '&':
return "amp";
}
return null;
}
/** Retrieve and remove the namespaces declarations from the list of attributes.
*
*/
throws SAXException
{
int i;
int indexColon;
int length;
return null;
}
//We have to exclude the namespaces declarations from the attributes
//Append only when the feature http://xml.org/sax/features/namespace-prefixes"
//is TRUE
attrsOnly.removeAttribute( i );
attrsOnly.removeAttribute( i );
}
}
}
return attrsOnly;
}
//
// Printing attribute value
//
for (int i = 0; i < length; ++i) {
if (++i < length) {
} else {
}
continue;
}
// escape NL, CR, TAB
} else if (ch == '<') {
} else if (ch == '&') {
} else if (ch == '"') {
} else {
}
}
}
/** print text data */
if (ch == '\r') {
} else if ( ch == '<') {
} else if (ch == '&') {
} else if (ch == '>'){
// character sequence "]]>" can't appear in content, therefore
// we should escape '>'
} else {
}
}
throws IOException {
int index;
char ch;
if ( preserveSpace ) {
// Preserving spaces: the text must print exactly as it is,
// without breaking when spaces appear in the text and without
// consolidating spaces. If a line terminator is used, a line
// break will occur.
// check if it is surrogate
} else {
}
continue;
}
if ( unescaped ) {
} else
printXMLChar( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
// use spaces between parts to break them into different
// lines. Spaces at beginning of line will be stripped
// by printing mechanism. Line terminator is treated
// no different than other text part.
// check if it is surrogate
} else {
}
continue;
}
if ( unescaped )
else
printXMLChar( ch);
}
}
}
int index;
char ch;
if ( preserveSpace ) {
// Preserving spaces: the text must print exactly as it is,
// without breaking when spaces appear in the text and without
// consolidating spaces. If a line terminator is used, a line
// break will occur.
while ( length-- > 0 ) {
// check if it is surrogate
if ( length-- > 0 ) {
} else {
}
continue;
}
if ( unescaped )
else
printXMLChar( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
// use spaces between parts to break them into different
// lines. Spaces at beginning of line will be stripped
// by printing mechanism. Line terminator is treated
// no different than other text part.
while ( length-- > 0 ) {
// check if it is surrogate
if ( length-- > 0 ) {
} else {
}
continue;
}
if ( unescaped )
else
printXMLChar( ch );
}
}
}
/**
* DOM Level 3:
* Check a node to determine if it contains unbound namespace prefixes.
*
* @param node The node to check for unbound namespace prefices
*/
if (fNamespaces) {
if (DEBUG) {
}
if (DEBUG) {
}
//If a NamespaceURI is not declared for the current
//node's prefix, raise a fatal error.
fatalError("The replacement text of the entity node '"
+ node.getNodeName()
+ "' contains an element node '"
+ child.getNodeName()
+ "' with an undeclared prefix '"
+ prefix + "'.");
}
fatalError("The replacement text of the entity node '"
+ node.getNodeName()
+ "' contains an element node '"
+ child.getNodeName()
+ "' with an attribute '"
+ "' an undeclared prefix '"
+ attrPrefix + "'.");
}
}
}
if (child.hasChildNodes()) {
}
}
}
}
public boolean reset() {
super.reset();
// during serialization always have a mapping to empty string
// so we assume there is a declaration.
}
return true;
}
}