286N/A/*
286N/A * reserved comment block
286N/A * DO NOT REMOVE OR ALTER!
286N/A */
286N/A/*
286N/A * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
286N/A *
286N/A * Licensed under the Apache License, Version 2.0 (the "License");
286N/A * you may not use this file except in compliance with the License.
286N/A * You may obtain a copy of the License at
286N/A *
286N/A * http://www.apache.org/licenses/LICENSE-2.0
286N/A *
286N/A * Unless required by applicable law or agreed to in writing, software
286N/A * distributed under the License is distributed on an "AS IS" BASIS,
286N/A * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
286N/A * See the License for the specific language governing permissions and
286N/A * limitations under the License.
286N/A */
286N/A
286N/A
286N/A
286N/A// Sep 14, 2000:
286N/A// Fixed problem with namespace handling. Contributed by
286N/A// David Blondeau <blondeau@intalio.com>
286N/A// Sep 14, 2000:
286N/A// Fixed serializer to report IO exception directly, instead at
286N/A// the end of document processing.
286N/A// Reported by Patrick Higgins <phiggins@transzap.com>
286N/A// Aug 21, 2000:
286N/A// Fixed bug in startDocument not calling prepare.
286N/A// Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
286N/A// Aug 21, 2000:
286N/A// Added ability to omit DOCTYPE declaration.
286N/A
286N/A
286N/Apackage com.sun.org.apache.xml.internal.serialize;
286N/A
286N/A
286N/Aimport java.io.IOException;
286N/Aimport java.io.OutputStream;
286N/Aimport java.io.Writer;
286N/A
286N/Aimport com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
286N/Aimport com.sun.org.apache.xerces.internal.impl.Constants;
286N/Aimport com.sun.org.apache.xerces.internal.util.NamespaceSupport;
286N/Aimport com.sun.org.apache.xerces.internal.util.SymbolTable;
286N/Aimport com.sun.org.apache.xerces.internal.util.XML11Char;
286N/Aimport com.sun.org.apache.xerces.internal.util.XMLChar;
286N/Aimport org.xml.sax.SAXException;
286N/Aimport org.w3c.dom.DOMError;
286N/A
286N/A/**
286N/A * Implements an XML serializer supporting both DOM and SAX pretty
286N/A * serializing. For usage instructions see {@link Serializer}.
286N/A * <p>
286N/A * If an output stream is used, the encoding is taken from the
286N/A * output format (defaults to <tt>UTF-8</tt>). If a writer is
286N/A * used, make sure the writer uses the same encoding (if applies)
286N/A * as specified in the output format.
286N/A * <p>
286N/A * The serializer supports both DOM and SAX. SAX serializing is done by firing
286N/A * SAX events and using the serializer as a document handler. DOM serializing is done
286N/A * by calling {@link #serialize(Document)} or by using DOM Level 3
286N/A * {@link org.w3c.dom.ls.DOMSerializer} and
286N/A * serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
286N/A * {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
286N/A * <p>
286N/A * If an I/O exception occurs while serializing, the serializer
286N/A * will not throw an exception directly, but only throw it
286N/A * at the end of serializing (either DOM or SAX's {@link
286N/A * org.xml.sax.DocumentHandler#endDocument}.
286N/A * <p>
286N/A * For elements that are not specified as whitespace preserving,
286N/A * the serializer will potentially break long text lines at space
286N/A * boundaries, indent lines, and serialize elements on separate
286N/A * lines. Line terminators will be regarded as spaces, and
286N/A * spaces at beginning of line will be stripped.
286N/A * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
286N/A * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
286N/A * @author Elena Litani IBM
286N/A * @see Serializer
286N/A */
286N/Apublic class XML11Serializer
286N/Aextends XMLSerializer {
286N/A
286N/A //
286N/A // constants
286N/A //
286N/A
286N/A protected static final boolean DEBUG = false;
286N/A
286N/A //
286N/A // data
286N/A //
286N/A
286N/A //
286N/A // DOM Level 3 implementation: variables intialized in DOMSerializerImpl
286N/A //
286N/A
286N/A /** stores namespaces in scope */
286N/A protected NamespaceSupport fNSBinder;
286N/A
286N/A /** stores all namespace bindings on the current element */
286N/A protected NamespaceSupport fLocalNSBinder;
286N/A
286N/A /** symbol table for serialization */
286N/A protected SymbolTable fSymbolTable;
286N/A
286N/A // is node dom level 1 node?
286N/A protected boolean fDOML1 = false;
286N/A // counter for new prefix names
286N/A protected int fNamespaceCounter = 1;
286N/A protected final static String PREFIX = "NS";
286N/A
286N/A /**
286N/A * Controls whether namespace fixup should be performed during
286N/A * the serialization.
286N/A * NOTE: if this field is set to true the following
286N/A * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
286N/A * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
286N/A */
286N/A protected boolean fNamespaces = false;
286N/A
286N/A
286N/A private boolean fPreserveSpace;
286N/A
286N/A
286N/A /**
286N/A * Constructs a new serializer. The serializer cannot be used without
286N/A * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
286N/A * first.
286N/A */
286N/A public XML11Serializer() {
286N/A super( );
286N/A _format.setVersion("1.1");
286N/A }
286N/A
286N/A
286N/A /**
286N/A * Constructs a new serializer. The serializer cannot be used without
286N/A * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
286N/A * first.
286N/A */
286N/A public XML11Serializer( OutputFormat format ) {
286N/A super( format );
286N/A _format.setVersion("1.1");
286N/A }
286N/A
286N/A
286N/A /**
286N/A * Constructs a new serializer that writes to the specified writer
286N/A * using the specified output format. If <tt>format</tt> is null,
286N/A * will use a default output format.
286N/A *
286N/A * @param writer The writer to use
286N/A * @param format The output format to use, null for the default
286N/A */
286N/A public XML11Serializer( Writer writer, OutputFormat format ) {
286N/A super( writer, format );
286N/A _format.setVersion("1.1");
286N/A }
286N/A
286N/A
286N/A /**
286N/A * Constructs a new serializer that writes to the specified output
286N/A * stream using the specified output format. If <tt>format</tt>
286N/A * is null, will use a default output format.
286N/A *
286N/A * @param output The output stream to use
286N/A * @param format The output format to use, null for the default
286N/A */
286N/A public XML11Serializer( OutputStream output, OutputFormat format ) {
286N/A super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
286N/A _format.setVersion("1.1");
286N/A }
286N/A
286N/A //-----------------------------------------//
286N/A // SAX content handler serializing methods //
286N/A //-----------------------------------------//
286N/A
286N/A
286N/A public void characters( char[] chars, int start, int length )
286N/A throws SAXException
286N/A {
286N/A ElementState state;
286N/A
286N/A try {
286N/A state = content();
286N/A
286N/A // Check if text should be print as CDATA section or unescaped
286N/A // based on elements listed in the output format (the element
286N/A // state) or whether we are inside a CDATA section or entity.
286N/A
286N/A if ( state.inCData || state.doCData ) {
286N/A int saveIndent;
286N/A
286N/A // Print a CDATA section. The text is not escaped, but ']]>'
286N/A // appearing in the code must be identified and dealt with.
286N/A // The contents of a text node is considered space preserving.
286N/A if ( ! state.inCData ) {
286N/A _printer.printText( "<![CDATA[" );
286N/A state.inCData = true;
286N/A }
286N/A saveIndent = _printer.getNextIndent();
286N/A _printer.setNextIndent( 0 );
286N/A char ch;
286N/A final int end = start + length;
286N/A for ( int index = start; index < end; ++index ) {
286N/A ch = chars[index];
286N/A if ( ch == ']' && index + 2 < end &&
286N/A chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
286N/A _printer.printText("]]]]><![CDATA[>");
286N/A index +=2;
286N/A continue;
286N/A }
286N/A if (!XML11Char.isXML11Valid(ch)) {
286N/A // check if it is surrogate
286N/A if (++index < end) {
286N/A surrogates(ch, chars[index]);
286N/A }
286N/A else {
286N/A fatalError("The character '"+(char)ch+"' is an invalid XML character");
286N/A }
286N/A continue;
286N/A } else {
286N/A if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
286N/A _printer.printText((char)ch);
286N/A } else {
286N/A // The character is not printable -- split CDATA section
286N/A _printer.printText("]]>&#x");
286N/A _printer.printText(Integer.toHexString(ch));
286N/A _printer.printText(";<![CDATA[");
286N/A }
286N/A }
286N/A }
286N/A _printer.setNextIndent( saveIndent );
286N/A
286N/A } else {
286N/A
286N/A int saveIndent;
286N/A
286N/A if ( state.preserveSpace ) {
286N/A // If preserving space then hold of indentation so no
286N/A // excessive spaces are printed at line breaks, escape
286N/A // the text content without replacing spaces and print
286N/A // the text breaking only at line breaks.
286N/A saveIndent = _printer.getNextIndent();
286N/A _printer.setNextIndent( 0 );
286N/A printText( chars, start, length, true, state.unescaped );
286N/A _printer.setNextIndent( saveIndent );
286N/A } else {
286N/A printText( chars, start, length, false, state.unescaped );
286N/A }
286N/A }
286N/A } catch ( IOException except ) {
286N/A throw new SAXException( except );
286N/A }
286N/A }
286N/A
286N/A
286N/A //
286N/A // overwrite printing functions to make sure serializer prints out valid XML
286N/A //
286N/A protected void printEscaped( String source ) throws IOException {
286N/A int length = source.length();
286N/A for ( int i = 0 ; i < length ; ++i ) {
286N/A int ch = source.charAt(i);
286N/A if (!XML11Char.isXML11Valid(ch)) {
286N/A if (++i <length) {
286N/A surrogates(ch, source.charAt(i));
286N/A } else {
286N/A fatalError("The character '"+(char)ch+"' is an invalid XML character");
286N/A }
286N/A continue;
286N/A }
286N/A if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){
286N/A printHex(ch);
286N/A } else if (ch == '<') {
286N/A _printer.printText("&lt;");
286N/A } else if (ch == '&') {
286N/A _printer.printText("&amp;");
286N/A } else if (ch == '"') {
286N/A _printer.printText("&quot;");
286N/A } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
286N/A _printer.printText((char) ch);
286N/A } else {
286N/A printHex(ch);
286N/A }
286N/A }
286N/A }
286N/A
286N/A protected final void printCDATAText(String text) throws IOException {
286N/A int length = text.length();
286N/A char ch;
286N/A
286N/A for (int index = 0; index < length; ++index) {
286N/A ch = text.charAt(index);
286N/A
286N/A if (ch == ']'
286N/A && index + 2 < length
286N/A && text.charAt(index + 1) == ']'
286N/A && text.charAt(index + 2) == '>') { // check for ']]>'
286N/A if (fDOMErrorHandler != null){
286N/A // REVISIT: this means that if DOM Error handler is not registered we don't report any
286N/A // fatal errors and might serialize not wellformed document
286N/A if ((features & DOMSerializerImpl.SPLITCDATA) == 0
286N/A && (features & DOMSerializerImpl.WELLFORMED) == 0) {
286N/A // issue fatal error
286N/A String msg =
286N/A DOMMessageFormatter.formatMessage(
286N/A DOMMessageFormatter.SERIALIZER_DOMAIN,
286N/A "EndingCDATA",
286N/A null);
286N/A modifyDOMError(
286N/A msg,
286N/A DOMError.SEVERITY_FATAL_ERROR,
286N/A null, fCurrentNode);
286N/A boolean continueProcess =
286N/A fDOMErrorHandler.handleError(fDOMError);
286N/A if (!continueProcess) {
286N/A throw new IOException();
286N/A }
286N/A } else {
286N/A // issue warning
286N/A String msg =
286N/A DOMMessageFormatter.formatMessage(
286N/A DOMMessageFormatter.SERIALIZER_DOMAIN,
286N/A "SplittingCDATA",
286N/A null);
286N/A modifyDOMError(
286N/A msg,
286N/A DOMError.SEVERITY_WARNING,
286N/A null, fCurrentNode);
286N/A fDOMErrorHandler.handleError(fDOMError);
286N/A }
286N/A }
286N/A // split CDATA section
286N/A _printer.printText("]]]]><![CDATA[>");
286N/A index += 2;
286N/A continue;
286N/A }
286N/A
286N/A if (!XML11Char.isXML11Valid(ch)) {
286N/A // check if it is surrogate
286N/A if (++index < length) {
286N/A surrogates(ch, text.charAt(index));
286N/A } else {
286N/A fatalError(
286N/A "The character '"
286N/A + (char) ch
286N/A + "' is an invalid XML character");
286N/A }
286N/A continue;
286N/A } else {
286N/A if (_encodingInfo.isPrintable((char) ch)
286N/A && XML11Char.isXML11ValidLiteral(ch)) {
286N/A _printer.printText((char) ch);
286N/A } else {
286N/A
286N/A // The character is not printable -- split CDATA section
286N/A _printer.printText("]]>&#x");
286N/A _printer.printText(Integer.toHexString(ch));
286N/A _printer.printText(";<![CDATA[");
286N/A }
286N/A }
286N/A }
286N/A }
286N/A
286N/A
286N/A // note that this "int" should, in all cases, be a char.
286N/A // REVISIT: make it a char...
286N/A protected final void printXMLChar( int ch ) throws IOException {
286N/A
286N/A if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
286N/A printHex(ch);
286N/A } else if ( ch == '<') {
286N/A _printer.printText("&lt;");
286N/A } else if (ch == '&') {
286N/A _printer.printText("&amp;");
286N/A } else if (ch == '>'){
286N/A // character sequence "]]>" can't appear in content, therefore
286N/A // we should escape '>'
286N/A _printer.printText("&gt;");
286N/A } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
286N/A _printer.printText((char)ch);
286N/A } else {
286N/A printHex(ch);
286N/A }
286N/A }
286N/A
286N/A
286N/A
286N/A protected final void surrogates(int high, int low) throws IOException{
286N/A if (XMLChar.isHighSurrogate(high)) {
286N/A if (!XMLChar.isLowSurrogate(low)) {
286N/A //Invalid XML
286N/A fatalError("The character '"+(char)low+"' is an invalid XML character");
286N/A }
286N/A else {
286N/A int supplemental = XMLChar.supplemental((char)high, (char)low);
286N/A if (!XML11Char.isXML11Valid(supplemental)) {
286N/A //Invalid XML
286N/A fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
286N/A }
286N/A else {
286N/A if (content().inCData ) {
286N/A _printer.printText("]]>&#x");
286N/A _printer.printText(Integer.toHexString(supplemental));
286N/A _printer.printText(";<![CDATA[");
286N/A }
286N/A else {
286N/A printHex(supplemental);
286N/A }
286N/A }
286N/A }
286N/A } else {
286N/A fatalError("The character '"+(char)high+"' is an invalid XML character");
286N/A }
286N/A
286N/A }
286N/A
286N/A
286N/A protected void printText( String text, boolean preserveSpace, boolean unescaped )
286N/A throws IOException {
286N/A int index;
286N/A char ch;
286N/A int length = text.length();
286N/A if ( preserveSpace ) {
286N/A // Preserving spaces: the text must print exactly as it is,
286N/A // without breaking when spaces appear in the text and without
286N/A // consolidating spaces. If a line terminator is used, a line
286N/A // break will occur.
286N/A for ( index = 0 ; index < length ; ++index ) {
286N/A ch = text.charAt( index );
286N/A if (!XML11Char.isXML11Valid(ch)) {
286N/A // check if it is surrogate
286N/A if (++index <length) {
286N/A surrogates(ch, text.charAt(index));
286N/A } else {
286N/A fatalError("The character '"+(char)ch+"' is an invalid XML character");
286N/A }
286N/A continue;
286N/A }
286N/A if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
286N/A _printer.printText( ch );
286N/A } else
286N/A printXMLChar( ch );
286N/A }
286N/A } else {
286N/A // Not preserving spaces: print one part at a time, and
286N/A // use spaces between parts to break them into different
286N/A // lines. Spaces at beginning of line will be stripped
286N/A // by printing mechanism. Line terminator is treated
286N/A // no different than other text part.
286N/A for ( index = 0 ; index < length ; ++index ) {
286N/A ch = text.charAt( index );
286N/A if (!XML11Char.isXML11Valid(ch)) {
286N/A // check if it is surrogate
286N/A if (++index <length) {
286N/A surrogates(ch, text.charAt(index));
286N/A } else {
286N/A fatalError("The character '"+(char)ch+"' is an invalid XML character");
286N/A }
286N/A continue;
286N/A }
286N/A
286N/A if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
286N/A _printer.printText( ch );
286N/A else
286N/A printXMLChar( ch);
286N/A }
286N/A }
286N/A }
286N/A
286N/A
286N/A
286N/A protected void printText( char[] chars, int start, int length,
286N/A boolean preserveSpace, boolean unescaped ) throws IOException {
286N/A int index;
286N/A char ch;
286N/A
286N/A if ( preserveSpace ) {
286N/A // Preserving spaces: the text must print exactly as it is,
286N/A // without breaking when spaces appear in the text and without
286N/A // consolidating spaces. If a line terminator is used, a line
286N/A // break will occur.
286N/A while ( length-- > 0 ) {
286N/A ch = chars[start++];
286N/A if (!XML11Char.isXML11Valid(ch)) {
286N/A // check if it is surrogate
286N/A if ( length-- > 0) {
286N/A surrogates(ch, chars[start++]);
286N/A } else {
286N/A fatalError("The character '"+(char)ch+"' is an invalid XML character");
286N/A }
286N/A continue;
286N/A }
286N/A if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
286N/A _printer.printText( ch );
286N/A else
286N/A printXMLChar( ch );
286N/A }
286N/A } else {
286N/A // Not preserving spaces: print one part at a time, and
286N/A // use spaces between parts to break them into different
286N/A // lines. Spaces at beginning of line will be stripped
286N/A // by printing mechanism. Line terminator is treated
286N/A // no different than other text part.
286N/A while ( length-- > 0 ) {
286N/A ch = chars[start++];
286N/A if (!XML11Char.isXML11Valid(ch)) {
286N/A // check if it is surrogate
286N/A if ( length-- > 0) {
286N/A surrogates(ch, chars[start++]);
286N/A } else {
286N/A fatalError("The character '"+(char)ch+"' is an invalid XML character");
286N/A }
286N/A continue;
286N/A }
286N/A
286N/A if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
286N/A _printer.printText( ch );
286N/A else
286N/A printXMLChar( ch );
286N/A }
286N/A }
286N/A }
286N/A
286N/A
286N/A public boolean reset() {
286N/A super.reset();
286N/A return true;
286N/A
286N/A }
286N/A
286N/A}