/* * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. */ /* * Copyright 2005 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sun.org.apache.xerces.internal.impl; import java.io.EOFException; import java.io.IOException; import java.util.Locale; import java.util.Vector; import com.sun.xml.internal.stream.Entity; import com.sun.xml.internal.stream.XMLBufferListener; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; import com.sun.org.apache.xerces.internal.impl.io.UCSReader; import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; import com.sun.org.apache.xerces.internal.util.EncodingMap; import com.sun.org.apache.xerces.internal.util.SymbolTable; import com.sun.org.apache.xerces.internal.util.XMLChar; import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; import com.sun.org.apache.xerces.internal.xni.QName; import com.sun.org.apache.xerces.internal.xni.XMLString; import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; import com.sun.org.apache.xerces.internal.xni.*; /** * Implements the entity scanner methods. * * @author Neeraj Bajaj, Sun Microsystems * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author K.Venugopal Sun Microsystems * */ public class XMLEntityScanner implements XMLLocator { protected Entity.ScannedEntity fCurrentEntity = null ; protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; protected XMLEntityManager fEntityManager ; /** Debug switching readers for encodings. */ private static final boolean DEBUG_ENCODINGS = false; /** Listeners which should know when load is being called */ private Vector listeners = new Vector(); private static final boolean [] VALID_NAMES = new boolean[127]; /** * Debug printing of buffer. This debugging flag works best when you * resize the DEFAULT_BUFFER_SIZE down to something reasonable like * 64 characters. */ private static final boolean DEBUG_BUFFER = false; private static final boolean DEBUG_SKIP_STRING = false; /** * To signal the end of the document entity, this exception will be thrown. */ private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { private static final long serialVersionUID = 980337771224675268L; public Throwable fillInStackTrace() { return this; } }; protected SymbolTable fSymbolTable = null; protected XMLErrorReporter fErrorReporter = null; int [] whiteSpaceLookup = new int[100]; int whiteSpaceLen = 0; boolean whiteSpaceInfoNeeded = true; /** * Allow Java encoding names. This feature identifier is: * http://apache.org/xml/features/allow-java-encodings */ protected boolean fAllowJavaEncodings; //Will be used only during internal subsets. //for appending data. /** Property identifier: symbol table. */ protected static final String SYMBOL_TABLE = Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; /** Property identifier: error reporter. */ protected static final String ERROR_REPORTER = Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; /** Feature identifier: allow Java encodings. */ protected static final String ALLOW_JAVA_ENCODINGS = Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; protected PropertyManager fPropertyManager = null ; boolean isExternal = false; static { for(int i=0x0041;i<=0x005A ; i++){ VALID_NAMES[i]=true; } for(int i=0x0061;i<=0x007A; i++){ VALID_NAMES[i]=true; } for(int i=0x0030;i<=0x0039; i++){ VALID_NAMES[i]=true; } VALID_NAMES[45]=true; VALID_NAMES[46]=true; VALID_NAMES[58]=true; VALID_NAMES[95]=true; } // SAPJVM: Remember, that the XML version has explicitly been set, // so that XMLStreamReader.getVersion() can find that out. boolean xmlVersionSetExplicitly = false; // // Constructors // /** Default constructor. */ public XMLEntityScanner() { } // () /** private constructor, this class can only be instantiated within this class. Instance of this class should * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) * @see getEntityScanner() * @see getEntityScanner(ScannedEntity) */ public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { fEntityManager = entityManager ; reset(propertyManager); } // () // set buffer size: public final void setBufferSize(int size) { // REVISIT: Buffer size passed to entity scanner // was not being kept in synch with the actual size // of the buffers in each scanned entity. If any // of the buffers were actually resized, it was possible // that the parser would throw an ArrayIndexOutOfBoundsException // for documents which contained names which are longer than // the current buffer size. Conceivably the buffer size passed // to entity scanner could be used to determine a minimum size // for resizing, if doubling its size is smaller than this // minimum. -- mrglavas fBufferSize = size; } /** * Resets the components. */ public void reset(PropertyManager propertyManager){ fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; fCurrentEntity = null; whiteSpaceLen = 0; whiteSpaceInfoNeeded = true; listeners.clear(); } /** * Resets the component. The component can query the component manager * about any features and properties that affect the operation of the * component. * * @param componentManager The component manager. * * @throws SAXException Thrown by component on initialization error. * For example, if a feature or property is * required for the operation of the component, the * component manager may throw a * SAXNotRecognizedException or a * SAXNotSupportedException. */ public void reset(XMLComponentManager componentManager) throws XMLConfigurationException { //System.out.println(" this is being called"); // xerces features fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); //xerces properties fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); fCurrentEntity = null; whiteSpaceLen = 0; whiteSpaceInfoNeeded = true; listeners.clear(); } // reset(XMLComponentManager) public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, XMLErrorReporter reporter) { fCurrentEntity = null; fSymbolTable = symbolTable; fEntityManager = entityManager; fErrorReporter = reporter; } /** * Returns the XML version of the current entity. This will normally be the * value from the XML or text declaration or defaulted by the parser. Note that * that this value may be different than the version of the processing rules * applied to the current entity. For instance, an XML 1.1 document may refer to * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire * document. Also note that, for a given entity, this value can only be considered * final once the XML or text declaration has been read or once it has been * determined that there is no such declaration. */ public final String getXMLVersion() { if (fCurrentEntity != null) { return fCurrentEntity.xmlVersion; } return null; } // getXMLVersion():String /** * Sets the XML version. This method is used by the * scanners to report the value of the version pseudo-attribute * in an XML or text declaration. * * @param xmlVersion the XML version of the current entity */ public final void setXMLVersion(String xmlVersion) { xmlVersionSetExplicitly = true; // SAPJVM fCurrentEntity.xmlVersion = xmlVersion; } // setXMLVersion(String) /** set the instance of current scanned entity. * @param ScannedEntity */ public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ fCurrentEntity = scannedEntity ; if(fCurrentEntity != null){ isExternal = fCurrentEntity.isExternal(); if(DEBUG_BUFFER) System.out.println("Current Entity is "+scannedEntity.name); } } public Entity.ScannedEntity getCurrentEntity(){ return fCurrentEntity ; } // // XMLEntityReader methods // /** * Returns the base system identifier of the currently scanned * entity, or null if none is available. */ public final String getBaseSystemId() { return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; } // getBaseSystemId():String /** * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) */ public void setBaseSystemId(String systemId) { //no-op } ///////////// Locator methods start. public final int getLineNumber(){ //if the entity is closed, we should return -1 //xxx at first place why such call should be there... return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; } /** * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) */ public void setLineNumber(int line) { //no-op } public final int getColumnNumber(){ //if the entity is closed, we should return -1 //xxx at first place why such call should be there... return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; } /** * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) */ public void setColumnNumber(int col) { // no-op } public final int getCharacterOffset(){ return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; } /** Returns the expanded system identifier. */ public final String getExpandedSystemId() { return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; } /** * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) */ public void setExpandedSystemId(String systemId) { //no-op } /** Returns the literal system identifier. */ public final String getLiteralSystemId() { return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; } /** * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) */ public void setLiteralSystemId(String systemId) { //no-op } /** Returns the public identifier. */ public final String getPublicId() { return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; } /** * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) */ public void setPublicId(String publicId) { //no-op } ///////////////// Locator methods finished. /** the version of the current entity being scanned */ public void setVersion(String version){ fCurrentEntity.version = version; } public String getVersion(){ if (fCurrentEntity != null) return fCurrentEntity.version ; return null; } /** * Returns the encoding of the current entity. * Note that, for a given entity, this value can only be * considered final once the encoding declaration has been read (or once it * has been determined that there is no such declaration) since, no encoding * having been specified on the XMLInputSource, the parser * will make an initial "guess" which could be in error. */ public final String getEncoding() { if (fCurrentEntity != null) { return fCurrentEntity.encoding; } return null; } // getEncoding():String /** * Sets the encoding of the scanner. This method is used by the * scanners if the XMLDecl or TextDecl line contains an encoding * pseudo-attribute. *

* Note: The underlying character reader on the * current entity will be changed to accomodate the new encoding. * However, the new encoding is ignored if the current reader was * not constructed from an input stream (e.g. an external entity * that is resolved directly to the appropriate java.io.Reader * object). * * @param encoding The IANA encoding name of the new encoding. * * @throws IOException Thrown if the new encoding is not supported. * * @see com.sun.org.apache.xerces.internal.util.EncodingMap */ public final void setEncoding(String encoding) throws IOException { if (DEBUG_ENCODINGS) { System.out.println("$$$ setEncoding: "+encoding); } if (fCurrentEntity.stream != null) { // if the encoding is the same, don't change the reader and // re-use the original reader used by the OneCharReader // NOTE: Besides saving an object, this overcomes deficiencies // in the UTF-16 reader supplied with the standard Java // distribution (up to and including 1.3). The UTF-16 // decoder buffers 8K blocks even when only asked to read // a single char! -Ac if (fCurrentEntity.encoding == null || !fCurrentEntity.encoding.equals(encoding)) { // UTF-16 is a bit of a special case. If the encoding is UTF-16, // and we know the endian-ness, we shouldn't change readers. // If it's ISO-10646-UCS-(2|4), then we'll have to deduce // the endian-ness from the encoding we presently have. if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { String ENCODING = encoding.toUpperCase(Locale.ENGLISH); if(ENCODING.equals("UTF-16")) return; if(ENCODING.equals("ISO-10646-UCS-4")) { if(fCurrentEntity.encoding.equals("UTF-16BE")) { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); } else { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); } return; } if(ENCODING.equals("ISO-10646-UCS-2")) { if(fCurrentEntity.encoding.equals("UTF-16BE")) { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); } else { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); } return; } } // wrap a new reader around the input stream, changing // the encoding if (DEBUG_ENCODINGS) { System.out.println("$$$ creating new reader from stream: "+ fCurrentEntity.stream); } //fCurrentEntity.stream.reset(); fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); fCurrentEntity.encoding = encoding; } else { if (DEBUG_ENCODINGS) System.out.println("$$$ reusing old reader on stream"); } } } // setEncoding(String) /** Returns true if the current entity being scanned is external. */ public final boolean isExternal() { return fCurrentEntity.isExternal(); } // isExternal():boolean public int getChar(int relative) throws IOException{ if(arrangeCapacity(relative + 1, false)){ return fCurrentEntity.ch[fCurrentEntity.position + relative]; }else{ return -1; } }//getChar() /** * Returns the next character on the input. *

* Note: The character is not consumed. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int peekChar() throws IOException { if (DEBUG_BUFFER) { System.out.print("(peekChar: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } // peek at character int c = fCurrentEntity.ch[fCurrentEntity.position]; // return peeked character if (DEBUG_BUFFER) { System.out.print(")peekChar: "); print(); if (isExternal) { System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); } else { System.out.println(" -> '"+(char)c+"'"); } } if (isExternal) { return c != '\r' ? c : '\n'; } else { return c; } } // peekChar():int /** * Returns the next character on the input. *

* Note: The character is consumed. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int scanChar() throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanChar: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } // scan character int c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\n' || (c == '\r' && isExternal)) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = (char)c; load(1, false); } if (c == '\r' && isExternal) { if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { fCurrentEntity.position--; } c = '\n'; } } // return character that was scanned if (DEBUG_BUFFER) { System.out.print(")scanChar: "); print(); System.out.println(" -> '"+(char)c+"'"); } fCurrentEntity.columnNumber++; return c; } // scanChar():int /** * Returns a string matching the NMTOKEN production appearing immediately * on the input as a symbol, or null if NMTOKEN Name string is present. *

* Note: The NMTOKEN characters are consumed. *

* Note: The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see com.sun.org.apache.xerces.internal.util.SymbolTable * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName */ public String scanNmtoken() throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanNmtoken: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } // scan nmtoken int offset = fCurrentEntity.position; boolean vc = false; char c; while (true){ //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { c = fCurrentEntity.ch[fCurrentEntity.position]; if(c < 127){ vc = VALID_NAMES[c]; }else{ vc = XMLChar.isName(c); } if(!vc)break; if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; invokeListeners(length); if (length == fCurrentEntity.fBufferSize) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; fCurrentEntity.fBufferSize *= 2; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; // return nmtoken String symbol = null; if (length > 0) { symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); } if (DEBUG_BUFFER) { System.out.print(")scanNmtoken: "); print(); System.out.println(" -> "+String.valueOf(symbol)); } return symbol; } // scanNmtoken():String /** * Returns a string matching the Name production appearing immediately * on the input as a symbol, or null if no Name string is present. *

* Note: The Name characters are consumed. *

* Note: The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see com.sun.org.apache.xerces.internal.util.SymbolTable * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart */ public String scanName() throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanName: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } // scan name int offset = fCurrentEntity.position; if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { if (++fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; offset = 0; if (load(1, false)) { fCurrentEntity.columnNumber++; String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); if (DEBUG_BUFFER) { System.out.print(")scanName: "); print(); System.out.println(" -> "+String.valueOf(symbol)); } return symbol; } } boolean vc =false; while (true ){ //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; char c = fCurrentEntity.ch[fCurrentEntity.position]; if(c < 127){ vc = VALID_NAMES[c]; }else{ vc = XMLChar.isName(c); } if(!vc)break; if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; invokeListeners(length); if (length == fCurrentEntity.fBufferSize) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; fCurrentEntity.fBufferSize *= 2; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; // return name String symbol; if (length > 0) { symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); } else symbol = null; if (DEBUG_BUFFER) { System.out.print(")scanName: "); print(); System.out.println(" -> "+String.valueOf(symbol)); } return symbol; } // scanName():String /** * Scans a qualified name from the input, setting the fields of the * QName structure appropriately. *

* Note: The qualified name characters are consumed. *

* Note: The strings used to set the values of the * QName structure must be symbols. The SymbolTable can be used for * this purpose. * * @param qname The qualified name structure to fill. * * @return Returns true if a qualified name appeared immediately on * the input and was scanned, false otherwise. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see com.sun.org.apache.xerces.internal.util.SymbolTable * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart */ public boolean scanQName(QName qname) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanQName, "+qname+": "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } // scan qualified name int offset = fCurrentEntity.position; //making a check if if the specified character is a valid name start character //as defined by production [5] in the XML 1.0 specification. // Name ::= (Letter | '_' | ':') (NameChar)* if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { if (++fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; offset = 0; if (load(1, false)) { fCurrentEntity.columnNumber++; //adding into symbol table. //XXX We are trying to add single character in SymbolTable?????? String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); qname.setValues(null, name, name, null); if (DEBUG_BUFFER) { System.out.print(")scanQName, "+qname+": "); print(); System.out.println(" -> true"); } return true; } } int index = -1; boolean vc = false; while ( true){ //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; char c = fCurrentEntity.ch[fCurrentEntity.position]; if(c < 127){ vc = VALID_NAMES[c]; }else{ vc = XMLChar.isName(c); } if(!vc)break; if (c == ':') { if (index != -1) { break; } index = fCurrentEntity.position; } if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; invokeListeners(length); if (length == fCurrentEntity.fBufferSize) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; fCurrentEntity.fBufferSize *= 2; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } if (index != -1) { index = index - offset; } offset = 0; if (load(length, false)) { break; } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; if (length > 0) { String prefix = null; String localpart = null; String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); if (index != -1) { int prefixLength = index - offset; prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, prefixLength); int len = length - prefixLength - 1; localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, index + 1, len); } else { localpart = rawname; } qname.setValues(prefix, localpart, rawname, null); if (DEBUG_BUFFER) { System.out.print(")scanQName, "+qname+": "); print(); System.out.println(" -> true"); } return true; } } // no qualified name found if (DEBUG_BUFFER) { System.out.print(")scanQName, "+qname+": "); print(); System.out.println(" -> false"); } return false; } // scanQName(QName):boolean /** * CHANGED: * Scans a range of parsed character data, This function appends the character data to * the supplied buffer. *

* Note: The characters are consumed. *

* Note: This method does not guarantee to return * the longest run of parsed character data. This method may return * before markup due to reaching the end of the input buffer or any * other reason. *

* * @param content The content structure to fill. * * @return Returns the next character on the input, if known. This * value may be -1 but this does note designate * end of file. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int scanContent(XMLString content) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanContent: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { invokeListeners(0); fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; load(1, false); fCurrentEntity.position = 0; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; if (c == '\n' || (c == '\r' && isExternal)) { if (DEBUG_BUFFER) { System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\r' && isExternal) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; invokeListeners(newlines); fCurrentEntity.position = newlines; if (load(newlines, false)) { break; } } if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } } else if (c == '\n') { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; invokeListeners(newlines); fCurrentEntity.position = newlines; if (load(newlines, false)) { break; } } } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); for (int i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee //on buffering the data.. content.setValues(fCurrentEntity.ch, offset, length); //content.append(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } return -1; } if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } } while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (!XMLChar.isContent(c)) { fCurrentEntity.position--; break; } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee //on buffering the data.. content.setValues(fCurrentEntity.ch, offset, length); //content.append(fCurrentEntity.ch, offset, length); // return next character if (fCurrentEntity.position != fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position]; // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac if (c == '\r' && isExternal) { c = '\n'; } } else { c = -1; } if (DEBUG_BUFFER) { System.out.print(")scanContent: "); print(); System.out.println(" -> '"+(char)c+"'"); } return c; } // scanContent(XMLString):int /** * Scans a range of attribute value data, setting the fields of the * XMLString structure, appropriately. *

* Note: The characters are consumed. *

* Note: This method does not guarantee to return * the longest run of attribute value data. This method may return * before the quote character due to reaching the end of the input * buffer or any other reason. *

* Note: The fields contained in the XMLString * structure are not guaranteed to remain valid upon subsequent calls * to the entity scanner. Therefore, the caller is responsible for * immediately using the returned character data or making a copy of * the character data. * * @param quote The quote character that signifies the end of the * attribute value data. * @param content The content structure to fill. * * @return Returns the next character on the input, if known. This * value may be -1 but this does note designate * end of file. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int scanLiteral(int quote, XMLString content) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanLiteral, '"+(char)quote+"': "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { invokeListeners(0); fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; load(1, false); fCurrentEntity.position = 0; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; if(whiteSpaceInfoNeeded) whiteSpaceLen=0; if (c == '\n' || (c == '\r' && isExternal)) { if (DEBUG_BUFFER) { System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\r' && isExternal) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(newlines); offset = 0; fCurrentEntity.position = newlines; if (load(newlines, false)) { break; } } if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } /***/ } else if (c == '\n') { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; invokeListeners(newlines); fCurrentEntity.position = newlines; if (load(newlines, false)) { break; } } /*** NEWLINE NORMALIZATION *** * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' * && external) { * fCurrentEntity.position++; * offset++; * } * /***/ } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); int i=0; for ( i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; whiteSpaceLookup[whiteSpaceLen++]=i; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { content.setValues(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } return -1; } if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } } // scan literal value while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if ((c == quote && (!fCurrentEntity.literal || isExternal)) || c == '%' || !XMLChar.isContent(c)) { fCurrentEntity.position--; break; } if(whiteSpaceInfoNeeded){ if(c == 0x20 || c == 0x9){ if(whiteSpaceLen < whiteSpaceLookup.length){ whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position-1; }else{ int [] tmp = new int[whiteSpaceLookup.length*2]; System.arraycopy(whiteSpaceLookup,0,tmp,0,whiteSpaceLookup.length); whiteSpaceLookup = tmp; whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position - 1; } } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; content.setValues(fCurrentEntity.ch, offset, length); // return next character if (fCurrentEntity.position != fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position]; // NOTE: We don't want to accidentally signal the // end of the literal if we're expanding an // entity appearing in the literal. -Ac if (c == quote && fCurrentEntity.literal) { c = -1; } } else { c = -1; } if (DEBUG_BUFFER) { System.out.print(")scanLiteral, '"+(char)quote+"': "); print(); System.out.println(" -> '"+(char)c+"'"); } return c; } // scanLiteral(int,XMLString):int //CHANGED: /** * Scans a range of character data up to the specified delimiter, * setting the fields of the XMLString structure, appropriately. *

* Note: The characters are consumed. *

* Note: This assumes that the length of the delimiter * and that the delimiter contains at least one character. *

* Note: This method does not guarantee to return * the longest run of character data. This method may return before * the delimiter due to reaching the end of the input buffer or any * other reason. *

* @param delimiter The string that signifies the end of the character * data to be scanned. * @param buffer The XMLStringBuffer to fill. * * @return Returns true if there is more data to scan, false otherwise. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public boolean scanData(String delimiter, XMLStringBuffer buffer) throws IOException { boolean done = false; int delimLen = delimiter.length(); char charAt0 = delimiter.charAt(0); do { if (DEBUG_BUFFER) { System.out.print("(scanData: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } boolean bNextEntity = false; while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) && (!bNextEntity)) { System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position); bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false); fCurrentEntity.position = 0; fCurrentEntity.startPosition = 0; } if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { // something must be wrong with the input: e.g., file ends in an unterminated comment int length = fCurrentEntity.count - fCurrentEntity.position; buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); fCurrentEntity.columnNumber += fCurrentEntity.count; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = fCurrentEntity.count; fCurrentEntity.startPosition = fCurrentEntity.count; load(0, true); return false; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; if (c == '\n' || (c == '\r' && isExternal)) { if (DEBUG_BUFFER) { System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\r' && isExternal) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; invokeListeners(newlines); fCurrentEntity.position = newlines; if (load(newlines, false)) { break; } } if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } } else if (c == '\n') { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; invokeListeners(newlines); fCurrentEntity.position = newlines; fCurrentEntity.count = newlines; if (load(newlines, false)) { break; } } } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); for (int i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { buffer.append(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } return true; } if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } } // iterate over buffer looking for delimiter OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == charAt0) { // looks like we just hit the delimiter int delimOffset = fCurrentEntity.position - 1; for (int i = 1; i < delimLen; i++) { if (fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.position -= i; break OUTER; } c = fCurrentEntity.ch[fCurrentEntity.position++]; if (delimiter.charAt(i) != c) { fCurrentEntity.position -= i; break; } } if (fCurrentEntity.position == delimOffset + delimLen) { done = true; break; } } else if (c == '\n' || (isExternal && c == '\r')) { fCurrentEntity.position--; break; } else if (XMLChar.isInvalid(c)) { fCurrentEntity.position--; int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; buffer.append(fCurrentEntity.ch, offset, length); return true; } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; if (done) { length -= delimLen; } buffer.append(fCurrentEntity.ch, offset, length); // return true if string was skipped if (DEBUG_BUFFER) { System.out.print(")scanData: "); print(); System.out.println(" -> " + done); } } while (!done); return !done; } // scanData(String,XMLString) /** * Skips a character appearing immediately on the input. *

* Note: The character is consumed only if it matches * the specified character. * * @param c The character to skip. * * @return Returns true if the character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public boolean skipChar(int c) throws IOException { if (DEBUG_BUFFER) { System.out.print("(skipChar, '"+(char)c+"': "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } // skip character int cc = fCurrentEntity.ch[fCurrentEntity.position]; if (cc == c) { fCurrentEntity.position++; if (c == '\n') { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; } else { fCurrentEntity.columnNumber++; } if (DEBUG_BUFFER) { System.out.print(")skipChar, '"+(char)c+"': "); print(); System.out.println(" -> true"); } return true; } else if (c == '\n' && cc == '\r' && isExternal) { // handle newlines if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = (char)cc; load(1, false); } fCurrentEntity.position++; if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; } fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (DEBUG_BUFFER) { System.out.print(")skipChar, '"+(char)c+"': "); print(); System.out.println(" -> true"); } return true; } // character was not skipped if (DEBUG_BUFFER) { System.out.print(")skipChar, '"+(char)c+"': "); print(); System.out.println(" -> false"); } return false; } // skipChar(int):boolean public boolean isSpace(char ch){ return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); } /** * Skips space characters appearing immediately on the input. *

* Note: The characters are consumed only if they are * space characters. * * @return Returns true if at least one space character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace */ public boolean skipSpaces() throws IOException { if (DEBUG_BUFFER) { System.out.print("(skipSpaces: "); print(); System.out.println(); } //boolean entityChanged = false; // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); } //we are doing this check only in skipSpace() because it is called by //fMiscDispatcher and we want the parser to exit gracefully when document //is well-formed. //it is possible that end of document is reached and //fCurrentEntity becomes null //nothing was read so entity changed 'false' should be returned. if(fCurrentEntity == null){ return false ; } // skip spaces int c = fCurrentEntity.ch[fCurrentEntity.position]; if (XMLChar.isSpace(c)) { do { boolean entityChanged = false; // handle newlines if (c == '\n' || (isExternal && c == '\r')) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count - 1) { invokeListeners(0); fCurrentEntity.ch[0] = (char)c; entityChanged = load(1, true); if (!entityChanged){ // the load change the position to be 1, // need to restore it when entity not changed fCurrentEntity.position = 0; }else if(fCurrentEntity == null){ return true ; } } if (c == '\r' && isExternal) { // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { fCurrentEntity.position--; } } } else { fCurrentEntity.columnNumber++; } // load more characters, if needed if (!entityChanged){ fCurrentEntity.position++; } if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(0); load(0, true); //we are doing this check only in skipSpace() because it is called by //fMiscDispatcher and we want the parser to exit gracefully when document //is well-formed. //it is possible that end of document is reached and //fCurrentEntity becomes null //nothing was read so entity changed 'false' should be returned. if(fCurrentEntity == null){ return true ; } } } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); if (DEBUG_BUFFER) { System.out.print(")skipSpaces: "); print(); System.out.println(" -> true"); } return true; } // no spaces were found if (DEBUG_BUFFER) { System.out.print(")skipSpaces: "); print(); System.out.println(" -> false"); } return false; } // skipSpaces():boolean /** * @param legnth This function checks that following number of characters are available. * to the underlying buffer. * @return This function returns true if capacity asked is available. */ public boolean arrangeCapacity(int length) throws IOException{ return arrangeCapacity(length, false); } /** * @param legnth This function checks that following number of characters are available. * to the underlying buffer. * @param if the underlying function should change the entity * @return This function returns true if capacity asked is available. * */ public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ //check if the capacity is availble in the current buffer //count is no. of characters in the buffer [x][m][l] //position is '0' based //System.out.println("fCurrent Entity " + fCurrentEntity); if((fCurrentEntity.count - fCurrentEntity.position) >= length) { return true; } if(DEBUG_SKIP_STRING){ System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); System.out.println("length = " + length); } boolean entityChanged = false; //load more characters -- this function shouldn't change the entity while((fCurrentEntity.count - fCurrentEntity.position) < length){ if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ invokeListeners(0); System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; fCurrentEntity.position = 0; } if((fCurrentEntity.count - fCurrentEntity.position) < length){ int pos = fCurrentEntity.position; invokeListeners(pos); entityChanged = load(fCurrentEntity.count, changeEntity); fCurrentEntity.position = pos; if(entityChanged)break; } if(DEBUG_SKIP_STRING){ System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); System.out.println("length = " + length); } } //load changes the position.. set it back to the point where we started. //after loading check again. if((fCurrentEntity.count - fCurrentEntity.position) >= length) { return true; } else { return false; } } /** * Skips the specified string appearing immediately on the input. *

* Note: The characters are consumed only if all * the characters are skipped. * * @param s The string to skip. * * @return Returns true if the string was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public boolean skipString(String s) throws IOException { final int length = s.length(); //first make sure that required capacity is avaible if(arrangeCapacity(length, false)){ final int beforeSkip = fCurrentEntity.position ; int afterSkip = fCurrentEntity.position + length - 1 ; if(DEBUG_SKIP_STRING){ System.out.println("skipString,length = " + s + "," + length); System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); } //s.charAt() indexes are 0 to 'Length -1' based. int i = length - 1 ; //check from reverse while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ if(afterSkip-- == beforeSkip){ fCurrentEntity.position = fCurrentEntity.position + length ; fCurrentEntity.columnNumber += length; return true; } } } return false; } // skipString(String):boolean public boolean skipString(char [] s) throws IOException { final int length = s.length; //first make sure that required capacity is avaible if(arrangeCapacity(length, false)){ int beforeSkip = fCurrentEntity.position ; int afterSkip = fCurrentEntity.position + length ; if(DEBUG_SKIP_STRING){ System.out.println("skipString,length = " + new String(s) + "," + length); System.out.println("skipString,length = " + new String(s) + "," + length); } for(int i=0;i XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; } if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); // reset count and position boolean entityChanged = false; if (count != -1) { if (count != 0) { // record the last count fCurrentEntity.fLastCount = count; fCurrentEntity.count = count + offset; fCurrentEntity.position = offset; } } // end of this entity else { fCurrentEntity.count = offset; fCurrentEntity.position = offset; entityChanged = true; if (changeEntity) { //notify the entity manager about the end of entity fEntityManager.endEntity(); //return if the current entity becomes null if(fCurrentEntity == null){ throw END_OF_DOCUMENT_ENTITY; } // handle the trailing edges if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } } } if (DEBUG_BUFFER) { System.out.print(")load, "+offset+": "); print(); System.out.println(); } return entityChanged; } // load(int, boolean):boolean /** * Creates a reader capable of reading the given input stream in * the specified encoding. * * @param inputStream The input stream. * @param encoding The encoding name that the input stream is * encoded using. If the user has specified that * Java encoding names are allowed, then the * encoding name may be a Java encoding name; * otherwise, it is an ianaEncoding name. * @param isBigEndian For encodings (like uCS-4), whose names cannot * specify a byte order, this tells whether the order is bigEndian. null menas * unknown or not relevant. * * @return Returns a reader. */ protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) throws IOException { // normalize encoding name if (encoding == null) { encoding = "UTF-8"; } // try to use an optimized reader String ENCODING = encoding.toUpperCase(Locale.ENGLISH); if (ENCODING.equals("UTF-8")) { if (DEBUG_ENCODINGS) { System.out.println("$$$ creating UTF8Reader"); } return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); } if (ENCODING.equals("US-ASCII")) { if (DEBUG_ENCODINGS) { System.out.println("$$$ creating ASCIIReader"); } return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); } if(ENCODING.equals("ISO-10646-UCS-4")) { if(isBigEndian != null) { boolean isBE = isBigEndian.booleanValue(); if(isBE) { return new UCSReader(inputStream, UCSReader.UCS4BE); } else { return new UCSReader(inputStream, UCSReader.UCS4LE); } } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } if(ENCODING.equals("ISO-10646-UCS-2")) { if(isBigEndian != null) { // sould never happen with this encoding... boolean isBE = isBigEndian.booleanValue(); if(isBE) { return new UCSReader(inputStream, UCSReader.UCS2BE); } else { return new UCSReader(inputStream, UCSReader.UCS2LE); } } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } // check for valid name boolean validIANA = XMLChar.isValidIANAEncoding(encoding); boolean validJava = XMLChar.isValidJavaEncoding(encoding); if (!validIANA || (fAllowJavaEncodings && !validJava)) { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 // because every byte is a valid ISO Latin 1 character. // It may not translate correctly but if we failed on // the encoding anyway, then we're expecting the content // of the document to be bad. This will just prevent an // invalid UTF-8 sequence to be detected. This is only // important when continue-after-fatal-error is turned // on. -Ac encoding = "ISO-8859-1"; } // try to use a Java reader String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); if (javaEncoding == null) { if(fAllowJavaEncodings) { javaEncoding = encoding; } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // see comment above. javaEncoding = "ISO8859_1"; } } else if (javaEncoding.equals("ASCII")) { if (DEBUG_ENCODINGS) { System.out.println("$$$ creating ASCIIReader"); } return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); } if (DEBUG_ENCODINGS) { System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); if (javaEncoding == encoding) { System.out.print(" (IANA encoding)"); } System.out.println(); } return new InputStreamReader(inputStream, javaEncoding); } // createReader(InputStream,String, Boolean): Reader /** * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where appropriate. * * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. * @return a 2-element array: the first element, an IANA-encoding string, * the second element a Boolean which is true iff the document is big endian, false * if it's little-endian, and null if the distinction isn't relevant. */ protected Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { return new Object[]{"UTF-8", null}; } // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian return new Object [] {"UTF-16BE", new Boolean(true)}; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian return new Object [] {"UTF-16LE", new Boolean(false)}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { return new Object [] {"UTF-8", null}; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { return new Object [] {"UTF-8", null}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { return new Object [] {"UTF-8", null}; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? return new Object [] {"UTF-16BE", new Boolean(true)}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... return new Object [] {"UTF-16LE", new Boolean(false)}; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here return new Object [] {"CP037", null}; } // default encoding return new Object [] {"UTF-8", null}; } // getEncodingName(byte[],int):Object[] /** * xxx not removing endEntity() so that i remember that we need to implement it. * Ends an entity. * * @throws XNIException Thrown by entity handler to signal an error. */ // /** Prints the contents of the buffer. */ final void print() { if (DEBUG_BUFFER) { if (fCurrentEntity != null) { System.out.print('['); System.out.print(fCurrentEntity.count); System.out.print(' '); System.out.print(fCurrentEntity.position); if (fCurrentEntity.count > 0) { System.out.print(" \""); for (int i = 0; i < fCurrentEntity.count; i++) { if (i == fCurrentEntity.position) { System.out.print('^'); } char c = fCurrentEntity.ch[i]; switch (c) { case '\n': { System.out.print("\\n"); break; } case '\r': { System.out.print("\\r"); break; } case '\t': { System.out.print("\\t"); break; } case '\\': { System.out.print("\\\\"); break; } default: { System.out.print(c); } } } if (fCurrentEntity.position == fCurrentEntity.count) { System.out.print('^'); } System.out.print('"'); } System.out.print(']'); System.out.print(" @ "); System.out.print(fCurrentEntity.lineNumber); System.out.print(','); System.out.print(fCurrentEntity.columnNumber); } else { System.out.print("*NO CURRENT ENTITY*"); } } } /** * Registers the listener object and provides callback. * @param listener listener to which call back should be provided when scanner buffer * is being changed. */ public void registerListener(XMLBufferListener listener) { if(!listeners.contains(listener)) listeners.add(listener); } /** * * @param loadPos Starting position from which new data is being loaded into scanner buffer. */ private void invokeListeners(int loadPos){ for(int i=0;i * Note: The characters are consumed only if they would * match non-terminal S before end of line normalization is performed. * * @return Returns true if at least one space character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace */ public final boolean skipDeclSpaces() throws IOException { if (DEBUG_BUFFER) { System.out.print("(skipDeclSpaces: "); //XMLEntityManager.print(fCurrentEntity); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // skip spaces int c = fCurrentEntity.ch[fCurrentEntity.position]; if (XMLChar.isSpace(c)) { boolean external = fCurrentEntity.isExternal(); do { boolean entityChanged = false; // handle newlines if (c == '\n' || (external && c == '\r')) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count - 1) { fCurrentEntity.ch[0] = (char)c; entityChanged = load(1, true); if (!entityChanged) // the load change the position to be 1, // need to restore it when entity not changed fCurrentEntity.position = 0; } if (c == '\r' && external) { // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { fCurrentEntity.position--; } } /*** NEWLINE NORMALIZATION *** * else { * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' * && external) { * fCurrentEntity.position++; * } * } * /***/ } else { fCurrentEntity.columnNumber++; } // load more characters, if needed if (!entityChanged) fCurrentEntity.position++; if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); if (DEBUG_BUFFER) { System.out.print(")skipDeclSpaces: "); // XMLEntityManager.print(fCurrentEntity); System.out.println(" -> true"); } return true; } // no spaces were found if (DEBUG_BUFFER) { System.out.print(")skipDeclSpaces: "); //XMLEntityManager.print(fCurrentEntity); System.out.println(" -> false"); } return false; } // skipDeclSpaces():boolean } // class XMLEntityScanner