/*
*/
/*
* Copyright 2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Implements the entity scanner methods.
*
* @author Neeraj Bajaj, Sun Microsystems
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author K.Venugopal Sun Microsystems
*
*/
/** Debug switching readers for encodings. */
private static final boolean DEBUG_ENCODINGS = false;
/** Listeners which should know when load is being called */
/**
* Debug printing of buffer. This debugging flag works best when you
* resize the DEFAULT_BUFFER_SIZE down to something reasonable like
* 64 characters.
*/
private static final boolean DEBUG_BUFFER = false;
private static final boolean DEBUG_SKIP_STRING = false;
/**
* To signal the end of the document entity, this exception will be thrown.
*/
private static final long serialVersionUID = 980337771224675268L;
public Throwable fillInStackTrace() {
return this;
}
};
boolean whiteSpaceInfoNeeded = true;
/**
* Allow Java encoding names. This feature identifier is:
*/
protected boolean fAllowJavaEncodings;
//Will be used only during internal subsets.
//for appending data.
/** Property identifier: symbol table. */
/** Property identifier: error reporter. */
/** Feature identifier: allow Java encodings. */
boolean isExternal = false;
static {
for(int i=0x0041;i<=0x005A ; i++){
VALID_NAMES[i]=true;
}
for(int i=0x0061;i<=0x007A; i++){
VALID_NAMES[i]=true;
}
for(int i=0x0030;i<=0x0039; i++){
VALID_NAMES[i]=true;
}
VALID_NAMES[45]=true;
VALID_NAMES[46]=true;
VALID_NAMES[58]=true;
VALID_NAMES[95]=true;
}
// SAPJVM: Remember, that the XML version has explicitly been set,
// so that XMLStreamReader.getVersion() can find that out.
boolean xmlVersionSetExplicitly = false;
//
// Constructors
//
/** Default constructor. */
public XMLEntityScanner() {
} // <init>()
/** private constructor, this class can only be instantiated within this class. Instance of this class should
* be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
* @see getEntityScanner()
* @see getEntityScanner(ScannedEntity)
*/
} // <init>()
// set buffer size:
// REVISIT: Buffer size passed to entity scanner
// was not being kept in synch with the actual size
// of the buffers in each scanned entity. If any
// of the buffers were actually resized, it was possible
// that the parser would throw an ArrayIndexOutOfBoundsException
// for documents which contained names which are longer than
// the current buffer size. Conceivably the buffer size passed
// to entity scanner could be used to determine a minimum size
// for resizing, if doubling its size is smaller than this
// minimum. -- mrglavas
fBufferSize = size;
}
/**
* Resets the components.
*/
whiteSpaceLen = 0;
whiteSpaceInfoNeeded = true;
}
/**
* Resets the component. The component can query the component manager
* about any features and properties that affect the operation of the
* component.
*
* @param componentManager The component manager.
*
* @throws SAXException Thrown by component on initialization error.
* For example, if a feature or property is
* required for the operation of the component, the
* component manager may throw a
* SAXNotRecognizedException or a
* SAXNotSupportedException.
*/
throws XMLConfigurationException {
//System.out.println(" this is being called");
// xerces features
//xerces properties
whiteSpaceLen = 0;
whiteSpaceInfoNeeded = true;
} // reset(XMLComponentManager)
}
/**
* Returns the XML version of the current entity. This will normally be the
* value from the XML or text declaration or defaulted by the parser. Note that
* that this value may be different than the version of the processing rules
* applied to the current entity. For instance, an XML 1.1 document may refer to
* XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
* document. Also note that, for a given entity, this value can only be considered
* final once the XML or text declaration has been read or once it has been
* determined that there is no such declaration.
*/
if (fCurrentEntity != null) {
return fCurrentEntity.xmlVersion;
}
return null;
} // getXMLVersion():String
/**
* Sets the XML version. This method is used by the
* scanners to report the value of the version pseudo-attribute
* in an XML or text declaration.
*
* @param xmlVersion the XML version of the current entity
*/
xmlVersionSetExplicitly = true; // SAPJVM
} // setXMLVersion(String)
/** set the instance of current scanned entity.
* @param ScannedEntity
*/
if(fCurrentEntity != null){
if(DEBUG_BUFFER)
}
}
return fCurrentEntity ;
}
//
// XMLEntityReader methods
//
/**
* Returns the base system identifier of the currently scanned
* entity, or null if none is available.
*/
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
} // getBaseSystemId():String
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
*/
//no-op
}
///////////// Locator methods start.
public final int getLineNumber(){
//if the entity is closed, we should return -1
//xxx at first place why such call should be there...
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
*/
//no-op
}
public final int getColumnNumber(){
//if the entity is closed, we should return -1
//xxx at first place why such call should be there...
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
*/
// no-op
}
public final int getCharacterOffset(){
return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
}
/** Returns the expanded system identifier. */
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
*/
//no-op
}
/** Returns the literal system identifier. */
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
*/
//no-op
}
/** Returns the public identifier. */
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
}
/**
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
*/
//no-op
}
///////////////// Locator methods finished.
/** the version of the current entity being scanned */
}
if (fCurrentEntity != null)
return fCurrentEntity.version ;
return null;
}
/**
* Returns the encoding of the current entity.
* Note that, for a given entity, this value can only be
* considered final once the encoding declaration has been read (or once it
* has been determined that there is no such declaration) since, no encoding
* having been specified on the XMLInputSource, the parser
* will make an initial "guess" which could be in error.
*/
if (fCurrentEntity != null) {
return fCurrentEntity.encoding;
}
return null;
} // getEncoding():String
/**
* Sets the encoding of the scanner. This method is used by the
* scanners if the XMLDecl or TextDecl line contains an encoding
* pseudo-attribute.
* <p>
* <strong>Note:</strong> The underlying character reader on the
* current entity will be changed to accomodate the new encoding.
* However, the new encoding is ignored if the current reader was
* not constructed from an input stream (e.g. an external entity
* that is resolved directly to the appropriate java.io.Reader
* object).
*
* @param encoding The IANA encoding name of the new encoding.
*
* @throws IOException Thrown if the new encoding is not supported.
*
* @see com.sun.org.apache.xerces.internal.util.EncodingMap
*/
if (DEBUG_ENCODINGS) {
}
// if the encoding is the same, don't change the reader and
// re-use the original reader used by the OneCharReader
// NOTE: Besides saving an object, this overcomes deficiencies
// in the UTF-16 reader supplied with the standard Java
// distribution (up to and including 1.3). The UTF-16
// decoder buffers 8K blocks even when only asked to read
// a single char! -Ac
// UTF-16 is a bit of a special case. If the encoding is UTF-16,
// and we know the endian-ness, we shouldn't change readers.
// If it's ISO-10646-UCS-(2|4), then we'll have to deduce
// the endian-ness from the encoding we presently have.
} else {
}
return;
}
} else {
}
return;
}
}
// wrap a new reader around the input stream, changing
// the encoding
if (DEBUG_ENCODINGS) {
}
//fCurrentEntity.stream.reset();
} else {
if (DEBUG_ENCODINGS)
}
}
} // setEncoding(String)
/** Returns true if the current entity being scanned is external. */
public final boolean isExternal() {
return fCurrentEntity.isExternal();
} // isExternal():boolean
}else{
return -1;
}
}//getChar()
/**
* Returns the next character on the input.
* <p>
* <strong>Note:</strong> The character is <em>not</em> consumed.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
}
// peek at character
// return peeked character
if (DEBUG_BUFFER) {
print();
if (isExternal) {
} else {
}
}
if (isExternal) {
return c != '\r' ? c : '\n';
} else {
return c;
}
} // peekChar():int
/**
* Returns the next character on the input.
* <p>
* <strong>Note:</strong> The character is consumed.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
}
// scan character
if (c == '\n' ||
(c == '\r' && isExternal)) {
invokeListeners(1);
load(1, false);
}
if (c == '\r' && isExternal) {
}
c = '\n';
}
}
// return character that was scanned
if (DEBUG_BUFFER) {
print();
}
return c;
} // scanChar():int
/**
* Returns a string matching the NMTOKEN production appearing immediately
* on the input as a symbol, or null if NMTOKEN Name string is present.
* <p>
* <strong>Note:</strong> The NMTOKEN characters are consumed.
* <p>
* <strong>Note:</strong> The string returned must be a symbol. The
* SymbolTable can be used for this purpose.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.SymbolTable
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
*/
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
}
// scan nmtoken
boolean vc = false;
char c;
while (true){
//while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
if(c < 127){
vc = VALID_NAMES[c];
}else{
}
if(!vc)break;
// bad luck we have to resize our buffer
} else {
}
offset = 0;
break;
}
}
}
// return nmtoken
if (length > 0) {
}
if (DEBUG_BUFFER) {
print();
}
return symbol;
} // scanNmtoken():String
/**
* Returns a string matching the Name production appearing immediately
* on the input as a symbol, or null if no Name string is present.
* <p>
* <strong>Note:</strong> The Name characters are consumed.
* <p>
* <strong>Note:</strong> The string returned must be a symbol. The
* SymbolTable can be used for this purpose.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.SymbolTable
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
*/
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
}
// scan name
invokeListeners(1);
offset = 0;
if (load(1, false)) {
if (DEBUG_BUFFER) {
print();
}
return symbol;
}
}
boolean vc =false;
while (true ){
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
if(c < 127){
vc = VALID_NAMES[c];
}else{
}
if(!vc)break;
// bad luck we have to resize our buffer
} else {
}
offset = 0;
break;
}
}
}
}
// return name
if (length > 0) {
} else
if (DEBUG_BUFFER) {
print();
}
return symbol;
} // scanName():String
/**
* Scans a qualified name from the input, setting the fields of the
* QName structure appropriately.
* <p>
* <strong>Note:</strong> The qualified name characters are consumed.
* <p>
* <strong>Note:</strong> The strings used to set the values of the
* QName structure must be symbols. The SymbolTable can be used for
* this purpose.
*
* @param qname The qualified name structure to fill.
*
* @return Returns true if a qualified name appeared immediately on
* the input and was scanned, false otherwise.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.SymbolTable
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
*/
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
}
// scan qualified name
//making a check if if the specified character is a valid name start character
//as defined by production [5] in the XML 1.0 specification.
// Name ::= (Letter | '_' | ':') (NameChar)*
invokeListeners(1);
offset = 0;
if (load(1, false)) {
//adding into symbol table.
//XXX We are trying to add single character in SymbolTable??????
if (DEBUG_BUFFER) {
print();
}
return true;
}
}
int index = -1;
boolean vc = false;
while ( true){
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
if(c < 127){
vc = VALID_NAMES[c];
}else{
}
if(!vc)break;
if (c == ':') {
if (index != -1) {
break;
}
}
// bad luck we have to resize our buffer
} else {
}
if (index != -1) {
}
offset = 0;
break;
}
}
}
if (length > 0) {
if (index != -1) {
} else {
}
if (DEBUG_BUFFER) {
print();
}
return true;
}
}
// no qualified name found
if (DEBUG_BUFFER) {
print();
}
return false;
} // scanQName(QName):boolean
/**
* CHANGED:
* Scans a range of parsed character data, This function appends the character data to
* the supplied buffer.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of parsed character data. This method may return
* before markup due to reaching the end of the input buffer or any
* other reason.
* <p>
*
* @param content The content structure to fill.
*
* @return Returns the next character on the input, if known. This
* value may be -1 but this does <em>note</em> designate
* end of file.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
invokeListeners(0);
load(1, false);
}
// normalize newlines
int newlines = 0;
if (DEBUG_BUFFER) {
print();
}
do {
if (c == '\r' && isExternal) {
newlines++;
offset = 0;
break;
}
}
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
} else if (c == '\n') {
newlines++;
offset = 0;
break;
}
}
} else {
break;
}
}
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
//on buffering the data..
//content.append(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) {
print();
}
return -1;
}
if (DEBUG_BUFFER) {
print();
}
}
break;
}
}
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
//on buffering the data..
//content.append(fCurrentEntity.ch, offset, length);
// return next character
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
if (c == '\r' && isExternal) {
c = '\n';
}
} else {
c = -1;
}
if (DEBUG_BUFFER) {
print();
}
return c;
} // scanContent(XMLString):int
/**
* Scans a range of attribute value data, setting the fields of the
* XMLString structure, appropriately.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of attribute value data. This method may return
* before the quote character due to reaching the end of the input
* buffer or any other reason.
* <p>
* <strong>Note:</strong> The fields contained in the XMLString
* structure are not guaranteed to remain valid upon subsequent calls
* to the entity scanner. Therefore, the caller is responsible for
* immediately using the returned character data or making a copy of
* the character data.
*
* @param quote The quote character that signifies the end of the
* attribute value data.
* @param content The content structure to fill.
*
* @return Returns the next character on the input, if known. This
* value may be -1 but this does <em>note</em> designate
* end of file.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
throws IOException {
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
invokeListeners(0);
load(1, false);
}
// normalize newlines
int newlines = 0;
if (DEBUG_BUFFER) {
print();
}
do {
if (c == '\r' && isExternal) {
newlines++;
offset = 0;
break;
}
}
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
/***/
} else if (c == '\n') {
newlines++;
offset = 0;
break;
}
}
/*** NEWLINE NORMALIZATION ***
* if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
* && external) {
* fCurrentEntity.position++;
* offset++;
* }
* /***/
} else {
break;
}
int i=0;
}
if (DEBUG_BUFFER) {
print();
}
return -1;
}
if (DEBUG_BUFFER) {
print();
}
}
// scan literal value
if ((c == quote &&
break;
}
if(whiteSpaceInfoNeeded){
if(c == 0x20 || c == 0x9){
}else{
}
}
}
}
// return next character
// NOTE: We don't want to accidentally signal the
// end of the literal if we're expanding an
// entity appearing in the literal. -Ac
c = -1;
}
} else {
c = -1;
}
if (DEBUG_BUFFER) {
print();
}
return c;
} // scanLiteral(int,XMLString):int
//CHANGED:
/**
* Scans a range of character data up to the specified delimiter,
* setting the fields of the XMLString structure, appropriately.
* <p>
* <strong>Note:</strong> The characters are consumed.
* <p>
* <strong>Note:</strong> This assumes that the length of the delimiter
* and that the delimiter contains at least one character.
* <p>
* <strong>Note:</strong> This method does not guarantee to return
* the longest run of character data. This method may return before
* the delimiter due to reaching the end of the input buffer or any
* other reason.
* <p>
* @param delimiter The string that signifies the end of the character
* data to be scanned.
* @param buffer The XMLStringBuffer to fill.
*
* @return Returns true if there is more data to scan, false otherwise.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
throws IOException {
boolean done = false;
do {
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
load(0, true);
}
boolean bNextEntity = false;
&& (!bNextEntity))
{
0,
}
// something must be wrong with the input: e.g., file ends in an unterminated comment
load(0, true);
return false;
}
// normalize newlines
int newlines = 0;
if (DEBUG_BUFFER) {
print();
}
do {
if (c == '\r' && isExternal) {
newlines++;
offset = 0;
break;
}
}
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
} else if (c == '\n') {
newlines++;
offset = 0;
break;
}
}
} else {
break;
}
}
if (DEBUG_BUFFER) {
print();
}
return true;
}
if (DEBUG_BUFFER) {
print();
}
}
// iterate over buffer looking for delimiter
if (c == charAt0) {
// looks like we just hit the delimiter
for (int i = 1; i < delimLen; i++) {
fCurrentEntity.position -= i;
break OUTER;
}
fCurrentEntity.position -= i;
break;
}
}
done = true;
break;
}
break;
return true;
}
}
if (done) {
}
// return true if string was skipped
if (DEBUG_BUFFER) {
print();
}
} while (!done);
return !done;
} // scanData(String,XMLString)
/**
* Skips a character appearing immediately on the input.
* <p>
* <strong>Note:</strong> The character is consumed only if it matches
* the specified character.
*
* @param c The character to skip.
*
* @return Returns true if the character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
if (DEBUG_BUFFER) {
print();
}
// load more characters, if needed
invokeListeners(0);
load(0, true);
}
// skip character
if (cc == c) {
if (c == '\n') {
} else {
}
if (DEBUG_BUFFER) {
print();
}
return true;
// handle newlines
invokeListeners(1);
load(1, false);
}
}
if (DEBUG_BUFFER) {
print();
}
return true;
}
// character was not skipped
if (DEBUG_BUFFER) {
print();
}
return false;
} // skipChar(int):boolean
}
/**
* Skips space characters appearing immediately on the input.
* <p>
* <strong>Note:</strong> The characters are consumed only if they are
* space characters.
*
* @return Returns true if at least one space character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
*/
if (DEBUG_BUFFER) {
print();
}
//boolean entityChanged = false;
// load more characters, if needed
invokeListeners(0);
load(0, true);
}
//we are doing this check only in skipSpace() because it is called by
//fMiscDispatcher and we want the parser to exit gracefully when document
//is well-formed.
//it is possible that end of document is reached and
//fCurrentEntity becomes null
//nothing was read so entity changed 'false' should be returned.
if(fCurrentEntity == null){
return false ;
}
// skip spaces
do {
boolean entityChanged = false;
// handle newlines
invokeListeners(0);
if (!entityChanged){
// the load change the position to be 1,
// need to restore it when entity not changed
}else if(fCurrentEntity == null){
return true ;
}
}
if (c == '\r' && isExternal) {
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
}
}
} else {
}
// load more characters, if needed
if (!entityChanged){
}
invokeListeners(0);
load(0, true);
//we are doing this check only in skipSpace() because it is called by
//fMiscDispatcher and we want the parser to exit gracefully when document
//is well-formed.
//it is possible that end of document is reached and
//fCurrentEntity becomes null
//nothing was read so entity changed 'false' should be returned.
if(fCurrentEntity == null){
return true ;
}
}
if (DEBUG_BUFFER) {
print();
}
return true;
}
// no spaces were found
if (DEBUG_BUFFER) {
print();
}
return false;
} // skipSpaces():boolean
/**
* @param legnth This function checks that following number of characters are available.
* to the underlying buffer.
* @return This function returns true if capacity asked is available.
*/
return arrangeCapacity(length, false);
}
/**
* @param legnth This function checks that following number of characters are available.
* to the underlying buffer.
* @param if the underlying function should change the entity
* @return This function returns true if capacity asked is available.
*
*/
//check if the capacity is availble in the current buffer
//count is no. of characters in the buffer [x][m][l]
//position is '0' based
//System.out.println("fCurrent Entity " + fCurrentEntity);
return true;
}
if(DEBUG_SKIP_STRING){
}
boolean entityChanged = false;
//load more characters -- this function shouldn't change the entity
invokeListeners(0);
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
}
if(entityChanged)break;
}
if(DEBUG_SKIP_STRING){
}
}
//load changes the position.. set it back to the point where we started.
//after loading check again.
return true;
} else {
return false;
}
}
/**
* Skips the specified string appearing immediately on the input.
* <p>
* <strong>Note:</strong> The characters are consumed only if all
* the characters are skipped.
*
* @param s The string to skip.
*
* @return Returns true if the string was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*/
//first make sure that required capacity is avaible
if(arrangeCapacity(length, false)){
if(DEBUG_SKIP_STRING){
System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length));
}
//s.charAt() indexes are 0 to 'Length -1' based.
int i = length - 1 ;
//check from reverse
if(afterSkip-- == beforeSkip){
return true;
}
}
}
return false;
} // skipString(String):boolean
//first make sure that required capacity is avaible
if(arrangeCapacity(length, false)){
if(DEBUG_SKIP_STRING){
}
for(int i=0;i<length;i++){
return false;
}
}
return true;
}
return false;
}
//
// Locator methods
//
//
// Private methods
//
/**
* Loads a chunk of text.
*
* @param offset The offset into the character buffer to
* read the next batch of characters.
* @param changeEntity True if the load should change entities
* at the end of the entity, otherwise leave
* the current entity in place and the entity
* boundary will be signaled by the return
* value.
*
* @returns Returns true if the entity changed as a result of this
* load operation.
*/
throws IOException {
if (DEBUG_BUFFER) {
print();
}
//maintaing the count till last load
fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
// read characters
}
// reset count and position
boolean entityChanged = false;
if (count != -1) {
if (count != 0) {
// record the last count
}
}
// end of this entity
else {
entityChanged = true;
if (changeEntity) {
//notify the entity manager about the end of entity
//return if the current entity becomes null
if(fCurrentEntity == null){
throw END_OF_DOCUMENT_ENTITY;
}
// handle the trailing edges
load(0, true);
}
}
}
if (DEBUG_BUFFER) {
print();
}
return entityChanged;
} // load(int, boolean):boolean
/**
* Creates a reader capable of reading the given input stream in
* the specified encoding.
*
* @param inputStream The input stream.
* @param encoding The encoding name that the input stream is
* encoded using. If the user has specified that
* Java encoding names are allowed, then the
* encoding name may be a Java encoding name;
* otherwise, it is an ianaEncoding name.
* @param isBigEndian For encodings (like uCS-4), whose names cannot
* specify a byte order, this tells whether the order is bigEndian. null menas
* unknown or not relevant.
*
* @return Returns a reader.
*/
throws IOException {
// normalize encoding name
encoding = "UTF-8";
}
// try to use an optimized reader
if (DEBUG_ENCODINGS) {
}
return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
}
if (DEBUG_ENCODINGS) {
}
return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
}
if(isBigEndian != null) {
if(isBE) {
} else {
}
} else {
"EncodingByteOrderUnsupported",
}
}
if(isBE) {
} else {
}
} else {
"EncodingByteOrderUnsupported",
}
}
// check for valid name
"EncodingDeclInvalid",
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
// It may not translate correctly but if we failed on
// the encoding anyway, then we're expecting the content
// of the document to be bad. This will just prevent an
// invalid UTF-8 sequence to be detected. This is only
// important when continue-after-fatal-error is turned
// on. -Ac
encoding = "ISO-8859-1";
}
// try to use a Java reader
if (javaEncoding == null) {
if(fAllowJavaEncodings) {
} else {
"EncodingDeclInvalid",
// see comment above.
javaEncoding = "ISO8859_1";
}
}
if (DEBUG_ENCODINGS) {
}
return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
}
if (DEBUG_ENCODINGS) {
if (javaEncoding == encoding) {
}
}
} // createReader(InputStream,String, Boolean): Reader
/**
* Returns the IANA encoding name that is auto-detected from
* the bytes specified, with the endian-ness of that encoding where appropriate.
*
* @param b4 The first four bytes of the input.
* @param count The number of bytes actually read.
* @return a 2-element array: the first element, an IANA-encoding string,
* the second element a Boolean which is true iff the document is big endian, false
* if it's little-endian, and null if the distinction isn't relevant.
*/
if (count < 2) {
}
// UTF-16, with BOM
// UTF-16, big-endian
}
// UTF-16, little-endian
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
}
// UTF-8 with a BOM
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 4) {
}
// other encodings
// UCS-4, big endian (1234)
}
// UCS-4, little endian (4321)
}
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
}
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
}
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
}
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
}
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
}
// default encoding
} // getEncodingName(byte[],int):Object[]
/**
* xxx not removing endEntity() so that i remember that we need to implement it.
* Ends an entity.
*
* @throws XNIException Thrown by entity handler to signal an error.
*/
//
/** Prints the contents of the buffer. */
final void print() {
if (DEBUG_BUFFER) {
if (fCurrentEntity != null) {
if (i == fCurrentEntity.position) {
}
char c = fCurrentEntity.ch[i];
switch (c) {
case '\n': {
break;
}
case '\r': {
break;
}
case '\t': {
break;
}
case '\\': {
break;
}
default: {
}
}
}
}
}
} else {
}
}
}
/**
* Registers the listener object and provides callback.
* @param listener listener to which call back should be provided when scanner buffer
* is being changed.
*/
}
/**
*
* @param loadPos Starting position from which new data is being loaded into scanner buffer.
*/
}
}
/**
* Skips space characters appearing immediately on the input that would
* match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
* normalization is performed. This is useful when scanning structures
* such as the XMLDecl and TextDecl that can only contain US-ASCII
* characters.
* <p>
* <strong>Note:</strong> The characters are consumed only if they would
* match non-terminal S before end of line normalization is performed.
*
* @return Returns true if at least one space character was skipped.
*
* @throws IOException Thrown if i/o error occurs.
* @throws EOFException Thrown on end of file.
*
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
*/
if (DEBUG_BUFFER) {
//XMLEntityManager.print(fCurrentEntity);
}
// load more characters, if needed
load(0, true);
}
// skip spaces
do {
boolean entityChanged = false;
// handle newlines
if (!entityChanged)
// the load change the position to be 1,
// need to restore it when entity not changed
}
if (c == '\r' && external) {
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
}
}
/*** NEWLINE NORMALIZATION ***
* else {
* if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
* && external) {
* fCurrentEntity.position++;
* }
* }
* /***/
} else {
}
// load more characters, if needed
if (!entityChanged)
load(0, true);
}
if (DEBUG_BUFFER) {
// XMLEntityManager.print(fCurrentEntity);
}
return true;
}
// no spaces were found
if (DEBUG_BUFFER) {
//XMLEntityManager.print(fCurrentEntity);
}
return false;
} // skipDeclSpaces():boolean
} // class XMLEntityScanner