286N/A * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. 286N/A * Copyright 2005 The Apache Software Foundation. 286N/A * Licensed under the Apache License, Version 2.0 (the "License"); 286N/A * you may not use this file except in compliance with the License. 286N/A * You may obtain a copy of the License at 286N/A * Unless required by applicable law or agreed to in writing, software 286N/A * distributed under the License is distributed on an "AS IS" BASIS, 286N/A * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 286N/A * See the License for the specific language governing permissions and 286N/A * limitations under the License. 286N/A * This class is responsible for scanning the structure and content 286N/A * of document fragments. 286N/A * This class has been modified as per the new design which is more suited to 286N/A * efficiently build pull parser. Lot of improvements have been done and 286N/A * @author Neeraj Bajaj SUN Microsystems 286N/A * @author K.Venugopal SUN Microsystems 286N/A * @author Glenn Marcy, IBM 286N/A * @author Andy Clark, IBM 286N/A * @author Arnaud Le Hors, IBM 286N/A * @author Sunitha Reddy, SUN Microsystems 286N/A /** External subset resolver. **/ 286N/A //XXX this should be divided into more states. 286N/A /** Scanner state: start of markup. */ 286N/A /** Scanner state: content. */ 286N/A /** Scanner state: processing instruction. */ 286N/A /** Scanner state: DOCTYPE. */ 286N/A /** Scanner state: XML Declaration */ 286N/A /** Scanner state: root element. */ 286N/A /** Scanner state: comment. */ 286N/A /** Scanner state: reference. */ 286N/A // <book type="hard"> reading attribute name 'type' 286N/A // <book type="hard"> //reading attribute value. 286N/A /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 286N/A //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 286N/A /** Scanner state: end of input. */ 286N/A /** Scanner state: terminated. */ 286N/A /** Scanner state: CDATA section. */ 286N/A /** Scanner state: Text declaration. */ 286N/A /** Scanner state: Text declaration. */ 286N/A //<book type="hard">foo</book> 286N/A //<book type="hard">foo</book> reading </book> 286N/A /** Feature identifier: notify built-in refereces. */ 286N/A /** Property identifier: entity resolver. */ 559N/A /** Feature identifier: standard uri conformant */ 559N/A /** property identifier: access external dtd. */ 559N/A /** access external dtd: file protocol 559N/A * For DOM/SAX, the secure feature is set to true by default 286N/A // recognized features and properties 286N/A /** Recognized features. */ 286N/A /** Feature defaults. */ 286N/A /** Recognized properties. */ 286N/A /** Property defaults. */ 448N/A private static final char []
cdata = {
'[',
'C',
'D',
'A',
'T',
'A',
'['};
448N/A //this variable is also used by XMLDocumentScannerImpl in the same package 286N/A /** Debug scanner state. */ 286N/A /** Debug content driver scanning. */ 286N/A /** Debug driver next */ 286N/A /** Debug driver next */ 286N/A /** Document handler. */ 286N/A //track if we are reading attributes, this is usefule while 286N/A /** SubScanner state: inside scanContent method. */ 286N/A /** has external dtd */ 286N/A /** Current element. */ 286N/A /** Document system identifier. 286N/A * REVISIT: So what's this used for? - NG 286N/A * protected String fDocumentSystemId; 286N/A //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 286N/A /** Notify built-in references. */ 286N/A //STAX related properties 362N/A /** Xerces Feature: Disallow doctype declaration. */ 559N/A * comma-delimited list of protocols that are allowed for the purpose 559N/A * of accessing external dtd or entity references 559N/A * standard uri conformant (strict uri). 286N/A /** Attribute QName. */ 286N/A * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 286N/A * implements Iterator interface so we can directly give Attributes in the form of 286N/A /** Array of 3 strings. */ 286N/A /** Making the buffer accesible to derived class -- String buffer. */ 286N/A /** Making the buffer accesible to derived class -- String buffer. */ 286N/A /** stores character data. */ 286N/A /** Making the buffer accesible to derived class -- stores PI data */ 286N/A /** Single character array. */ 286N/A //skip element algorithm 286N/A //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 286N/A //pointer location where last element was skipped 286N/A //2D array to store pointer info 286N/A /** Reusable Augmentations. */ 286N/A /** Default constructor. */ 286N/A // XMLDocumentScanner methods 286N/A * Sets the input source. 286N/A * @param inputSource The input source. 286N/A * @throws IOException Thrown on i/o error. 286N/A // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 286N/A }
// setInputSource(XMLInputSource) 286N/A * @param complete True if the scanner should scan the document 286N/A * completely, pushing all events to the registered 286N/A * document handler. A value of false indicates that 286N/A * that the scanner should only scan the next portion 286N/A * of the document and return. A scanner instance is 286N/A * permitted to completely scan a document if it does 286N/A * not support this "pull" scanning model. 286N/A * @return True if there is more to scan, false otherwise. 286N/A // keep dispatching "events" 286N/A //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 286N/A //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 286N/A //System.out.println(" in scann element"); 286N/A //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 286N/A //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 286N/A //System.out.println("in the space"); 286N/A //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 286N/A //entity reference callback are given in startEntity 286N/A //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl"); 286N/A //all DTD related callbacks are handled in DTDScanner. 286N/A //1. Stax doesn't define DTD states as it does for XML Document. 286N/A //therefore we don't need to take care of anything here. So Just break; 286N/A //xxx: check if CDATA values comes from getCharacterData() function 286N/A //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 286N/A //do not give callback here. 286N/A //this callback is given in scanEndElement function. 286N/A //fDocumentHandler.endElement(getElementQName(),null); 286N/A //System.out.println("here in before calling next"); 286N/A //System.out.println("here in after calling next"); 286N/A }
// scanDocument(boolean):boolean 286N/A /** return the next state on the input 286N/A // XMLComponent methods 286N/A * Resets the component. The component can query the component manager 286N/A * about any features and properties that affect the operation of the 286N/A * @param componentManager The component manager. 286N/A * @throws SAXException Thrown by component on initialization error. 286N/A * For example, if a feature or property is 286N/A * required for the operation of the component, the 286N/A * component manager may throw a 286N/A * SAXNotRecognizedException or a 286N/A * SAXNotSupportedException. 286N/A // fDocumentSystemId = null; 286N/A //fAttributes.setNamespaces(fNamespaces); 286N/A //xxx: external entities are supported in Xerces 286N/A // it would be good to define feature for this case 559N/A // JAXP 1.5 features and properties 286N/A //fEntityManager.test(); 286N/A }
// reset(XMLComponentManager) 286N/A // fDocumentSystemId = null; 286N/A //fElementStack2.clear(); 286N/A //fReplaceEntityReferences = true; 286N/A //fSupportExternalEntities = true; 286N/A //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 286N/A //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 286N/A //we dont need to do this -- nb. 286N/A //setScannerState(SCANNER_STATE_CONTENT); 286N/A //setDriver(fContentDriver); 286N/A //fEntityManager.test(); 286N/A }
// reset(XMLComponentManager) 286N/A * Returns a list of feature identifiers that are recognized by 286N/A * this component. This method may return null if no features 286N/A * are recognized by this component. 286N/A }
// getRecognizedFeatures():String[] 286N/A * Sets the state of a feature. This method is called by the component 286N/A * manager any time after reset when a feature changes state. 286N/A * <strong>Note:</strong> Components should silently ignore features 286N/A * that do not affect the operation of the component. 286N/A * @param featureId The feature identifier. 286N/A * @param state The state of the feature. 286N/A * @throws SAXNotRecognizedException The component should not throw 286N/A * @throws SAXNotSupportedException The component should not throw 286N/A }
// setFeature(String,boolean) 286N/A * Returns a list of property identifiers that are recognized by 286N/A * this component. This method may return null if no properties 286N/A * are recognized by this component. 286N/A }
// getRecognizedProperties():String[] 286N/A * Sets the value of a property. This method is called by the component 286N/A * manager any time after reset when a property changes value. 286N/A * <strong>Note:</strong> Components should silently ignore properties 286N/A * that do not affect the operation of the component. 286N/A * @param propertyId The property identifier. 286N/A * @param value The value of the property. 286N/A * @throws SAXNotRecognizedException The component should not throw 286N/A * @throws SAXNotSupportedException The component should not throw 286N/A }
// setProperty(String,Object) 286N/A * Returns the default state for a feature, or null if this 286N/A * component does not want to report a default value for this 286N/A * @param featureId The feature identifier. 286N/A }
// getFeatureDefault(String):Boolean 286N/A * Returns the default state for a property, or null if this 286N/A * component does not want to report a default value for this 286N/A * @param propertyId The property identifier. 286N/A }
// getPropertyDefault(String):Object 286N/A // XMLDocumentSource methods 286N/A * @param documentHandler 286N/A //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 286N/A }
// setDocumentHandler(XMLDocumentHandler) 286N/A /** Returns the document handler */ 286N/A // XMLEntityHandler methods 286N/A * This method notifies of the start of an entity. The DTD has the 286N/A * pseudo-name of "[dtd]" parameter entity names start with '%'; and 286N/A * general entities are just specified by their name. 286N/A * @param name The name of the entity. 286N/A * @param identifier The resource identifier. 286N/A * @param encoding The auto-detected IANA encoding name of the entity 286N/A * stream. This value will be null in those situations 286N/A * where the entity encoding is not auto-detected (e.g. 286N/A * internal entities or a document entity that is 286N/A * parsed from a java.io.Reader). 413N/A * @param augs Additional information that may include infoset augmentations 286N/A * @throws XNIException Thrown by handler to signal an error. 286N/A // keep track of this entity before fEntityDepth is increased 286N/A // WFC: entity declared in external subset in standalone doc 286N/A /** we are not calling the handlers yet.. */ 286N/A }
// startEntity(String,XMLResourceIdentifier,String) 286N/A * This method notifies the end of an entity. The DTD has the pseudo-name 286N/A * of "[dtd]" parameter entity names start with '%'; and general entities 286N/A * are just specified by their name. 286N/A * @param name The name of the entity. 413N/A * @param augs Additional information that may include infoset augmentations 286N/A * @throws XNIException Thrown by handler to signal an error. 286N/A * // flush possible pending output buffer - see scanContent 286N/A * if (fInScanContent && fStringBuffer.length != 0 286N/A * && fDocumentHandler != null) { 286N/A * fDocumentHandler.characters(fStringBuffer, null); 286N/A * fStringBuffer.length = 0; // make sure we know it's been flushed 286N/A // make sure markup is properly balanced 286N/A // Driver factory methods 286N/A /** Creates a content Driver. */ 286N/A }
// createContentDriver():Driver 286N/A * Scans an XML or text declaration. 286N/A * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 286N/A * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 286N/A * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 286N/A * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 286N/A * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 286N/A * | ('"' ('yes' | 'no') '"')) 286N/A * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 286N/A * @param scanningTextDecl True if a text declaration is to 286N/A * be scanned instead of an XML 286N/A // pseudo-attribute values 286N/A ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 286N/A //but this information is only related with Document Entity. 286N/A // set encoding on reader, only if encoding was not specified by the application explicitly 286N/A }
// scanXMLDeclOrTextDecl(boolean) 286N/A //XXX: why not this function behave as per the state of the parser? 286N/A * Scans a processing data. This is needed to handle the situation 286N/A * where a document starts with a processing instruction whose 286N/A * target name <em>starts with</em> "xml". (e.g. xmlfoo) 286N/A * @param target The PI target 286N/A * @param data The XMLStringBuffer to fill in with the data 286N/A //set the PI target and values 286N/A }
// scanPIData(String) 286N/A * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 286N/A * <strong>Note:</strong> Called after scanning past '<!--' 286N/A //getTextCharacters can also be called for reading comments 286N/A //xxx value returned by this function may not remain valid if another event is scanned. 286N/A //storing element raw name in a linear list of array 286N/A //storing elemnetPointer for particular element depth 286N/A //store pointer information only when element depth is less MAX_DEPTH_LIMIT 286N/A //identity comparison shouldn't take much time and we can rely on this 286N/A //since its guaranteed to have same object id for same string. 286N/A //reset the things and return. 286N/A //returns column information at which pointer was stored. 286N/A //Stores element pointer locations at particular depth , only 4 pointer locations 286N/A //are stored at particular depth for now. 286N/A //pointer was not stored because we reached the limit 286N/A //colum = 0 , means first element at particular depth 286N/A //column = 1, means second element at particular depth 286N/A // calle should make sure that it doesn't call for value outside allowed co-ordinates 286N/A //colum = 0 , means first element at particular depth 286N/A //column = 1, means second element at particular depth 286N/A // calle should make sure that it doesn't call for value outside allowed co-ordinates 286N/A //this function assumes that string passed is not null and skips 286N/A //the following string from the buffer this makes sure 286N/A //If the start element was completely skipped we should encounter either ' '(space), 286N/A //or '/' (in case of empty element) or '>' 286N/A if( c ==
' ' || c ==
'/' || c ==
'>'){
286N/A //if this character is still valid element name -- this means string can't match 286N/A //Look at the next element stored in the array list.. we might just get a match. 286N/A //reset it back to zero... we haven't got the correct subset yet. 286N/A //xxx: we can put some logic here as from what column it should start looking 286N/A //for now we always start at 0 286N/A //fallback to tolerant algorithm, it would look for differnt element stored at different 286N/A //depth and get us the pointer location. 286N/A //start of the column at which it should try searching 286N/A * Scans a start element. This method will handle the binding of 286N/A * namespace information and notifying the handler of the start 286N/A * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 286N/A * [40] STag ::= '<' Name (S Attribute)* S? '>' 286N/A * <strong>Note:</strong> This method assumes that the leading 286N/A * '<' character has been consumed. 286N/A * <strong>Note:</strong> This method uses the fElementQName and 286N/A * fAttributes variables. The contents of these variables will be 286N/A * destroyed. The caller should copy important information out of 286N/A * these variables before calling this method. 286N/A * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 286N/A * @return True if element is empty. (i.e. It matches 286N/A // fElementQName will have the details of element just read.. 286N/A // fAttributes will have the details of all the attributes. 286N/A //when skipping is true and no more elements should be added 286N/A //get the stored element -- if everything goes right this should match the 286N/A //Be conservative -- if skipping fails -- stop. 286N/A //if skipping fails reposition the stack or fallback to normal way of processing 286N/A //we are still at the stage of adding elements 286N/A //the elements were not matched or 286N/A //fSkip is not set to true 286N/A //get the next element from the stack 286N/A //when the elements are being added , we need to check if we are set for skipping the elements 286N/A //this sets the value of fAdd variable 286N/A //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 286N/A "ElementAttributeLimit",
286N/A //decrease the markup depth.. 286N/A // check that this element was opened in the same entity 286N/A //We should not be popping out the context here in endELement becaause the namespace context is still 286N/A //valid when parser is at the endElement state. 286N/A // fNamespaceContext.popContext(); 286N/A //pop the element off the stack.. 286N/A //complete element and attributes are traversed in this function so we can send a callback 286N/A //<strong>we shouldn't be sending callback in scanDocument()</strong> 286N/A }
// scanStartElement():boolean 286N/A * Looks for the close of start tag, i.e. if it finds '>' or '/>' 286N/A * Characters are consumed. 286N/A * [41] Attribute ::= Name Eq AttValue 286N/A * <strong>Note:</strong> This method assumes that the next 286N/A * character on the stream is the first character of the attribute 286N/A * <strong>Note:</strong> This method uses the fAttributeQName and 286N/A * fQName variables. The contents of these variables will be 286N/A * @param attributes The attributes list for the scanned attribute. 286N/A * protected void scanAttribute(AttributeIteratorImpl attributes) 286N/A * throws IOException, XNIException { 286N/A * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 286N/A * fEntityScanner.scanQName(fAttributeQName); 286N/A * String name = fEntityScanner.scanName(); 286N/A * fAttributeQName.setValues(null, name, name, null); 286N/A * fEntityScanner.skipSpaces(); 286N/A * if (!fEntityScanner.skipChar('=')) { 286N/A * reportFatalError("EqRequiredInAttribute", 286N/A * new Object[]{fAttributeQName.rawname}); 286N/A * fEntityScanner.skipSpaces(); 286N/A * int oldLen = attributes.getLength(); 286N/A /**xxx there is one check of duplicate attribute that has been removed. 286N/A * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 286N/A * // WFC: Unique Att Spec 286N/A * if (oldLen == attributes.getLength()) { 286N/A * reportFatalError("AttributeNotUnique", 286N/A * new Object[]{fCurrentElement.rawname, 286N/A * fAttributeQName.rawname}); 286N/A //REVISIT: one more case needs to be included: external PE and standalone is no 286N/A boolean isVC = fHasExternalDTD && !fStandalone; 286N/A scanAttributeValue(fTempString, fTempString2, 286N/A fAttributeQName.rawname, attributes, 286N/A //attributes.setValue(oldLen, fTempString.toString()); 286N/A //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 286N/A //attributes.setSpecified(oldLen, true); 286N/A AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 286N/A fAttributes.addAttribute(attribute); 286N/A if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 286N/A } // scanAttribute(XMLAttributes) 286N/A /** return the attribute iterator implementation */ 286N/A /** return if standalone is set */ 286N/A /** return if the doucment is standalone */ 286N/A * Scans an attribute name value pair. 286N/A * [41] Attribute ::= Name Eq AttValue 286N/A * <strong>Note:</strong> This method assumes that the next 286N/A * character on the stream is the first character of the attribute 286N/A * <strong>Note:</strong> This method uses the fAttributeQName and 286N/A * fQName variables. The contents of these variables will be 286N/A * @param attributes The attributes list for the scanned attribute. 286N/A //REVISIT: one more case needs to be included: external PE and standalone is no 286N/A //fTempString would store attribute value 286N/A ///fTempString2 would store attribute non-normalized value 286N/A //this function doesn't use 'attIndex'. We are adding the attribute later 286N/A //after we have figured out that current attribute is not namespace declaration 286N/A //since scanAttributeValue doesn't use attIndex parameter therefore we 286N/A //can safely add the attribute later.. 286N/A //if the attribute name already exists.. new value is replaced with old value 286N/A // WFC: Unique Att Spec 286N/A //attributes count will be same if the current attribute name already exists for this element name. 286N/A //this means there are two duplicate attributes. 286N/A //tmpString contains attribute value 286N/A //we are passing null as the attribute value 286N/A ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 286N/A //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 286N/A }
// scanAttribute(XMLAttributes) 286N/A * Scans element content. 286N/A * @return Returns the next character on the stream. 286N/A //EARLIER: scanContent() 286N/A //NOW: scanContent(XMLStringBuffer) 286N/A //It makes things easy if this functions takes XMLStringBuffer as parameter.. 286N/A //this function appends the data to the buffer. 286N/A //set the fTempString length to 0 before passing it on to scanContent 286N/A //scanContent sets the correct co-ordinates as per the content read 286N/A // happens when there is the character reference 286N/A //xxx: We know the next chracter.. we should just skip it and add ']' directlry 286N/A //fStringBuffer.clear(); 286N/A //xxx: We know the next chracter.. we should just skip it and add ']' directlry 286N/A // remember where we are in case we get an endEntity before we 286N/A // could flush the buffer out - this happens when we're parsing an 286N/A // entity which ends with a ] 286N/A // We work on a single character basis to handle cases such as: 286N/A // ']]]>' which we might otherwise miss. 286N/A //fDocumentHandler.characters(content, null); 286N/A * Scans a CDATA section. 286N/A * <strong>Note:</strong> This method uses the fTempString and 286N/A * fStringBuffer variables. 286N/A * @param complete True if the CDATA section is to be scanned 286N/A * @return True if CDATA is completely scanned. 286N/A //fDocumentHandler.startCDATA(null); 286N/A //scanData will fill the contentBuffer 286N/A /** We dont need all this code if we pass ']]>' as delimeter.. 286N/A * while (fEntityScanner.skipChar(']')) { 286N/A * //When we find more than 2 square brackets 286N/A * if (fDocumentHandler != null && brackets > 2) { 286N/A * //we dont need to clear the buffer.. 286N/A * //contentBuffer.clear(); 286N/A * for (int i = 2; i < brackets; i++) { 286N/A * contentBuffer.append(']'); 286N/A * fDocumentHandler.characters(contentBuffer, null); 286N/A * if (fEntityScanner.skipChar('>')) { 286N/A * if (fDocumentHandler != null) { 286N/A * //we dont need to clear the buffer now.. 286N/A * //contentBuffer.clear(); 286N/A * contentBuffer.append("]]"); 286N/A * fDocumentHandler.characters(contentBuffer, null); 286N/A //contentBuffer.clear(); 286N/A //scan surrogates if any.... 286N/A //by this time we have also read surrogate contents if any... 286N/A //fDocumentHandler.characters(contentBuffer, null); 286N/A //fDocumentHandler.characters(contentBuffer, null); 286N/A //fDocumentHandler.endCDATA(null); 286N/A }
// scanCDATASection(XMLStringBuffer, boolean):boolean 286N/A * Scans an end element. 286N/A * [42] ETag ::= '</' Name S? '>' 286N/A * <strong>Note:</strong> This method uses the fElementQName variable. 286N/A * The contents of this variable will be destroyed. The caller should 286N/A * copy the needed information out of this variable before calling 286N/A * @return The element depth. 286N/A // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 286N/A //In scanners most of the time is consumed on checks done for XML characters, we can 286N/A // optimize on it and avoid the checks done for endElement, 286N/A //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 286N/A // this should work both for namespace processing true or false... 286N/A //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 286N/A //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 286N/A //we have increased the depth for two markup "<" characters 286N/A // check that this element was opened in the same entity 286N/A //We should not be popping out the context here in endELement becaause the namespace context is still 286N/A //valid when parser is at the endElement state. 286N/A // fNamespaceContext.popContext(); 286N/A //end element is scanned in this function so we can send a callback 286N/A //<strong>we shouldn't be sending callback in scanDocument()</strong> 286N/A }
// scanEndElement():int 286N/A * Scans a character reference. 286N/A * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 286N/A //xxx: How do we deal with this - how to return charReferenceValues 286N/A //now this is being commented because this is taken care in scanDocument() 286N/A //fDocumentHandler.characters(fStringBuffer2, null); 286N/A }
// scanCharReference() 286N/A * Scans an entity reference. 286N/A * @return returns true if the new entity is started. If it was built-in entity 286N/A * @throws IOException Thrown if i/o error occurs. 286N/A * @throws XNIException Thrown if handler throws exception upon 286N/A // handle built-in entities 286N/A //1. if the entity is external and support to external entities is not required 286N/A // 2. or entities should not be replaced 286N/A //3. or if it is built in entity reference. 286N/A // start general entity 286N/A //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 286N/A //REVISIT: one more case needs to be included: external PE and standalone is no 286N/A //we are starting the entity even if the entity was not declared 286N/A //if that was the case it its taken care in XMLEntityManager.startEntity() 286N/A //we immediately call the endEntity. Application gets to know if there was 286N/A //any entity that was not declared. 286N/A //set the scaner state to content.. parser will automatically revive itself at any point of time. 286N/A //setScannerState(SCANNER_STATE_CONTENT); 286N/A }
// scanEntityReference() 286N/A * Calls document handler with a single character resulting from 286N/A * built-in entity resolution. 286N/A * @param entity built-in name 286N/A * @param XMLStringBuffer append the character to buffer 286N/A * we really dont need to call this function -- this function is only required when 286N/A * we integrate with rest of Xerces2. SO maintaining the current behavior and still 286N/A * calling this function to hanlde built-in entity reference. 286N/A //fDocumentHandler.characters(fTempString, null); 286N/A }
// handleCharacter(char) 286N/A * Sets the scanner state. 286N/A * @param state The new scanner state. 286N/A //System.out.print(fScannerState); 286N/A }
// setScannerState(int) 286N/A * @param Driver The new Driver. 286N/A /** Returns the scanner state name. */ 286N/A }
// getScannerStateName(int):String 286N/A //return the cached name 286N/A /** Returns the driver name. */ 286N/A }
// getDriverName():String 286N/A * @author Neeraj Bajaj, Sun Microsystems. 286N/A //raw name stored as characters 286N/A /** The next Element entry. */ 286N/A * Constructs a new Element from the given QName and next Element 286N/A * @author Neeraj Bajaj, Sun Microsystems. 286N/A //total number of elements 286N/A //Mark refers to the position 286N/A /** Default constructor. */ 286N/A * int length = fElements.length; 286N/A * Element [] temp = new Element[length * 2]; 286N/A * System.arraycopy(fElements, 0, temp, 0, length); 286N/A /** Check if the element scanned during the start element 286N/A *matches the stored element. 286N/A *@return true if the match suceeds. 286N/A //last depth is the depth when last elemnt was pushed 286N/A //if last depth is greater than current depth 286N/A //decrease the depth by 1 as arrays are 0 based 286N/A //we found the match and from next element skipping will start, add 1 286N/A //Once we get match decrease the count -- this was increased by nextElement() 286N/A }
// pushElement(QName):QName 286N/A * This function doesn't increase depth. The function in this function is 286N/A *broken down into two functions for efficiency. <@see>matchElement</see>. 286N/A * This function just returns the pointer to the object and its values are set. 286N/A *@return QName reference to the next element in the list 286N/A //if number of elements becomes equal to the length of array -- stop the skipping 286N/A //xxx: this is not correct, we are returning the last element 286N/A //this wont make any difference since flag has been set to 'false' 286N/A /** Note that this function is considerably different than nextElement() 286N/A * This function just returns the previously stored elements 286N/A //when position reaches number of elements in the list.. 286N/A //set the position back to mark, making it a circular linked list. 286N/A /** returns the current depth 286N/A /** Clears the stack without throwing away existing QName objects. */ 286N/A }
// class ElementStack 286N/A * Element stack. This stack operates without synchronization, error 286N/A * checking, and it re-uses objects instead of throwing popped items 286N/A * @author Andy Clark, IBM 286N/A //total number of elements 286N/A //Mark refers to the position 286N/A /** Default constructor. */ 286N/A * Pushes an element on the stack. 286N/A * <strong>Note:</strong> The QName values are copied into the 286N/A * stack. In other words, the caller does <em>not</em> orphan 286N/A * the element to the stack. Also, the QName object returned 286N/A * is <em>not</em> orphaned to the caller. It should be 286N/A * considered read-only. 286N/A * @param element The element to push onto the stack. 286N/A * @return Returns the actual QName object that stores the 286N/A //XXX: THIS FUNCTION IS NOT USED 286N/A }
// pushElement(QName):QName 286N/A /** Note that this function is considerably different than nextElement() 286N/A * This function just returns the previously stored elements 286N/A //when position reaches number of elements in the list.. 286N/A //set the position back to mark, making it a circular linked list. 286N/A //store the position of last opened tag at particular depth 286N/A //fInt[++fDepth] = fPosition; 286N/A //return fElements[fPosition++]; 286N/A /** This function should be called only when element was skipped sucessfully. 286N/A * 1. Increase the depth - because element was sucessfully skipped. 286N/A *2. Store the position of the element token in array "last opened tag" at depth. 286N/A *3. increase the position counter so as to point to the next element in the array 286N/A /** Check if the element scanned during the start element 286N/A *matches the stored element. 286N/A *@return true if the match suceeds. 286N/A //last depth is the depth when last elemnt was pushed 286N/A //if last depth is greater than current depth 286N/A //if(DEBUG_SKIP_ALGORITHM){ 286N/A // System.out.println("Check if the element " + element.rawname + " matches"); 286N/A // System.out.println("fLastDepth = " + fLastDepth); 286N/A // System.out.println("fDepth = " + fDepth); 286N/A //decrease the depth by 1 as arrays are 0 based 286N/A //Once we get match decrease the count -- this was increased by nextElement() 286N/A //store the position for the current depth 286N/A //when we are adding the elements, when skipping 286N/A //starts even then this should be tracked ie. when 286N/A //from next element skipping will start, add 1 286N/A //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 286N/A //if number of elements becomes equal to the length of array -- stop the skipping 286N/A //xxx: should we do "fCount == fInt.length" 286N/A //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 286N/A }
// matchElement(QName):QName 286N/A * Returns the next element on the stack. 286N/A * @return Returns the actual QName object. Callee should 286N/A * use this object to store the details of next element encountered. 286N/A //boundary checks are done in matchElement() 286N/A }
// pushElement(QName):QName 286N/A * Pops an element off of the stack by setting the values of 286N/A * <strong>Note:</strong> The object returned is <em>not</em> 286N/A * orphaned to the caller. Therefore, the caller should consider 286N/A * the object to be read-only. 286N/A //return the same object that was pushed -- this would avoid 286N/A //setting the values for every end element. 286N/A //STRONG: this object is read only -- this object reference shouldn't be stored. 286N/A //element.setValues(fElements[--fDepth]); 286N/A /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 286N/A * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 286N/A /** Clears the stack without throwing away existing QName objects. */ 286N/A * This function is as a result of optimization done for endElement -- 286N/A * we dont need to set the value for every end element encouterd. 286N/A * For Well formedness checks we can have the same QName object that was pushed. 286N/A * the values will be set only if application need to know about the endElement 286N/A * -- neeraj.bajaj@sun.com 286N/A }
// class ElementStack 286N/A * Drives the parser to the next state/event on the input. Parser is guaranteed 286N/A * Internally XML document is divided into several states. Each state represents 286N/A * a sections of XML document. When this functions returns normally, it has read 286N/A * the section of XML document and returns the state corresponding to section of 286N/A * document which has been read. For optimizations, a particular driver 286N/A * can read ahead of the section of document (state returned) just read and 286N/A * can maintain a different internal state. 286N/A * @author Neeraj Bajaj, Sun Microsystems 286N/A * Drives the parser to the next state/event on the input. Parser is guaranteed 286N/A * Internally XML document is divided into several states. Each state represents 286N/A * a sections of XML document. When this functions returns normally, it has read 286N/A * the section of XML document and returns the state corresponding to section of 286N/A * document which has been read. For optimizations, a particular driver 286N/A * can read ahead of the section of document (state returned) just read and 286N/A * can maintain a different internal state. 286N/A * @return state representing the section of document just read. 286N/A * @throws IOException Thrown on i/o error. 286N/A * @throws XNIException Thrown on parse error. 286N/A * Driver to handle content scanning. This driver is capable of reading 286N/A * the fragment of XML document. When it has finished reading fragment 286N/A * of XML documents, it can pass the job of reading to another driver. 286N/A * This class has been modified as per the new design which is more suited to 286N/A * efficiently build pull parser. Lot of performance improvements have been done and 286N/A * @author Neeraj Bajaj, Sun Microsystems 286N/A * @author Andy Clark, IBM 286N/A * decides the appropriate state of the parser 286N/A //element content is there.. 286N/A * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 286N/A * At any point of time when in doubt over the current state of the parser, the state should be 286N/A * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 286N/A * the parser to one of its sub state. 286N/A * sub states are defined in the parser on the basis of different XML component like 286N/A * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 286N/A * These sub states help the parser to have fine control over the parsing. These are the 286N/A * different milepost, parser stops at each sub state (milepost). Based on this state it is 286N/A * decided if paresr needs to stop at next milepost ?? 286N/A * Drives the parser to the next state/event on the input. Parser is guaranteed 286N/A * is divided into several states. Each state represents a sections of XML 286N/A * document. When this functions returns normally, it has read the section 286N/A * of XML document and returns the state corresponding to section of 286N/A * document which has been read. For optimizations, a particular driver 286N/A * can read ahead of the section of document (state returned) just read and 286N/A * can maintain a different internal state. 286N/A * State returned corresponds to Stax states. 286N/A * @return state representing the section of document just read. 286N/A * @throws IOException Thrown on i/o error. 286N/A * @throws XNIException Thrown on parse error. 286N/A //decide the actual sub state of the scanner.For more information refer to the javadoc of 286N/A //element content is there.. 286N/A }
//case: SCANNER_STATE_START_OF_MARKUP 286N/A //do some special handling if isCoalesce is set to true. 286N/A //if the last section was character data 286N/A //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 286N/A //return the last scanned charactrer data. 286N/A }
//if last section was CDATA or ENTITY REFERENCE 286N/A //xxx: there might be another entity reference or CDATA after this 286N/A //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 286N/A //and current state is not SCANNER_STATE_CHARACTER_DATA 286N/A //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 286N/A //this means there is nothing more to be coalesced. 286N/A //return the CHARACTERS event. 286N/A //xxx this function returns true when element is empty.. can be linked to end element event. 286N/A //returns true if the element is empty 286N/A //if the element is empty the next event is "end element" 286N/A //set the next possible state 286N/A //if last section was either entity reference or cdata or character data we should be using buffer 286N/A //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 286N/A //set the fTempString length to 0 before passing it on to scanContent 286N/A //scanContent sets the correct co-ordinates as per the content read 286N/A //check if we have reached end of element 286N/A //increase the mark up depth 286N/A //check if its start of new element 286N/A //there can be cdata ahead if coalesce is true we should call again 286N/A //in case last section was either entity reference or cdata or character data -- we should be using buffer 286N/A // happens when there is the character reference 286N/A //xxx: We know the next chracter.. we should just skip it and add ']' directlry 286N/A //fStringBuffer.clear(); 286N/A //xxx: We know the next chracter.. we should just skip it and add ']' directlry 286N/A // remember where we are in case we get an endEntity before we 286N/A // could flush the buffer out - this happens when we're parsing an 286N/A // entity which ends with a ] 286N/A // We work on a single character basis to handle cases such as: 286N/A // ']]]>' which we might otherwise miss. 286N/A //xxx: we should be using only one buffer.. 286N/A // we need not to grow the buffer only when isCoalesce() is not true; 286N/A }
//xxx what should be the behavior if entity reference is present in the content ? 286N/A }
///xxx since this part is also characters, it should be merged... 286N/A // special case: surrogates 286N/A //xxx: scanContent also gives character callback. 286N/A //we should not be iterating again if fIsCoalesce is not set to true 286N/A //if (fDocumentHandler != null) { 286N/A // fDocumentHandler.characters(fContentBuffer, null); 286N/A //if fIsCoalesce is true there might be more data so call fDriver.next() 286N/A //set it back to false. 286N/A //check the case when there is comment after single element document 286N/A //<foo/> and some comment after this 286N/A //It is last element of the document 286N/A //if element depth is zero , it indicates the end of the document 286N/A //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 286N/A //xxx understand this point once again.. 286N/A //clear the buffer first 286N/A //xxx: which buffer should be passed. Ideally we shouldn't have 286N/A //more than two buffers -- 286N/A //xxx: where should we add the switch for buffering. 286N/A //xxx: What if CDATA is the first event 286N/A //<foo><![CDATA[hello<><>]]>append</foo> 286N/A //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 286N/A //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 286N/A }
//if we dont need to coalesce clear the buffer 286N/A //CDATA section is completely read in all the case. 286N/A //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 286N/A //and just call fDispatche.next(). Since we have set the scanner state to 286N/A //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 286N/A //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 286N/A //2. Check if application has set for reporting CDATA event 286N/A //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 286N/A //return the cdata event as characters. 286N/A //there might be more data to coalesce. 286N/A //we should not clear the buffer only when the last state was either CDATA or 286N/A //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 286N/A //fLastSectionWasEntityReference or fLastSectionWasCData are only 286N/A //used when fIsCoalesce is set to true. 286N/A }
//if we dont need to coalesce clear the buffer 286N/A //take care of character reference 286N/A // this function also starts new entity 286N/A //if there was built-in entity reference & coalesce is not true 286N/A //if there was a text declaration, call next() it will be taken care. 286N/A // Skip the entity reference, we don't care 286N/A //Wether it was character reference, entity reference or built-in entity 286N/A //set the next possible state to SCANNER_STATE_CONTENT 286N/A // NOTE: special case where entity starts with a PI 286N/A // whose name starts with "xml" (e.g. "xmlfoo") 286N/A // standard text declaration 286N/A //xxx: this function gives callback 286N/A // now that we've straightened out the readers, we can read in chunks: 286N/A //xxx: we don't return any state, so how do we get to know about TEXT declarations. 286N/A //it seems we have to careful when to allow function issue a callback 286N/A //and when to allow adapter issue a callback. 286N/A //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 286N/A // premature end of file 286N/A // NOTE: These hook methods are added so that the full document 286N/A // scanner can share the majority of code with this class. 286N/A * Scan for DOCTYPE hook. This method is a hook for subclasses 286N/A * to add code to handle scanning for a the "DOCTYPE" string 286N/A * after the string "<!" has been scanned. 286N/A * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 286N/A }
// scanForDoctypeHook():boolean 286N/A * Element depth iz zero. This methos is a hook for subclasses 286N/A * to add code to handle when the element depth hits zero. When 286N/A * scanning a document fragment, an element depth of zero is 286N/A * normal. However, when scanning a full XML document, the 286N/A * scanner must handle the trailing miscellanous section of 286N/A * the document after the end of the document's root element. 286N/A * @return True if the caller should stop and return true which 286N/A * allows the scanner to switch to a new scanning 286N/A * driver. A return value of false indicates that 286N/A * the content driver should continue as normal. 286N/A }
// elementDepthIsZeroHook():boolean 286N/A * Scan for root element hook. This method is a hook for 286N/A * subclasses to add code that handles scanning for the root 286N/A * element. When scanning a document fragment, there is no 286N/A * "root" element. However, when scanning a full XML document, 286N/A * the scanner must handle the root element specially. 286N/A * @return True if the caller should stop and return true which 286N/A * allows the scanner to switch to a new scanning 286N/A * driver. A return value of false indicates that 286N/A * the content driver should continue as normal. 286N/A }
// scanRootElementHook():boolean 286N/A * End of file hook. This method is a hook for subclasses to 286N/A * add code that handles the end of file. The end of file in 286N/A * a document fragment is OK if the markup depth is zero. 286N/A * However, when scanning a full XML document, an end of file 286N/A // NOTE: An end of file is only only an error if we were 286N/A // in the middle of scanning some markup. -Ac 286N/A }
// class FragmentContentDriver 286N/A /** this function gets an XMLString (which is used to store the attribute value) from the special pool 286N/A * maintained for attributes. 286N/A * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 286N/A * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 286N/A * @return XMLString XMLString used to store an attribute value. 286N/A * Implements XMLBufferListener interface. 286N/A * receives callbacks from {@link XMLEntityReader } when buffer 286N/A * @param refreshPosition 286N/A //If you are reading attributes and you got a callback 286N/A //cache available attributes. 286N/A //since fTempString directly matches to the underlying main buffer 286N/A //store the data into buffer 286N/A //clear the XMLString so that data can't be added again. 286N/A}
// class XMLDocumentFragmentScannerImpl