/* * reserved comment block * DO NOT REMOVE OR ALTER! */ /* * The Apache Software License, Version 1.1 * * * Copyright (c) 1999-2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * . */ package com.sun.org.apache.xerces.internal.impl.dtd.models; import java.util.HashMap; import com.sun.org.apache.xerces.internal.impl.dtd.XMLContentSpec; import com.sun.org.apache.xerces.internal.xni.QName; /** * @version $Id: DFAContentModel.java,v 1.4 2010/08/06 23:49:43 joehw Exp $ * DFAContentModel is the derivative of ContentModel that does * all of the non-trivial element content validation. This class does * the conversion from the regular expression to the DFA that * it then uses in its validation algorithm. *

* Note: Upstream work insures that this class will never see * a content model with PCDATA in it. Any model with PCDATA is 'mixed' * and is handled via the MixedContentModel class since mixed models * are very constrained in form and easily handled via a special case. * This also makes implementation of this class much easier. * * @xerces.internal * * @version $Id: DFAContentModel.java,v 1.4 2010/08/06 23:49:43 joehw Exp $ */ public class DFAContentModel implements ContentModelValidator { // // Constants // // special strings /** Epsilon string. */ private static String fEpsilonString = "<>"; /** End-of-content string. */ private static String fEOCString = "<>"; /** initializing static members **/ static { fEpsilonString = fEpsilonString.intern(); fEOCString = fEOCString.intern(); } // debugging /** Set to true to debug content model validation. */ private static final boolean DEBUG_VALIDATE_CONTENT = false; // // Data // /* this is the EquivClassComparator object */ //private EquivClassComparator comparator = null; /** * This is the map of unique input symbol elements to indices into * each state's per-input symbol transition table entry. This is part * of the built DFA information that must be kept around to do the * actual validation. */ private QName fElemMap[] = null; /** * This is a map of whether the element map contains information * related to ANY models. */ private int fElemMapType[] = null; /** The element map size. */ private int fElemMapSize = 0; /** Boolean to distinguish Schema Mixed Content */ private boolean fMixed; /** * The NFA position of the special EOC (end of content) node. This * is saved away since it's used during the DFA build. */ private int fEOCPos = 0; /** * This is an array of booleans, one per state (there are * fTransTableSize states in the DFA) that indicates whether that * state is a final state. */ private boolean fFinalStateFlags[] = null; /** * The list of follow positions for each NFA position (i.e. for each * non-epsilon leaf node.) This is only used during the building of * the DFA, and is let go afterwards. */ private CMStateSet fFollowList[] = null; /** * This is the head node of our intermediate representation. It is * only non-null during the building of the DFA (just so that it * does not have to be passed all around.) Once the DFA is built, * this is no longer required so its nulled out. */ private CMNode fHeadNode = null; /** * The count of leaf nodes. This is an important number that set some * limits on the sizes of data structures in the DFA process. */ private int fLeafCount = 0; /** * An array of non-epsilon leaf nodes, which is used during the DFA * build operation, then dropped. */ private CMLeaf fLeafList[] = null; /** Array mapping ANY types to the leaf list. */ private int fLeafListType[] = null; //private ContentLeafNameTypeVector fLeafNameTypeVector = null; /** * The string pool of our parser session. This is set during construction * and kept around. */ //private StringPool fStringPool = null; /** * This is the transition table that is the main by product of all * of the effort here. It is an array of arrays of ints. The first * dimension is the number of states we end up with in the DFA. The * second dimensions is the number of unique elements in the content * model (fElemMapSize). Each entry in the second dimension indicates * the new state given that input for the first dimension's start * state. *

* The fElemMap array handles mapping from element indexes to * positions in the second dimension of the transition table. */ private int fTransTable[][] = null; /** * The number of valid entries in the transition table, and in the other * related tables such as fFinalStateFlags. */ private int fTransTableSize = 0; /** * Flag that indicates that even though we have a "complicated" * content model, it is valid to have no content. In other words, * all parts of the content model are optional. For example: *

     *      <!ELEMENT AllOptional (Optional*,NotRequired?)>
     *

*/ private boolean fEmptyContentIsValid = false; // temp variables /** Temporary qualified name. */ private final QName fQName = new QName(); // // Constructors // // // Constructors // /** * Constructs a DFA content model. * * @param syntaxTree The syntax tree of the content model. * @param leafCount The number of leaves. * @param mixed * */ public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) { // Store away our index and pools in members //fStringPool = stringPool; fLeafCount = leafCount; // this is for Schema Mixed Content fMixed = mixed; // // Ok, so lets grind through the building of the DFA. This method // handles the high level logic of the algorithm, but it uses a // number of helper classes to do its thing. // // In order to avoid having hundreds of references to the error and // string handlers around, this guy and all of his helper classes // just throw a simple exception and we then pass it along. // buildDFA(syntaxTree); } // // ContentModelValidator methods // /** * Check that the specified content is valid according to this * content model. This method can also be called to do 'what if' * testing of content models just to see if they would be valid. *

* A value of -1 in the children array indicates a PCDATA node. All other * indexes will be positive and represent child elements. The count can be * zero, since some elements have the EMPTY content model and that must be * confirmed. * * @param children The children of this element. Each integer is an index within * the StringPool of the child element name. An index * of -1 is used to indicate an occurrence of non-whitespace character * data. * @param offset Offset into the array where the children starts. * @param length The number of entries in the children array. * * @return The value -1 if fully valid, else the 0 based index of the child * that first failed. If the value returned is equal to the number * of children, then the specified children are valid but additional * content is required to reach a valid ending state. * */ public int validate(QName[] children, int offset, int length) { if (DEBUG_VALIDATE_CONTENT) System.out.println("DFAContentModel#validateContent"); // // A DFA content model must *always* have at least 1 child // so a failure is given if no children present. // // Defect 782: This is an incorrect statement because a DFA // content model is also used for constructions such as: // // (Optional*,NotRequired?) // // where a perfectly valid content would be NO CHILDREN. // Therefore, if there are no children, we must check to // see if the CMNODE_EOC marker is a valid start state! -Ac // if (length == 0) { if (DEBUG_VALIDATE_CONTENT) { System.out.println("!!! no children"); System.out.println("elemMap="+fElemMap); for (int i = 0; i < fElemMap.length; i++) { String uri = fElemMap[i].uri; String localpart = fElemMap[i].localpart; System.out.println("fElemMap["+i+"]="+uri+","+ localpart+" ("+ uri+", "+ localpart+ ')'); } System.out.println("EOCIndex="+fEOCString); } return fEmptyContentIsValid ? -1 : 0; } // if child count == 0 // // Lets loop through the children in the array and move our way // through the states. Note that we use the fElemMap array to map // an element index to a state index. // int curState = 0; for (int childIndex = 0; childIndex < length; childIndex++) { // Get the current element index out final QName curElem = children[offset + childIndex]; // ignore mixed text if (fMixed && curElem.localpart == null) { continue; } // Look up this child in our element map int elemIndex = 0; for (; elemIndex < fElemMapSize; elemIndex++) { int type = fElemMapType[elemIndex] & 0x0f ; if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) { //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]); if (fElemMap[elemIndex].rawname == curElem.rawname) { break; } } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) { String uri = fElemMap[elemIndex].uri; if (uri == null || uri == curElem.uri) { break; } } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) { if (curElem.uri == null) { break; } } else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) { if (fElemMap[elemIndex].uri != curElem.uri) { break; } } } // If we didn't find it, then obviously not valid if (elemIndex == fElemMapSize) { if (DEBUG_VALIDATE_CONTENT) { System.out.println("!!! didn't find it"); System.out.println("curElem : " +curElem ); for (int i=0; i