src/dom/xpathparser.h

	xpathparser.h revision 6cd2e86330e1049942b9ce57d4f10bbe2542067d
#ifndef SEEN_XPATHPARSER_H
#define SEEN_XPATHPARSER_H

/**
 * @file
 * Phoebe DOM Implementation.
 *
 * This is a C++ approximation of the W3C DOM model, which follows
 * fairly closely the specifications in the various .idl files, copies of
 * which are provided for reference.  Most important is this one:
 *
 * http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
 */
/*
 * Authors:
 *   Bob Jamison
 *
 * Copyright (C) 2005-2007 Bob Jamison
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */


#include <stdio.h>
#include <stdarg.h>

#include <string>
#include <vector>

#include "dom.h"
#include "xpathtoken.h"

namespace org
{
namespace w3c
{
namespace dom
{
namespace xpath
{

typedef dom::DOMString DOMString;
typedef dom::Node Node;
typedef dom::NodeList  NodeList;


//########################################################################
//# L E X I C A L    D E F I N I T I O N S
//########################################################################


typedef struct
{
   int   ival;
   char const *sval;
} LookupEntry;


//Note:  in the following definitions, where the starts of
//strings are similar, put the longer definitions first

/**
 *
 */
typedef enum
{
    COMMENT,
    TEXT,
    PROCESSING_INSTRUCTION,
    NODE
} NodeType;


static LookupEntry nodeTypeTable [] =
{
    {  COMMENT,                "comment"                },
    {  TEXT,                   "text"                   },
    {  PROCESSING_INSTRUCTION, "processing-instruction" },
    {  NODE,                   "node"                   },
    { -1,                      NULL                     }
};


/**
 *
 */
typedef enum
{
    ANCESTOR_OR_SELF,
    ANCESTOR,
    ATTRIBUTE,
    CHILD,
    DESCENDANT_OR_SELF,
    DESCENDANT,
    FOLLOWING_SIBLING,
    FOLLOWING,
    NAMESPACE,
    PARENT,
    PRECEDING_SIBLING,
    PRECEDING,
    SELF
} AxisNameType;


static LookupEntry axisNameTable [] =
{
    {  ANCESTOR_OR_SELF,    "ancestor-or-self"  },
    {  ANCESTOR,            "ancestor"          },
    {  ATTRIBUTE,           "attribute"         },
    {  CHILD,               "child"             },
    {  DESCENDANT_OR_SELF,  "descendant-or-self"},
    {  DESCENDANT,          "descendant"        },
    {  FOLLOWING_SIBLING,   "following-sibling" },
    {  FOLLOWING,           "following"         },
    {  NAMESPACE,           "namespace"         },
    {  PARENT,              "parent"            },
    {  PRECEDING_SIBLING,   "preceding-sibling" },
    {  PRECEDING,           "preceding"         },
    {  SELF,                "self"              },
    { -1,                   NULL                }
};


/**
 *
 */
typedef enum
{
    NONE = 0,
    CHAR, //default if none of the below
    //Expr tokens
    LPAREN,
    RPAREN,
    LBRACKET,
    RBRACKET,
    DOUBLE_DOT,
    DOT,
    AMPR,
    COMMA,
    DOUBLE_COLON,
    NAME_TEST,
    NODE_TYPE,
    OPERATOR,
    FUNCTION_NAME,
    AXIS_NAME,
    LITERAL,
    NUMBER,
    VARIABLE_REFERENCE,
    //Operator tokens
    AND,
    OR,
    MOD,
    DIV,
    MULTIPLY,
    DOUBLE_SLASH,
    SLASH,
    PIPE,
    PLUS,
    MINUS,
    EQUALS,
    NOT_EQUALS,
    LESS_THAN_EQUALS,
    LESS_THAN,
    GREATER_THAN_EQUALS,
    GREATER_THAN
} LexTokType;


/*
* Be VERY careful that this table matches the LexicalTokenType enum
* declaration above.
*/
static LookupEntry exprTokenTable [] =
{
    {  NONE,                "xxNONExx"          },
    {  CHAR,                "CHAR"              },
    //Expr tokens
    {  LPAREN,              "("                 },
    {  RPAREN,              ")"                 },
    {  LBRACKET,            "["                 },
    {  RBRACKET,            "]"                 },
    {  DOUBLE_DOT,          ".."                },
    {  DOT,                 "."                 },
    {  AMPR,                "@"                 },
    {  COMMA,               ","                 },
    {  DOUBLE_COLON,        "::"                },
    {  NAME_TEST,           "NameTest"          },
    {  NODE_TYPE,           "NodeType"          },
    {  OPERATOR,            "Operator"          },
    {  FUNCTION_NAME,       "FunctionName"      },
    {  AXIS_NAME,           "AxisName"          },
    {  LITERAL,             "Literal"           },
    {  NUMBER,              "Number"            },
    {  VARIABLE_REFERENCE,  "VariableReference" },
    { -1,                   NULL                }
};

static LookupEntry operatorTable [] =
{
    {  NONE,                "xxNONExx"          },
    //Operator tokens
    {  AND,                 "and"               },
    {  OR,                  "or"                },
    {  MOD,                 "mod"               },
    {  DIV,                 "div"               },
    {  MULTIPLY,            "*"                 },
    {  DOUBLE_SLASH,        "//"                },
    {  SLASH,               "/"                 },
    {  PIPE,                "|"                 },
    {  PLUS,                "+"                 },
    {  MINUS,               "-"                 },
    {  EQUALS,              "="                 },
    {  NOT_EQUALS,          "!="                },
    {  LESS_THAN_EQUALS,    "<="                },
    {  LESS_THAN,           "<"                 },
    {  GREATER_THAN_EQUALS, ">="                },
    {  GREATER_THAN,        ">"                 },
    { -1,                   NULL                }
};


/**
 *
 */
class LexTok
{
public:
    LexTok(const LexTok &tok)
        {
        type     = tok.type;
        location = tok.location;
        sval     = tok.sval;
        dval     = tok.dval;
        ival     = tok.ival;
        }
    LexTok()
        { init(); }
    LexTok(int theType, int loc)
        { init(); type = theType; location = loc;}
    LexTok(int theType, int loc, const DOMString &val)
        { init(); type = theType; location = loc; sval = val; }
    LexTok(int theType, int loc, double val)
        { init(); type = theType; location = loc; dval = val; }
    LexTok(int theType, int loc, long val)
        { init(); type = theType; location = loc; ival = val; }

    void print()
        {
        if (type == OPERATOR)
            {
            char const *tokenStr = "unknown";
            for (LookupEntry const *entry = operatorTable; entry->sval ; entry++)
                {
                if (entry->ival == ival)
                    {
                    tokenStr = entry->sval;
                    break;
                    }
                }
            printf("(%s)\n", tokenStr);
            }
        else if (type == NODE_TYPE)
            {
            char const *tokenStr = "unknown";
            for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)
                {
                if (entry->ival == ival)
                    {
                    tokenStr = entry->sval;
                    break;
                    }
                }
            printf("{{%s}}\n", tokenStr);
            }
        else if (type == AXIS_NAME)
            {
            char const *tokenStr = "unknown";
            for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)
                {
                if (entry->ival == ival)
                    {
                    tokenStr = entry->sval;
                    break;
                    }
                }
            printf("{%s}\n", tokenStr);
            }
        else if (type == CHAR)
            printf("'%c'\n", (char)ival);
        else if (type == NAME_TEST)
            printf("\"%s\"\n", sval.c_str());
        else if (type == LITERAL)
            printf("L'%s'\n", sval.c_str());
        else if (type == FUNCTION_NAME)
            printf("%s()\n", sval.c_str());
        else if (type == NUMBER)
            printf("#%f\n", dval);
        else
            {
            char const *tokenStr = "unknown";
            for (LookupEntry *entry = exprTokenTable; entry->sval ; entry++)
                {
                if (entry->ival == type)
                    {
                    tokenStr = entry->sval;
                    break;
                    }
                }
            printf("%s\n", tokenStr);
            //printf("%s [%s/%f/%ld]\n", tokenStr, sval.c_str(), dval, ival);
            }
        }

    int getType()
	{ return type; }
    int getLocation()
        { return location; }
    DOMString &getStringValue()
        { return sval; }
    double getDoubleValue()
        { return dval; }
    long getIntValue()
        { return ival; }

private:
    void  init()
        {
        type     = NONE;
        location = 0;
        dval     = 0.0;
        ival     = 0;
        }

    int       type;
    int       location;
    DOMString sval;
    double    dval;
    long      ival;
};


//########################################################################
//# P A R S E R
//########################################################################

class XPathParser
{
public:

    //#################################
    //# CONSTRUCTOR
    //#################################

    /**
     *
     */
    XPathParser() :
            debug(false),
            parsebuf(0),
            parselen(0),
            position(0),
            numberString(),
            number(0),
            lexicalTokens(),
            tokens()
        {
        }

    /**
     *
     */
    virtual ~XPathParser() {}

    /**
     *
     */
    bool getDebug()
        { return debug; }

    /**
     *
     */
    void setDebug(bool val)
        { debug = val; }


    /**
     *  Normally not called directly unless for string parsing testing
     */
    bool parse(const DOMString &str);

    /**
     * This is the big one. Called by the xpath-dom api to fetch
     * nodes from a DOM tree.
     */
    NodeList evaluate(const NodePtr root, const DOMString &str);


private:

    //#################################
    //# MESSAGES
    //#################################

    /**
     *
     */
    void trace(const char *fmt, ...)
    #ifdef G_GNUC_PRINTF
    G_GNUC_PRINTF(2, 3)
    #endif
    ;

    /**
     *
     */
    void traceStack(const char *name, int pos, int depth);

    /**
     *
     */
    void error(const char *fmt, ...)
    #ifdef G_GNUC_PRINTF
    G_GNUC_PRINTF(2, 3)
    #endif
    ;

    //#################################
    //# LEXICAL  SCANNING
    //#################################

    /**
     *  Add a lexical token of a given type to the list
     */
    void lexTokAdd(int type, int loc);
    void lexTokAdd(int type, int loc, const DOMString &val);
    void lexTokAdd(int type, int loc, double val);
    void lexTokAdd(int type, int loc, long   val);

    /**
     *
     */
    void lexicalTokenDump();

    /**
     *
     */
    LexTok lexTok(int p);

    /**
     *
     */
    int lexTokType(int p);

    /**
     *
     */
    int peek(int p);

    /**
     *
     */
    int get(int p);

    /**
     *
     */
    int getword(int p, DOMString &str);

    /**
     *
     */
    int match(int p, const char *str);

    /**
     *
     */
    int skipwhite(int p);

    /**
     *
     */
    int getNumber(int p, double &dresult);

    /**
     *
     */
    int getLiteral(int p, DOMString &result);

    /**
     *
     */
    int getNameTest(int p0, DOMString &result);

    /**
     *
     */
    int getNCName(int p0, DOMString &result);


    /**
     *
     */
    int lexicalScan();


    //#################################
    //# GRAMMAR  PARSING
    //#################################

    /**
     * Add a newly derived token to the token list;
     */
    void tokAdd(const Token &token);

    void tokAdd(int type);

    void tokAdd(int type, long val);

    void tokAdd(int type, double val);

    void tokAdd(int type, const DOMString &val);


    /**
     * The grammar definitions marked [1]-[39] are directly
     * from the W3C XPath grammar spacification.
     */

    /**
     * [1]
     */
    int getLocationPath(int p0, int depth);

    /**
     * [2]
     */
    int getAbsoluteLocationPath(int p0, int depth);

    /**
     * [3]
     */
    int getRelativeLocationPath(int p0, int depth);

    /**
     * [4]
     */
    int getStep(int p0, int depth);

    /**
     * [5]
     */
    int getAxisSpecifier(int p0, int depth);

    /**
     * [6]
     */
    int getAxisName(int p0, int depth);

    /**
     * [7]
     */
    int getNodeTest(int p0, int depth);

    /**
     * [8]
     */
    int getPredicate(int p0, int depth);

    /**
     * [9]
     */
    int getPredicateExpr(int p0, int depth);

    /**
     * [10]
     */
    int getAbbreviatedAbsoluteLocationPath(int p0, int depth);
    /**
     * [11]
     */
    int getAbbreviatedRelativeLocationPath(int p0, int depth);
    /**
     * [12]
     */
    int getAbbreviatedStep(int p0, int depth);

    /**
     * [13]
     */
    int getAbbreviatedAxisSpecifier(int p0, int depth);

    /**
     * [14]
     */
    int getExpr(int p0, int depth);

    /**
     * [15]
     */
    int getPrimaryExpr(int p0, int depth);

    /**
     * [16]
     */
    int getFunctionCall(int p0, int depth);

    /**
     * [17]
     */
    int getArgument(int p0, int depth);

    /**
     * [18]
     */
    int getUnionExpr(int p0, int depth);

    /**
     * [19]
     */
    int getPathExpr(int p0, int depth);

    /**
     * [20]
     */
    int getFilterExpr(int p0, int depth);

    /**
     * [21]
     */
    int getOrExpr(int p0, int depth);

    /**
     * [22]
     */
    int getAndExpr(int p0, int depth);

    /**
     * [23]
     */
    int getEqualityExpr(int p0, int depth);

    /**
     * [24]
     */
    int getRelationalExpr(int p0, int depth);

    /**
     * [25]
     */
    int getAdditiveExpr(int p0, int depth);

    /**
     * [26]
     */
    int getMultiplicativeExpr(int p0, int depth);

    /**
     * [27]
     */
    int getUnaryExpr(int p0, int depth);

    /**
     * [28]
     */
    int getExprToken(int p0, int depth);

    /**
     * [29]
     */
    int getLiteral(int p0, int depth);

    /**
     * [30]
     */
    int getNumber(int p0, int depth);

    /**
     * [31]
     */
    int getDigits(int p0, int depth);

    /**
     * [32]
     */
    int getOperator(int p0, int depth);

    /**
     * [33]
     */
    int getOperatorName(int p0, int depth);

    /**
     * [34]
     */
    int getMultiplyOperator(int p0, int depth);

    /**
     * [35]
     */
    int getFunctionName(int p0, int depth);

    /**
     * [36]
     */
    int getVariableReference(int p0, int depth);

    /**
     * [37]
     */
    int getNameTest(int p0, int depth);

    /**
     * [38]
     */
    int getNodeType(int p0, int depth);

    /**
     * [39]
     */
    int getExprWhitespace(int p0, int depth);


    //#################################
    //# DATA ITEMS
    //#################################

    /**
     *
     */
    bool debug;

    /**
     *
     */
    char *parsebuf;

    /**
     *
     */
    int parselen;

    /**
     *
     */
    int position;

    /**
     *
     */
    DOMString numberString;

    /**
     *
     */
    double number;


    /**
     *  The result of the first lexical scan
     */
    std::vector<LexTok> lexicalTokens;

    /**
     *  The result of parsing.  If parsing was successful, then
     *  this is executable via execute()
     */
    TokenList tokens;
};


} // namespace xpath
} // namespace dom
} // namespace w3c
} // namespace org
#endif // SEEN_XPATHPARSER_H
//#########################################################################
//# E N D    O F    F I L E
//#########################################################################