xpathparser.cpp revision 6cd2e86330e1049942b9ce57d4f10bbe2542067d
/*
* Phoebe DOM Implementation.
*
* This is a C++ approximation of the W3C DOM model, which follows
* fairly closely the specifications in the various .idl files, copies of
* which are provided for reference. Most important is this one:
*
* http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/idl-definitions.html
*
* Authors:
* Bob Jamison
*
* Copyright (C) 2006-2007 Bob Jamison
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "ucd.h"
#include "xpathparser.h"
namespace org
{
namespace w3c
{
namespace dom
{
namespace xpath
{
//#########################################################################
//# M E S S A G E S
//#########################################################################
void XPathParser::trace(const char *fmt, ...)
{
if (!debug)
return;
FILE *f = stdout;
va_list args;
va_start(args, fmt);
fprintf(f, "XPathParser: ");
vfprintf(f, fmt, args);
fprintf(f, "\n");
va_end(args);
}
void XPathParser::error(const char *fmt, ...)
{
FILE *f = stdout;
va_list args;
va_start(args, fmt);
fprintf(f, "XPathParser ERROR: ");
vfprintf(f, fmt, args);
fprintf(f, "\n");
va_end(args);
//Print location in string
fprintf(f, "%s\n", parsebuf);
for (int i=0 ; i<position ; i++)
fprintf(f, " ");
fprintf(f, "^\n");
}
void XPathParser::traceStack(const char *name, int pos, int depth)
{
if (!debug)
return;
return;
int indent = depth;
for (int i=0 ; i<indent ; i++)
fprintf(stdout, " ");
fprintf(stdout, "%d %d %s\n", pos, depth, name);
}
//#########################################################################
//# L E X I C A L S C A N N I N G
//#########################################################################
void XPathParser::lexTokAdd(int type, int loc)
{
LexTok tok(type, loc);
lexicalTokens.push_back(tok);
}
void XPathParser::lexTokAdd(int type, int loc, const DOMString &val)
{
LexTok tok(type, loc, val);
lexicalTokens.push_back(tok);
}
void XPathParser::lexTokAdd(int type, int loc, double val)
{
LexTok tok(type, loc, val);
lexicalTokens.push_back(tok);
}
void XPathParser::lexTokAdd(int type, int loc, long val)
{
LexTok tok(type, loc, val);
lexicalTokens.push_back(tok);
}
void XPathParser::lexicalTokenDump()
{
printf("####### LEXICAL TOKENS #######\n");
for (unsigned int i=0 ; i<lexicalTokens.size() ; i++)
{
printf("%d : ", i);
lexicalTokens[i].print();
}
printf("##### END LEXICAL TOKENS #####\n\n");
}
LexTok XPathParser::lexTok(int p)
{
if (p < 0 || p>=(int)lexicalTokens.size())
{
LexTok tok;
return tok;
}
return lexicalTokens[p];
}
int XPathParser::lexTokType(int p)
{
if (p < 0 || p>=(int)lexicalTokens.size())
return -1;
return lexicalTokens[p].getType();
}
int XPathParser::peek(int p)
{
if (p >= parselen)
return -1;
position = p;
return parsebuf[p] ;
}
int XPathParser::get(int p)
{
if (p >= parselen)
return -1;
position = p;
return parsebuf[p];
}
int XPathParser::skipwhite(int p0)
{
int p = p0;
while (p < parselen)
{
int ch = peek(p);
if (!uni_is_space(ch))
break;
ch = get(p++);
}
return p;
}
int XPathParser::getword(int p0, DOMString &str)
{
int p = p0;
while (p < parselen)
{
int ch = peek(p);
if (!uni_is_letter_or_digit(ch))
break;
ch = get(p++);
str.push_back((XMLCh)ch);
}
return p;
}
int XPathParser::match(int p, const char *str)
{
while (*str)
{
if (p >= parselen)
return -1;
if (parsebuf[p] != *str)
return -1;
p++; str++;
}
return p;
}
int XPathParser::getNumber(int p0, double &dresult)
{
int p = p0;
if (p >= parselen)
return p0;/*need at least x*/
bool isdouble = false;
bool negative = false;
int ch = parsebuf[p];
if (ch=='-')
{
p++;
negative = true;
if (p >= parselen) return p0;
}
bool seen_dot = false;
bool seen_e = false;
bool seen_eminus = false;
DOMString num;
int i = p;
while (i < parselen)
{
ch = parsebuf[i];
if (ch=='.')
{
if (seen_dot)
return p0;
seen_dot = true;
isdouble = true;
}
else if (ch=='e' || ch=='E')
{
if (seen_e || !seen_dot)
return p0;
seen_e = true;
}
else if (ch=='-' && seen_e)
{
if (seen_eminus || !seen_dot)
return p0;
seen_eminus = true;
}
else if (!uni_is_digit(ch))
break;
num.push_back((XMLCh)ch);
i++;
}
if (i == p)/*no digits*/
return p0;
if (isdouble)
{
const char *begin = num.c_str();
char *end;
dresult = strtod(begin,&end);
if (!end)/*not a number?*/
{
error("Error formatting double: %s\n", num.c_str());
return p0;
}
}
else
{
const char *begin = num.c_str();
char *end;
dresult = (double)strtol(begin,&end,10);
if (!end)/*not a number?*/
{
error("Error formatting integer: %s\n", num.c_str());
return p0;
}
}
p = i;
return p;
}
int XPathParser::getLiteral(int p0, DOMString &result)
{
int p = p0;
int ch = peek(p);
int quotechar = 0;
if (ch == '"' || ch == '\'')
{
quotechar = ch;
}
else
return p0;
p++;
while (true)
{
if (p >= parselen)
{
error("Unterminated literal string");
return -1;
}
ch = peek(p);
if (ch == quotechar)
break;
result.push_back((XMLCh)ch);
p++;
}
p++; //skip over closing "
return p;
}
/**
* NCName is a 'non-colonized' name
*/
int XPathParser::getNCName(int p0, DOMString &result)
{
int p = p0;
int ch = peek(p);
if (ch != '_' && !uni_is_letter(ch))
return p0;
result.push_back((XMLCh)ch);
p++;
while (p < parselen)
{
ch = peek(p);
if ( uni_is_letter_or_digit(ch) ||
// isCombiningChar(ch) ||
// isExtender(ch) ||
ch == '.' || ch == '-' || ch == '_' )
{
result.push_back((XMLCh)ch);
p++;
}
else
break;
}
return p;
}
/**
* Name parsing with post-parsing
*/
int XPathParser::getNameTest(int p0, DOMString &result)
{
int p = p0;
int ch = peek(p);
if (ch == '*')
{
result.push_back((XMLCh)ch);
p++;
return p;
}
DOMString ncName;
int p2 = getNCName(p, ncName);
if (p2 <= p)
return p0;
result = ncName;
p = p2;
ch = peek(p);
if (ch != ':' )//short name. we are done
{
return p;
}
if (peek(p+1) == ':') //was name:: which is ok
return p;
result.push_back(':');
p++;
ch = peek(p);
if (ch == '*')
{
result.push_back((XMLCh)ch);
p++;
return p;
}
DOMString ncName2;
p2 = getNCName(p, ncName2);
if (p2 <= p)
{
if (peek(p) == ':') //was name:: which is ok
return p0;
error("Nothing after ':' in QName");
return -1;
}
result.append(ncName2);
p = p2;
return p;
}
int XPathParser::lexicalScan()
{
lexicalTokens.clear();
int p = 0;
int p2 = p;
while (p < parselen)
{
p2 = skipwhite(p);
p = p2;
//trace("nextChar:%c", peek(p));
bool selected = false;
//### LITERAL EXPR TOKENS
for (int i=2 ; i<=10 ; i++)
{
p2 = match(p, exprTokenTable[i].sval);
if (p2 > p)
{
lexTokAdd(exprTokenTable[i].ival, p);
p = p2;
selected = true;
break;
}
}
if (selected)
continue;
//### OPERATORS
for (LookupEntry *entry = operatorTable; entry->sval ; entry++)
{
p2 = match(p, entry->sval);
if (p2 > p)
{
long op = (long)entry->ival;
//according to the disambiguating rule for * in the spec
if (op == MULTIPLY && !lexicalTokens.empty())
{
int ltyp = lexTokType(lexicalTokens.size()-1);
if (ltyp != AMPR && ltyp != DOUBLE_COLON &&
ltyp != LPAREN && ltyp != RBRACKET &&
ltyp != COMMA && ltyp != OPERATOR )
{
lexTokAdd(OPERATOR, p, (long)entry->ival);
p = p2;
selected = true;
break;
}
}
else
{
lexTokAdd(OPERATOR, p, (long)entry->ival);
p = p2;
selected = true;
break;
}
}
}
if (selected)
continue;
//### NODE TYPES
for (LookupEntry *entry = nodeTypeTable; entry->sval ; entry++)
{
p2 = match(p, entry->sval);
if (p2 > p)
{
lexTokAdd(NODE_TYPE, p, (long)entry->ival);
p = p2;
selected = true;
break;
}
}
if (selected)
continue;
//### AXIS NAMES
for (LookupEntry *entry = axisNameTable; entry->sval ; entry++)
{
p2 = match(p, entry->sval);
if (p2 > p)
{
lexTokAdd(AXIS_NAME, p, (long)entry->ival);
p = p2;
selected = true;
break;
}
}
if (selected)
continue;
//### NAME TEST
DOMString ntResult;
p2 = getNameTest(p, ntResult);
if (p2 > p)
{
int p3 = skipwhite(p2);
if (peek(p3) == '(')
lexTokAdd(FUNCTION_NAME, p, ntResult);
else
lexTokAdd(NAME_TEST, p, ntResult);
p = p2;
selected = true;
}
if (selected)
continue;
//### VARIABLE REFERENCE
if (peek(p) == '$')
{
p++;
DOMString qnResult;
p2 = getNCName(p, qnResult);
if (p2 > p)
{
lexTokAdd(VARIABLE_REFERENCE, p, qnResult);
p = p2;
selected = true;
}
else
{
error("Variable referenced with '$' requires a qualified name\n");
return -1;
}
}
if (selected)
continue;
//### NUMBER
double numval;
p2 = getNumber(p, numval);
if (p2 > p)
{
lexTokAdd(NUMBER, p, numval);
p = p2;
selected = true;
}
if (selected)
continue;
//### LITERAL
DOMString strval;
p2 = getLiteral(p, strval);
if (p2 > p)
{
lexTokAdd(LITERAL, p, strval);
p = p2;
selected = true;
}
if (selected)
continue;
//### CHAR (default, none of the above)
lexTokAdd(CHAR, p, (long) peek(p));
p++;
}//while p
return p;
}
//#########################################################################
//# X P A T H G R A M M A R P A R S I N G
//#########################################################################
//## Various shorthand methods to add a token to the list
void XPathParser::tokAdd(const Token &tok)
{
tokens.add(tok);
}
void XPathParser::tokAdd(int type)
{
tokens.add(Token::create(type));
}
void XPathParser::tokAdd(int type, long val)
{
tokens.add(Token::create(type, val));
}
void XPathParser::tokAdd(int type, double val)
{
tokens.add(Token::create(type, val));
}
void XPathParser::tokAdd(int type, const DOMString &val)
{
tokens.add(Token::create(type, val));
}
//########################################
//# Grammar - specific parsing
//########################################
/**
* [1] LocationPath ::=
* RelativeLocationPath
* | AbsoluteLocationPath
*/
int XPathParser::getLocationPath(int p0, int depth)
{
traceStack("getLocationPath", p0, depth);
int p = p0;
p = skipwhite(p);
int p2 = getAbsoluteLocationPath(p, depth+1);
if (p2 > p)
{
tokAdd(Token::TOK_ABSOLUTE);
return p2;
}
p2 = getRelativeLocationPath(p, depth+1);
if (p2 > p)
{
tokAdd(Token::TOK_RELATIVE);
return p2;
}
return p0;
}
/**
* [2] AbsoluteLocationPath ::=
* '/' RelativeLocationPath?
* | AbbreviatedAbsoluteLocationPath
*/
int XPathParser::getAbsoluteLocationPath(int p0, int depth)
{
traceStack("getAbsoluteLocationPath", p0, depth);
int p = p0;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue()==SLASH)
{
p++;
int p2 = getRelativeLocationPath(p, depth+1);
if (p2 <= p)
{
error("Relative path after '/'");
return -1;
}
p = p2;
return p;
}
//AbbreviatedAbsoluteLocationPath
if (t.getType() == OPERATOR && t.getIntValue()==DOUBLE_SLASH)
{
p++;
int p2 = getRelativeLocationPath(p, depth+1);
if (p2 <= p)
{
error("Relative path after '//'");
return -1;
}
p = p2;
return p;
}
return p0;
}
/**
* [3] RelativeLocationPath ::=
* Step
* | RelativeLocationPath '/' Step
* | AbbreviatedRelativeLocationPath
*/
int XPathParser::getRelativeLocationPath(int p0, int depth)
{
traceStack("getRelativeLocationPath", p0, depth);
int p = p0;
int p2 = getStep(p, depth+1);
if (p2 < 0)
return -1;
if (p2 > p)
{
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue()==SLASH)
{
p++;
p2 = getRelativeLocationPath(p, depth+1);
if (p2 < 0)
{
error("Relative path after '/'");
return -1;
}
p = p2;
return p;
}
//AbbreviatedRelativeLocationPath
if (t.getType() == OPERATOR && t.getIntValue()==DOUBLE_SLASH)
{
p++;
// a '//' is an abbreviation for /descendant-or-self:node()/
tokAdd(Token::TOK_AXIS_DESCENDANT_OR_SELF);
p2 = getRelativeLocationPath(p, depth+1);
if (p2 < 0)
{
error("Relative path after '//'");
return -1;
}
p = p2;
return p;
}
return p;
}
return p0;
}
/**
* [4] Step ::=
* AxisSpecifier NodeTest Predicate*
* | AbbreviatedStep
*/
int XPathParser::getStep(int p0, int depth)
{
traceStack("getStep", p0, depth);
int p = p0;
lexTok(p).print();
//This can be (and usually is) 0-length
int p2 = getAxisSpecifier(p, depth+1);
if (p2 < 0)
{
error("Axis specifier in step section");
return -1;
}
p = p2;
p2 = getNodeTest(p, depth+1);
if (p2 < 0)
{
error("Node test in step section");
return -1;
}
if (p2 > p)
{
p = p2;
p2 = getPredicate(p, depth+1);
if (p2 < 0)
{
error("Predicate in step section");
return -1;
}
p = p2;
return p;
}
//AbbreviatedStep
if (lexTokType(p) == DOT)
{
p++;
return p;
}
//AbbreviatedStep
if (lexTokType(p) == DOUBLE_DOT)
{
p++;
return p;
}
return p0;
}
/**
* [5] AxisSpecifier ::=
* AxisName '::'
* | AbbreviatedAxisSpecifier
*/
int XPathParser::getAxisSpecifier(int p0, int depth)
{
traceStack("getAxisSpecifier", p0, depth);
int p = p0;
if (lexTokType(p) == AXIS_NAME)
{
LexTok t = lexTok(p);
int axisType = t.getIntValue();
p++;
if (lexTokType(p) != DOUBLE_COLON)
{
error("'::' required after axis name literal");
return -1;
}
p++;
switch (axisType)
{
case ANCESTOR_OR_SELF:
tokAdd(Token::TOK_AXIS_ANCESTOR_OR_SELF);
break;
case ANCESTOR:
tokAdd(Token::TOK_AXIS_ANCESTOR);
break;
case ATTRIBUTE:
tokAdd(Token::TOK_AXIS_ATTRIBUTE);
break;
case CHILD:
tokAdd(Token::TOK_AXIS_CHILD);
break;
case DESCENDANT_OR_SELF:
tokAdd(Token::TOK_AXIS_DESCENDANT_OR_SELF);
break;
case DESCENDANT:
tokAdd(Token::TOK_AXIS_DESCENDANT);
break;
case FOLLOWING_SIBLING:
tokAdd(Token::TOK_AXIS_FOLLOWING_SIBLING);
break;
case FOLLOWING:
tokAdd(Token::TOK_AXIS_FOLLOWING);
break;
case NAMESPACE:
tokAdd(Token::TOK_AXIS_NAMESPACE);
break;
case PARENT:
tokAdd(Token::TOK_AXIS_PARENT);
break;
case PRECEDING_SIBLING:
tokAdd(Token::TOK_AXIS_PRECEDING_SIBLING);
break;
case PRECEDING:
tokAdd(Token::TOK_AXIS_PRECEDING);
break;
case SELF:
tokAdd(Token::TOK_AXIS_SELF);
break;
default:
{
error("unknown axis type %d", axisType);
return -1;
}
}
return p;
}
//AbbreviatedAxisSpecifier
if (lexTokType(p) == AMPR)
{
p++;
return p;
}
return p0;
}
/**
* [6] AxisName ::=
* 'ancestor'
* | 'ancestor-or-self'
* | 'attribute'
* | 'child'
* | 'descendant'
* | 'descendant-or-self'
* | 'following'
* | 'following-sibling'
* | 'namespace'
* | 'parent'
* | 'preceding'
* | 'preceding-sibling'
* | 'self'
* NOTE: This definition, and those at the bottom, is not
* needed. Its functionality is handled by lexical scanning.
* It is left here for reference.
*/
int XPathParser::getAxisName(int p0, int depth)
{
traceStack("getAxisName", p0, depth);
return p0;
}
/**
* [7] NodeTest ::=
* NameTest
* | NodeType '(' ')'
* | 'processing-instruction' '(' Literal ')'
*/
int XPathParser::getNodeTest(int p0, int depth)
{
traceStack("getNodeTest", p0, depth);
int p = p0;
LexTok t = lexTok(p);
if (t.getType() == NAME_TEST)
{
p++;
tokAdd(Token::TOK_NAME_TEST, t.getStringValue());
return p;
}
if (t.getType() == NODE_TYPE)
{
if (t.getIntValue() == PROCESSING_INSTRUCTION)
{
if (lexTokType(p) != LPAREN ||
lexTokType(p+1) != LITERAL ||
lexTokType(p+2) != RPAREN )
{
error("processing instruction requires (\"literal string\")");
return -1;
}
p += 3;
}
else
{
if (lexTokType(p+1) != LPAREN ||
lexTokType(p+2) != RPAREN )
{
error("processing instruction requires ()");
return -1;
}
p += 2;
}
return p;
}
return p0;
}
/**
* [8] Predicate ::=
* '[' PredicateExpr ']'
*/
int XPathParser::getPredicate(int p0, int depth)
{
traceStack("getPredicate", p0, depth);
int p = p0;
if (lexTokType(p) != LBRACKET)
return p0;
p++;
int p2 = getPredicateExpr(p, depth+1);
if (p2 <= p)
{
error("Predicate expression in predicate");
return -1;
}
p = p2;
lexTok(p).print();
if (lexTokType(p) != RBRACKET)
{
error("Predicate expression requires closing ']'");
return -1;
}
p++;
return p;
}
/**
* [9] PredicateExpr ::=
* Expr
*/
int XPathParser::getPredicateExpr(int p0, int depth)
{
traceStack("getPredicateExpr", p0, depth);
int p = p0;
int p2 = getExpr(p, depth+1);
if (p2 < 0)
{
error("Expression in predicate expression");
return -1;
}
p = p2;
return p;
}
/**
* [10] AbbreviatedAbsoluteLocationPath ::=
* '//' RelativeLocationPath
* NOTE: not used. handled in getAbsoluteLocationPath()
*/
int XPathParser::getAbbreviatedAbsoluteLocationPath(int p0, int depth)
{
traceStack("getAbbreviatedAbsoluteLocationPath", p0, depth);
return p0;
}
/**
* [11] AbbreviatedRelativeLocationPath ::=
* RelativeLocationPath '//' Step
* NOTE: not used. handled in getRelativeLocationPath()
*/
int XPathParser::getAbbreviatedRelativeLocationPath(int p0, int depth)
{
traceStack("getAbbreviatedRelativeLocationPath", p0, depth);
return p0;
}
/**
* [12] AbbreviatedStep ::=
* '.'
* | '..'
* NOTE: not used. handled in getStep()
*/
int XPathParser::getAbbreviatedStep(int p0, int depth)
{
traceStack("getAbbreviatedStep", p0, depth);
return p0;
}
/**
* [13] AbbreviatedAxisSpecifier ::=
* '@'?
* NOTE: not used. handled in getAxisSpecifier()
*/
int XPathParser::getAbbreviatedAxisSpecifier(int p0, int depth)
{
traceStack("getAbbreviatedAxisSpecifier", p0, depth);
return p0;
}
/**
* [14] Expr ::=
* OrExpr
*/
int XPathParser::getExpr(int p0, int depth)
{
traceStack("getExpr", p0, depth);
int p = p0;
int p2 = getOrExpr(p, depth+1);
if (p2 < 0)
{
error("OR expression in expression");
return -1;
}
p = p2;
return p;
}
/**
* [15] PrimaryExpr ::=
* VariableReference
* | '(' Expr ')'
* | Literal
* | Number
* | FunctionCall
*/
int XPathParser::getPrimaryExpr(int p0, int depth)
{
traceStack("getPrimaryExpr", p0, depth);
int p = p0;
int p2 = p;
if (lexTokType(p) == VARIABLE_REFERENCE)
{
p++;
return p;
}
if (lexTokType(p) == LPAREN)
{
p++;
p2 = getExpr(p, depth+1);
if (p2 <= p)
{
error("Expression in primary expression");
return -1;
}
p += p2;
if (lexTokType(p) != RPAREN)
{
error("Primary expression requires closing ')'");
return -1;
}
}
if (lexTokType(p) == LITERAL)
{
tokAdd(Token::TOK_STR, lexTok(p).getStringValue());
p++;
return p;
}
if (lexTokType(p) == NUMBER)
{
tokAdd(Token::TOK_FLOAT, lexTok(p).getDoubleValue());
p++;
return p;
}
p2 = getFunctionCall(p, depth+1);
if (p2 < 0)
{
error("Function call in primary expression");
return -1;
}
if (p2 > p)
{
p = p2;
return p;
}
return p0;
}
/**
* [16] FunctionCall ::=
* FunctionName '(' ( Argument ( ',' Argument )* )? ')'
*/
int XPathParser::getFunctionCall(int p0, int depth)
{
traceStack("getFunctionCall", p0, depth);
int p = p0;
if (lexTokType(p) != FUNCTION_NAME)
return p0;
DOMString name = lexTok(p).getStringValue();
p++;
if (lexTokType(p) != LPAREN) //this makes a function
return p0;
p++;
int argCount = 0;
int p2 = getArgument(p, depth+1);
if (p2 < 0)
{
error("Error in function argument");
return -1;
}
if (p2 > p)
{
argCount++;
p = p2;
while (lexTokType(p) == COMMA)
{
p++;
p2 = getArgument(p, depth+1);
if (p2 <= p)
{
error("Error in function argument");
return -1;
}
if (p2 > p)
argCount++;
//do we add a token here? i dont think so
p = p2;
}
}
if (lexTokType(p) != RPAREN) //mandatory
{
error("Function requires closing ')'");
return -1;
}
p++;
// Function names from http://www.w3.org/TR/xpath#NT-FunctionName
if (name == "last")
tokAdd(Token::TOK_FUNC_LAST);
else if (name == "position")
tokAdd(Token::TOK_FUNC_POSITION);
else if (name == "count")
tokAdd(Token::TOK_FUNC_COUNT);
else if (name == "id")
tokAdd(Token::TOK_FUNC_ID);
else if (name == "local-name")
tokAdd(Token::TOK_FUNC_LOCAL_NAME);
else if (name == "namespace-uri")
tokAdd(Token::TOK_FUNC_NAMESPACE_URI);
else if (name == "name")
tokAdd(Token::TOK_FUNC_NAME);
else if (name == "string")
tokAdd(Token::TOK_FUNC_STRING);
else if (name == "concat")
tokAdd(Token::TOK_FUNC_CONCAT);
else if (name == "starts-with")
tokAdd(Token::TOK_FUNC_STARTS_WITH);
else if (name == "contains")
tokAdd(Token::TOK_FUNC_CONTAINS);
else if (name == "substring-before")
tokAdd(Token::TOK_FUNC_SUBSTRING_BEFORE);
else if (name == "substring-after")
tokAdd(Token::TOK_FUNC_SUBSTRING_AFTER);
else if (name == "substring")
tokAdd(Token::TOK_FUNC_SUBSTRING);
else if (name == "string-length")
tokAdd(Token::TOK_FUNC_STRING_LENGTH);
else if (name == "normalize-space")
tokAdd(Token::TOK_FUNC_NORMALIZE_SPACE);
else if (name == "translate")
tokAdd(Token::TOK_FUNC_TRANSLATE);
else if (name == "boolean")
tokAdd(Token::TOK_FUNC_BOOLEAN);
else if (name == "not")
tokAdd(Token::TOK_FUNC_NOT);
else if (name == "true")
tokAdd(Token::TOK_FUNC_TRUE);
else if (name == "false")
tokAdd(Token::TOK_FUNC_FALSE);
else if (name == "lang")
tokAdd(Token::TOK_FUNC_LANG);
else if (name == "number")
tokAdd(Token::TOK_FUNC_NUMBER);
else if (name == "sum")
tokAdd(Token::TOK_FUNC_SUM);
else if (name == "floor")
tokAdd(Token::TOK_FUNC_FLOOR);
else if (name == "ceiling")
tokAdd(Token::TOK_FUNC_CEILING);
else if (name == "round")
tokAdd(Token::TOK_FUNC_ROUND);
else
{
error("unknown function name:'%s'", name.c_str());
return -1;
}
return p;
}
/**
* [17] Argument ::=
* Expr
*/
int XPathParser::getArgument(int p0, int depth)
{
traceStack("getArgument", p0, depth);
int p = p0;
int p2 = getExpr(p, depth+1);
if (p2 < 0)
{
error("Argument expression");
return -1;
}
p = p2;
return p;
}
/**
* [18] UnionExpr ::=
* PathExpr
* | UnionExpr '|' PathExpr
*/
int XPathParser::getUnionExpr(int p0, int depth)
{
traceStack("getUnionExpr", p0, depth);
int p = p0;
int p2 = getPathExpr(p, depth+1);
if (p2 < 0)
{
error("Path expression for union");
return -1;
}
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == PIPE)
{
p++;
p2 = getUnionExpr(p, depth+1);
if (p2 < 0)
{
error("OR (|) requires union expression on the left");
return -1;
}
tokAdd(Token::TOK_UNION);
p = p2;
}
return p;
}
/**
* [19] PathExpr ::=
* LocationPath
* | FilterExpr
* | FilterExpr '/' RelativeLocationPath
* | FilterExpr '//' RelativeLocationPath
*/
int XPathParser::getPathExpr(int p0, int depth)
{
traceStack("getPathExpr", p0, depth);
int p = p0;
int p2;
p2 = getLocationPath(p, depth+1);
if (p2 < 0)
{
error("Location path in path expression");
return -1;
}
if (p2 > p)
{
p = p2;
return p;
}
p2 = getFilterExpr(p, depth+1);
if (p2 < 0)
{
error("Filter expression in path expression");
return -1;
}
if (p2 <= p)
return p0;
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == SLASH)
{
p++;
p2 = getRelativeLocationPath(p, depth+1);
if (p2 < 0)
{
error("Relative location after / in path expression");
return -1;
}
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == DOUBLE_SLASH)
{
p++;
p2 = getRelativeLocationPath(p, depth+1);
if (p2 < 0)
{
error("Relative location after // in path expression");
return -1;
}
p = p2;
return p;
}
return p;
}
/**
* [20] FilterExpr ::=
* PrimaryExpr
* | FilterExpr Predicate
*/
int XPathParser::getFilterExpr(int p0, int depth)
{
traceStack("getFilterExpr", p0, depth);
int p = p0;
int p2 = getPrimaryExpr(p, depth+1);
if (p2 < 0)
{
error("Primary expression in path expression");
return -1;
}
if (p2 > p)
{
p = p2;
while (true)
{
p2 = getPredicate(p, depth+1);
if (p2 < 0)
{
error("Predicate in primary expression");
return -1;
}
if (p2 > p)
{
p = p2;
}
else
break;
}
return p;
}
return p0;
}
/**
* [21] OrExpr ::=
* AndExpr
* | OrExpr 'or' AndExpr
*/
int XPathParser::getOrExpr(int p0, int depth)
{
traceStack("getOrExpr", p0, depth);
int p = p0;
int p2 = getAndExpr(p, depth+1);
if (p2 < 0)
{
error("AND expression in OR expression");
return -1;
}
if (p2 > p)
{
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == OR)
{
p++;
p2 = getAndExpr(p, depth+1);
if (p2 <= p)
{
error("AND expression in OR expression");
return -1;
}
p = p2;
return p;
}
tokAdd(Token::TOK_OR);
return p;
}
return p0;
}
/**
* [22] AndExpr ::=
* EqualityExpr
* | AndExpr 'and' EqualityExpr
*/
int XPathParser::getAndExpr(int p0, int depth)
{
traceStack("getAndExpr", p0, depth);
int p = p0;
int p2 = getEqualityExpr(p, depth+1);
if (p2 < 0)
{
error("Equality expression in AND expression");
return -1;
}
if (p2 > p)
{
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == AND)
{
p++;
p2 = getAndExpr(p, depth+1);
if (p2 <= p)
{
error("AND expression after 'and'");
return -1;
}
p = p2;
return p;
}
tokAdd(Token::TOK_AND);
return p;
}
return p0;
}
/**
* [23] EqualityExpr ::=
* RelationalExpr
* | EqualityExpr '=' RelationalExpr
* | EqualityExpr '!=' RelationalExpr
*/
int XPathParser::getEqualityExpr(int p0, int depth)
{
traceStack("getEqualityExpr", p0, depth);
int p = p0;
int p2 = getRelationalExpr(p, depth+1);
if (p2 < 0)
{
error("Relation expression in equality expression");
return -1;
}
if (p2 > p)
{
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == EQUALS)
{
p++;
p2 = getEqualityExpr(p, depth+1);
if (p2 <= p)
{
error("Equality expression expected after ==");
return -1;
}
tokAdd(Token::TOK_EQUALS);
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == NOT_EQUALS)
{
p++;
p2 = getEqualityExpr(p, depth+1);
if (p2 <= p)
{
error("Equality expression expected after !=");
return -1;
}
tokAdd(Token::TOK_NOT_EQUALS);
p = p2;
return p;
}
return p;
}
return p0;
}
/**
* [24] RelationalExpr ::=
* AdditiveExpr
* | RelationalExpr '<' AdditiveExpr
* | RelationalExpr '>' AdditiveExpr
* | RelationalExpr '<=' AdditiveExpr
* | RelationalExpr '>=' AdditiveExpr
*/
int XPathParser::getRelationalExpr(int p0, int depth)
{
traceStack("getRelationalExpr", p0, depth);
int p = p0;
int p2 = getAdditiveExpr(p, depth+1);
if (p2 < 0)
{
error("Additive expression in relational expression");
return -1;
}
if (p2 > p)
{
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == GREATER_THAN)
{
p++;
p2 = getRelationalExpr(p, depth+1);
if (p2 <= p)
{
error("Relational expression after '>'");
return -1;
}
tokAdd(Token::TOK_GREATER_THAN);
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == LESS_THAN)
{
p++;
p2 = getRelationalExpr(p, depth+1);
if (p2 <= p)
{
error("Relational expression after '<'");
return -1;
}
tokAdd(Token::TOK_LESS_THAN);
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == GREATER_THAN_EQUALS)
{
p++;
p2 = getRelationalExpr(p, depth+1);
if (p2 <= p)
{
error("Relational expression after '>='");
return -1;
}
tokAdd(Token::TOK_GREATER_THAN_EQUALS);
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == LESS_THAN_EQUALS)
{
p++;
p2 = getRelationalExpr(p, depth+1);
if (p2 <= p)
{
error("Relational expression after '<='");
return -1;
}
tokAdd(Token::TOK_LESS_THAN_EQUALS);
p = p2;
return p;
}
return p;
}
return p0;
}
/**
* [25] AdditiveExp ::=
* MultiplicativeExpr
* | AdditiveExpr '+' MultiplicativeExpr
* | AdditiveExpr '-' MultiplicativeExpr
*/
int XPathParser::getAdditiveExpr(int p0, int depth)
{
traceStack("getAdditiveExpr", p0, depth);
int p = p0;
int p2 = getMultiplicativeExpr(p, depth+1);
if (p2 < 0)
{
error("Multiplicative expression in additive expression");
return -1;
}
if (p2 > p)
{
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == PLUS)
{
p++;
p2 = getAdditiveExpr(p, depth+1);
if (p2 <= p)
{
error("Additive expression after '+'");
return -1;
}
tokAdd(Token::TOK_MINUS);
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == MINUS)
{
p++;
p2 = getAdditiveExpr(p, depth+1);
if (p2 <= p)
{
error("Additive expression after '-'");
return -1;
}
tokAdd(Token::TOK_MINUS);
p = p2;
return p;
}
return p;
}
return p0;
}
/**
* [26] MultiplicativeExpr ::=
* UnaryExpr
* | MultiplicativeExpr MultiplyOperator UnaryExpr
* | MultiplicativeExpr 'div' UnaryExpr
* | MultiplicativeExpr 'mod' UnaryExpr
*/
int XPathParser::getMultiplicativeExpr(int p0, int depth)
{
traceStack("getMultiplicativeExpr", p0, depth);
int p = p0;
int p2 = getUnaryExpr(p, depth+1);
if (p2 < 0)
{
error("Unary expression in multiplicative expression");
return -1;
}
if (p2 > p)
{
p = p2;
LexTok t = lexTok(p);
if (t.getType() == OPERATOR && t.getIntValue() == MULTIPLY)
{
p++;
p2 = getMultiplicativeExpr(p, depth+1);
if (p2 <= p)
{
error("Multiplicative expression after '*'");
return -1;
}
tokAdd(Token::TOK_MUL);
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == DIV)
{
p++;
p2 = getMultiplicativeExpr(p, depth+1);
if (p2 <= p)
{
error("Multiplicative expression after 'div'");
return -1;
}
tokAdd(Token::TOK_DIV);
p = p2;
return p;
}
if (t.getType() == OPERATOR && t.getIntValue() == MOD)
{
p++;
p2 = getMultiplicativeExpr(p, depth+1);
if (p2 <= p)
{
error("Multiplicative expression after 'mod'");
return -1;
}
tokAdd(Token::TOK_MOD);
p = p2;
return p;
}
return p;
}
return p0;
}
/**
* [27] UnaryExpr ::=
* UnionExpr
* | '-' UnaryExpr
*/
int XPathParser::getUnaryExpr(int p0, int depth)
{
traceStack("getUnaryExpr", p0, depth);
int p = p0;
int p2 = getUnionExpr(p, depth+1);
if (p2 < 0)
{
error("Union expression in unary expression");
return -1;
}
if (p2 > p)
{
p = p2;
return p;
}
if (lexTokType(p) == '-')
{
p++;
p2 = getUnaryExpr(p, depth+1);
if (p2 < 0)
{
error("Unary expression after '-'");
return -1;
}
tokAdd(Token::TOK_NEG);
p = p2;
return p;
}
return p0;
}
//######################################################
//# NOT USED!!!
//## The grammar definitions below are
//## handled by lexical parsing, and will not be used
//######################################################
/**
* [28] ExprToken ::=
* '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
* | NameTest
* | NodeType
* | Operator
* | FunctionName
* | AxisName
* | Literal
* | Number
* | VariableReference
*/
int XPathParser::getExprToken(int p0, int depth)
{
traceStack("getExprToken", p0, depth);
return p0;
}
/**
* [29] Literal ::=
* '"' [^"]* '"'
* | "'" [^']* "'"
*/
int XPathParser::getLiteral(int p0, int depth)
{
traceStack("getLiteral", p0, depth);
return p0;
}
/**
* [30] Number ::=
* Digits ('.' Digits?)?
* | '.' Digits
*/
int XPathParser::getNumber(int p0, int depth)
{
traceStack("getNumber", p0, depth);
return p0;
}
/**
* [31] Digits ::=
* [0-9]+
*/
int XPathParser::getDigits(int p0, int depth)
{
traceStack("getDigits", p0, depth);
return p0;
}
/**
* [32] Operator ::=
* OperatorName
* | MultiplyOperator
* | '/' | '//' | '|' | '+' | '-' | '='
* | '!=' | '<' | '<=' | '>' | '>='
*/
int XPathParser::getOperator(int p0, int depth)
{
traceStack("getOperator", p0, depth);
return p0;
}
/**
* [33] OperatorName ::=
* 'and' | 'or' | 'mod' | 'div'
*/
int XPathParser::getOperatorName(int p0, int depth)
{
traceStack("getOperatorName", p0, depth);
return p0;
}
/**
* [34] MultiplyOperator ::=
* '*'
*/
int XPathParser::getMultiplyOperator(int p0, int depth)
{
traceStack("getMultiplyOperator", p0, depth);
return p0;
}
/**
* [35] FunctionName ::=
* QName - NodeType
*/
int XPathParser::getFunctionName(int p0, int depth)
{
traceStack("getFunctionName", p0, depth);
return p0;
}
/**
* [36] VariableReference ::=
* '$' QName
*/
int XPathParser::getVariableReference(int p0, int depth)
{
traceStack("getVariableReference", p0, depth);
return p0;
}
/**
* [37] NameTest ::=
* '*'
* | NCName ':' '*'
* | QName
*/
int XPathParser::getNameTest(int p0, int depth)
{
traceStack("getNameTest", p0, depth);
return p0;
}
/**
* [38] NodeType ::=
* 'comment'
* | 'text'
* | 'processing-instruction'
* | 'node'
*/
int XPathParser::getNodeType(int p0, int depth)
{
traceStack("getNodeType", p0, depth);
return p0;
}
/**
* [39] ExprWhitespace ::=
* S
*/
int XPathParser::getExprWhitespace(int p0, int depth)
{
traceStack("getExprWhitespace", p0, depth);
return p0;
}
//#########################################################################
//# H I G H L E V E L P A R S I N G
//#########################################################################
/**
* Parse a candidate XPath string. Leave a copy in 'tokens.'
*/
bool XPathParser::parse(const DOMString &xpathString)
{
int p0 = 0;
DOMString str = xpathString;
parsebuf = (char *)str.c_str();
parselen = (int) str.size();
position = 0;
trace("## parsing string: '%s'", parsebuf);
lexicalScan();
lexicalTokenDump();
tokens.clear();//Get ready to store new tokens
int p = getLocationPath(p0, 0);
parsebuf = NULL;
parselen = 0;
if (p <= p0)
{
//return false;
}
return true;
}
//#########################################################################
//# E V A L U A T E
//#########################################################################
/**
* This wraps the two-step call to parse(), then execute() to get a NodeList
* of matching DOM nodes
*/
NodeList XPathParser::evaluate(const NodePtr root,
const DOMString &xpathString)
{
NodeList list;
//### Maybe do caching for speed here
//### Parse and execute
//### Error message can be generated as a side effect
if (!parse(xpathString))
return list;
if (debug)
tokens.dump();
//### Execute the token list
TokenExecutor executor;
NodeList results;
if (!executor.execute(tokens, root, results))
{
//error
}
return results;
}
} // namespace xpath
} // namespace dom
} // namespace w3c
} // namespace org
//#########################################################################
//# E N D O F F I L E
//#########################################################################