4752N/ADescription : XPath utilities
4752N/ACopyright : (c) Christian Maeder, DFKI GmbH 2010
4752N/AMaintainer : Christian.Maeder@dfki.de
4752N/AXPath utilities independent of xml package.
4752N/A(modules XPathParser, XPathDataTypes)
4752N/Afully supported. A qualified name is an ncName or two ncNames
4752N/Aseparated by a colon (different from OWL uris).
4752N/A-- * data types and pretty printing (via show)
4752N/A = Ancestor Bool -- ^ or self?
4752N/A | Descendant Bool -- ^ or self?
4752N/A | Following Bool -- ^ sibling?
4752N/A | Preceding Bool -- ^ sibling?
4752N/AallAxis = let bl = [True, False] in
4752N/A ++ [ Ancestor b | b <- bl ]
4752N/A ++ [ Descendant b | b <- bl ]
4752N/A ++ [ Following b | b <- bl ]
4752N/A ++ [ Preceding b | b <- bl ]
4752N/A-- | utility to show (constant) constructors as lower case strings
4752N/AlowerShow :: Show a => a -> String
4752N/AlowerShow = map toLower . show
4752N/A-- | proper string representation (do not use show)
4752N/A let s = takeWhile isAlpha $ lowerShow a
4752N/A orSelf b = if b then s ++ "-or-self" else s
4752N/A sibl b = if b then s ++ "-sibling" else s
4752N/A = NameTest String -- ^ optional prefix and local part (possibly *)
4752N/A | PI String -- ^ processing-instruction node type with optional literal
4752N/A-- | all node types without processing-instruction
4752N/AnodeTypes = [Node, Comment, Text]
4752N/A-- | the processing-instruction string
4752N/ApIS = "processing-instruction"
4752N/A-- | put parens arount a string
4752N/A-- | proper string representation (do not use show)
4752N/AshowNodeTest :: NodeTest -> String
4752N/A _ -> lowerShow t ++ paren ""
4752N/A-- | the stuff of a path between the slashes
4752N/Adata Step = Step Axis NodeTest [Expr] -- ^ with predicate list
4752N/A-- | string representation considering abbreviations
4752N/A _ -> showAxis a ++ "::" ++ t
4752N/A-- | test for @descendant-or-self::node()@ step
4752N/AisDescOrSelfNode :: Step -> Bool
4752N/AisDescOrSelfNode (Step a n _) = case (a, n) of
4752N/A (Descendant True, Node) -> True
4752N/A-- | only absolute paths may be empty
4752N/Adata Path = Path Bool [Step] -- ^ absolute?
4752N/A-- | show a path abbreviating @\/descendant-or-self::node()\/@
4752N/AshowSteps :: Bool -> [Step] -> String
showSteps abso sts = let h = if abso then "/" else "" in case sts of
s : r -> let f = h ++ showStep s in case r of
_ -> if abso && isDescOrSelfNode s then "//" ++ showSteps False r
else f ++ showSteps True r
show (Path abso sts) = showSteps abso sts
-- | indicator for primary expressions
= Var -- ^ leading dollar
| Literal -- ^ single or double quotes
| Number -- ^ digits possibly with decimal point
-- | expressions where function calls, unary and infix expressions are generic
= GenExpr Bool String [Expr] -- ^ infix?, op or fct, and arguments
| PathExpr (Maybe Expr) Path -- ^ optional filter and path
| FilterExpr Expr [Expr] -- ^ primary expression with predicates
| PrimExpr PrimKind String
-- | put square brackets around an expression
showPred :: Expr -> String
showPred e = '[' : showExpr e ++ "]"
-- | show expression with minimal parens
showExpr :: Expr -> String
else op ++ paren (intercalate "," $ map showExpr args)
PathExpr m p -> case m of
(if isPrimExpr pe then id else paren) (showExpr pe)
{- | show arguments with minimal parens interspersed with the infix operator.
Also treat the unary minus where the argument list is a singleton.
Alphanumeric operators are shown with spaces, which looks bad for @mod@ and
@div@ in conjunction with additive, relational, or equality operators. The
unary minus gets a leading blank if the preceding character is a
showInfixExpr :: String -> [Expr] -> String
showInfixExpr op args = case args of
[] -> op -- cannot happen
GenExpr True aOp _ -> case aOp of
let mi = findIndex (elem op) inOps
f = parenExpr False mi arg
padOp = if any isAlpha op then ' ' : op ++ " " else
if elem op addOps && not (null f) && ncNameChar (last f)
in f ++ concatMap ((padOp ++) . parenExpr True mi) rargs
{- | put parens around arguments that have a lower precedence or
equal precendence if they are a right argument. -}
parenExpr :: Bool -> Maybe Int -> Expr -> String
GenExpr True op (_ : _ : _) ->
let mj = findIndex (elem op) inOps
putPar = case (mi, mj) of
(Just i, Just j) -> i > j || rst && i == j
in if putPar then paren s else s
-- | test if expression is primary
isPrimExpr :: Expr -> Bool
GenExpr False _ _ -> True
-- | unequal (@!=@) and equal (@=@)
-- | the four other comparisons
relOps = ["<=", ">=", "<", ">"]
-- | @+@ and @-@, where @-@ is allowed within names and as unary operator
-- | @*@, div and mod, where @*@ is also used as wildcard for node names
multOps = ["*", "div", "mod"]
{- | all infix operators. Lowest precedence for @or@ followed by @and@,
highest is union(@|@). Only these three operators may get more than two
-- | skip trailing spaces
skips :: Parser a -> Parser a
-- | parse keyword and skip spaces
symbol :: String -> Parser String
symbol = skips . tryString
lpar = forget (symbol "(")
rpar = forget (symbol ")")
-- | non-abbreviated axis parser
axis = choice (map (\ a -> symbol (showAxis a) >> return a) allAxis)
-- | the axis specifier parser
(symbol "@" >> return Attribute)
<|> try (axis << symbol "::")
-- | starting name character (no unicode)
ncNameStart :: Char -> Bool
ncNameStart c = isAlpha c || c == '_'
-- | name character (without @+@) including centered dot (and no other unicode)
ncNameChar :: Char -> Bool
ncNameChar c = isAlphaNum c || elem c ".-_\183"
-- | non-colon xml names (non-skipping)
ncName = satisfy ncNameStart <:> many (satisfy ncNameChar) <?> "ncName"
-- | literal string within single or double quotes (skipping)
char '"' <:> many (satisfy (/= '"')) <++> string "\""
<|> char '\'' <:> many (satisfy (/= '\'')) <++> string "'"
-- | ncName or wild-card (@*@) (skipping)
localName :: Parser String
localName = symbol "*" <|> skips ncName <?> "localName"
-- | the node test parser
nodeTest :: Parser NodeTest
nodeTest = fmap PI (symbol pIS >> lpar >> literal << rpar)
<|> choice (map (\ t -> symbol (lowerShow t)
>> lpar >> rpar >> return t) nodeTypes)
p <- try (ncName <++> string ":")
return $ NameTest $ p ++ l
-- | parent or self abbreviated steps
(symbol ".." >> return (Step Parent Node []))
<|> (symbol "." >> return (Step Self Node []))
-- | the predicate (expression in square brackets) parser
predicate = symbol "[" >> expr << symbol "]" <?> "predicate"
-- | the step (stuff between slashes) parser
-- | the implicit @descendant-or-self::node()@ step constant
descOrSelfStep = Step (Descendant True) Node []
-- | a double or single slash
doubleSlash :: Parser Bool
doubleSlash = (symbol "//" >> return True) <|> (symbol "/" >> return False)
{- | a step starting with a single or double slash,
the latter yielding two steps. -}
slashStep :: Parser [Step]
return (if b then [descOrSelfStep, s] else [s])
-- | parse the steps of a relative path
-- | a (possibly empty) absolute or (non-empty) relative path
m <- optionMaybe doubleSlash
Just b -> Path True $ if b then descOrSelfStep : s else s)
-- | at least one digit and at most one decimal point (skipping)
number = skips $ many1 digit <++> optionL (char '.' <:> many digit)
<|> try (char '.' <:> many1 digit)
-- | a qualified name (prefixed or unprefixed)
qualName :: Parser String
qualName = skips $ ncName <++> optionL (char ':' <:> ncName)
-- | parse a primary expression (including 'fct' or 'expr' in parens)
primExpr = fmap (PrimExpr Var) (char '$' <:> qualName)
<|> (lpar >> expr << rpar)
<|> fmap (PrimExpr Literal) literal
<|> fmap (PrimExpr Number) number
-- | parse a function call by checking the qname and the left paren
if elem n $ pIS : map lowerShow nodeTypes
then fail $ n ++ " not allowed as function name"
args <- sepBy expr (symbol ",")
return $ GenExpr False q args
-- | parse a filter expresssion as primary expression followed by predicates
filterExpr :: Parser Expr
return $ if null ps then e else FilterExpr e ps
{- | a path expression is either a filter expression followed by a (non-empty)
absoulte path or an ordinary 'path'. -}
return $ if b then descOrSelfStep : r else r
return $ if null s then f else PathExpr (Just f) $ Path True s
<|> fmap (PathExpr Nothing) path
-- | parse multiple argument expressions separated by an infix symbol
singleInfixExpr :: Parser Expr -> String -> Parser Expr
-- | 'pathExpr' are arguments of union expression
unionExpr = singleInfixExpr pathExpr "|"
-- | 'unionExpr' can be prefixed by the unary minus
return $ GenExpr True "-" [e]
{- | parse as many arguments separated by any infix symbol as possible
but construct left-associative binary application trees. -}
leftAssocExpr :: Parser Expr -> [String] -> Parser Expr
op <- choice $ map symbol ops
return $ \ a b -> GenExpr True op [a, b]
-- * all final infix parsers using 'leftAssocExpr' or 'singleInfixExpr'
multExpr = leftAssocExpr unaryExpr multOps
addExpr = leftAssocExpr multExpr addOps
relExpr = leftAssocExpr addExpr relOps
eqExpr = leftAssocExpr relExpr eqOps
andExpr = singleInfixExpr eqExpr "and"
-- | the top-level expressions interspersed by @or@.
expr = singleInfixExpr andExpr "or"
-- * checking sanity of paths
principalNodeType :: Axis -> PrincipalNodeType
principalNodeType a = case a of
-- | may this step have further steps
isElementNode :: Step -> Bool
isElementNode (Step a t _) =
principalNodeType a == TElement && case t of
isLegalPath :: [Step] -> Bool
isLegalPath l = case l of
s : r -> isElementNode s && isLegalPath r
finalStep :: Path -> Maybe Step
finalStep (Path _ l) = case l of
finalPrincipalNodeType :: Path -> PrincipalNodeType
finalPrincipalNodeType p = case finalStep p of
Just (Step a _ _) -> principalNodeType a
type FctEnv = [(String, (BasicType, [BasicType]))]
type VarEnv = [(String, BasicType)]
[ ("last", (Numeral, []))
, ("position", (Numeral, []))
, ("count", (Numeral, [NodeSet]))
, ("id", (NodeSet, [Object]))
, ("local-name", (String, [NodeSet]))
, ("namespace-uri", (String, [NodeSet]))
, ("name", (String, [NodeSet]))
, ("string", (String, [Object]))
, ("concat", (String, [String, String]))
, ("substring-before", (String, [String, String]))
, ("substring-after", (String, [String, String]))
, ("substring", (String, [String, Numeral, Numeral]))
, ("starts-with", (Boolean, [String, String]))
, ("contains", (Boolean, [String, String]))
, ("string-length", (Numeral, [String]))
, ("normalize-space", (String, [String]))
, ("translate", (String, [String, String, String]))
, ("boolean", (Boolean, [Object]))
, ("not", (Boolean, [Boolean]))
, ("true", (Boolean, []))
, ("false", (Boolean, []))
, ("lang", (Boolean, [String]))
, ("number", (Numeral, [Object]))
, ("sum", (Numeral, [NodeSet]))
, ("floor", (Numeral, [Numeral]))
, ("ceiling", (Numeral, [Numeral]))
, ("round", (Numeral, [Numeral]))
basicType :: Expr -> BasicType
_ | elem op $ ["or", "and"] ++ eqOps ++ relOps -> Boolean
| elem op $ addOps ++ multOps -> Numeral
else case lookup op coreFcts of
PrimExpr k _ -> case k of
isPathExpr :: Expr -> Bool
GenExpr True "|" args -> all isPathExpr args
GenExpr False "id" [_] -> True
PathExpr m (Path _ s) -> isLegalPath s && maybe True isPathExpr m
FilterExpr p _ -> isPathExpr p
parseExpr :: String -> Either String Expr
parseExpr s = case parse (expr << eof) "" s of
Right e | isPathExpr e -> Right e
_ -> Left "not a legal path expression"
getPaths :: Expr -> [Path]
GenExpr True "|" args -> concatMap getPaths args
PathExpr m p@(Path _ s) -> case m of
Just fe -> map (\ (Path r f) -> Path r $ f ++ s) $ getPaths fe
FilterExpr p _ -> getPaths p