XPath.hs revision 133498fca44f178d10e2eb6d965ce3442d2e2e32
97a9a944b5887e91042b019776c41d5dd74557aferikabele{- |
97a9a944b5887e91042b019776c41d5dd74557aferikabeleModule : $Header$
fd9abdda70912b99b24e3bf1a38f26fde908a74cndDescription : XPath utilities
fd9abdda70912b99b24e3bf1a38f26fde908a74cndCopyright : (c) Christian Maeder, DFKI GmbH 2010
fd9abdda70912b99b24e3bf1a38f26fde908a74cndLicense : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
8c81ebbdeed67059e7a1106d7c617543e901c3abyoshikiMaintainer : Christian.Maeder@dfki.de
8c81ebbdeed67059e7a1106d7c617543e901c3abyoshikiStability : provisional
8c81ebbdeed67059e7a1106d7c617543e901c3abyoshikiPortability : portable
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd
96ad5d81ee4a2cc66a4ae19893efc8aa6d06fae7jailletcXPath utilities independent of xml package.
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndreferences:
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd<http://www.w3.org/TR/xpath/>
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen<http://www.galiel.net/el/study/XPath_Overview.html>
2e545ce2450a9953665f701bb05350f0d3f26275nd<http://www.fh-wedel.de/~si/HXmlToolbox/hxpath/diplomarbeit.pdf>
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen<http://hackage.haskell.org/package/hxt-xpath>
d29d9ab4614ff992b0e8de6e2b88d52b6f1f153erbowen(modules XPathParser, XPathDataTypes)
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd<http://hackage.haskell.org/package/hxt-8.5.0>
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd(modules Text.XML.HXT.DOM.Unicode, Text.XML.HXT.Parser.XmlCharParser)
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd<http://www.w3.org/TR/REC-xml/#NT-Name>
af33a4994ae2ff15bc67d19ff1a7feb906745bf8rbowen
3f08db06526d6901aa08c110b5bc7dde6bc39905nd'ncName' from 'OWL.Parse' allows `+' in names. Unicode is not
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndfully supported. A qualified name is an ncName or two ncNames
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndseparated by a colon (different from OWL uris).
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd-}
3f08db06526d6901aa08c110b5bc7dde6bc39905nd
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndmodule Common.XPath where
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd
3b3b7fc78d1f5bfc2769903375050048ff41ff26ndimport Text.ParserCombinators.Parsec
ad74a0524a06bfe11b7de9e3b4ce7233ab3bd3f7ndimport Common.Parsec
7f5b59ccc63c0c0e3e678a168f09ee6a2f51f9d0ndimport Data.Char
e1e8390280254f7f0580d701e583f670643d4f3fnilgunimport Data.List
f086b4b402fa9a2fefc7dda85de2a3cc1cd0a654rjung
3b3b7fc78d1f5bfc2769903375050048ff41ff26nd-- * data types and pretty printing (via show)
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd-- | axis specifier
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nddata Axis
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd = Ancestor Bool -- ^ or self?
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd | Attribute
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd | Child
8c81ebbdeed67059e7a1106d7c617543e901c3abyoshiki | Descendant Bool -- ^ or self?
8c81ebbdeed67059e7a1106d7c617543e901c3abyoshiki | Following Bool -- ^ sibling?
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd | Namespace
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd | Parent
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd | Preceding Bool -- ^ sibling?
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd | Self deriving Show
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd-- | all possible values
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndallAxis :: [Axis]
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndallAxis = let bl = [True, False] in
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd [ Attribute
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd , Child
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd , Namespace
30471a4650391f57975f60bbb6e4a90be7b284bfhumbedooh , Parent
1f1b6bf13313fdd14a45e52e553d3ff28689b717coar , Self ]
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd ++ [ Ancestor b | b <- bl ]
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd ++ [ Descendant b | b <- bl ]
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd ++ [ Following b | b <- bl ]
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd ++ [ Preceding b | b <- bl ]
f61ec47ec494705bd2d02b5b6fe021e5c193f2c8noodl
f61ec47ec494705bd2d02b5b6fe021e5c193f2c8noodl-- | utility to show (constant) constructors as lower case strings
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndlowerShow :: Show a => a -> String
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndlowerShow = map toLower . show
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd-- | proper string representation (do not use show)
06ba4a61654b3763ad65f52283832ebf058fdf1csliveshowAxis :: Axis -> String
06ba4a61654b3763ad65f52283832ebf058fdf1csliveshowAxis a =
06ba4a61654b3763ad65f52283832ebf058fdf1cslive let s = takeWhile isAlpha $ lowerShow a
06ba4a61654b3763ad65f52283832ebf058fdf1cslive orSelf b = if b then s ++ "-or-self" else s
06ba4a61654b3763ad65f52283832ebf058fdf1cslive sibl b = if b then s ++ "-sibling" else s
06ba4a61654b3763ad65f52283832ebf058fdf1cslive in case a of
06ba4a61654b3763ad65f52283832ebf058fdf1cslive Ancestor c -> orSelf c
06ba4a61654b3763ad65f52283832ebf058fdf1cslive Descendant c -> orSelf c
06ba4a61654b3763ad65f52283832ebf058fdf1cslive Following c -> sibl c
06ba4a61654b3763ad65f52283832ebf058fdf1cslive Preceding c -> sibl c
06ba4a61654b3763ad65f52283832ebf058fdf1cslive _ -> s
06ba4a61654b3763ad65f52283832ebf058fdf1cslive
06ba4a61654b3763ad65f52283832ebf058fdf1cslivedata NodeTest
06ba4a61654b3763ad65f52283832ebf058fdf1cslive = NameTest String -- ^ optional prefix and local part (possibly *)
06ba4a61654b3763ad65f52283832ebf058fdf1cslive | PI String -- ^ processing-instruction node type with optional literal
06ba4a61654b3763ad65f52283832ebf058fdf1cslive | Node
06ba4a61654b3763ad65f52283832ebf058fdf1cslive | Comment
06ba4a61654b3763ad65f52283832ebf058fdf1cslive | Text
06ba4a61654b3763ad65f52283832ebf058fdf1cslive deriving Show
06ba4a61654b3763ad65f52283832ebf058fdf1cslive
06ba4a61654b3763ad65f52283832ebf058fdf1cslive-- | all node types without processing-instruction
06ba4a61654b3763ad65f52283832ebf058fdf1cslivenodeTypes :: [NodeTest]
06ba4a61654b3763ad65f52283832ebf058fdf1cslivenodeTypes = [Node, Comment, Text]
06ba4a61654b3763ad65f52283832ebf058fdf1cslive
06ba4a61654b3763ad65f52283832ebf058fdf1cslive-- | the processing-instruction string
06ba4a61654b3763ad65f52283832ebf058fdf1cslivepIS :: String
06ba4a61654b3763ad65f52283832ebf058fdf1cslivepIS = "processing-instruction"
06ba4a61654b3763ad65f52283832ebf058fdf1cslive
06ba4a61654b3763ad65f52283832ebf058fdf1cslive-- | put parens arount a string
06ba4a61654b3763ad65f52283832ebf058fdf1csliveparen :: String -> String
06ba4a61654b3763ad65f52283832ebf058fdf1csliveparen = ('(' :) . (++ ")")
06ba4a61654b3763ad65f52283832ebf058fdf1cslive
06ba4a61654b3763ad65f52283832ebf058fdf1cslive-- | proper string representation (do not use show)
82178a3043043b8813c0d7288a06ca1b7d110d4atakashishowNodeTest :: NodeTest -> String
06ba4a61654b3763ad65f52283832ebf058fdf1csliveshowNodeTest t = case t of
06ba4a61654b3763ad65f52283832ebf058fdf1cslive NameTest q -> q
e1e8390280254f7f0580d701e583f670643d4f3fnilgun PI s -> pIS ++ paren s
e1e8390280254f7f0580d701e583f670643d4f3fnilgun _ -> lowerShow t ++ paren ""
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun-- | the stuff of a path between the slashes
e1e8390280254f7f0580d701e583f670643d4f3fnilgundata Step = Step Axis NodeTest [Expr] -- ^ with predicate list
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun-- | string representation considering abbreviations
e1e8390280254f7f0580d701e583f670643d4f3fnilgunshowStep :: Step -> String
e1e8390280254f7f0580d701e583f670643d4f3fnilgunshowStep (Step a n ps) =
e1e8390280254f7f0580d701e583f670643d4f3fnilgun let t = showNodeTest n in
e1e8390280254f7f0580d701e583f670643d4f3fnilgun case (a, n) of
e1e8390280254f7f0580d701e583f670643d4f3fnilgun (Attribute, _) -> '@' : t
e1e8390280254f7f0580d701e583f670643d4f3fnilgun (Child, _) -> t
e1e8390280254f7f0580d701e583f670643d4f3fnilgun (Self, Node) -> "."
e1e8390280254f7f0580d701e583f670643d4f3fnilgun (Parent, Node) -> ".."
e1e8390280254f7f0580d701e583f670643d4f3fnilgun _ -> showAxis a ++ "::" ++ t
e1e8390280254f7f0580d701e583f670643d4f3fnilgun ++ concatMap showPred ps
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilguninstance Show Step where
e1e8390280254f7f0580d701e583f670643d4f3fnilgun show = showStep
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun-- | test for @descendant-or-self::node()@ step
e1e8390280254f7f0580d701e583f670643d4f3fnilgunisDescOrSelfNode :: Step -> Bool
e1e8390280254f7f0580d701e583f670643d4f3fnilgunisDescOrSelfNode (Step a n _) = case (a, n) of
e1e8390280254f7f0580d701e583f670643d4f3fnilgun (Descendant True, Node) -> True
06ba4a61654b3763ad65f52283832ebf058fdf1cslive _ -> False
3a6531f1fd50efd78381e5800802a1449096781eslive
3a6531f1fd50efd78381e5800802a1449096781eslive-- | only absolute paths may be empty
3a6531f1fd50efd78381e5800802a1449096781eslivedata Path = Path Bool [Step] -- ^ absolute?
97a9a944b5887e91042b019776c41d5dd74557aferikabele
3a6531f1fd50efd78381e5800802a1449096781eslive-- | show a path abbreviating @\/descendant-or-self::node()\/@
e1e8390280254f7f0580d701e583f670643d4f3fnilgunshowSteps :: Bool -> [Step] -> String
06ba4a61654b3763ad65f52283832ebf058fdf1csliveshowSteps abso sts = let h = if abso then "/" else "" in case sts of
e1e8390280254f7f0580d701e583f670643d4f3fnilgun [] -> h
06ba4a61654b3763ad65f52283832ebf058fdf1cslive s : r -> let f = h ++ showStep s in case r of
e1e8390280254f7f0580d701e583f670643d4f3fnilgun [] -> f
e1e8390280254f7f0580d701e583f670643d4f3fnilgun _ -> if abso && isDescOrSelfNode s then "//" ++ showSteps False r
06ba4a61654b3763ad65f52283832ebf058fdf1cslive else f ++ showSteps True r
4aa603e6448b99f9371397d439795c91a93637eand
4aa603e6448b99f9371397d439795c91a93637eandinstance Show Path where
17ade6df5ec233536985eb1c130a906c725dd614humbedooh show (Path abso sts) = showSteps abso sts
06ba4a61654b3763ad65f52283832ebf058fdf1cslive
e1e8390280254f7f0580d701e583f670643d4f3fnilgun-- | indicator for primary expressions
e1e8390280254f7f0580d701e583f670643d4f3fnilgundata PrimKind
06ba4a61654b3763ad65f52283832ebf058fdf1cslive = Var -- ^ leading dollar
20f499565e77defe9dab24dd85c02f38a1175855nd | Literal -- ^ single or double quotes
17ade6df5ec233536985eb1c130a906c725dd614humbedooh | Number -- ^ digits possibly with decimal point
06ba4a61654b3763ad65f52283832ebf058fdf1cslive
e1e8390280254f7f0580d701e583f670643d4f3fnilgun-- | expressions where function calls, unary and infix expressions are generic
e1e8390280254f7f0580d701e583f670643d4f3fnilgundata Expr
e06675c51d084791089d79c3ac18aeae8dd465fcrbb = GenExpr Bool String [Expr] -- ^ infix?, op or fct, and arguments
4a56677aad9b66a36f3dc9fddbca8dc1230ad471rbowen | PathExpr (Maybe Expr) Path -- ^ optional filter and path
17ade6df5ec233536985eb1c130a906c725dd614humbedooh | FilterExpr Expr [Expr] -- ^ primary expression with predicates
e1e8390280254f7f0580d701e583f670643d4f3fnilgun | PrimExpr PrimKind String
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilguninstance Show Expr where
e1e8390280254f7f0580d701e583f670643d4f3fnilgun show = showExpr
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
e1e8390280254f7f0580d701e583f670643d4f3fnilgun-- | put square brackets around an expression
e1e8390280254f7f0580d701e583f670643d4f3fnilgunshowPred :: Expr -> String
e1e8390280254f7f0580d701e583f670643d4f3fnilgunshowPred e = '[' : showExpr e ++ "]"
2f46ce2a814d7f2b126dfb9f1b25fd64e2fbdc11rbowen
e1e8390280254f7f0580d701e583f670643d4f3fnilgun-- | show expression with minimal parens
e1e8390280254f7f0580d701e583f670643d4f3fnilgunshowExpr :: Expr -> String
e1e8390280254f7f0580d701e583f670643d4f3fnilgunshowExpr e = case e of
e1e8390280254f7f0580d701e583f670643d4f3fnilgun GenExpr infx op args ->
e1e8390280254f7f0580d701e583f670643d4f3fnilgun if infx then
e1e8390280254f7f0580d701e583f670643d4f3fnilgun showInfixExpr op args
e1e8390280254f7f0580d701e583f670643d4f3fnilgun else op ++ paren (intercalate "," $ map showExpr args)
25d005c58102474e06fb67d748273b161d7198d0pquerna PathExpr m p -> case m of
a5a2e98043365d3f5a8bb3c3cdfc252a4f11bf02nd Nothing -> ""
9a58dc6a2b26ec128b1270cf48810e705f1a90dbsf Just f -> showExpr f
a5a2e98043365d3f5a8bb3c3cdfc252a4f11bf02nd ++ show p
a5a2e98043365d3f5a8bb3c3cdfc252a4f11bf02nd FilterExpr pe ps ->
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd (if isPrimExpr pe then id else paren) (showExpr pe)
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd ++ concatMap showPred ps
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd PrimExpr _ s -> s
e1e8390280254f7f0580d701e583f670643d4f3fnilgun
2f46ce2a814d7f2b126dfb9f1b25fd64e2fbdc11rbowen{- | show arguments with minimal parens interspersed with the infix operator.
e1e8390280254f7f0580d701e583f670643d4f3fnilgunAlso treat the unary minus where the argument list is a singleton.
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndAlphanumeric operators are shown with spaces, which looks bad for @mod@ and
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd@div@ in conjunction with additive, relational, or equality operators. The
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4ndunary minus gets a leading blank if the preceding character is a
3b3b7fc78d1f5bfc2769903375050048ff41ff26nd'ncNameChar'. -}
ad74a0524a06bfe11b7de9e3b4ce7233ab3bd3f7ndshowInfixExpr :: String -> [Expr] -> String
7f5b59ccc63c0c0e3e678a168f09ee6a2f51f9d0ndshowInfixExpr op args = case args of
e1e8390280254f7f0580d701e583f670643d4f3fnilgun [] -> op -- cannot happen
f086b4b402fa9a2fefc7dda85de2a3cc1cd0a654rjung [arg] -> -- unary minus
727872d18412fc021f03969b8641810d8896820bhumbedooh let s = showExpr arg
0d0ba3a410038e179b695446bb149cce6264e0abnd in op ++ case arg of
727872d18412fc021f03969b8641810d8896820bhumbedooh GenExpr True aOp _ -> case aOp of
cc7e1025de9ac63bd4db6fe7f71c158b2cf09fe4humbedooh "|" -> s
0d0ba3a410038e179b695446bb149cce6264e0abnd _ -> paren s
cc7e1025de9ac63bd4db6fe7f71c158b2cf09fe4humbedooh _ -> s
727872d18412fc021f03969b8641810d8896820bhumbedooh arg : rargs ->
0d0ba3a410038e179b695446bb149cce6264e0abnd let mi = findIndex (elem op) inOps
0d0ba3a410038e179b695446bb149cce6264e0abnd f = parenExpr False mi arg
0d0ba3a410038e179b695446bb149cce6264e0abnd padOp = if any isAlpha op then ' ' : op ++ " " else
ac082aefa89416cbdc9a1836eaf3bed9698201c8humbedooh if elem op addOps && not (null f) && ncNameChar (last f)
0d0ba3a410038e179b695446bb149cce6264e0abnd then ' ' : op else op
0d0ba3a410038e179b695446bb149cce6264e0abnd in f ++ concatMap ((padOp ++) . parenExpr True mi) rargs
0d0ba3a410038e179b695446bb149cce6264e0abnd
727872d18412fc021f03969b8641810d8896820bhumbedooh{- | put parens around arguments that have a lower precedence or
0d0ba3a410038e179b695446bb149cce6264e0abnd equal precendence if they are a right argument. -}
0d0ba3a410038e179b695446bb149cce6264e0abndparenExpr :: Bool -> Maybe Int -> Expr -> String
30471a4650391f57975f60bbb6e4a90be7b284bfhumbedoohparenExpr rst mi e =
205f749042ed530040a4f0080dbcb47ceae8a374rjung let s = showExpr e
af33a4994ae2ff15bc67d19ff1a7feb906745bf8rbowen in case e of
0d0ba3a410038e179b695446bb149cce6264e0abnd GenExpr True op (_ : _ : _) ->
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd let mj = findIndex (elem op) inOps
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd putPar = case (mi, mj) of
7fec19672a491661b2fe4b29f685bc7f4efa64d4nd (Just i, Just j) -> i > j || rst && i == j
0c4abc32c00611fe1d52c9661f5cc79a3f74c6d4nd _ -> True
in if putPar then paren s else s
_ -> s
-- | test if expression is primary
isPrimExpr :: Expr -> Bool
isPrimExpr e = case e of
PrimExpr _ _ -> True
GenExpr False _ _ -> True
_ -> False
-- * infix operators
-- | unequal (@!=@) and equal (@=@)
eqOps :: [String]
eqOps = ["!=", "="]
-- | the four other comparisons
relOps :: [String]
relOps = ["<=", ">=", "<", ">"]
-- | @+@ and @-@, where @-@ is allowed within names and as unary operator
addOps :: [String]
addOps = ["+", "-"]
-- | @*@, div and mod, where @*@ is also used as wildcard for node names
multOps :: [String]
multOps = ["*", "div", "mod"]
{- | all infix operators. Lowest precedence for @or@ followed by @and@,
highest is union(@|@). Only these three operators may get more than two
arguments. -}
inOps :: [[String]]
inOps =
[ ["or"]
, ["and"]
, eqOps
, relOps
, addOps
, multOps
, ["|"]]
-- * parsers
-- | skip trailing spaces
skips :: Parser a -> Parser a
skips = (<< spaces)
-- | parse keyword and skip spaces
symbol :: String -> Parser String
symbol = skips . tryString
-- | skip left paren
lpar :: Parser ()
lpar = forget (symbol "(")
-- | skip right paren
rpar :: Parser ()
rpar = forget (symbol ")")
-- | non-abbreviated axis parser
axis :: Parser Axis
axis = choice (map (\ a -> symbol (showAxis a) >> return a) allAxis)
<?> "axis"
-- | the axis specifier parser
abbrAxis :: Parser Axis
abbrAxis =
(symbol "@" >> return Attribute)
<|> try (axis << symbol "::")
<|> return Child
<?> "abbrAxis"
-- | starting name character (no unicode)
ncNameStart :: Char -> Bool
ncNameStart c = isAlpha c || c == '_'
-- | name character (without @+@) including centered dot (and no other unicode)
ncNameChar :: Char -> Bool
ncNameChar c = isAlphaNum c || elem c ".-_\183"
-- | non-colon xml names (non-skipping)
ncName :: Parser String
ncName = satisfy ncNameStart <:> many (satisfy ncNameChar) <?> "ncName"
-- | literal string within single or double quotes (skipping)
literal :: Parser String
literal = skips $
char '"' <:> many (satisfy (/= '"')) <++> string "\""
<|> char '\'' <:> many (satisfy (/= '\'')) <++> string "'"
-- | ncName or wild-card (@*@) (skipping)
localName :: Parser String
localName = symbol "*" <|> skips ncName <?> "localName"
-- | the node test parser
nodeTest :: Parser NodeTest
nodeTest = fmap PI (symbol pIS >> lpar >> literal << rpar)
<|> choice (map (\ t -> symbol (lowerShow t)
>> lpar >> rpar >> return t) nodeTypes)
<|> do
p <- try (ncName <++> string ":")
l <- localName
return $ NameTest $ p ++ l
<|> do
l <- localName
return $ NameTest l
<?> "nodeTest"
-- | parent or self abbreviated steps
abbrStep :: Parser Step
abbrStep =
(symbol ".." >> return (Step Parent Node []))
<|> (symbol "." >> return (Step Self Node []))
<?> "abbrStep"
-- | the predicate (expression in square brackets) parser
predicate :: Parser Expr
predicate = symbol "[" >> expr << symbol "]" <?> "predicate"
-- | the step (stuff between slashes) parser
step :: Parser Step
step = abbrStep <|> do
a <- abbrAxis
t <- nodeTest
ps <- many predicate
return (Step a t ps)
<?> "step"
-- | the implicit @descendant-or-self::node()@ step constant
descOrSelfStep :: Step
descOrSelfStep = Step (Descendant True) Node []
-- | a double or single slash
doubleSlash :: Parser Bool
doubleSlash = (symbol "//" >> return True) <|> (symbol "/" >> return False)
{- | a step starting with a single or double slash,
the latter yielding two steps. -}
slashStep :: Parser [Step]
slashStep = do
b <- doubleSlash
s <- step
return (if b then [descOrSelfStep, s] else [s])
<?> "slashStep"
-- | parse the steps of a relative path
relPath :: Parser [Step]
relPath = do
s <- step
sl <- many slashStep
return (s : concat sl)
<?> "relPath"
-- | a (possibly empty) absolute or (non-empty) relative path
path :: Parser Path
path = do
m <- optionMaybe doubleSlash
s <- (case m of
Just False -> optionL
_ -> id) relPath
return (case m of
Nothing -> Path False s
Just b -> Path True $ if b then descOrSelfStep : s else s)
<?> "path"
-- | at least one digit and at most one decimal point (skipping)
number :: Parser String
number = skips $ many1 digit <++> optionL (char '.' <:> many digit)
<|> try (char '.' <:> many1 digit)
-- | a qualified name (prefixed or unprefixed)
qualName :: Parser String
qualName = skips $ ncName <++> optionL (char ':' <:> ncName)
-- | parse a primary expression (including 'fct' or 'expr' in parens)
primExpr :: Parser Expr
primExpr = fmap (PrimExpr Var) (char '$' <:> qualName)
<|> (lpar >> expr << rpar)
<|> fmap (PrimExpr Literal) literal
<|> fmap (PrimExpr Number) number
<|> fct
-- | parse a function call by checking the qname and the left paren
fct :: Parser Expr
fct = do
q <- try $ do
n <- qualName
if elem n $ pIS : map lowerShow nodeTypes
then fail $ n ++ " not allowed as function name"
else lpar >> return n
args <- sepBy expr (symbol ",")
rpar
return $ GenExpr False q args
-- | parse a filter expresssion as primary expression followed by predicates
filterExpr :: Parser Expr
filterExpr = do
e <- primExpr
ps <- many predicate
return $ if null ps then e else FilterExpr e ps
{- | a path expression is either a filter expression followed by a (non-empty)
absoulte path or an ordinary 'path'. -}
pathExpr :: Parser Expr
pathExpr = do
f <- filterExpr
s <- optionL $ do
b <- doubleSlash
r <- relPath
return $ if b then descOrSelfStep : r else r
return $ if null s then f else PathExpr (Just f) $ Path True s
<|> fmap (PathExpr Nothing) path
-- | parse multiple argument expressions separated by an infix symbol
singleInfixExpr :: Parser Expr -> String -> Parser Expr
singleInfixExpr p s = do
l <- sepBy1 p $ symbol s
return $ case l of
[e] -> e
_ -> GenExpr True s l
-- | 'pathExpr' are arguments of union expression
unionExpr :: Parser Expr
unionExpr = singleInfixExpr pathExpr "|"
-- | 'unionExpr' can be prefixed by the unary minus
unaryExpr :: Parser Expr
unaryExpr = do
symbol "-"
e <- unaryExpr
return $ GenExpr True "-" [e]
<|> unionExpr
{- | parse as many arguments separated by any infix symbol as possible
but construct left-associative binary application trees. -}
leftAssocExpr :: Parser Expr -> [String] -> Parser Expr
leftAssocExpr p ops =
chainl1 p $ do
op <- choice $ map symbol ops
return $ \ a b -> GenExpr True op [a, b]
-- * all final infix parsers using 'leftAssocExpr' or 'singleInfixExpr'
multExpr :: Parser Expr
multExpr = leftAssocExpr unaryExpr multOps
addExpr :: Parser Expr
addExpr = leftAssocExpr multExpr addOps
relExpr :: Parser Expr
relExpr = leftAssocExpr addExpr relOps
eqExpr :: Parser Expr
eqExpr = leftAssocExpr relExpr eqOps
andExpr :: Parser Expr
andExpr = singleInfixExpr eqExpr "and"
-- | the top-level expressions interspersed by @or@.
expr :: Parser Expr
expr = singleInfixExpr andExpr "or"
-- * checking sanity of paths
data PrincipalNodeType
= TAttribute
| TNamespace
| TElement deriving Eq
principalNodeType :: Axis -> PrincipalNodeType
principalNodeType a = case a of
Attribute -> TAttribute
Namespace -> TNamespace
_ -> TElement
-- | may this step have further steps
isElementNode :: Step -> Bool
isElementNode (Step a t _) =
principalNodeType a == TElement && case t of
Node -> True
NameTest _ -> True
_ -> False
isLegalPath :: [Step] -> Bool
isLegalPath l = case l of
[] -> True
[_] -> True
s : r -> isElementNode s && isLegalPath r
finalStep :: Path -> Maybe Step
finalStep (Path _ l) = case l of
[] -> Nothing
_ -> Just $ last l
finalPrincipalNodeType :: Path -> PrincipalNodeType
finalPrincipalNodeType p = case finalStep p of
Nothing -> TElement
Just (Step a _ _) -> principalNodeType a
data BasicType
= NodeSet
| Boolean
| Numeral
| String
| Object
coreFcts :: [(String, (BasicType, [BasicType]))]
coreFcts =
[ ("last", (Numeral, []))
, ("position", (Numeral, []))
, ("count", (Numeral, [NodeSet]))
, ("id", (NodeSet, [Object]))
, ("local-name", (String, [NodeSet]))
, ("namespace-uri", (String, [NodeSet]))
, ("name", (String, [NodeSet]))
, ("string", (String, [Object]))
, ("concat", (String, [String, String]))
, ("substring-before", (String, [String, String]))
, ("substring-after", (String, [String, String]))
, ("substring", (String, [String, Numeral, Numeral]))
, ("starts-with", (Boolean, [String, String]))
, ("contains", (Boolean, [String, String]))
, ("string-length", (Numeral, [String]))
, ("normalize-space", (String, [String]))
, ("translate", (String, [String, String, String]))
, ("boolean", (Boolean, [Object]))
, ("not", (Boolean, [Boolean]))
, ("true", (Boolean, []))
, ("false", (Boolean, []))
, ("lang", (Boolean, [String]))
, ("number", (Numeral, [Object]))
, ("sum", (Numeral, [NodeSet]))
, ("floor", (Numeral, [Numeral]))
, ("ceiling", (Numeral, [Numeral]))
, ("round", (Numeral, [Numeral]))
]
basicType :: Expr -> BasicType
basicType e = case e of
GenExpr infx op _ ->
if infx then case op of
"|" -> NodeSet
_ | elem op $ ["or", "and"] ++ eqOps ++ relOps -> Boolean
| elem op $ addOps ++ multOps -> Numeral
_ -> Object
else case lookup op coreFcts of
Just (t, _) -> t
Nothing -> Object
PrimExpr k _ -> case k of
Number -> Numeral
Literal -> String
Var -> Object
_ -> NodeSet
isPathExpr :: Expr -> Bool
isPathExpr e = case e of
GenExpr True "|" args -> all isPathExpr args
GenExpr False "id" [_] -> True
PrimExpr Var _ -> True
PathExpr m (Path _ s) -> isLegalPath s && maybe True isPathExpr m
FilterExpr p _ -> isPathExpr p
_ -> False
-- | parse string and perform sanity check
maybePath :: String -> Maybe Expr
maybePath s = case parse (expr << eof) "" s of
Right e | isPathExpr e -> Just e
_ -> Nothing