XPath.hs revision 6596e6462e9356ac01f15a6dcada971e1f346b63
08cb74ca432a8c24e39f17dedce527e6a47b8001jerenkrantzModule : $Header$
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesDescription : XPath utilities
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesCopyright : (c) Christian Maeder, DFKI GmbH 2010
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesLicense : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesMaintainer : Christian.Maeder@dfki.de
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesStability : provisional
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesPortability : portable
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesXPath utilities independent of xml package.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes<http://www.galiel.net/el/study/XPath_Overview.html>
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes<http://www.fh-wedel.de/~si/HXmlToolbox/hxpath/diplomarbeit.pdf>
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes(modules XPathParser, XPathDataTypes)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes(modules Text.XML.HXT.DOM.Unicode, Text.XML.HXT.Parser.XmlCharParser)
e8f95a682820a599fe41b22977010636be5c2717jim'ncName' from 'OWL.Parse' allows `+' in names. Unicode is not
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesfully supported. A qualified name is an ncName or two ncNames
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesseparated by a colon (different from OWL uris).
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- * data types and pretty printing (via show)
5c0419d51818eb02045cf923a9fe456127a44c60wrowe-- | axis specifier
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes = Ancestor Bool -- ^ or self?
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | Descendant Bool -- ^ or self?
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | Following Bool -- ^ sibling?
d266c3777146d36a4c23c17aad6f153aebea1bf4jorton | Namespace
d266c3777146d36a4c23c17aad6f153aebea1bf4jorton | Preceding Bool -- ^ sibling?
d266c3777146d36a4c23c17aad6f153aebea1bf4jorton | Self deriving Show
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | all possible values
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesallAxis :: [Axis]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesallAxis = let bl = [True, False] in
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes ++ [ Ancestor b | b <- bl ]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes ++ [ Descendant b | b <- bl ]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes ++ [ Following b | b <- bl ]
0568280364eb026393be492ebc732795c4934643jorton ++ [ Preceding b | b <- bl ]
0568280364eb026393be492ebc732795c4934643jorton-- | utility to show (constant) constructors as lower case strings
0568280364eb026393be492ebc732795c4934643jortonlowerShow :: Show a => a -> String
0568280364eb026393be492ebc732795c4934643jortonlowerShow = map toLower . show
0568280364eb026393be492ebc732795c4934643jorton-- | proper string representation (do not use show)
0568280364eb026393be492ebc732795c4934643jortonshowAxis :: Axis -> String
0568280364eb026393be492ebc732795c4934643jortonshowAxis a =
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes let s = takeWhile isAlpha $ lowerShow a
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes orSelf b = if b then s ++ "-or-self" else s
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes sibl b = if b then s ++ "-sibling" else s
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes in case a of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Ancestor c -> orSelf c
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Descendant c -> orSelf c
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Following c -> sibl c
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Preceding c -> sibl c
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdata NodeTest
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes = NameTest String -- ^ optional prefix and local part (possibly *)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | PI String -- ^ processing-instruction node type with optional literal
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes deriving Show
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | all node types without processing-instruction
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesnodeTypes :: [NodeTest]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesnodeTypes = [Node, Comment, Text]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | the processing-instruction string
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholespIS :: String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholespIS = "processing-instruction"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | put parens arount a string
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwroweparen :: String -> String
e8f95a682820a599fe41b22977010636be5c2717jimparen = ('(' :) . (++ ")")
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | proper string representation (do not use show)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowNodeTest :: NodeTest -> String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowNodeTest t = case t of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes NameTest q -> q
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes PI s -> pIS ++ paren s
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ -> lowerShow t ++ paren ""
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | the stuff of a path between the slashes
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdata Step = Step Axis NodeTest [Expr] -- ^ with predicate list
713a2b68bac4aeb1e9c48785006c0732451039depquerna-- | string representation considering abbreviations
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowStep :: Step -> String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowStep (Step a n ps) =
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes let t = showNodeTest n in
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes case (a, n) of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes (Attribute, _) -> '@' : t
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe (Child, _) -> t
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe (Self, Node) -> "."
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes (Parent, Node) -> ".."
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ -> showAxis a ++ "::" ++ t
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes ++ concatMap showPred ps
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesinstance Show Step where
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes show = showStep
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | test for @descendant-or-self::node()@ step
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesisDescOrSelfNode :: Step -> Bool
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesisDescOrSelfNode (Step a n _) = case (a, n) of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes (Descendant True, Node) -> True
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | only absolute paths may be empty
8113dac419143273351446c3ad653f3fe5ba5cfdwrowedata Path = Path Bool [Step] -- ^ absolute?
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | show a path abbreviating @\/descendant-or-self::node()\/@
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowSteps :: Bool -> [Step] -> String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowSteps abso sts = let h = if abso then "/" else "" in case sts of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes s : r -> let f = h ++ showStep s in case r of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ -> if abso && isDescOrSelfNode s then "//" ++ showSteps False r
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes else f ++ showSteps True r
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesinstance Show Path where
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes show (Path abso sts) = showSteps abso sts
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | indicator for primary expressions
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdata PrimKind
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes = Var -- ^ leading dollar
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | Literal -- ^ single or double quotes
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | Number -- ^ digits possibly with decimal point
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | expressions where function calls, unary and infix expressions are generic
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes = GenExpr Bool String [Expr] -- ^ infix?, op or fct, and arguments
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | PathExpr (Maybe Expr) Path -- ^ optional filter and path
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | FilterExpr Expr [Expr] -- ^ primary expression with predicates
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | PrimExpr PrimKind String
8113dac419143273351446c3ad653f3fe5ba5cfdwroweinstance Show Expr where
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes show = showExpr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | put square brackets around an expression
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowPred :: Expr -> String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowPred e = '[' : showExpr e ++ "]"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | show expression with minimal parens
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowExpr :: Expr -> String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowExpr e = case e of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes GenExpr infx op args ->
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes if infx then
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes showInfixExpr op args
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes else op ++ paren (intercalate "," $ map showExpr args)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes PathExpr m p -> case m of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Nothing -> ""
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Just f -> showExpr f
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes FilterExpr pe ps ->
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes (if isPrimExpr pe then id else paren) (showExpr pe)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes ++ concatMap showPred ps
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe PrimExpr _ s -> s
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes{- | show arguments with minimal parens interspersed with the infix operator.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesAlso treat the unary minus where the argument list is a singleton.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesAlphanumeric operators are shown with spaces, which looks bad for @mod@ and
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes@div@ in conjunction with additive, relational, or equality operators. The
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesunary minus gets a leading blank if the preceding character is a
f43b67c5a9d29b572eac916f8335cedc80c908bebnicholes'ncNameChar'. -}
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowInfixExpr :: String -> [Expr] -> String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesshowInfixExpr op args = case args of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes [] -> op -- cannot happen
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes [arg] -> -- unary minus
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes let s = showExpr arg
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes in op ++ case arg of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes GenExpr True aOp _ -> case aOp of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ -> paren s
f43b67c5a9d29b572eac916f8335cedc80c908bebnicholes arg : rargs ->
e8f95a682820a599fe41b22977010636be5c2717jim let mi = findIndex (elem op) inOps
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes f = parenExpr False mi arg
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes padOp = if any isAlpha op then ' ' : op ++ " " else
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes if elem op addOps && not (null f) && ncNameChar (last f)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes then ' ' : op else op
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes in f ++ concatMap ((padOp ++) . parenExpr True mi) rargs
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes{- | put parens around arguments that have a lower precedence or
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes equal precendence if they are a right argument. -}
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesparenExpr :: Bool -> Maybe Int -> Expr -> String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesparenExpr rst mi e =
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes let s = showExpr e
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes in case e of
e8f95a682820a599fe41b22977010636be5c2717jim GenExpr True op (_ : _ : _) ->
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe let mj = findIndex (elem op) inOps
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes putPar = case (mi, mj) of
54d22ed1c429b903b029bbd62621f11a9e286137minfrin (Just i, Just j) -> i > j || rst && i == j
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes in if putPar then paren s else s
e8f95a682820a599fe41b22977010636be5c2717jim-- | test if expression is primary
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholesisPrimExpr :: Expr -> Bool
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholesisPrimExpr e = case e of
54d22ed1c429b903b029bbd62621f11a9e286137minfrin PrimExpr _ _ -> True
54d22ed1c429b903b029bbd62621f11a9e286137minfrin GenExpr False _ _ -> True
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- * infix operators
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | unequal (@!=@) and equal (@=@)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholeseqOps :: [String]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholeseqOps = ["!=", "="]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | the four other comparisons
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesrelOps :: [String]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesrelOps = ["<=", ">=", "<", ">"]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | @+@ and @-@, where @-@ is allowed within names and as unary operator
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesaddOps :: [String]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesaddOps = ["+", "-"]
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | @*@, div and mod, where @*@ is also used as wildcard for node names
81965264d92dd8c9ca21d058420f6f6da34b3032minfrinmultOps :: [String]
81965264d92dd8c9ca21d058420f6f6da34b3032minfrinmultOps = ["*", "div", "mod"]
81965264d92dd8c9ca21d058420f6f6da34b3032minfrin{- | all infix operators. Lowest precedence for @or@ followed by @and@,
54d22ed1c429b903b029bbd62621f11a9e286137minfrinhighest is union(@|@). Only these three operators may get more than two
54d22ed1c429b903b029bbd62621f11a9e286137minfrinarguments. -}
54d22ed1c429b903b029bbd62621f11a9e286137minfrininOps :: [[String]]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | skip trailing spaces
54d22ed1c429b903b029bbd62621f11a9e286137minfrinskips :: Parser a -> Parser a
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesskips = (<< spaces)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | parse keyword and skip spaces
54d22ed1c429b903b029bbd62621f11a9e286137minfrinsymbol :: String -> Parser String
54d22ed1c429b903b029bbd62621f11a9e286137minfrinsymbol = skips . tryString
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | skip left paren
54d22ed1c429b903b029bbd62621f11a9e286137minfrinlpar :: Parser ()
5ecb216eeb5125a953c2e3b08a127e35e7d1c1c5bnicholeslpar = forget (symbol "(")
5ecb216eeb5125a953c2e3b08a127e35e7d1c1c5bnicholes-- | skip right paren
54d22ed1c429b903b029bbd62621f11a9e286137minfrinrpar :: Parser ()
54d22ed1c429b903b029bbd62621f11a9e286137minfrinrpar = forget (symbol ")")
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | non-abbreviated axis parser
54d22ed1c429b903b029bbd62621f11a9e286137minfrinaxis :: Parser Axis
54d22ed1c429b903b029bbd62621f11a9e286137minfrinaxis = choice (map (\ a -> symbol (showAxis a) >> return a) allAxis)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | the axis specifier parser
54d22ed1c429b903b029bbd62621f11a9e286137minfrinabbrAxis :: Parser Axis
54d22ed1c429b903b029bbd62621f11a9e286137minfrin (symbol "@" >> return Attribute)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin <|> try (axis << symbol "::")
54d22ed1c429b903b029bbd62621f11a9e286137minfrin <|> return Child
81965264d92dd8c9ca21d058420f6f6da34b3032minfrin <?> "abbrAxis"
e1cc1ac970d0aa2910027f1f20445a16207a6deeminfrin-- | starting name character (no unicode)
e1cc1ac970d0aa2910027f1f20445a16207a6deeminfrinncNameStart :: Char -> Bool
e1cc1ac970d0aa2910027f1f20445a16207a6deeminfrinncNameStart c = isAlpha c || c == '_'
e1cc1ac970d0aa2910027f1f20445a16207a6deeminfrin-- | name character (without @+@) including centered dot (and no other unicode)
e1cc1ac970d0aa2910027f1f20445a16207a6deeminfrinncNameChar :: Char -> Bool
e1cc1ac970d0aa2910027f1f20445a16207a6deeminfrinncNameChar c = isAlphaNum c || elem c ".-_\183"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | non-colon xml names (non-skipping)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesncName :: Parser String
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesncName = satisfy ncNameStart <:> many (satisfy ncNameChar) <?> "ncName"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | literal string within single or double quotes (skipping)
7add8f7fb048534390571801b7794f71cd9e127abnicholesliteral :: Parser String
7add8f7fb048534390571801b7794f71cd9e127abnicholesliteral = skips $
e8f95a682820a599fe41b22977010636be5c2717jim char '"' <:> many (satisfy (/= '"')) <++> string "\""
141e1368614dc7564e1627671361b01b4869b491bnicholes <|> char '\'' <:> many (satisfy (/= '\'')) <++> string "'"
7add8f7fb048534390571801b7794f71cd9e127abnicholes-- | ncName or wild-card (@*@) (skipping)
7add8f7fb048534390571801b7794f71cd9e127abnicholeslocalName :: Parser String
7add8f7fb048534390571801b7794f71cd9e127abnicholeslocalName = symbol "*" <|> skips ncName <?> "localName"
7add8f7fb048534390571801b7794f71cd9e127abnicholes-- | the node test parser
7add8f7fb048534390571801b7794f71cd9e127abnicholesnodeTest :: Parser NodeTest
7add8f7fb048534390571801b7794f71cd9e127abnicholesnodeTest = fmap PI (symbol pIS >> lpar >> literal << rpar)
7add8f7fb048534390571801b7794f71cd9e127abnicholes <|> choice (map (\ t -> symbol (lowerShow t)
43c3e6a4b559b76b750c245ee95e2782c15b4296jim >> lpar >> rpar >> return t) nodeTypes)
7add8f7fb048534390571801b7794f71cd9e127abnicholes p <- try (ncName <++> string ":")
43c3e6a4b559b76b750c245ee95e2782c15b4296jim l <- localName
43c3e6a4b559b76b750c245ee95e2782c15b4296jim return $ NameTest $ p ++ l
7add8f7fb048534390571801b7794f71cd9e127abnicholes l <- localName
43c3e6a4b559b76b750c245ee95e2782c15b4296jim return $ NameTest l
43c3e6a4b559b76b750c245ee95e2782c15b4296jim <?> "nodeTest"
7add8f7fb048534390571801b7794f71cd9e127abnicholes-- | parent or self abbreviated steps
7add8f7fb048534390571801b7794f71cd9e127abnicholesabbrStep :: Parser Step
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes (symbol ".." >> return (Step Parent Node []))
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes <|> (symbol "." >> return (Step Self Node []))
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes <?> "abbrStep"
e8f95a682820a599fe41b22977010636be5c2717jim-- | the predicate (expression in square brackets) parser
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholespredicate :: Parser Expr
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholespredicate = symbol "[" >> expr << symbol "]" <?> "predicate"
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes-- | the step (stuff between slashes) parser
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholesstep :: Parser Step
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholesstep = abbrStep <|> do
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes a <- abbrAxis
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes t <- nodeTest
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes ps <- many predicate
3dfeb02cfb853d8717ca0cc259b59fea610173f5bnicholes return (Step a t ps)
e8f95a682820a599fe41b22977010636be5c2717jim <?> "step"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | the implicit @descendant-or-self::node()@ step constant
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdescOrSelfStep :: Step
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdescOrSelfStep = Step (Descendant True) Node []
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | a double or single slash
e8f95a682820a599fe41b22977010636be5c2717jimdoubleSlash :: Parser Bool
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdoubleSlash = (symbol "//" >> return True) <|> (symbol "/" >> return False)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes{- | a step starting with a single or double slash,
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes the latter yielding two steps. -}
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesslashStep :: Parser [Step]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesslashStep = do
54d22ed1c429b903b029bbd62621f11a9e286137minfrin b <- doubleSlash
54d22ed1c429b903b029bbd62621f11a9e286137minfrin return (if b then [descOrSelfStep, s] else [s])
54d22ed1c429b903b029bbd62621f11a9e286137minfrin <?> "slashStep"
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | parse the steps of a relative path
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesrelPath :: Parser [Step]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes sl <- many slashStep
54d22ed1c429b903b029bbd62621f11a9e286137minfrin return (s : concat sl)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes <?> "relPath"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | a (possibly empty) absolute or (non-empty) relative path
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholespath :: Parser Path
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes m <- optionMaybe doubleSlash
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes s <- (case m of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Just False -> optionL
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ -> id) relPath
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes return (case m of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Nothing -> Path False s
54d22ed1c429b903b029bbd62621f11a9e286137minfrin Just b -> Path True $ if b then descOrSelfStep : s else s)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | at least one digit and at most one decimal point (skipping)
54d22ed1c429b903b029bbd62621f11a9e286137minfrinnumber :: Parser String
54d22ed1c429b903b029bbd62621f11a9e286137minfrinnumber = skips $ many1 digit <++> optionL (char '.' <:> many digit)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin <|> try (char '.' <:> many1 digit)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | a qualified name (prefixed or unprefixed)
e8f95a682820a599fe41b22977010636be5c2717jimqualName :: Parser String
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowequalName = skips $ ncName <++> optionL (char ':' <:> ncName)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | parse a primary expression (including 'fct' or 'expr' in parens)
54d22ed1c429b903b029bbd62621f11a9e286137minfrinprimExpr :: Parser Expr
54d22ed1c429b903b029bbd62621f11a9e286137minfrinprimExpr = fmap (PrimExpr Var) (char '$' <:> qualName)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin <|> (lpar >> expr << rpar)
54d22ed1c429b903b029bbd62621f11a9e286137minfrin <|> fmap (PrimExpr Literal) literal
54d22ed1c429b903b029bbd62621f11a9e286137minfrin <|> fmap (PrimExpr Number) number
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | parse a function call by checking the qname and the left paren
54d22ed1c429b903b029bbd62621f11a9e286137minfrinfct :: Parser Expr
54d22ed1c429b903b029bbd62621f11a9e286137minfrin q <- try $ do
54d22ed1c429b903b029bbd62621f11a9e286137minfrin n <- qualName
54d22ed1c429b903b029bbd62621f11a9e286137minfrin if elem n $ pIS : map lowerShow nodeTypes
54d22ed1c429b903b029bbd62621f11a9e286137minfrin then fail $ n ++ " not allowed as function name"
54d22ed1c429b903b029bbd62621f11a9e286137minfrin else lpar >> return n
54d22ed1c429b903b029bbd62621f11a9e286137minfrin args <- sepBy expr (symbol ",")
edc346c3223efd41e6a2057c37cea69744b73dccwrowe return $ GenExpr False q args
54d22ed1c429b903b029bbd62621f11a9e286137minfrin-- | parse a filter expresssion as primary expression followed by predicates
54d22ed1c429b903b029bbd62621f11a9e286137minfrinfilterExpr :: Parser Expr
54d22ed1c429b903b029bbd62621f11a9e286137minfrinfilterExpr = do
54d22ed1c429b903b029bbd62621f11a9e286137minfrin e <- primExpr
54d22ed1c429b903b029bbd62621f11a9e286137minfrin ps <- many predicate
54d22ed1c429b903b029bbd62621f11a9e286137minfrin return $ if null ps then e else FilterExpr e ps
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes{- | a path expression is either a filter expression followed by a (non-empty)
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes absoulte path or an ordinary 'path'. -}
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholespathExpr :: Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholespathExpr = do
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes f <- filterExpr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes s <- optionL $ do
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes b <- doubleSlash
e8f95a682820a599fe41b22977010636be5c2717jim r <- relPath
e8f95a682820a599fe41b22977010636be5c2717jim return $ if b then descOrSelfStep : r else r
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe return $ if null s then f else PathExpr (Just f) $ Path True s
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes <|> fmap (PathExpr Nothing) path
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe-- | parse multiple argument expressions separated by an infix symbol
8113dac419143273351446c3ad653f3fe5ba5cfdwrowesingleInfixExpr :: Parser Expr -> String -> Parser Expr
8113dac419143273351446c3ad653f3fe5ba5cfdwrowesingleInfixExpr p s = do
e8f95a682820a599fe41b22977010636be5c2717jim l <- sepBy1 p $ symbol s
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe return $ case l of
560fd0658902ab57754616c172d8953e69fc4722bnicholes _ -> GenExpr True s l
e8f95a682820a599fe41b22977010636be5c2717jim-- | 'pathExpr' are arguments of union expression
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesunionExpr :: Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesunionExpr = singleInfixExpr pathExpr "|"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | 'unionExpr' can be prefixed by the unary minus
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesunaryExpr :: Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesunaryExpr = do
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes e <- unaryExpr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes return $ GenExpr True "-" [e]
560fd0658902ab57754616c172d8953e69fc4722bnicholes <|> unionExpr
560fd0658902ab57754616c172d8953e69fc4722bnicholes{- | parse as many arguments separated by any infix symbol as possible
560fd0658902ab57754616c172d8953e69fc4722bnicholes but construct left-associative binary application trees. -}
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesleftAssocExpr :: Parser Expr -> [String] -> Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesleftAssocExpr p ops =
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes chainl1 p $ do
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes op <- choice $ map symbol ops
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes return $ \ a b -> GenExpr True op [a, b]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- * all final infix parsers using 'leftAssocExpr' or 'singleInfixExpr'
e8f95a682820a599fe41b22977010636be5c2717jimmultExpr :: Parser Expr
e8f95a682820a599fe41b22977010636be5c2717jimmultExpr = leftAssocExpr unaryExpr multOps
e8f95a682820a599fe41b22977010636be5c2717jimaddExpr :: Parser Expr
e8f95a682820a599fe41b22977010636be5c2717jimaddExpr = leftAssocExpr multExpr addOps
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowerelExpr :: Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesrelExpr = leftAssocExpr addExpr relOps
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholeseqExpr :: Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholeseqExpr = leftAssocExpr relExpr eqOps
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesandExpr :: Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesandExpr = singleInfixExpr eqExpr "and"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | the top-level expressions interspersed by @or@.
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesexpr :: Parser Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesexpr = singleInfixExpr andExpr "or"
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- * checking sanity of paths
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdata PrincipalNodeType
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes = TAttribute
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | TNamespace
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | TElement deriving Eq
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesprincipalNodeType :: Axis -> PrincipalNodeType
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesprincipalNodeType a = case a of
e8f95a682820a599fe41b22977010636be5c2717jim Attribute -> TAttribute
560fd0658902ab57754616c172d8953e69fc4722bnicholes Namespace -> TNamespace
e8f95a682820a599fe41b22977010636be5c2717jim _ -> TElement
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | may this step have further steps
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesisElementNode :: Step -> Bool
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesisElementNode (Step a t _) =
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes principalNodeType a == TElement && case t of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Node -> True
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes NameTest _ -> True
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesisLegalPath :: [Step] -> Bool
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesisLegalPath l = case l of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes s : r -> isElementNode s && isLegalPath r
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesfinalStep :: Path -> Maybe Step
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesfinalStep (Path _ l) = case l of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes [] -> Nothing
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ -> Just $ last l
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesfinalPrincipalNodeType :: Path -> PrincipalNodeType
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesfinalPrincipalNodeType p = case finalStep p of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Nothing -> TElement
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Just (Step a _ _) -> principalNodeType a
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesdata BasicType
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholestype FctEnv = [(String, (BasicType, [BasicType]))]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholestype VarEnv = [(String, BasicType)]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholescoreFcts :: FctEnv
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes [ ("last", (Numeral, []))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("position", (Numeral, []))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("count", (Numeral, [NodeSet]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("id", (NodeSet, [Object]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("local-name", (String, [NodeSet]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("namespace-uri", (String, [NodeSet]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("name", (String, [NodeSet]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("string", (String, [Object]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("concat", (String, [String, String]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("substring-before", (String, [String, String]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("substring-after", (String, [String, String]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("substring", (String, [String, Numeral, Numeral]))
560fd0658902ab57754616c172d8953e69fc4722bnicholes , ("starts-with", (Boolean, [String, String]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("contains", (Boolean, [String, String]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("string-length", (Numeral, [String]))
54d22ed1c429b903b029bbd62621f11a9e286137minfrin , ("normalize-space", (String, [String]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("translate", (String, [String, String, String]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("boolean", (Boolean, [Object]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("not", (Boolean, [Boolean]))
f43b67c5a9d29b572eac916f8335cedc80c908bebnicholes , ("true", (Boolean, []))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("false", (Boolean, []))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("lang", (Boolean, [String]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("number", (Numeral, [Object]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("sum", (Numeral, [NodeSet]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("floor", (Numeral, [Numeral]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("ceiling", (Numeral, [Numeral]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes , ("round", (Numeral, [Numeral]))
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesbasicType :: Expr -> BasicType
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesbasicType e = case e of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes GenExpr infx op _ ->
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes if infx then case op of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes "|" -> NodeSet
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ | elem op $ ["or", "and"] ++ eqOps ++ relOps -> Boolean
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes | elem op $ addOps ++ multOps -> Numeral
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes else case lookup op coreFcts of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Just (t, _) -> t
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe Nothing -> Object
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe PrimExpr k _ -> case k of
e8f95a682820a599fe41b22977010636be5c2717jim Number -> Numeral
e8f95a682820a599fe41b22977010636be5c2717jim Literal -> String
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe Var -> Object
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes _ -> NodeSet
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesisPathExpr :: Expr -> Bool
e8f95a682820a599fe41b22977010636be5c2717jimisPathExpr e = case e of
e8f95a682820a599fe41b22977010636be5c2717jim GenExpr True "|" args -> all isPathExpr args
8113dac419143273351446c3ad653f3fe5ba5cfdwrowe GenExpr False "id" [_] -> True
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes PrimExpr Var _ -> True
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes PathExpr m (Path _ s) -> isLegalPath s && maybe True isPathExpr m
e8f95a682820a599fe41b22977010636be5c2717jim FilterExpr p _ -> isPathExpr p
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes-- | parse string
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesparseExpr :: String -> Either String Expr
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesparseExpr s = case parse (expr << eof) "" s of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Right e | isPathExpr e -> Right e
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Left e -> Left $ show e
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowe _ -> Left "not a legal path expression"
482f676c6c19b1c5bb5cca04dad11509c1da3a4cwrowegetPaths :: Expr -> [Path]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholesgetPaths e = case e of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes GenExpr True "|" args -> concatMap getPaths args
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes PathExpr m p@(Path _ s) -> case m of
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Nothing -> [p]
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes Just fe -> map (\ (Path r f) -> Path r $ f ++ s) $ getPaths fe
d5b12fe8ae917e654a33247fd4e59dc9e75170aebnicholes FilterExpr p _ -> getPaths p