Parse.hs revision 29454fc45be6d7e3caec75e08a933cdf77db3453
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian MaederModule : $Header$
c63ebf815c8a874525cf18670ad74847f7fc7b26Christian MaederClassExpression : Manchester syntax parser for OWL 2
c092fcac4b8f5c524c22ca579189c4487c13edf7Christian MaederCopyright : (c) DFKI GmbH, Uni Bremen 2007-2010
54ed6a6b1a6c7d27fadb39ec5b59d0806c81f7c8Christian MaederLicense : GPLv2 or higher, see LICENSE.txt
3f69b6948966979163bdfe8331c38833d5d90ecdChristian MaederMaintainer : Christian.Maeder@dfki.de
75a6279dbae159d018ef812185416cf6df386c10Till MossakowskiStability : provisional
75a6279dbae159d018ef812185416cf6df386c10Till MossakowskiPortability : portable
c092fcac4b8f5c524c22ca579189c4487c13edf7Christian MaederManchester syntax parser for OWL 2
dda5ab793f1615c1ba1dcaa97a4346b0878da6b1Christian Maeder<http://www.w3.org/TR/2009/NOTE-owl2-manchester-syntax-20091027/>
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian MaederManchester syntax parser for OWL 1.1
da955132262baab309a50fdffe228c9efe68251dCui Jian<http://www.webont.org/owled/2008dc/papers/owled2008dc_paper_11.pdf>
31bc219bae758272d0f064281b8ce7740a4553e9Till Mossakowskiimport Common.AnnoParser (commentLine)
b10d6cef708b7a659f2d3b367e8e0db0d03ae3f5Till Mossakowskiimport Common.Token (criticalKeywords)
c0c2380bced8159ff0297ece14eba948bd236471Christian Maedertype URI = IRI
c0c2380bced8159ff0297ece14eba948bd236471Christian Maedercharacters :: [Character]
6352f3c31da3043783a13be6594aacb2147378baRazvan Pascanucharacters = [minBound .. maxBound]
fc1a590cd3ee36797c0a032ff41e07f8e2469341Christian Maeder-- | OWL and CASL structured keywords including 'andS' and 'notS'
a2b04db3e156312a8596d8084f7f0f51acf8a96bChristian MaederowlKeywords :: [String]
0c885f1348fd58f7cb706472a3ff20b52dbef0a7Jonathan von SchroederowlKeywords = notS : stringS : map show entityTypes
fc1a590cd3ee36797c0a032ff41e07f8e2469341Christian Maeder ++ map show characters ++ keywords ++ criticalKeywords
834c2e71b8e390e5b05c8d02bb6eb22621125133Markus GrossncNameStart :: Char -> Bool
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian MaederncNameStart c = isAlpha c || c == '_'
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian Maeder-- | rfc3987 plus '+' from scheme (scheme does not allow the dots)
6e52f1dfc0da4bc4a7701cf856641c9dce08fc7dChristian MaederncNameChar :: Char -> Bool
923e25bb8c7cf9f2978c7844ad173704482cc3b0Martin KühlncNameChar c = isAlphaNum c || elem c ".+-_\183"
63da71bfb4226f504944b293fb77177ebcaea7d4Ewaryst Schulzprefix :: CharParser st String
e1ea9a046e9640148ca876dfe47e391559a9fdf3Christian Maederprefix = satisfy ncNameStart <:> many (satisfy ncNameChar)
57026bc09337d158b89775048a9bcc9c17d825caChristian Maederiunreserved :: Char -> Bool
57026bc09337d158b89775048a9bcc9c17d825caChristian Maederiunreserved c = isAlphaNum c || elem c "-._~" || ord c >= 160 && ord c <= 55295
9175e29c044318498a40f323f189f9dfd50378efChristian Maeder-- maybe lower case hex-digits should be illegal
e1ea9a046e9640148ca876dfe47e391559a9fdf3Christian MaederpctEncoded :: CharParser st String
923e25bb8c7cf9f2978c7844ad173704482cc3b0Martin KühlpctEncoded = char '%' <:> hexDigit <:> single hexDigit
2b873214c9ab511bbca437c036371ab664aedaceChristian Maeder{- comma and parens are removed here
52d922076b89f12234f721974e82531bc69a6f69Christian Maeder but would cause no problems for full IRIs within angle brackets -}
fe495a0978e5aa70776103c37fb0eb2bd6abea69Eugen KuksasubDelims :: Char -> Bool
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian MaedersubDelims c = elem c "!$&'*+;="
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian MaederiunreservedSubDelims :: String -> CharParser st Char
52d922076b89f12234f721974e82531bc69a6f69Christian MaederiunreservedSubDelims cs =
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross satisfy $ \ c -> iunreserved c || subDelims c || elem c cs
9175e29c044318498a40f323f189f9dfd50378efChristian MaederiunreservedPctEncodedSubDelims :: String -> CharParser st String
31bc219bae758272d0f064281b8ce7740a4553e9Till MossakowskiiunreservedPctEncodedSubDelims cs =
31bc219bae758272d0f064281b8ce7740a4553e9Till Mossakowski single (iunreservedSubDelims cs) <|> pctEncoded
31bc219bae758272d0f064281b8ce7740a4553e9Till MossakowskiipChar :: CharParser st String
a31430de8b0632d29f42634d6395e982bf31b14dChristian MaederipChar = iunreservedPctEncodedSubDelims ":@"
a31430de8b0632d29f42634d6395e982bf31b14dChristian Maederifragment :: CharParser st String
a31430de8b0632d29f42634d6395e982bf31b14dChristian Maederifragment = flat $ many (ipChar <|> single (char '/' <|> char '?'))
a31430de8b0632d29f42634d6395e982bf31b14dChristian Maederiquery :: CharParser st String
a31430de8b0632d29f42634d6395e982bf31b14dChristian Maederiquery = ifragment -- ignore iprivate
a31430de8b0632d29f42634d6395e982bf31b14dChristian MaederiregName :: CharParser st String
a31430de8b0632d29f42634d6395e982bf31b14dChristian MaederiregName = flat $ many $ iunreservedPctEncodedSubDelims ""
a31430de8b0632d29f42634d6395e982bf31b14dChristian Maederiuserinfo :: CharParser st String
9175e29c044318498a40f323f189f9dfd50378efChristian Maederiuserinfo = flat $ many $ iunreservedPctEncodedSubDelims ":"
9175e29c044318498a40f323f189f9dfd50378efChristian Maeder-- | parse zero or at most n consecutive arguments
9175e29c044318498a40f323f189f9dfd50378efChristian MaederatMost :: Int -> GenParser tok st a -> GenParser tok st [a]
9175e29c044318498a40f323f189f9dfd50378efChristian MaederatMost n p = if n <= 0 then return [] else
9175e29c044318498a40f323f189f9dfd50378efChristian Maeder p <:> atMost (n - 1) p <|> return []
f63e7684d8db7503c22e5d8d499c94a9405f8f9eChristian Maeder-- | parse at least one but at most n conse
91eeff7b19b22d7e5c5d83fa6e357496e291c718Christian MaederatMost1 :: Int -> GenParser tok st a -> GenParser tok st [a]
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian MaederatMost1 n p = p <:> atMost (n - 1) p
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian MaederdecOctet :: CharParser st String
c0c2380bced8159ff0297ece14eba948bd236471Christian MaederdecOctet = atMost 3 digit
bdc103981a28a51938de98a956d8a3767f6cf43dAivaras Jakubauskas `checkWith` \ s -> let v = value 10 s in v <= 255 &&
c0c2380bced8159ff0297ece14eba948bd236471Christian Maeder (if v == 0 then s == "0" else take 1 s /= "0")
e1ea9a046e9640148ca876dfe47e391559a9fdf3Christian MaederiPv4Adress :: CharParser st String
57026bc09337d158b89775048a9bcc9c17d825caChristian MaederiPv4Adress = decOctet <++> string "."
57026bc09337d158b89775048a9bcc9c17d825caChristian Maeder <++> decOctet <++> string "."
22b772f8753f0cdb4508ba460356c238de2ee375Jonathan von Schroeder <++> decOctet <++> string "."
63719301448519453f66383f4e583d9fd5b89ecbChristian Maederihost :: CharParser st String
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maederihost = iregName <|> iPv4Adress -- or ipLiteral
52d922076b89f12234f721974e82531bc69a6f69Christian Maederport :: CharParser st String
52d922076b89f12234f721974e82531bc69a6f69Christian Maederport = many digit
9f85afecbd79b3df5a0bb17bd28cd0b288dc3213Kristina Sojakovaiauthority :: CharParser st String
72079df98b3cb7cc1fd82a0a24984893dcd05ecaEwaryst Schulziauthority = optionL (try $ iuserinfo <++> string "@") <++> ihost
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maeder <++> optionL (char ':' <:> port)
8a77240a809197c92c0736c431b4b88947a7bac1Christian Maederisegment :: CharParser st String
8a77240a809197c92c0736c431b4b88947a7bac1Christian Maederisegment = flat $ many ipChar
bb63f684c4f5f33ffcd1dcc02c58d6a703900fafJonathan von SchroederisegmentNz :: CharParser st String
b0234f0a84fcd3587073fbc11d38759108997c3cChristian MaederisegmentNz = flat $ many1 ipChar
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus GrossipathAbempty :: CharParser st String
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus GrossipathAbempty = flat $ many (char '/' <:> isegment)
d56ece59c372cb887355825901222b9f3377f7e6Thiemo WiedemeyeripathAbsolute :: CharParser st String
9175e29c044318498a40f323f189f9dfd50378efChristian MaederipathAbsolute = char '/' <:> optionL (isegmentNz <++> ipathAbempty)
9175e29c044318498a40f323f189f9dfd50378efChristian Maeder{- within abbreviated IRIs only ipath-noscheme should be used
f1dec6898638ba1131a9fadbc4d1544c93dfabb0Klaus Luettich that excludes colons via isegment-nz-nc -}
9175e29c044318498a40f323f189f9dfd50378efChristian MaederipathRootless :: CharParser st String
f77f29e84b3f6e791c82e61b13fbf76582bedd2fChristian MaederipathRootless = isegmentNz <++> ipathAbempty