Parse.hs revision f07079faf4e99014e900c7c99adb5ff7fa106b61
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederModule : $Header$
f9e0b18852b238ddb649d341194e05d7200d1bbeChristian MaederDescription : Manchester syntax parser for OWL 2
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederCopyright : (c) DFKI GmbH, Uni Bremen 2007-2010
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu ProdescuLicense : GPLv2 or higher, see LICENSE.txt
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederMaintainer : Christian.Maeder@dfki.de
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederStability : provisional
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederPortability : portable
f9e0b18852b238ddb649d341194e05d7200d1bbeChristian MaederContains : Parser for the Manchester Syntax into Abstract Syntax of OWL 2
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederReferences : <http://www.w3.org/TR/2009/NOTE-owl2-manchester-syntax-20091027/>
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederimport Common.AnnoParser (commentLine)
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederimport Common.Token (criticalKeywords)
64e1905404e5135e98a26d2ab4150b6764956576Christian Maedertype URI = IRI
64e1905404e5135e98a26d2ab4150b6764956576Christian Maedercharacters :: [Character]
64e1905404e5135e98a26d2ab4150b6764956576Christian Maedercharacters = [minBound .. maxBound]
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescu-- | OWL and CASL structured keywords including 'andS' and 'notS'
64e1905404e5135e98a26d2ab4150b6764956576Christian MaederowlKeywords :: [String]
4c7f058cdd19ce67b2b5d4b7f69703d0f8a21e38Christian MaederowlKeywords = notS : stringS : map show entityTypes
4c7f058cdd19ce67b2b5d4b7f69703d0f8a21e38Christian Maeder ++ map show characters ++ keywords ++ criticalKeywords
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederncNameStart :: Char -> Bool
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederncNameStart c = isAlpha c || c == '_'
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder-- | rfc3987 plus '+' from scheme (scheme does not allow the dots)
67869d63d1725c79e4c07b51acd466a31932b275Christian MaederncNameChar :: Char -> Bool
083a5256468076d5a9bfeb22a6e97076c224252eChristian MaederncNameChar c = isAlphaNum c || elem c ".+-_\183"
64e1905404e5135e98a26d2ab4150b6764956576Christian Maederprefix :: CharParser st String
64e1905404e5135e98a26d2ab4150b6764956576Christian Maederprefix = satisfy ncNameStart <:> many (satisfy ncNameChar)
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian Maederiunreserved :: Char -> Bool
bdf2e01977470bedcb4425e2dadabc9e9f6ba149Ewaryst Schulziunreserved c = isAlphaNum c || elem c "-._~" || ord c >= 160 && ord c <= 55295
65dce48b81f69e11a36bf1051314a845299446e1Christian Maeder-- maybe lower case hex-digits should be illegal
64e1905404e5135e98a26d2ab4150b6764956576Christian MaederpctEncoded :: CharParser st String
64e1905404e5135e98a26d2ab4150b6764956576Christian MaederpctEncoded = char '%' <:> hexDigit <:> single hexDigit
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder{- comma and parens are removed here
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder but would cause no problems for full IRIs within angle brackets -}
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaedersubDelims :: Char -> Bool
67869d63d1725c79e4c07b51acd466a31932b275Christian MaedersubDelims c = elem c "!$&'*+;="
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederiunreservedSubDelims :: String -> CharParser st Char
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederiunreservedSubDelims cs =
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian Maeder satisfy $ \ c -> iunreserved c || subDelims c || elem c cs
64e1905404e5135e98a26d2ab4150b6764956576Christian MaederiunreservedPctEncodedSubDelims :: String -> CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederiunreservedPctEncodedSubDelims cs =
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescu single (iunreservedSubDelims cs) <|> pctEncoded
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescuipChar :: CharParser st String
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescuipChar = iunreservedPctEncodedSubDelims ":@"
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescuifragment :: CharParser st String
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescuifragment = flat $ many (ipChar <|> single (char '/' <|> char '?'))
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescuiquery :: CharParser st String
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescuiquery = ifragment -- ignore iprivate
42e78fd3454812d4f98b06154fdabc5ec3488718mcodescuiregName :: CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederiregName = flat $ many $ iunreservedPctEncodedSubDelims ""
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederiuserinfo :: CharParser st String
67869d63d1725c79e4c07b51acd466a31932b275Christian Maederiuserinfo = flat $ many $ iunreservedPctEncodedSubDelims ":"
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder-- | parse zero or at most n consecutive arguments
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederatMost :: Int -> GenParser tok st a -> GenParser tok st [a]
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederatMost n p = if n <= 0 then return [] else
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder p <:> atMost (n - 1) p <|> return []
67869d63d1725c79e4c07b51acd466a31932b275Christian Maeder-- | parse at least one but at most n conse
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederatMost1 :: Int -> GenParser tok st a -> GenParser tok st [a]
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederatMost1 n p = p <:> atMost (n - 1) p
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederdecOctet :: CharParser st String
62eaa2fb831613d8a6e59687f83a45be1041ab17Christian MaederdecOctet = atMost 3 digit
64e1905404e5135e98a26d2ab4150b6764956576Christian Maeder `checkWith` \ s -> let v = value 10 s in v <= 255 &&
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder (if v == 0 then s == "0" else take 1 s /= "0")
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederiPv4Adress :: CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederiPv4Adress = decOctet <++> string "."
67869d63d1725c79e4c07b51acd466a31932b275Christian Maeder <++> decOctet <++> string "."
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder <++> decOctet <++> string "."
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder <++> decOctet
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederihost :: CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederihost = iregName <|> iPv4Adress -- or ipLiteral
67869d63d1725c79e4c07b51acd466a31932b275Christian Maederport :: CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederport = many digit
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederiauthority :: CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederiauthority = optionL (try $ iuserinfo <++> string "@") <++> ihost
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder <++> optionL (char ':' <:> port)
67869d63d1725c79e4c07b51acd466a31932b275Christian Maederisegment :: CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederisegment = flat $ many ipChar
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederisegmentNz :: CharParser st String
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederisegmentNz = flat $ many1 ipChar
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian MaederipathAbempty :: CharParser st String
67869d63d1725c79e4c07b51acd466a31932b275Christian MaederipathAbempty = flat $ many (char '/' <:> isegment)
ecf557c0b4f953106755a239da2c0b168064d3f4Christian MaederipathAbsolute :: CharParser st String
ecf557c0b4f953106755a239da2c0b168064d3f4Christian MaederipathAbsolute = char '/' <:> optionL (isegmentNz <++> ipathAbempty)
935613eb8e67d724f1c4a4d4a37be3324ef6708dChristian Maeder{- within abbreviated IRIs only ipath-noscheme should be used
656f17ae9b7610ff2de1b6eedeeadea0c3bcdc8dChristian Maeder that excludes colons via isegment-nz-nc -}
6d4d212092e2edb139e0907a14e87c4df74ff06aChristian MaederipathRootless :: CharParser st String
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederipathRootless = isegmentNz <++> ipathAbempty
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederiauthorityWithPath :: CharParser st String
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederiauthorityWithPath = tryString "//" <++> iauthority <++> ipathAbempty
bdf2e01977470bedcb4425e2dadabc9e9f6ba149Ewaryst SchulzoptQueryOrFrag :: CharParser st String
083a5256468076d5a9bfeb22a6e97076c224252eChristian MaederoptQueryOrFrag = optionL (char '?' <:> iquery)
083a5256468076d5a9bfeb22a6e97076c224252eChristian Maeder <++> optionL (char '#' <:> ifragment)
083a5256468076d5a9bfeb22a6e97076c224252eChristian Maeder-- | covers irelative-part (therefore we omit curie)
3d3889e0cefcdce9b3f43c53aaa201943ac2e895Jonathan von SchroederihierPart :: CharParser st String
083a5256468076d5a9bfeb22a6e97076c224252eChristian Maeder iauthorityWithPath <|> ipathAbsolute <|> ipathRootless
083a5256468076d5a9bfeb22a6e97076c224252eChristian MaederhierPartWithOpts :: CharParser st String
083a5256468076d5a9bfeb22a6e97076c224252eChristian MaederhierPartWithOpts = ihierPart <++> optQueryOrFrag
de66af0f4b27f08f81c7ca9c573ef9cdf7ca7a07Christian Maederskips :: CharParser st a -> CharParser st a
de66af0f4b27f08f81c7ca9c573ef9cdf7ca7a07Christian Maederskips = (<< skipMany
de66af0f4b27f08f81c7ca9c573ef9cdf7ca7a07Christian Maeder (forget space <|> forget commentLine <|> nestCommentOut <?> ""))
de66af0f4b27f08f81c7ca9c573ef9cdf7ca7a07Christian MaederabbrIri :: CharParser st QName
de66af0f4b27f08f81c7ca9c573ef9cdf7ca7a07Christian MaederabbrIri = try $ do
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian Maeder pre <- try $ prefix << char ':'
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian Maeder r <- hierPartWithOpts
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian Maeder return $ QN pre r False ""
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian Maeder <|> fmap mkQName hierPartWithOpts
0d7d8e3dd817450cf792778d9d4e36420f5e8abfChristian MaederfullIri :: CharParser st QName
64e1905404e5135e98a26d2ab4150b6764956576Christian Maeder QN pre r _ _ <- abbrIri
6d4d212092e2edb139e0907a14e87c4df74ff06aChristian Maeder return $ QN pre r True ""
656f17ae9b7610ff2de1b6eedeeadea0c3bcdc8dChristian MaederuriQ :: CharParser st QName
6d4d212092e2edb139e0907a14e87c4df74ff06aChristian MaederuriQ = fullIri <|> abbrIri