Parse.hs revision 72a6eac33dbde67108edc10d70ef1cf266f43068
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder{- |
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederModule : $Header$
3a6c7a7ff823616f56cd3d205fc44664a683effdChristian MaederCopyright : (c) Felix Gabriel Mance
73dfcef93ee2ba07fedf4f3c74bace31853d1b9fChristian MaederLicense : GPLv2 or higher, see LICENSE.txt
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
2eeec5240b424984e3ee26296da1eeab6c6d739eChristian MaederMaintainer : f.mance@jacobs-university.de
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederStability : provisional
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederPortability : portable
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
e6d40133bc9f858308654afb1262b8b483ec5922Till MossakowskiRDF syntax parser
e6d40133bc9f858308654afb1262b8b483ec5922Till Mossakowski
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder-}
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maedermodule RDF.Parse where
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederimport Common.Parsec
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederimport Common.Lexer
85ebda7270c6883b503d3bde4757033c09c25644Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederimport OWL2.AS
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederimport OWL2.Parse
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederimport RDF.AS
ad270004874ce1d0697fb30d7309f180553bb315Christian Maederimport RDF.Symbols
ad270004874ce1d0697fb30d7309f180553bb315Christian Maeder
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederimport Data.List
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederimport qualified Data.Map as Map
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederimport Text.ParserCombinators.Parsec
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder-- * hets symbols parser
60bf7f52638962c93ec43da9aad8cafc9f09c318Christian Maeder
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian MaederrdfEntityType :: CharParser st RDFEntityType
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian MaederrdfEntityType = choice $ map (\ f -> keyword (show f) >> return f)
60bf7f52638962c93ec43da9aad8cafc9f09c318Christian Maeder rdfEntityTypes
60bf7f52638962c93ec43da9aad8cafc9f09c318Christian Maeder
60bf7f52638962c93ec43da9aad8cafc9f09c318Christian Maeder{- | parses an entity type (subject, predicate or object) followed by a
60bf7f52638962c93ec43da9aad8cafc9f09c318Christian Maedercomma separated list of IRIs -}
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian MaederrdfSymbItems :: GenParser Char st SymbItems
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederrdfSymbItems = do
60bf7f52638962c93ec43da9aad8cafc9f09c318Christian Maeder ext <- optionMaybe rdfEntityType
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder iris <- rdfSymbs
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ SymbItems ext iris
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder
60bf7f52638962c93ec43da9aad8cafc9f09c318Christian Maeder-- | parse a comma separated list of uris
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederrdfSymbs :: GenParser Char st [IRI]
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederrdfSymbs = uriP >>= \ u -> do
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder commaP `followedWith` uriP
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski us <- rdfSymbs
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski return $ u : us
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski <|> return [u]
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski-- | parse a possibly kinded list of comma separated symbol pairs
6abfd7000f15635fd29746bd841b4c36819e552bTill MossakowskirdfSymbMapItems :: GenParser Char st SymbMapItems
6abfd7000f15635fd29746bd841b4c36819e552bTill MossakowskirdfSymbMapItems = do
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski ext <- optionMaybe rdfEntityType
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski iris <- rdfSymbPairs
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ SymbMapItems ext iris
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder-- | parse a comma separated list of uri pairs
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederrdfSymbPairs :: GenParser Char st [(IRI, Maybe IRI)]
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederrdfSymbPairs = uriPair >>= \ u -> do
6abfd7000f15635fd29746bd841b4c36819e552bTill Mossakowski commaP `followedWith` uriP
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder us <- rdfSymbPairs
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder return $ u : us
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder <|> return [u]
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederparse1Base :: CharParser st BaseIRI
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederparse1Base = do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder pkeyword "@base"
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder base <- skips uriQ
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder skips $ string "."
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder return $ BaseIRI base
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederparse1Prefix :: CharParser st Prefix
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maederparse1Prefix = do
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder pkeyword "@prefix"
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder p <- skips (option "" prefix << char ':')
1bc5dccbf0083a620ae1181c717fea75e4af5e5cChristian Maeder i <- skips fullIri
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder skips $ string "."
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ Prefix p i
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederparse1BaseOrPrefix :: CharParser st (Either BaseIRI Prefix)
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maederparse1BaseOrPrefix = do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder b <- parse1Base
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ Left b
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder <|> do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder p <- parse1Prefix
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ Right p
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till MossakowskistartsWithScheme :: IRI -> Bool
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till MossakowskistartsWithScheme iri = isPrefixOf "//" $ localPart iri
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till Mossakowski
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till MossakowskibaseStartsWithScheme :: BaseIRI -> Bool
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till MossakowskibaseStartsWithScheme (BaseIRI iri) = startsWithScheme iri
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till Mossakowski
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till MossakowskiappendTwoBases :: BaseIRI -> BaseIRI -> BaseIRI
0fe1b901cec27c06b8aad7548f56a7cab4dee6a4Till MossakowskiappendTwoBases (BaseIRI b1) (BaseIRI b2) =
b20cc520e698253354303b7bf3bc17f84240b213Klaus Luettich let lpb1 = localPart b1
da955132262baab309a50fdffe228c9efe68251dCui Jian (lp1, ok) = if last lpb1 == '/' then (lpb1, 1) else (lpb1 ++ "/", 0)
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder lp = lp1 ++ localPart b2
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder exp = if ok == 0 then expandedIRI b1 ++ "/" else expandedIRI b1
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder in BaseIRI $ b1 {localPart = lp, expandedIRI = exp ++ localPart b2}
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederresolveIRI :: BaseIRI -> IRI -> IRI
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederresolveIRI b iri = extractIRI $ appendTwoBases b $ BaseIRI iri
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian MaederextractIRI :: BaseIRI -> IRI
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederextractIRI (BaseIRI b) = b
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederparseBases :: BaseIRI -> TurtlePrefixMap -> CharParser st (BaseIRI, TurtlePrefixMap)
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederparseBases base pm = do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder e <- parse1BaseOrPrefix
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder case e of
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder Left b ->
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder if baseStartsWithScheme b || iriType (extractIRI b) /= Full then parseBases b pm
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder else parseBases (appendTwoBases base b) pm
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder Right p@(Prefix s iri) ->
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder if startsWithScheme iri then parseBases base $ Map.insert s iri pm
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder else parseBases base $ Map.insert s (resolveIRI base iri) pm
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder <|> return (base, pm)
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederparseIRI :: BaseIRI -> CharParser st IRI
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian MaederparseIRI b = do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder iri <- uriQ
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ if iriType iri == Full && not (startsWithScheme iri)
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder then resolveIRI b iri else iri
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian MaederparseTerm :: BaseIRI -> CharParser st Term
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederparseTerm b = fmap LiteralTerm literal <|> fmap IRITerm (parseIRI b)
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder <|> fmap Collection (parensP $ many $ skips $ parseTerm b)
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder
07baaf27fc0029203075ed916999006dcc619ef0Christian Maeder
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederparseTriples :: BaseIRI -> TurtlePrefixMap -> String -> CharParser st [Triple]
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian MaederparseTriples base tpm end = do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder (b, pm) <- parseBases base tpm
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder t <- case end of
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder "." -> do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder s <- skips $ parseTerm b
07baaf27fc0029203075ed916999006dcc619ef0Christian Maeder p <- skips $ parseTerm b
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder o <- skips $ parseTerm b
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder return $ NTriple s p o
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder "," -> do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder o <- skips $ parseTerm b
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ AbbreviatedTriple Nothing o
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder ";" -> do
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder p <- skips $ parseTerm b
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder o <- skips $ parseTerm b
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ AbbreviatedTriple (Just p) o
8d97ef4f234681b11bb5924bd4d03adef858d2d2Christian Maeder sep <- choice $ map (\ s -> skips $ string s >> return s) [".", ",", ";"]
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder tl <- parseTriples b pm sep
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder return $ t : tl
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder <|> return []
acabd9ab36e1870f6f02c513bcfbfd10ffd118e0Christian Maeder