Parse.hs revision 1a38107941725211e7c3f051f7a8f5e12199f03a
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt{- |
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntModule : $Header$
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntCopyright : (c) Felix Gabriel Mance
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntLicense : GPLv2 or higher, see LICENSE.txt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntMaintainer : f.mance@jacobs-university.de
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntStability : provisional
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntPortability : portable
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntRDF syntax parser
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt-}
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntmodule RDF.Parse where
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport Common.Parsec
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport Common.Lexer
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport Common.AnnoParser (newlineOrEof)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport Common.Token (criticalKeywords)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport Common.Id
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport qualified Common.GlobalAnnotations as GA (PrefixMap)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport OWL2.AS
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport OWL2.Parse hiding (stringLiteral, literal, skips, uriP)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport RDF.AS
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport RDF.Symbols
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport Data.Either
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport qualified Data.Map as Map
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntimport Text.ParserCombinators.Parsec
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HunturiP :: CharParser st QName
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HunturiP =
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt skips $ try $ checkWithUsing showQN uriQ $ \ q ->
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt not (null $ namePrefix q) || notElem (localPart q) criticalKeywords
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt-- * hets symbols parser
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfEntityType :: CharParser st RDFEntityType
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfEntityType = choice $ map (\ f -> keyword (show f) >> return f)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt rdfEntityTypes
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt{- | parses an entity type (subject, predicate or object) followed by a
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntcomma separated list of IRIs -}
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfSymbItems :: GenParser Char st SymbItems
cc51cd2d2076e33117c60c9effcb8caccde4983bWitold KrecickirdfSymbItems = do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt ext <- optionMaybe rdfEntityType
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt iris <- rdfSymbs
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt return $ SymbItems ext iris
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt-- | parse a comma separated list of uris
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfSymbs :: GenParser Char st [IRI]
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfSymbs = uriP >>= \ u -> do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt commaP `followedWith` uriP
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt us <- rdfSymbs
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt return $ u : us
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt <|> return [u]
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt-- | parse a possibly kinded list of comma separated symbol pairs
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfSymbMapItems :: GenParser Char st SymbMapItems
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfSymbMapItems = do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt ext <- optionMaybe rdfEntityType
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt iris <- rdfSymbPairs
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt return $ SymbMapItems ext iris
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt-- | parse a comma separated list of uri pairs
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfSymbPairs :: GenParser Char st [(IRI, Maybe IRI)]
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntrdfSymbPairs = uriPair >>= \ u -> do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt commaP `followedWith` uriP
cc51cd2d2076e33117c60c9effcb8caccde4983bWitold Krecicki us <- rdfSymbPairs
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt return $ u : us
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt <|> return [u]
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt-- * turtle syntax parser
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntskips :: CharParser st a -> CharParser st a
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Huntskips = (<< skipMany
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt (forget space <|> parseComment <|> nestCommentOut <?> ""))
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntcharOrQuoteEscape :: CharParser st String
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntcharOrQuoteEscape = try (string "\\\"") <|> fmap return anyChar
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntlongLiteral :: CharParser st (String, Bool)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntlongLiteral = do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt string "\"\"\""
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt ls <- flat $ manyTill charOrQuoteEscape $ try $ string "\"\"\""
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt return (ls, True)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntshortLiteral :: CharParser st (String, Bool)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntshortLiteral = do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt char '"'
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt ls <- flat $ manyTill charOrQuoteEscape $ try $ string "\""
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt return (ls, False)
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntstringLiteral :: CharParser st RDFLiteral
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan HuntstringLiteral = do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt (s, b) <- try longLiteral <|> shortLiteral
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt string cTypeS
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt d <- datatypeUri
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt return $ RDFLiteral b s $ Typed d
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt <|> do
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt string "@"
98922b2b2b024dcca25be7c220cf3b16b1e6c4b5Evan Hunt t <- skips $ optionMaybe languageTag
return $ RDFLiteral b s $ Untyped t
<|> skips (return $ RDFLiteral b s $ Typed $ mkQName "string")
literal :: CharParser st RDFLiteral
literal = do
f <- skips $ try floatingPointLit
<|> fmap decToFloat decimalLit
return $ RDFNumberLit f
<|> stringLiteral
parseBase :: CharParser st Base
parseBase = do
pkeyword "@base"
base <- skips uriP
skips $ char '.'
return $ Base base
parsePrefix :: CharParser st Prefix
parsePrefix = do
pkeyword "@prefix"
p <- skips (option "" prefix << char ':')
i <- skips uriP
skips $ char '.'
return $ PrefixR p i
parsePredicate :: CharParser st Predicate
parsePredicate = fmap Predicate $ skips uriP
parseSubject :: CharParser st Subject
parseSubject =
fmap Subject (skips uriP)
<|> fmap SubjectList
(between (skips $ char '[') (skips $ char ']') $ skips parsePredObjList)
<|> fmap SubjectCollection
(between (skips $ char '(') (skips $ char ')') $ many parseObject)
parseObject :: CharParser st Object
parseObject = fmap ObjectLiteral literal <|> fmap Object parseSubject
parsePredObjects :: CharParser st PredicateObjectList
parsePredObjects = do
pr <- parsePredicate
objs <- sepBy parseObject $ skips $ char ','
return $ PredicateObjectList pr objs
parsePredObjList :: CharParser st [PredicateObjectList]
parsePredObjList = sepEndBy parsePredObjects $ skips $ char ';'
parseTriples :: CharParser st Triples
parseTriples = do
s <- parseSubject
ls <- parsePredObjList
skips $ char '.'
return $ Triples s ls
parseComment :: CharParser st ()
parseComment = do
tryString "#"
forget $ skips $ manyTill anyChar newlineOrEof
parseStatement :: CharParser st Statement
parseStatement = fmap BaseStatement parseBase
<|> fmap PrefixStatement parsePrefix <|> fmap Statement parseTriples
basicSpec :: GA.PrefixMap -> CharParser st TurtleDocument
basicSpec pm = do
many parseComment
ls <- many parseStatement
let td = TurtleDocument
dummyQName (Map.map transIri $ convertPrefixMap pm) ls
-- return $ trace (show $ Map.union predefinedPrefixes (prefixMap td)) td
return td
where transIri s = QN "" s Full s nullRange
predefinedPrefixes :: RDFPrefixMap
predefinedPrefixes = Map.fromList $ zip
["rdf", "rdfs", "dc", "owl", "ex", "xsd"]
$ rights $ map (parse uriQ "")
[ "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>"
, "<http://www.w3.org/2000/01/rdf-schema#>"
, "<http://purl.org/dc/elements/1.1/>"
, "<http://www.w3.org/2002/07/owl#>"
, "<http://www.example.org/>"
, "<http://www.w3.org/2001/XMLSchema#>" ]