Parse.hs revision 1f9274bb2aa44ea236327814dce99946be52e348
{- |
Module : $Header$
Copyright : (c) Felix Gabriel Mance
License : GPLv2 or higher, see LICENSE.txt
Maintainer : f.mance@jacobs-university.de
Stability : provisional
Portability : portable
RDF syntax parser
-}
module RDF.Parse where
import Common.Parsec
import Common.Lexer
import Common.AnnoParser (newlineOrEof)
import OWL2.AS
import OWL2.Parse hiding (stringLiteral, literal, skips)
import RDF.AS
import RDF.Symbols
import qualified Data.Map as Map
import Text.ParserCombinators.Parsec
-- * hets symbols parser
rdfEntityType :: CharParser st RDFEntityType
rdfEntityType = choice $ map (\ f -> keyword (show f) >> return f)
rdfEntityTypes
{- | parses an entity type (subject, predicate or object) followed by a
comma separated list of IRIs -}
rdfSymbItems :: GenParser Char st SymbItems
rdfSymbItems = do
ext <- optionMaybe rdfEntityType
iris <- rdfSymbs
return $ SymbItems ext iris
-- | parse a comma separated list of uris
rdfSymbs :: GenParser Char st [IRI]
rdfSymbs = uriQ >>= \ u -> do
commaP `followedWith` uriQ
us <- rdfSymbs
return $ u : us
<|> return [u]
-- | parse a possibly kinded list of comma separated symbol pairs
rdfSymbMapItems :: GenParser Char st SymbMapItems
rdfSymbMapItems = do
ext <- optionMaybe rdfEntityType
iris <- rdfSymbPairs
return $ SymbMapItems ext iris
-- | parse a comma separated list of uri pairs
rdfSymbPairs :: GenParser Char st [(IRI, Maybe IRI)]
rdfSymbPairs = uriPair >>= \ u -> do
commaP `followedWith` uriQ
us <- rdfSymbPairs
return $ u : us
<|> return [u]
-- * turtle syntax parser
skips :: CharParser st a -> CharParser st a
skips = (<< skipMany
(forget space <|> parseComment <|> nestCommentOut <?> ""))
charOrQuoteEscape :: CharParser st String
charOrQuoteEscape = try (string "\\\"") <|> fmap return anyChar
longLiteral :: CharParser st (String, Bool)
longLiteral = do
string "\"\"\""
ls <- flat $ manyTill charOrQuoteEscape $ try $ string "\"\"\""
return (ls, True)
shortLiteral :: CharParser st (String, Bool)
shortLiteral = do
char '"'
ls <- flat $ manyTill charOrQuoteEscape $ try $ string "\""
return (ls, False)
stringLiteral :: CharParser st RDFLiteral
stringLiteral = do
(s, b) <- try longLiteral <|> shortLiteral
do
string cTypeS
d <- datatypeUri
return $ RDFLiteral b s $ Typed d
<|> do
string "@"
t <- skips $ optionMaybe languageTag
return $ RDFLiteral b s $ Untyped t
<|> skips (return $ RDFLiteral b s $ Typed $ mkQName "string")
literal :: CharParser st RDFLiteral
literal = do
f <- skips $ try floatingPointLit
<|> fmap decToFloat decimalLit
return $ RDFNumberLit f
<|> stringLiteral
parseBase :: CharParser st Base
parseBase = do
pkeyword "@base"
base <- skips uriQ
skips $ char '.'
return $ Base base
parsePrefix :: CharParser st Prefix
parsePrefix = do
pkeyword "@prefix"
p <- skips (option "" prefix << char ':')
i <- skips uriQ
skips $ char '.'
return $ Prefix p i
parsePredicate :: CharParser st Predicate
parsePredicate = fmap Predicate $ skips uriQ
parseSubject :: CharParser st Subject
parseSubject =
fmap Subject (skips uriQ)
<|> fmap SubjectList
(between (skips $ char '[') (skips $ char ']') $ skips parsePredObjList)
<|> fmap SubjectCollection
(between (skips $ char '(') (skips $ char ')') $ many parseObject)
parseObject :: CharParser st Object
parseObject = fmap ObjectLiteral literal <|> fmap Object parseSubject
parsePredObjects :: CharParser st PredicateObjectList
parsePredObjects = do
pr <- parsePredicate
objs <- sepBy parseObject $ skips $ char ','
return $ PredicateObjectList pr objs
parsePredObjList :: CharParser st [PredicateObjectList]
parsePredObjList = sepEndBy parsePredObjects $ skips $ char ';'
parseTriples :: CharParser st Triples
parseTriples = do
s <- parseSubject
ls <- parsePredObjList
skips $ char '.'
return $ Triples s ls
parseComment :: CharParser st ()
parseComment = do
tryString "#"
forget $ skips $ manyTill anyChar newlineOrEof
parseStatement :: CharParser st Statement
parseStatement = fmap BaseStatement parseBase
<|> fmap PrefixStatement parsePrefix <|> fmap Statement parseTriples
basicSpec :: CharParser st TurtleDocument
basicSpec = do
many parseComment
ls <- many parseStatement
return $ TurtleDocument nullQName Map.empty ls