XML.hs revision 1ea1794fe8bebbd2d807240f9ea9a7217f5fb75f
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaModule : $Header$
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaCopyright : (c) Felix Gabriel Mance
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaLicense : GPLv2 or higher, see LICENSE.txt
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaMaintainer : f.mance@jacobs-university.de
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaStability : provisional
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaPortability : portable
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaOWL/XML Syntax Parsing
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březinaimport qualified Data.Map as Map
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březinatype XMLBase = String
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | error messages for the parser
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březinaerr :: String -> t
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březinaerr s = error $ "XML parser: " ++ s
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina{- two functions from Text.XML.Light.Proc version 1.3.7 for compatibility
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina with previous versions -}
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinavLookupAttrBy :: (Text.XML.Light.QName -> Bool) -> [Attr] -> Maybe String
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinavLookupAttrBy p as = attrVal `fmap` find (p . attrKey) as
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinavFindAttrBy :: (Text.XML.Light.QName -> Bool) -> Element -> Maybe String
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinavFindAttrBy p e = vLookupAttrBy p (elAttribs e)
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaisSmth :: String -> Text.XML.Light.QName -> Bool
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaisSmth s = (s ==) . qName
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaisSmthList :: [String] -> Text.XML.Light.QName -> Bool
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaisSmthList l qn = qName qn `elem` l
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinaisNotSmth :: Text.XML.Light.QName -> Bool
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaisNotSmth q = let qn = qName q in qn `notElem` [declarationK,
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina prefixK, importK, annotationK]
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | parses all children with the given name
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterCh :: String -> Element -> [Element]
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterCh s = filterChildrenName (isSmth s)
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | parses all children with names in the list
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterChL :: [String] -> Element -> [Element]
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterChL l = filterChildrenName (isSmthList l)
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | parses one child with the given name
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterC :: String -> Element -> Element
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterC s e = fromMaybe (err "child not found")
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina $ filterChildName (isSmth s) e
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | parses one child with the name in the list
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterCL :: [String] -> Element -> Element
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinafilterCL l e = fromMaybe (err "child not found")
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina $ filterChildName (isSmthList l) e
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | parses an IRI
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinagetIRI :: XMLBase -> Element -> IRI
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina let [a] = elAttribs e
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina iri = attrVal a
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina ty = case qName $ attrKey a of
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina "abbreviatedIRI" -> Abbreviated
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina "IRI" -> Full
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina "nodeID" -> NodeID
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina _ -> cssIRI iri
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina in appendBase b $ nullQName {localPart = iri, iriType = ty}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina{- | if the IRI contains colon, it is split there;
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březinaelse, the xml:base needs to be prepended to the local part
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březinaand then the IRI must be splitted -}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaappendBase :: XMLBase -> IRI -> IRI
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaappendBase b qn =
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina let r = localPart qn
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina in splitIRI $ if ':' `elem` r
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina else qn {localPart = b ++ r, iriType = Full}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | splits an IRI at the colon
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinasplitIRI :: IRI -> IRI
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinasplitIRI qn = case iriType qn of
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina NodeID -> mkNodeID qn
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina _ -> let lp = localPart qn
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina np = takeWhile (/= ':') lp
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina ':' : nlp = dropWhile (/= ':') lp
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina in qn {namePrefix = np, localPart = nlp}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | prepends "_:" to the nodeID if is not there already
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinamkNodeID :: IRI -> IRI
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina let lp = localPart qn
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina in case lp of
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina '_' : ':' : _ -> qn
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina _ -> qn {localPart = "_:" ++ lp}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | gets the content of an element with name IRI, AbbreviatedIRI or Import
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinacontentIRI :: XMLBase -> Element -> IRI
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinacontentIRI b e =
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina let cont = strContent e
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina iri = nullQName {localPart = cont}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina in case getName e of
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina "AbbreviatedIRI" -> splitIRI iri
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina "IRI" -> if ':' `elem` cont
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina then splitIRI $ iri {iriType = Full}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina else appendBase b iri
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina "Import" -> appendBase b $ iri {iriType = cssIRI cont}
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina _ -> err "invalid type of iri"
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina-- | gets the name of an axiom in XML Syntax
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinagetName :: Element -> String
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina let n = (qName . elName) e
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina q = (qURI . elName) e
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina-- | gets the cardinality
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinagetInt :: Element -> Int
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinagetInt e = let [int] = elAttribs e in value 10 $ attrVal int
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinagetEntityType :: String -> EntityType
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinagetEntityType ty = fromMaybe (err $ "no entity type " ++ ty)
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina . lookup ty $ map (\ e -> (show e, e)) entityTypes
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinagetEntity :: XMLBase -> Element -> Entity
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinagetEntity b e = Entity (getEntityType $ (qName . elName) e) $ getIRI b e
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinagetDeclaration :: XMLBase -> Element -> Axiom
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinagetDeclaration b e = case getName e of
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina "Declaration" ->
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina PlainAxiom (mkExtendedEntity $ getEntity b $ filterCL entityList e)
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina $ AnnFrameBit (getAllAnnos b e) $ AnnotationFrameBit Declaration
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina _ -> err "not declaration"
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinaisPlainLiteral :: String -> Bool
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinaisPlainLiteral s =
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina "http://www.w3.org/1999/02/22-rdf-syntax-ns#PlainLiteral" == s
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina-- | put an "f" for float if not there already (eg. 123.45 --> 123.45f)
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinacorrectLit :: Literal -> Literal
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel BřezinacorrectLit l = case l of
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Literal lf (Typed dt) ->
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina let nlf = if isSuffixOf "float" (localPart dt) && last lf /= 'f'
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina then lf ++ "f"
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina in Literal nlf (Typed dt)
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinagetLiteral :: XMLBase -> Element -> Literal
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel BřezinagetLiteral b e = case getName e of
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina let lf = strContent e
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina mdt = findAttr (unqual "datatypeIRI") e
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina mattr = vFindAttrBy (isSmth "lang") e
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina in case mdt of
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Nothing -> case mattr of
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Just lang -> Literal lf $ Untyped $ Just lang
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Nothing -> Literal lf $ Untyped Nothing
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Just dt -> case mattr of
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Just lang -> Literal lf $ Untyped $ Just lang
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Nothing -> if isPlainLiteral dt then
e07d700ed9daf0cf96607fa2d72978cb2431b794Pavel Březina Literal lf $ Untyped Nothing
dea636af4d1902a081ee891f1b19ee2f8729d759Pavel Březina else correctLit $ Literal lf $ Typed $ appendBase b $
let p = showQU cf \\ "http://www.w3.org/2001/XMLSchema#"
in OntologyDocument (Map.fromList $ getPrefixMap e)