XmlExpat.hs revision e9458b1a7a19a63aa4c179f9ab20f4d50681c168
{- |
Module : ./Common/XmlExpat.hs
Description : Interface to the Hexpat Library
Copyright : (c) Ewaryst Schulz, DFKI 2009
License : GPLv2 or higher, see LICENSE.txt
Maintainer : ewaryst.schulz@dfki.de
Stability : provisional
Portability : portable
Provides the Hexpat parsing and transformation facility to XML.Light types.
-}
module Common.XmlExpat where
import qualified Data.ByteString.Lazy as BS
import qualified Text.XML.Expat.Tree as Expat
import qualified Data.ByteString as B
import Data.ByteString.UTF8 (toString)
import Text.XML.Light
-- * Interface to the Expat xml parser
-- | Transforms an Expat xml tree to an XML.Light tree
nodesToContent :: [Expat.UNode B.ByteString] -> [Content]
nodesToContent = nodesToContent' ""
{- | Version of 'nodesToContent' with accumulator to minimize the occurrences
of CData -}
nodesToContent' :: String -- ^ accumulates text nodes
-> [Expat.UNode B.ByteString] -- ^ list of content items
-> [Content]
nodesToContent' s (Expat.Text t : xs) = nodesToContent' (s ++ toString t) xs
nodesToContent' s l
| not $ null s = strToCData s : nodesToContent l
| otherwise =
case l of
(Expat.Element { Expat.eName = n
, Expat.eAttributes = al, Expat.eChildren = cl } : xs)
-> elemToElem n al cl : nodesToContent xs
_ -> []
strToCData :: String -> Content
strToCData s = Text $ blank_cdata { cdData = s }
-> [Expat.UNode B.ByteString] -> Content
elemToElem n al cl = Elem $ blank_element { elName = strToQName n
, elAttribs = map attrToAttr al
, elContent = nodesToContent cl }
attrToAttr :: (B.ByteString, B.ByteString) -> Attr
attrToAttr (n, v) = Attr { attrKey = strToQName n
, attrVal = toString v }
strToQName :: B.ByteString -> QName
strToQName s = case break (':' ==) $ toString s of
(n, []) -> unqual n
(pr, _ : n) -> blank_name { qName = n, qPrefix = Just pr }
parseXml :: BS.ByteString -> Either String Element
parseXml bs =
let (nd, pe) = Expat.parse Expat.defaultParseOptions bs
contl = nodesToContent [nd]
in case pe of
Just e -> Left $ "Expat.parse: " ++ show e
_ -> case contl of
[Elem e] -> Right e
_ -> Left "Expat.parse: No unique root element."