57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder{- |
e9458b1a7a19a63aa4c179f9ab20f4d50681c168Jens ElknerModule : ./Common/XmlDiff.hs
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederDescription : compute xml diffs
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederCopyright : (c) Christian Maeder, DFKI GmbH 2011
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederLicense : GPLv2 or higher, see LICENSE.txt
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederMaintainer : Christian.Maeder@dfki.de
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederStability : provisional
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederPortability : portable
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder-}
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maedermodule Common.XmlDiff where
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maederimport Common.ToXml
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport Common.Utils as Utils
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport Common.XPath
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport Common.XUpdate
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport Common.Lib.MapSet (setToMap)
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport Data.List
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport qualified Data.Set as Set
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport qualified Data.Map as Map
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maederimport Text.XML.Light as XML
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian MaederhetsTags :: UnordTags
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian MaederhetsTags = Map.fromList
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder $ map (\ (e, as) -> (unqual e, Set.fromList $ map unqual as))
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder [ ("DGNode", ["name"])
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder , ("DGLink", ["linkid", "source", "target"])
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder , ("Axiom", [])
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder , ("Theorem", []) ]
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder{- for symbols the order matters. For axioms and theorems the names should be
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maederstored separately -}
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian MaederhetsXmlChanges :: Element -> Element -> [Change]
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian MaederhetsXmlChanges e1 e2 = xmlDiff hetsTags [] Map.empty
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder [Elem $ cleanUpElem e1]
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder [Elem $ cleanUpElem e2]
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian MaederhetsXmlDiff :: Element -> Element -> Element
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian MaederhetsXmlDiff e = mkMods . hetsXmlChanges e
770abb9d4554f378acbae393b25f8687eeeb3ad0Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder{- for elements, whose order does not matter, use the given attribute keys to
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian Maederdetermine their equality. An empty set indicates an element that only contains
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian Maedertext to be compared. -}
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maedertype UnordTags = Map.Map QName (Set.Set QName)
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder-- keep track of the nth element with a given tag
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maedertype Count = Map.Map QName Int
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder{- we assume an element contains other elements and no text entries or just a
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maedersingle text content -}
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian MaederxmlDiff :: UnordTags -> [Step] -> Count -> [Content] -> [Content] -> [Change]
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian MaederxmlDiff m stps em old new = case (old, filter validContent new) of
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder ([], []) -> []
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder ([], ns) ->
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder [Change (Add Append $ map contentToAddChange ns) $ pathToExpr stps]
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder (os, []) -> removeIns stps em os
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder (o : os, ns@(n : rt)) ->
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder if validContent o then
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder case o of
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder Elem e ->
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder let en = elName e
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder atts = elAttribs e
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder cs = elContent e
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder (nm, nstps) = extendPath en em stps
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder downDiffs = xmlElemDiff m nstps atts cs
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder restDiffs = xmlDiff m stps nm os
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder rmO = Change Remove (pathToExpr nstps) : restDiffs ns
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder in case Map.lookup en m of
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder Nothing -> case n of
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder Elem e2 | elName e2 == en ->
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder downDiffs e2
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder ++ restDiffs rt
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder _ -> rmO
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder Just ats -> let
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian Maeder (mns, rns) = partition (matchElems en (strContent e) $
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder Map.intersection (attrMap atts) $ setToMap ats) ns
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder in case mns of
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder Elem mn : rm -> downDiffs mn
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder ++ restDiffs (rm ++ rns)
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder _ -> rmO
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder XML.Text cd -> let inText = cdData cd in case n of
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder XML.Text cd2 | trim inText == trim nText
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder -> xmlDiff m stps em os rt
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder | otherwise -> Change (Update nText) (pathToExpr stps)
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder : xmlDiff m stps em os rt
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder where nText = cdData cd2
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder _ -> error "xmldiff2"
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder _ -> error "xmldiff"
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder else xmlDiff m stps em os ns
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian MaederremoveIns :: [Step] -> Count -> [Content] -> [Change]
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian MaederremoveIns stps em cs = case cs of
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder [] -> []
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder c : rs -> case c of
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder Elem e -> let
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder (nm, nstps) = extendPath (elName e) em stps
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder in Change Remove (pathToExpr nstps) : removeIns stps nm rs
1ba51e89f63278f541a547315926a05f92c676ffChristian Maeder _ -> Change (Update "") (pathToExpr stps) : removeIns stps em rs
1ba51e89f63278f541a547315926a05f92c676ffChristian Maeder -- does not work for multiple text entries
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian MaederattrMap :: [Attr] -> Map.Map QName String
89026e63176e6e40c7be1bbc542326f0d29d8108Christian MaederattrMap = Map.fromList . map (\ a -> (attrKey a, attrVal a))
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian MaedermatchElems :: QName -> String -> Map.Map QName String -> Content -> Bool
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian MaedermatchElems en t atts c = case c of
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian Maeder Elem e -> elName e == en
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian Maeder && if Map.null atts then null (elChildren e) && strContent e == t else
79e28be65fd0bc65adf266d5ae4f6deb92546bf7Christian Maeder Map.isSubmapOfBy (==) atts (attrMap $ elAttribs e)
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder _ -> False
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederxmlElemDiff :: UnordTags -> [Step] -> [Attr] -> [Content] -> Element -> [Change]
89026e63176e6e40c7be1bbc542326f0d29d8108Christian MaederxmlElemDiff m nPath atts cs e2 = xmlAttrDiff nPath atts (elAttribs e2)
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder ++ xmlDiff m nPath Map.empty cs (elContent e2)
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederxmlAttrDiff :: [Step] -> [Attr] -> [Attr] -> [Change]
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederxmlAttrDiff p a1 a2 = let
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder m1 = attrMap a1
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder m2 = attrMap a2
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder rms = Map.toList $ Map.difference m1 m2
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder ins = Map.toList $ Map.difference m2 m1
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder inter = Map.toList $ Map.filter (uncurry (/=))
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder $ Map.intersectionWith (,) m1 m2
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder addAttrStep a = pathToExpr $ Step Attribute (NameTest $ qName a) [] : p
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder in map (Change Remove . addAttrStep . fst) rms
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder ++ map (\ (a, (_, v)) -> Change (Update v) $ addAttrStep a) inter
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder ++ if null ins then [] else
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder [Change (Add Append $ map (AddAttr . uncurry Attr) ins) $ pathToExpr p]
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederpathToExpr :: [Step] -> Expr
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederpathToExpr = PathExpr Nothing . Path True . reverse
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian MaederextendPath :: QName -> Count -> [Step] -> (Count, [Step])
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian MaederextendPath en em stps = let
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder nm = Map.insertWith (+) en 1 em
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder i = Map.findWithDefault 1 en nm
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder nstps = Step Child (NameTest $ qName en) [PrimExpr Number $ show i] : stps
5b55761e0df088c5b41183fb83106bfd02a61fa2Christian Maeder in (nm, nstps)
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder-- steps and predicates are reversed!
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederaddPathNumber :: Int -> [Step] -> [Step]
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederaddPathNumber i stps =
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder let e = PrimExpr Number $ show i
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder in case stps of
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder [] -> []
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder Step a n es : rs -> Step a n (e : es) : rs
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder
89026e63176e6e40c7be1bbc542326f0d29d8108Christian MaedercontentToAddChange :: Content -> AddChange
89026e63176e6e40c7be1bbc542326f0d29d8108Christian MaedercontentToAddChange c = case c of
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder Elem e -> AddElem e
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder XML.Text t -> AddText $ cdData t
89026e63176e6e40c7be1bbc542326f0d29d8108Christian Maeder CRef s -> AddText s
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaedermkXQName :: String -> QName
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaedermkXQName s = (unqual s) { qPrefix = Just xupdateS }
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederchangeToXml :: Change -> Element
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederchangeToXml (Change csel pth) = let
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder sel = add_attr (mkAttr selectS $ show pth)
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder in case csel of
8ab0f7627987d45eb3fe36845a91694977ca5cdbChristian Maeder Add i as -> sel
8ab0f7627987d45eb3fe36845a91694977ca5cdbChristian Maeder . node (mkXQName $ showInsert i) $ map addsToXml as
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder Remove -> sel $ node (mkXQName removeS) ()
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder Update s -> sel $ node (mkXQName updateS) s
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder _ -> error "changeToXml"
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederaddsToXml :: AddChange -> Content
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaederaddsToXml a = case a of
4620e43bc9fa96506bd78ed6b3e5318b08de5996Christian Maeder AddElem e -> Elem $ cleanUpElem e
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder AddAttr (Attr k v) -> Elem
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder . add_attr (mkNameAttr $ qName k) $ node (mkXQName attributeS) v
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder AddText s -> mkText s
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder _ -> error "addsToXml"
57dc8a87418e235e3d0621fb90728054044a9ef9Christian Maeder
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaedermkMods :: [Change] -> Element
57dc8a87418e235e3d0621fb90728054044a9ef9Christian MaedermkMods = node (mkXQName "modifications") . map changeToXml