ReadWrite.hs revision 3f69b6948966979163bdfe8331c38833d5d90ecd
{- |
Module : $Header$
Copyright : (c) Klaus L�ttich, C.Maeder, Uni Bremen 2002-2005
License : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
Maintainer : Christian.Maeder@dfki.de
Stability : provisional
Portability : portable
-}
module Common.ATerm.ReadWrite (
readATerm, -- :: String -> ATermTable
writeATerm, -- :: ATermTable -> String
writeATermSDoc, -- :: ATermTable -> SDoc
writeSharedATerm,-- :: ATermTable -> String
writeSharedATermSDoc -- :: ATermTable -> SDoc
) where
{-
Author: Joost Visser, CWI, 2002
Changed by: Klaus Luettich & Felix Reckers, 2002-2003, C. Maeder 2005
This module contains functions for reading and writing ATerms
from and to Strings. Two ATerm formats are supported:
AT: plain (non-shared) textual ATerms
TAF: shared textual ATerms
The binary ATerm format (BAF) is not supported.
Current limitations:
BLOBS and place-holders are not supported.
accidental support for the empty aterm: ShAAppl "" [] []
-}
import Common.ATerm.AbstractSyntax
import Data.Char
import qualified Data.Map as Map -- used with Int keys only
import Common.SimpPretty
--- From String to ATerm ------------------------------------------------------
data ReadTAFStruct = RTS ATermTable
-- ATermTable
String
-- remaing part of the ATerm as String
(AbbrevTable Int)
Int -- length of ATerm as String
readATerm :: String -> ATermTable
readATerm = readATerm' . dropWhile ( \ c -> case c of
'!' -> True
_ -> isSpace c)
readATerm' :: String -> ATermTable
readATerm' str =
case readTAF emptyATermTable str emptyATab of
(RTS at rt _ _, _) -> case dropSpaces rt of
[] -> toReadonlyATT at
s -> error $ "garbage following aterm: " ++ take 10 s
-- non-shared is a special case of shared (but could be made more fool proof)
readTAF :: ATermTable -> String -> AbbrevTable Int -> (ReadTAFStruct, Int)
readTAF at str tbl = case readTAF' at str tbl of
(rs@(RTS at' str' tbl' l'), eith) -> case eith of
Left i -> (rs, i)
Right a -> case addATerm a at' of
(at'', i) ->
(RTS at'' str' (insertATab l' (sizeATab tbl') i tbl') l', i)
readTAF' :: ATermTable -> String -> AbbrevTable Int
-> (ReadTAFStruct, Either Int ShATerm)
readTAF' at str tbl = case str of
'#' : xs -> case spanAbbrevChar xs of
(i, str') -> case lookupATab (deAbbrev i) tbl of
Nothing -> error $ "unknown aterm index " ++ '#' : i
Just a -> (RTS at str' tbl $ length i + 1, Left a)
'[' : _ -> case readParTAFs '[' ']' at str tbl 0 of
(RTS at' str' tbl' l', kids) ->
case readAnnTAF at' (dropSpaces str') tbl' l' of
(rs, ann) -> (rs, Right $ ShAList kids ann)
x : xs | isIntHead x -> case span isDigit xs of
(i, str') -> let (intl, l') = readInteger $ x : i in
case readAnnTAF at (dropSpaces str') tbl l' of
(rs, ann) -> (rs, Right $ ShAInt intl ann)
_ -> case readAFun str of
(c, str') -> case readParenTAFs at (dropSpaces str') tbl $ length c of
(RTS at' str'' tbl' l', kids) ->
case readAnnTAF at' (dropSpaces str'') tbl' l' of
(rs, ann) -> (rs, Right $ ShAAppl c kids ann)
readParenTAFs :: ATermTable -> String -> AbbrevTable Int -> Int
-> (ReadTAFStruct, [Int])
readParenTAFs = readParTAFs '(' ')'
readParTAFs :: Char -> Char -> ATermTable -> String -> AbbrevTable Int -> Int
-> (ReadTAFStruct, [Int])
readParTAFs op cp at str tbl l = case str of
p : r | op == p -> readTAFs0 at cp (dropSpaces r) tbl $ l + 1
_ -> (RTS at str tbl l, [])
readTAFs0 :: ATermTable -> Char -> String -> AbbrevTable Int -> Int
-> (ReadTAFStruct, [Int])
readTAFs0 at par str tbl l = case str of
p : r | par == p -> (RTS at r tbl $ l + 1, [])
_ -> readTAFs1 at par str tbl l
readTAFs1 :: ATermTable -> Char -> String -> AbbrevTable Int -> Int
-> (ReadTAFStruct, [Int])
readTAFs1 at par str tbl l =
case readTAF at (dropSpaces str) tbl of
(RTS at' str' tbl' l', t) ->
case readTAFs2 at' par (dropSpaces str') tbl' $ l + l' of
(RTS at'' str'' tbl'' l'', ts) ->
(RTS at'' str'' tbl'' l'', t : ts)
readTAFs2 :: ATermTable -> Char -> String -> AbbrevTable Int -> Int
-> (ReadTAFStruct, [Int])
readTAFs2 at par str tbl l = case str of
',' : r -> readTAFs1 at par (dropSpaces r) tbl $ l + 1
p : r | par == p -> (RTS at r tbl $ l + 1, [])
_ -> error $ "expecting ',' or '" ++ [par] ++ "' in aterm but found: "
++ take 10 str
readAnnTAF :: ATermTable -> String -> AbbrevTable Int -> Int
-> (ReadTAFStruct, [Int])
readAnnTAF = readParTAFs '{' '}'
-- helpers --
dropSpaces :: String -> String
dropSpaces = dropWhile isSpace
readAFun :: String -> (String, String)
readAFun str = case str of
q : r | q == '"' -> let (c, s) = spanNotQuote' r in (q : c, s)
_ -> span isAFunChar str
spanNotQuote' :: String -> (String, String)
spanNotQuote' str = case str of
q : r | q == '"' -> ([q], r)
c : r@(d : s) -> let
(e, l) = if c == '\\' then ([c, d], s) else ([c], r)
(f, t) = spanNotQuote' l
in (e ++ f, t)
_ -> error $ "wrongly terminated aterm string: " ++ take 10 str
isAFunChar :: Char -> Bool
isAFunChar c = isAlphaNum c || c `elem` "_*+-."
spanAbbrevChar :: String -> (String, String)
spanAbbrevChar = span isBase64Char
isIntHead :: Char -> Bool
isIntHead c = isDigit c || c == '-'
--- From (shared) ATerms to strings via simple documents with associated length
data Doc_len = Doc_len SDoc Int
data Write_struct = WS (AbbrevTable Doc_len) Doc_len
writeATerm :: ATermTable -> String
writeATerm = render . writeATermSDoc
writeATermSDoc :: ATermTable -> SDoc
writeATermSDoc = writeSharedATermSDoc' False
writeSharedATermSDoc :: ATermTable -> SDoc
writeSharedATermSDoc = writeSharedATermSDoc' True
writeSharedATerm :: ATermTable -> String
writeSharedATerm = render . writeSharedATermSDoc
writeSharedATermSDoc' :: Bool -> ATermTable -> SDoc
writeSharedATermSDoc' b at =
case writeTAF b (toReadonlyATT at) emptyATab of
WS _ (Doc_len doc _) -> (if b then text "!" else empty) <> doc
--shared (if input is True)
writeTAF :: Bool -> ATermTable -> AbbrevTable Doc_len -> Write_struct
writeTAF b at tbl = let i = getTopIndex at in
case lookupATab i tbl of
Just s -> if b then WS tbl s else writeTAF' b at tbl
Nothing -> case writeTAF' b at tbl of
WS tbl' d_len@(Doc_len _ len) ->
WS (insertATab len i (abbrevD $ sizeATab tbl') tbl') d_len
writeTAF' :: Bool -> ATermTable -> AbbrevTable Doc_len -> Write_struct
writeTAF' b at tbl = case getATerm at of
ShAAppl c ts anns -> case writeTAFs b at ts tbl of
WS tbl' kids -> case writeTAFs b at anns tbl' of
WS tbl'' kidsAnn -> WS tbl'' $ dlConcat (writeATermAuxS c kids)
$ parenthesiseAnnS kidsAnn
ShAList ts anns -> case writeTAFs b at ts tbl of
WS tbl' kids -> case writeTAFs b at anns tbl' of
WS tbl'' kidsAnn -> WS tbl'' $ dlConcat (bracketS kids)
$ parenthesiseAnnS kidsAnn
ShAInt i anns -> case writeTAFs b at anns tbl of
WS tbl' kidsAnn -> WS tbl' $ dlConcat (integerDoc i)
$ parenthesiseAnnS kidsAnn
dlConcat :: Doc_len -> Doc_len -> Doc_len
dlConcat s1@(Doc_len sf1 sl1) s2@(Doc_len sf2 sl2)
| sl1 == 0 = s2
| sl2 == 0 = s1
| otherwise = Doc_len (sf1 <> sf2) $ sl1 + sl2
dlConcat_comma :: Doc_len -> Doc_len -> Doc_len
dlConcat_comma (Doc_len sf1 sl1) (Doc_len sf2 sl2) =
Doc_len (sf1 <> comma <> sf2) $ sl1 + sl2 + 1
-- produce comma seperated output from aterm indices
writeTAFs :: Bool -> ATermTable -> [Int] -> AbbrevTable Doc_len -> Write_struct
writeTAFs b at inds tbl = case inds of
[] -> WS tbl $ Doc_len empty 0
i : is -> case writeTAF b (getATermByIndex1 i at) tbl of
ws@(WS t1 s1) -> if null is then ws
else case writeTAFs b at is t1 of
WS t2 s2 -> WS t2 $ dlConcat_comma s1 s2
doc_len :: String -> Doc_len
doc_len s = Doc_len (text s) $ length s
integerDoc :: Integer -> Doc_len
integerDoc = doc_len . show
writeATermAuxS :: String -> Doc_len -> Doc_len
writeATermAuxS s = dlConcat (doc_len s) . parenthesiseS
-- list brackets must not be omitted
bracketS :: Doc_len -> Doc_len
bracketS (Doc_len d dl)
| dl == 0 = doc_len "[]"
| otherwise = Doc_len (brackets d) $ dl + 2
parenthesiseS :: Doc_len -> Doc_len
parenthesiseS s@(Doc_len d dl)
| dl == 0 = s
| otherwise = Doc_len (parens d) $ dl + 2
parenthesiseAnnS :: Doc_len -> Doc_len
parenthesiseAnnS s@(Doc_len d dl)
| dl == 0 = s
| otherwise = Doc_len (braces d) $ dl + 2
{- | This abbreviation table maps abbreviation ints to aterm indices for
reading and aterm indices to abbreviation docs during writing. -}
data AbbrevTable a = ATab !(Map.Map Int a) !(Int, Int) !Int
{- The last component is the current maximal abbreviation int that
serves as a key when reading and as a value when writing. The pair
stores the size of this component and the maximal power of 64
that is less than it, in order to simplify the computation of the size
of the next abbreviation int. -}
-- | initial table
emptyATab :: AbbrevTable a
emptyATab = ATab Map.empty (2, 64) 0
-- | only insert if the first argument is greater than the abbreviation size
insertATab :: Int -> Int -> a -> AbbrevTable a -> AbbrevTable a
insertATab dl i a t@(ATab m p@(sl, b) s) =
if dl > sl then let n = s + 1 in
ATab (Map.insert i a m)
(if rem n b == 0 then (sl + 1, 64 * b) else p) n
else t
-- | get current abbreviation size
sizeATab :: AbbrevTable a -> Int
sizeATab (ATab _ _ s) = s
-- | lookup map value
lookupATab :: Int -> AbbrevTable a -> Maybe a
lookupATab i (ATab m _ _) = Map.lookup i m
--- Intger Read ---------------------------------------------------------------
readInteger :: String -> (Integer, Int)
readInteger s = case s of
(hd:str) -> if hd == '-' then case conv str of
(m, l) -> (negate m, l + 1)
else conv s
_ -> error "readInteger"
where f (m, l) x = (toInteger (ord x - ord0) + 10 * m, l + 1)
conv = foldl f (0, 0)
--- Base 64 encoding ----------------------------------------------------------
abbrevD :: Int -> Doc_len
abbrevD i = doc_len $ abbrev i
abbrev :: Int -> String
abbrev i = '#' : mkAbbrev i
mkAbbrev :: Int -> String
mkAbbrev x = if x > 0 then mkAbbrevAux x "" else "A"
mkAbbrevAux :: Int -> String -> String
mkAbbrevAux x str =
if x > 0 then case quotRem x 64 of
(d, m) -> mkAbbrevAux d $ toBase64Char m : str
else str
deAbbrev :: [Char] -> Int
deAbbrev = let f m c = 64 * m + toBase64Int c in foldl f 0
{-
toBase64 :: [Char]
toBase64 =
[ 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
]
-}
toBase64Int :: Char -> Int
toBase64Int c
| isUpper c = ord c - ordA
| isLower c = ord c - orda_26
| isDigit c = ord c + mord0_52
| c == '+' = 62
| otherwise = 63 -- '/'
toBase64Char :: Int -> Char
toBase64Char i
| i < 26 = chr (ordA + i)
| i < 52 = chr (orda_26 + i)
| i < 62 = chr (i - mord0_52)
| i == 62 = '+'
| otherwise = '/' -- 63
isBase64Char :: Char -> Bool
isBase64Char c = isAscii c && (isAlphaNum c || c == '+' || c == '/')
-- a couple of constants
ordA :: Int
ordA = ord 'A'
orda_26 :: Int
orda_26 = ord 'a' - 26
ord0 :: Int
ord0 = ord '0'
mord0_52 :: Int
mord0_52 = 52 - ord0