0N/A{-# LANGUAGE FlexibleContexts #-}
0N/ADescription : scanner for Casl tokens using Parsec
0N/ACopyright : (c) Christian Maeder and Uni Bremen 2002-2005
0N/AMaintainer : Christian.Maeder@dfki.de
0N/AStability : provisional
0N/APortability : portable
0N/Aaccording to chapter II.4 (Lexical Symbols) of the CASL reference manual
0N/A-- | no-bracket-signs (excluding mu!)
0N/AisSignChar :: Char -> Bool
0N/AisSignChar c = if isAscii c then elem c "!#$&*+-./:<=>?@\\^|~" else
0N/A-- \172 neg \183 middle dot \215 times
0N/A-- at least two semicolons
0N/Asemis :: CharParser st String
0N/Asemis = tryString ";;" <++> many (char ';')
0N/AscanAnySigns :: CharParser st String
644N/A many1 (satisfy isSignChar <?> "casl sign") <|> semis <?> "signs"
1104N/A-- | casl letters (all isAlpha including feminine and masculine ordinal and mu)
0N/AcaslLetters :: Char -> Bool
0N/AcaslLetters = isAlpha
0N/A-- ['A'..'Z'] ++ ['a'..'z'] ++
1104N/A\208 ETH \215 times \222 THORN \240 eth \247 divide \254 thorn
0N/A\170 feminine ordinal \181 micro sign (mu) \186 masculine ordinal
0N/AcaslLetter :: CharParser st Char
0N/AcaslLetter = satisfy caslLetters <?> "casl letter"
0N/AscanLPD :: CharParser st Char
scanLPD = caslLetter <|> digit <|> char '\'' <?> "casl char"
-- * ParsecCombinator extension
lookaheadPosition :: String
lookaheadPosition = "lookahead position "
myLookAhead :: GenParser tok st a -> GenParser tok st a
x <- fmap Just parser <|> return Nothing
_ <- setParserState state
Nothing -> fail $ lookaheadPosition ++ showPos
followedWith :: GenParser tok st a -> GenParser tok st b -> GenParser tok st a
followedWith p q = try $ p << myLookAhead q
checkWithUsing :: (a -> String) -> GenParser tok st a -> (a -> Bool)
checkWithUsing display p f = do
if f x then return x else unexpected (display x)
checkWith :: Show a => GenParser tok st a -> (a -> Bool) -> GenParser tok st a
checkWith = checkWithUsing show
separatedBy :: GenParser tok st a -> GenParser tok st b
-> GenParser tok st ([a], [b])
(es, ts) <- separatedBy p s
scanLetterWord :: CharParser st String
scanLetterWord = caslLetter <:> many scanLPD <?> "letter word"
singleUnderline :: CharParser st Char
singleUnderline = char '_' `followedWith` scanLPD
scanUnderlineWord :: CharParser st String
scanUnderlineWord = singleUnderline <:> many1 scanLPD
scanAnyWords :: CharParser st String
scanAnyWords = flat (scanLetterWord <:> many scanUnderlineWord) <?> "words"
scanDot :: CharParser st Char
scanDot = char '.' `followedWith` caslLetter
scanDotWords :: CharParser st String
scanDotWords = scanDot <:> scanAnyWords
-- * casl escape chars for quoted chars and literal strings
value :: Int -> String -> Int
value base = foldl (\ x d -> base * x + digitToInt d) 0
digits :: Int -> Int -> Int
digits b n = if n == 0 then 0 else 1 + digits b (div n b)
valueCheck :: Int -> String -> Bool
in n >= digits b 255 && n <= digits b m && value b s <= m
simpleEscape :: CharParser st String
simpleEscape = single (oneOf "'\"\\ntrvbfa?")
decEscape :: CharParser st String
decEscape = many1 digit `checkWith` valueCheck 10
hexEscape :: CharParser st String
hexEscape = char 'x' <:> many1 hexDigit `checkWith` valueCheck 16
octEscape :: CharParser st String
octEscape = char 'o' <:> many1 octDigit `checkWith` valueCheck 8
escapeChar :: CharParser st String
escapeChar = char '\\' <:>
(simpleEscape <|> decEscape <|> hexEscape <|> octEscape)
-- * chars for quoted chars and literal strings
printable :: CharParser st String
printable = single $ satisfy $ \ c -> notElem c "'\"\\" && c > '\026'
caslChar :: CharParser st String
caslChar = escapeChar <|> printable
scanQuotedChar :: CharParser st String
scanQuotedChar = enclosedBy (caslChar <|> (char '"' >> return "\\\""))
(char '\'') <?> "quoted char"
-- convert '"' to '\"' and "'" to "\'" (no support for ''')
scanString :: CharParser st String
scanString = flat (many (caslChar <|> (char '\'' >> return "\\\'")))
`enclosedBy` char '"' <?> "literal string"
isString :: Token -> Bool
isString = isPrefixOf "\"" . tokStr
parseString :: CharParser () a -> String -> a
parseString p s = case parse p "" s of
Left _ -> error $ "parseString: " ++ s
splitString :: CharParser () a -> String -> (a, String)
splitString p = parseString $ do
-- * digit, number, fraction, float
getNumber :: CharParser st String
getSignedNumber :: CharParser st String
getSignedNumber = optionL (string "-") <++> getNumber
scanFloat :: CharParser st String
<++> (optionL (try $ char '.' <:> getNumber)
<++> optionL (char 'E' <:> optionL (single $ oneOf "+-")
{- | In addition to scanFloat, also '1.', '.1' and '
2.e-13' are recognized
as well as preceding signs '+-'. -}
scanFloatExt :: CharParser st String
compE = oneOf "eE" <:> getSNum
getNum' = option "0" getNumber
checkSp' = (++ "0.") . checkSign' . head
getSNum = optionL (oneOf "+-" >-> checkSign') <++> getNumber
in -- '1.' or '
2.e-13' or '1.213'
try (getSNum <++> (optionL (try $ compD getNum') <++> optionL compE))
-- everything starting with a dot
<|> (choice (map string ["+.", "-.", "."]) >-> checkSp') <++> getNumber
scanDigit :: CharParser st String
isNumber :: Token -> Bool
isNumber t = case tokStr t of
isFloating :: Token -> Bool
-- precondition: isNumber
isFloating = any (`elem` ".eE") . tokStr
-- * skip whitespaces and nested comment out
nestCommentOut :: CharParser st ()
nestCommentOut = forget $ nestedComment "%[" "]%"
skip = skipMany (forget (satisfy isSpace) <|> nestCommentOut <?> "")
getPos :: GenParser tok st Pos
getPos = fmap fromSourcePos getPosition
-- only skip to an annotation if it's on the same or next line
skipSmart :: CharParser st ()
$ notFollowedBy (char '%') >> return ())
-- * keywords WORDS or NO-BRACKET-SIGNS
keyWord :: CharParser st a -> CharParser st a
keyWord = try . (<< notFollowedBy (scanLPD <|> singleUnderline))
keySign :: CharParser st a -> CharParser st a
keySign = try . (<< notFollowedBy (satisfy isSignChar))
-- * lexical tokens with position
parseToken :: CharParser st String -> CharParser st Token
parseToken = liftM2 (\ p s -> Token s $ Range [p]) getPos
pToken :: CharParser st String -> CharParser st Token
pToken = parseToken . (<< skipSmart)
pluralKeyword :: String -> CharParser st Token
pluralKeyword s = pToken (keyWord (string s <++> optionL (string "s")))
-- | check for keywords (depending on lexem class)
toKey :: String -> CharParser st String
toKey s = let p = string s in
if last s `elem` "[]{}(),;" then p
else if isSignChar $ last s then keySign p
-- * some separator parsers
asSeparator :: String -> CharParser st Token
asSeparator = pToken . string
commaT :: CharParser st Token
semiT :: CharParser st Token
semiT = pToken $ string ";" << notFollowedBy (char ';')
oBraceT :: CharParser st Token
oBraceT = asSeparator "{"
cBraceT :: CharParser st Token
cBraceT = asSeparator "}"
oBracketT :: CharParser st Token
oBracketT = asSeparator "["
cBracketT :: CharParser st Token
cBracketT = (tryString "]%" >> unexpected "block-comment-end ]%" <?> "")
oParenT :: CharParser st Token
oParenT = asSeparator "("
cParenT :: CharParser st Token
cParenT = asSeparator ")"
braces :: CharParser st a -> CharParser st a
braces p = oBraceT >> p << cBraceT
commaSep1 :: CharParser st a -> CharParser st [a]
commaSep1 p = fmap fst $ separatedBy p commaT
placeS :: CharParser st String
placeT :: CharParser st Token
notFollowedWith :: GenParser tok st a -> GenParser tok st b
try $ join $ (try (p1 >> p2) >> return pzero) <|> return p1