Token.hs revision 03802967de9e47e40051919e56673a41dc2434d8
2b873214c9ab511bbca437c036371ab664aedaceChristian Maeder
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian Maeder{- |
c63ebf815c8a874525cf18670ad74847f7fc7b26Christian MaederModule : $Header$
c092fcac4b8f5c524c22ca579189c4487c13edf7Christian MaederDescription : parser for CASL 'Id's based on "Common.Lexer"
54ed6a6b1a6c7d27fadb39ec5b59d0806c81f7c8Christian MaederCopyright : (c) Christian Maeder and Uni Bremen 2002-2004
75a6279dbae159d018ef812185416cf6df386c10Till MossakowskiLicense : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
3f69b6948966979163bdfe8331c38833d5d90ecdChristian Maeder
75a6279dbae159d018ef812185416cf6df386c10Till MossakowskiMaintainer : Christian.Maeder@dfki.de
75a6279dbae159d018ef812185416cf6df386c10Till MossakowskiStability : provisional
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian MaederPortability : portable
c092fcac4b8f5c524c22ca579189c4487c13edf7Christian Maeder
dda5ab793f1615c1ba1dcaa97a4346b0878da6b1Christian MaederParser for CASL 'Id's based on "Common.Lexer"
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian Maeder
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian Maeder-}
da955132262baab309a50fdffe228c9efe68251dCui Jian{- http://www.cofi.info/Documents/CASL/Summary/
dda5ab793f1615c1ba1dcaa97a4346b0878da6b1Christian Maeder from 25 March 2001
dda5ab793f1615c1ba1dcaa97a4346b0878da6b1Christian Maeder
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian Maeder C.2.1 Basic Specifications with Subsorts
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian Maeder
0799b5dc3f06d2640e66e9ab54b8b217348fd719Christian MaederSIMPLE-ID ::= WORDS
0799b5dc3f06d2640e66e9ab54b8b217348fd719Christian MaederID ::= TOKEN-ID | MIXFIX-ID
43b4c41fbb07705c9df321221ab9cb9832460407Christian MaederTOKEN-ID ::= TOKEN
4c8d3c5a9e938633f6147b5a595b9b93bfca99e6Christian MaederMIXFIX-ID ::= TOKEN-ID PLACE-TOKEN-ID ... PLACE-TOKEN-ID
fbc4f8708092d571a45cb483f37cc6b674da45a7Christian Maeder | PLACE-TOKEN-ID ... PLACE-TOKEN-ID
fbc4f8708092d571a45cb483f37cc6b674da45a7Christian MaederPLACE-TOKEN-ID ::= PLACE TOKEN-ID
fbc4f8708092d571a45cb483f37cc6b674da45a7Christian Maeder | PLACE
14c89b2d830777bf4db2850f038c9f60acaca486Christian MaederPLACE ::= __
b10d6cef708b7a659f2d3b367e8e0db0d03ae3f5Till Mossakowski
f77f29e84b3f6e791c82e61b13fbf76582bedd2fChristian MaederTOKEN ::= WORDS | DOT-WORDS | DIGIT | QUOTED-CHAR
dda5ab793f1615c1ba1dcaa97a4346b0878da6b1Christian Maeder | SIGNS
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer SIGNS are adapted here and more permissive as in the summary
f77f29e84b3f6e791c82e61b13fbf76582bedd2fChristian Maeder WORDS and NO-BRACKET-SIGNS are treated equally
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian Maeder legal are, ie. "{a}", "{+}", "a{}="
c0c2380bced8159ff0297ece14eba948bd236471Christian Maeder illegal is "a=" (no two SIMPLE-TOKEN stay beside each other)
da333ffa6336cf59a4071fcddad358c5eafd3e61Sonja Gröning
c0c2380bced8159ff0297ece14eba948bd236471Christian Maeder SIMPLE-TOKEN ::= WORDS | DOT-WORDS | DIGIT | QUOTED-CHAR
6352f3c31da3043783a13be6594aacb2147378baRazvan Pascanu | NO-BRACKET-SIGNS
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross
fc1a590cd3ee36797c0a032ff41e07f8e2469341Christian Maeder STB ::= SIMPLE-TOKEN BRACKETS
a2b04db3e156312a8596d8084f7f0f51acf8a96bChristian Maeder BST ::= BRACKETS SIMPLE-TOKEN
fc1a590cd3ee36797c0a032ff41e07f8e2469341Christian Maeder
66a774f13272fde036481edd2298081ab3d04678Razvan Pascanu SIGNS ::= BRACKETS
834c2e71b8e390e5b05c8d02bb6eb22621125133Markus Gross | BRACKETS STB ... STB
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian Maeder | BRACKETS STB ... STB SIMPLE-TOKEN
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian Maeder | SIMPLE-TOKEN
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian Maeder | SIMPLE-TOKEN BST ... BST
6e52f1dfc0da4bc4a7701cf856641c9dce08fc7dChristian Maeder | SIMPLE-TOKEN BST ... BST BRACKETS
923e25bb8c7cf9f2978c7844ad173704482cc3b0Martin Kühl
9f85afecbd79b3df5a0bb17bd28cd0b288dc3213Kristina Sojakova A SIMPLE-TOKEN followed by "[" outside nested brackets
63da71bfb4226f504944b293fb77177ebcaea7d4Ewaryst Schulz will be taken as the beginning of a compound list.
e1ea9a046e9640148ca876dfe47e391559a9fdf3Christian Maeder Within SIGNS brackets need not be balanced,
14c89b2d830777bf4db2850f038c9f60acaca486Christian Maeder only after their composition to a MIXFIX-ID.
57026bc09337d158b89775048a9bcc9c17d825caChristian Maeder
57026bc09337d158b89775048a9bcc9c17d825caChristian Maeder BRACKETS = BRACKET ... BRACKET
57026bc09337d158b89775048a9bcc9c17d825caChristian Maeder BRACKET ::= [ | ] | { | }
22b772f8753f0cdb4508ba460356c238de2ee375Jonathan von Schroeder
e1ea9a046e9640148ca876dfe47e391559a9fdf3Christian Maeder 2.4 Identifiers
923e25bb8c7cf9f2978c7844ad173704482cc3b0Martin Kühl brackets/braces within MIXFIX-ID must be balanced
f66fcd981f556c238df7dd6dfa42123745e3b1d2Christian Maeder
2b873214c9ab511bbca437c036371ab664aedaceChristian Maeder C.2.2 Structured Specifications
fe4e6766a6e51cca3f8cc9632c25936af147d8b9Christian Maeder
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian Maeder TOKEN-ID ::= ... | TOKEN [ ID ,..., ID ]
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian Maeder
d27b1887e61f1dc53d77c37f59dbf5019242a686Christian Maeder A compound list must follow the last TOKEN within MIXFIX-ID,
834c2e71b8e390e5b05c8d02bb6eb22621125133Markus Gross so a compound list is never nested within (balanced) mixfix BRACKETS.
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross Only PLACEs may follow a compound list.
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross The IDs within the compound list may surely be compound IDs again.
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross-}
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross
ee1c7c5796832536932d7b06cbfb1ca13f9a0d7bMartin Kühlmodule Common.Token where
ee1c7c5796832536932d7b06cbfb1ca13f9a0d7bMartin Kühl
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maederimport Common.Id
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maederimport Common.Keywords
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maederimport Common.Lexer
c0c2380bced8159ff0297ece14eba948bd236471Christian Maederimport Common.Parsec
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maeder
c0c2380bced8159ff0297ece14eba948bd236471Christian Maederimport Text.ParserCombinators.Parsec
e1ea9a046e9640148ca876dfe47e391559a9fdf3Christian Maederimport Data.List (delete)
e1ea9a046e9640148ca876dfe47e391559a9fdf3Christian Maeder
57026bc09337d158b89775048a9bcc9c17d825caChristian Maeder-- * Casl keyword lists
57026bc09337d158b89775048a9bcc9c17d825caChristian Maeder
22b772f8753f0cdb4508ba460356c238de2ee375Jonathan von Schroeder-- | reserved signs
fa388aea9cef5f9734fec346159899a74432ce26Christian Maedercasl_reserved_ops :: [String]
63719301448519453f66383f4e583d9fd5b89ecbChristian Maedercasl_reserved_ops = [colonS, colonQuMark, defnS, dotS, cDot, mapsTo]
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maeder
fc1a590cd3ee36797c0a032ff41e07f8e2469341Christian Maeder-- | these formula signs are legal in terms, but illegal in declarations
fc1a590cd3ee36797c0a032ff41e07f8e2469341Christian Maederformula_ops :: [String]
923e25bb8c7cf9f2978c7844ad173704482cc3b0Martin Kühlformula_ops = [equalS, implS, equivS, lOr, lAnd, negS]
9f85afecbd79b3df5a0bb17bd28cd0b288dc3213Kristina Sojakova
72079df98b3cb7cc1fd82a0a24984893dcd05ecaEwaryst Schulz-- | all reseverd signs
a461314c811f4187dff85c8be079a41b2f13f176Christian Maedercasl_reserved_fops :: [String]
a3a7d8b3cdf05c8040c62dbcf9a15dc5042cd721Christian Maedercasl_reserved_fops = formula_ops ++ casl_reserved_ops
fbc4f8708092d571a45cb483f37cc6b674da45a7Christian Maeder
8a77240a809197c92c0736c431b4b88947a7bac1Christian Maeder-- | reserved keywords
8a77240a809197c92c0736c431b4b88947a7bac1Christian Maedercasl_basic_reserved_words :: [String]
fbc4f8708092d571a45cb483f37cc6b674da45a7Christian Maedercasl_basic_reserved_words =
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross [axiomS, axiomS ++ sS, cogeneratedS, cotypeS, cotypeS ++ sS,
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross esortS, esortS ++ sS, etypeS, etypeS ++ sS,
8ef91a173e69219fc2ebd45c76a35891c7785abdMarkus Gross existsS, forallS, freeS, generatedS,
f77f29e84b3f6e791c82e61b13fbf76582bedd2fChristian Maeder opS, opS ++ sS, predS, predS ++ sS,
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer sortS, sortS ++ sS, typeS, typeS ++ sS, varS, varS ++ sS]
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer-- | reserved keywords
f1dec6898638ba1131a9fadbc4d1544c93dfabb0Klaus Luettichcasl_structured_reserved_words :: [String]
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyercasl_structured_reserved_words =
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer [andS, archS, behaviourallyS, closedS, cofreeS, endS,
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer fitS, freeS, fromS, getS, givenS,
d56ece59c372cb887355825901222b9f3377f7e6Thiemo Wiedemeyer hideS, lambdaS, libraryS, localS, logicS, newlogicS,
f77f29e84b3f6e791c82e61b13fbf76582bedd2fChristian Maeder refinedS, refinementS,
resultS, revealS, specS, thenS, toS,
unitS, unitS ++ sS, versionS, viewS, withS, withinS]
-- | reserved keywords
casl_reserved_words :: [String]
casl_reserved_words =
casl_basic_reserved_words ++ casl_structured_reserved_words
-- | these formula words are legal in terms, but illegal in declarations
formula_words :: [String]
formula_words = [asS, defS, elseS, ifS, inS, whenS, falseS, notS, trueS]
-- | all reserved words
casl_reserved_fwords :: [String]
casl_reserved_fwords = formula_words ++ casl_reserved_words
-- * a single 'Token' parser taking lists of key symbols and words as parameter
-- | a simple 'Token' parser depending on reserved signs and words
-- (including a quoted char, dot-words or a single digit)
sid :: ([String], [String]) -> GenParser Char st Token
sid (kOps, kWords) = pToken (scanQuotedChar <|> scanDotWords
<|> scanDigit <|> reserved kOps scanAnySigns
<|> reserved kWords scanAnyWords <?> "simple-id")
-- * 'Token' lists parsers
-- | balanced mixfix components within braces
braceP :: GenParser Char st [Token] -> GenParser Char st [Token]
braceP p = oBraceT <:> p <++> single cBraceT
<|> try (oBracketT <:> single cBracketT)
-- | balanced mixfix components within square brackets
bracketP :: GenParser Char st [Token] -> GenParser Char st [Token]
bracketP p = oBracketT <:> p <++> single cBracketT
-- | an 'sid' optionally followed by other mixfix components
-- (without no two consecutive 'sid's)
innerMix1 :: ([String], [String]) -> GenParser Char st [Token]
innerMix1 l = sid l <:> optionL (innerMix2 l)
-- | mixfix components not starting with a 'sid' (possibly places)
innerMix2 :: ([String], [String]) -> GenParser Char st [Token]
innerMix2 l = let p = innerList l in
flat (many1 (braceP p <|> bracketP p <|> many1 placeT))
<++> optionL (innerMix1 l)
-- | any mixfix components within braces or brackets
innerList :: ([String], [String]) -> GenParser Char st [Token]
innerList l = optionL (innerMix1 l <|> innerMix2 l <?> "token")
-- | mixfix components starting with a 'sid' (outside 'innerList')
topMix1 :: ([String], [String]) -> GenParser Char st [Token]
topMix1 l = sid l <:> optionL (topMix2 l)
-- | mixfix components starting with braces ('braceP')
-- that may follow 'sid' outside 'innerList'.
-- (Square brackets after a 'sid' will be taken as a compound list.)
topMix2 :: ([String], [String]) -> GenParser Char st [Token]
topMix2 l = flat (many1 (braceP $ innerList l)) <++> optionL (topMix1 l)
-- | mixfix components starting with square brackets ('bracketP')
-- that may follow a place ('placeT') (outside 'innerList')
topMix3 :: ([String], [String]) -> GenParser Char st [Token]
topMix3 l = let p = innerList l in
bracketP p <++> flat (many (braceP p))
<++> optionL (topMix1 l)
-- | any ('topMix1', 'topMix2', 'topMix3') mixfix components
-- that may follow a place ('placeT') at the top level
afterPlace :: ([String], [String]) -> GenParser Char st [Token]
afterPlace l = topMix1 l <|> topMix2 l <|> topMix3 l
-- | places possibly followed by other ('afterPlace') mixfix components
middle :: ([String], [String]) -> GenParser Char st [Token]
middle l = many1 placeT <++> optionL (afterPlace l)
-- | many (balanced, top-level) mixfix components ('afterPlace')
-- possibly interspersed with multiple places ('placeT')
tokStart :: ([String], [String]) -> GenParser Char st [Token]
tokStart l = afterPlace l <++> flat (many (middle l))
-- | any (balanced, top-level) mixfix components
-- possibly starting with places but no single 'placeT' only.
start :: ([String], [String]) -> GenParser Char st [Token]
start l = tokStart l <|> placeT <:> (tokStart l <|>
many1 placeT <++> optionL (tokStart l))
<?> "id"
-- * parser for mixfix and compound 'Id's
-- | parsing a compound list
comps :: ([String], [String]) -> GenParser Char st ([Id], Range)
comps keys = do
o <- oBracketT
(ts, ps) <- mixId keys keys `separatedBy` commaT
c <- cBracketT
return (ts, toRange o ps c)
{- | parse mixfix components ('start') and an optional compound list ('comps')
if the last token was no place. Accept possibly further places.
Key strings (second argument) within compound list may differ from
top-level key strings (first argument)!
-}
mixId :: ([String], [String]) -> ([String], [String]) -> GenParser Char st Id
mixId keys idKeys = do
l <- start keys
if isPlace (last l) then return (Id l [] nullRange) else do
(c, p) <- option ([], nullRange) (comps idKeys)
u <- many placeT
return (Id (l ++ u) c p)
-- | the Casl key strings (signs first) with additional keywords
casl_keys :: [String] -> ([String], [String])
casl_keys ks = (ks ++ casl_reserved_fops, ks ++ casl_reserved_fwords)
-- | Casl ids for operations and predicates
parseId :: [String] -> GenParser Char st Id
parseId ks = mixId (casl_keys ks) (casl_keys ks)
-- | disallow 'barS' within the top-level of constructor names
consId :: [String] -> GenParser Char st Id
consId ks = mixId (barS : ks ++ casl_reserved_fops,
ks ++ casl_reserved_fwords) $ casl_keys ks
-- | Casl sorts are simple words ('varId'),
-- but may have a compound list ('comps')
sortId :: [String] -> GenParser Char st Id
sortId ks =
do s <- varId ks
(c, p) <- option ([], nullRange) (comps $ casl_keys ks)
return (Id [s] c p)
-- * parser for simple 'Id's
-- | parse a simple word not in 'casl_reserved_fwords'
varId :: [String] -> GenParser Char st Token
varId ks = pToken (reserved (ks ++ casl_reserved_fwords) scanAnyWords)
-- | like 'varId'. 'Common.Id.SIMPLE_ID' for spec- and view names
simpleId :: GenParser Char st Token
simpleId = pToken (reserved casl_structured_reserved_words scanAnyWords)
-- * parser for key 'Token's
-- | parse a question mark key sign ('quMark')
quMarkT :: GenParser Char st Token
quMarkT = pToken $ toKey quMark
-- | parse a 'colonS' possibly immediately followed by a 'quMark'
colonST :: GenParser Char st Token
colonST = pToken $ try $ string colonS << notFollowedBy
(oneOf $ delete '?' signChars)
-- | parse the product key sign ('prodS' or 'timesS')
crossT :: GenParser Char st Token
crossT = pToken (toKey prodS <|> toKey timesS) <?> "cross"