Token.hs revision e6d40133bc9f858308654afb1262b8b483ec5922
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus Roggenbach
8267b99c0d7a187abe6f87ad50530dc08f5d1cdcAndy Gimblett{- |
e071fb22ea9923a2a4ff41184d80ca46b55ee932Till MossakowskiModule : $Header$
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus RoggenbachDescription : parser for CASL 'Id's based on "Common.Lexer"
97018cf5fa25b494adffd7e9b4e87320dae6bf47Christian MaederCopyright : (c) Christian Maeder and Uni Bremen 2002-2004
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus RoggenbachLicense : similar to LGPL, see HetCATS/LICENSE.txt or LIZENZ.txt
b4fbc96e05117839ca409f5f20f97b3ac872d1edTill Mossakowski
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus RoggenbachMaintainer : maeder@tzi.de
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus RoggenbachStability : provisional
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus RoggenbachPortability : portable
f3a94a197960e548ecd6520bb768cb0d547457bbChristian Maeder
8267b99c0d7a187abe6f87ad50530dc08f5d1cdcAndy GimblettParser for CASL 'Id's based on "Common.Lexer"
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus Roggenbach
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus Roggenbach-}
fbb2d28086a1860850f661fbf4af531322bac405Christian Maeder{- http://www.cofi.info/Documents/CASL/Summary/
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus Roggenbach from 25 March 2001
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus Roggenbach
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus Roggenbach C.2.1 Basic Specifications with Subsorts
ad270004874ce1d0697fb30d7309f180553bb315Christian Maeder
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus RoggenbachSIMPLE-ID ::= WORDS
e771539425f4a0abef9f94cf4b63690f3603f682Andy GimblettID ::= TOKEN-ID | MIXFIX-ID
e771539425f4a0abef9f94cf4b63690f3603f682Andy GimblettTOKEN-ID ::= TOKEN
e771539425f4a0abef9f94cf4b63690f3603f682Andy GimblettMIXFIX-ID ::= TOKEN-ID PLACE-TOKEN-ID ... PLACE-TOKEN-ID
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett | PLACE-TOKEN-ID ... PLACE-TOKEN-ID
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy GimblettPLACE-TOKEN-ID ::= PLACE TOKEN-ID
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett | PLACE
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy GimblettPLACE ::= __
90047eafd2de482c67bcd13103c6064e9b0cb254Andy Gimblett
90047eafd2de482c67bcd13103c6064e9b0cb254Andy GimblettTOKEN ::= WORDS | DOT-WORDS | DIGIT | QUOTED-CHAR
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett | SIGNS
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett
b22c258cca179a5ffe777b64b32e10687c5f6b2cAndy Gimblett SIGNS are adapted here and more permissive as in the summary
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett WORDS and NO-BRACKET-SIGNS are treated equally
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett legal are, ie. "{a}", "{+}", "a{}="
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett illegal is "a=" (no two SIMPLE-TOKEN stay beside each other)
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett
90047eafd2de482c67bcd13103c6064e9b0cb254Andy Gimblett SIMPLE-TOKEN ::= WORDS | DOT-WORDS | DIGIT | QUOTED-CHAR
90047eafd2de482c67bcd13103c6064e9b0cb254Andy Gimblett | NO-BRACKET-SIGNS
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett STB ::= SIMPLE-TOKEN BRACKETS
90047eafd2de482c67bcd13103c6064e9b0cb254Andy Gimblett BST ::= BRACKETS SIMPLE-TOKEN
90047eafd2de482c67bcd13103c6064e9b0cb254Andy Gimblett
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett SIGNS ::= BRACKETS
a79fe3aad8743ea57e473ea5f66a723244cb9c0eMarkus Roggenbach | BRACKETS STB ... STB
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett | BRACKETS STB ... STB SIMPLE-TOKEN
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett | SIMPLE-TOKEN
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblett | SIMPLE-TOKEN BST ... BST
929190acb9f2b2f5857dce841c5a389710895515Andy Gimblett | SIMPLE-TOKEN BST ... BST BRACKETS
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblett A SIMPLE-TOKEN followed by "[" outside nested brackets
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblett will be taken as the beginning of a compound list.
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett Within SIGNS brackets need not be balanced,
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett only after their composition to a MIXFIX-ID.
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett BRACKETS = BRACKET ... BRACKET
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett BRACKET ::= [ | ] | { | }
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett 2.4 Identifiers
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett brackets/braces within MIXFIX-ID must be balanced
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett C.2.2 Structured Specifications
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett TOKEN-ID ::= ... | TOKEN [ ID ,..., ID ]
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
eca4db63ed0bdbd93b62678feea6e3eb80aa47bbChristian Maeder A compound list must follow the last TOKEN within MIXFIX-ID,
1df33829303cbf924aa018ac5ce9a28e69c17d22Till Mossakowski so a compound list is never nested within (balanced) mixfix BRACKETS.
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett Only PLACEs may follow a compound list.
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett The IDs within the compound list may surely be compound IDs again.
1df33829303cbf924aa018ac5ce9a28e69c17d22Till Mossakowski-}
1df33829303cbf924aa018ac5ce9a28e69c17d22Till Mossakowski
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblettmodule Common.Token where
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblettimport Common.Keywords
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblettimport Common.Lexer
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblettimport Common.Id
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblettimport Text.ParserCombinators.Parsec
e771539425f4a0abef9f94cf4b63690f3603f682Andy Gimblett
03136b84a0c70d877e227444f0875e209506b9e4Christian Maeder
b3dca469a9e267d6d71acfdeca7bf284d0581dc7Till Mossakowski-- ----------------------------------------------
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblett-- * Casl keyword lists
b3dca469a9e267d6d71acfdeca7bf284d0581dc7Till Mossakowski-- ----------------------------------------------
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblett
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblett-- | reserved signs
03136b84a0c70d877e227444f0875e209506b9e4Christian Maedercasl_reserved_ops :: [String]
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblettcasl_reserved_ops = [colonS, colonQuMark, defnS, dotS, cDot, mapsTo]
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
1df33829303cbf924aa018ac5ce9a28e69c17d22Till Mossakowski-- | these formula signs are legal in terms, but illegal in declarations
b3dca469a9e267d6d71acfdeca7bf284d0581dc7Till Mossakowskiformula_ops :: [String]
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblettformula_ops = [equalS, implS, equivS, lOr, lAnd, negS]
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblett
41486a487c9b065d4d9d1a8adf63c00925cd455bAndy Gimblett-- | all reseverd signs
afd6ed16928bbd774b6c6c5b3f440a917dd638a1Andy Gimblettcasl_reserved_fops :: [String]
casl_reserved_fops = formula_ops ++ casl_reserved_ops
-- | reserved keywords
casl_basic_reserved_words :: [String]
casl_basic_reserved_words =
[axiomS, axiomS ++ sS, cogeneratedS, cotypeS, cotypeS ++ sS,
existsS, forallS, freeS, generatedS,
opS, opS ++ sS, predS, predS ++ sS,
sortS, sortS ++ sS, typeS, typeS ++ sS, varS, varS ++ sS]
-- | reserved keywords
casl_structured_reserved_words :: [String]
casl_structured_reserved_words =
[andS, archS, behaviourallyS, closedS, cofreeS, endS,
fitS, freeS, fromS, getS, givenS,
hideS, lambdaS, libraryS, localS, logicS,
refinedS, refinementS,
resultS, revealS, specS, thenS, toS,
unitS, unitS ++ sS, versionS, viewS, withS, withinS]
-- | reserved keywords
casl_reserved_words :: [String]
casl_reserved_words =
casl_basic_reserved_words++ casl_structured_reserved_words
-- | these formula words are legal in terms, but illegal in declarations
formula_words :: [String]
formula_words = [asS, defS, elseS, ifS, inS, whenS, falseS, notS, trueS]
-- | all reserved words
casl_reserved_fwords :: [String]
casl_reserved_fwords = formula_words ++ casl_reserved_words
-- ----------------------------------------------
-- * a single 'Token' parser
-- pass list of key symbols and keywords as parameter
-- ----------------------------------------------
-- | a simple 'Token' parser depending on reserved signs and words
-- (including a quoted char, dot-words or a single digit)
sid :: ([String], [String]) -> GenParser Char st Token
sid (kOps, kWords) = pToken (scanQuotedChar <|> scanDotWords
<|> scanDigit <|> reserved kOps scanAnySigns
<|> reserved kWords scanAnyWords <?> "simple-id")
-- * 'Token' lists parsers
-- | balanced mixfix components within braces
braceP :: GenParser Char st [Token] -> GenParser Char st [Token]
braceP p = begDoEnd oBraceT p cBraceT <|> try (oBracketT <:> single cBracketT)
-- | balanced mixfix components within square brackets
bracketP :: GenParser Char st [Token] -> GenParser Char st [Token]
bracketP p = begDoEnd oBracketT p cBracketT
-- | an 'sid' optionally followed by other mixfix components
-- (without no two consecutive 'sid's)
innerMix1 :: ([String], [String]) -> GenParser Char st [Token]
innerMix1 l = sid l <:> option [] (innerMix2 l)
-- | mixfix components not starting with a 'sid' (possibly places)
innerMix2 :: ([String], [String]) -> GenParser Char st [Token]
innerMix2 l = let p = innerList l in
flat (many1 (braceP p <|> bracketP p <|> many1 placeT))
<++> option [] (innerMix1 l)
-- | any mixfix components within braces or brackets
innerList :: ([String], [String]) -> GenParser Char st [Token]
innerList l = option [] (innerMix1 l <|> innerMix2 l <?> "token")
-- | mixfix components starting with a 'sid' (outside 'innerList')
topMix1 :: ([String], [String]) -> GenParser Char st [Token]
topMix1 l = sid l <:> option [] (topMix2 l)
-- | mixfix components starting with braces ('braceP')
-- that may follow 'sid' outside 'innerList'.
-- (Square brackets after a 'sid' will be taken as a compound list.)
topMix2 :: ([String], [String]) -> GenParser Char st [Token]
topMix2 l = flat (many1 (braceP $ innerList l)) <++> option [] (topMix1 l)
-- | mixfix components starting with square brackets ('bracketP')
-- that may follow a place ('placeT') (outside 'innerList')
topMix3 :: ([String], [String]) -> GenParser Char st [Token]
topMix3 l = let p = innerList l in
bracketP p <++> flat (many (braceP p))
<++> option [] (topMix1 l)
-- | any ('topMix1', 'topMix2', 'topMix3') mixfix components
-- that may follow a place ('placeT') at the top level
afterPlace :: ([String], [String]) -> GenParser Char st [Token]
afterPlace l = topMix1 l <|> topMix2 l<|> topMix3 l
-- | places possibly followed by other ('afterPlace') mixfix components
middle :: ([String], [String]) -> GenParser Char st [Token]
middle l = many1 placeT <++> option [] (afterPlace l)
-- | many (balanced, top-level) mixfix components ('afterPlace')
-- possibly interspersed with multiple places ('placeT')
tokStart :: ([String], [String]) -> GenParser Char st [Token]
tokStart l = afterPlace l <++> flat (many (middle l))
-- | any (balanced, top-level) mixfix components
-- possibly starting with places but no single 'placeT' only.
start :: ([String], [String]) -> GenParser Char st [Token]
start l = tokStart l <|> placeT <:> (tokStart l <|>
many1 placeT <++> option [] (tokStart l))
<?> "id"
-- ----------------------------------------------
-- * parser for mixfix and compound 'Id's
-- ----------------------------------------------
-- | parsing a compound list
comps :: ([String], [String]) -> GenParser Char st ([Id], Range)
comps keys = do o <- oBracketT
(ts, ps) <- mixId keys keys `separatedBy` commaT
c <- cBracketT
return (ts, toPos o ps c)
{- | parse mixfix components ('start') and an optional compound list ('comps')
if the last token was no place. Accept possibly further places.
Key strings (second argument) within compound list may differ from
top-level key strings (frist argument)!
-}
mixId :: ([String], [String]) -> ([String], [String]) -> GenParser Char st Id
mixId keys idKeys =
do l <- start keys
if isPlace (last l) then return (Id l [] nullRange)
else do (c, p) <- option ([], nullRange) (comps idKeys)
u <- many placeT
return (Id (l++u) c p)
-- | the Casl key strings (signs first) with additional keywords
casl_keys :: [String] -> ([String], [String])
casl_keys ks = (ks ++ casl_reserved_fops, ks ++ casl_reserved_fwords)
-- | Casl ids for operations and predicates
parseId :: [String] -> GenParser Char st Id
parseId ks = mixId (casl_keys ks) (casl_keys ks)
-- | disallow 'barS' with in the top-level of constructor names
consId :: [String] -> GenParser Char st Id
consId ks = mixId (barS:ks++casl_reserved_fops,
ks++casl_reserved_fwords) $ casl_keys ks
-- | Casl sorts are simple words ('varId'),
-- but may have a compound list ('comps')
sortId :: [String] -> GenParser Char st Id
sortId ks =
do s <- varId ks
(c, p) <- option ([], nullRange) (comps $ casl_keys ks)
return (Id [s] c p)
-- ----------------------------------------------
-- * parser for simple 'Id's
-- ----------------------------------------------
-- | parse a simple word not in 'casl_reserved_fwords'
varId :: [String] -> GenParser Char st Token
varId ks = pToken (reserved (ks++casl_reserved_fwords) scanAnyWords)
-- | like 'varId'. 'Common.Id.SIMPLE_ID' for spec- and view names
simpleId :: GenParser Char st Token
simpleId = pToken (reserved casl_structured_reserved_words scanAnyWords)
-- ----------------------------------------------
-- * parser for key 'Token's
-- ----------------------------------------------
-- | parse a question mark key sign ('quMark')
quMarkT :: GenParser Char st Token
quMarkT = pToken $ toKey quMark
-- | parse a colon ('colonS') even if other signs (like 'quMark')
-- immediately follow.
colonST :: GenParser Char st Token
colonST = pToken (string colonS)
-- | parse the product key sign ('prodS' or 'timesS')
crossT :: GenParser Char st Token
crossT = pToken (toKey prodS <|> toKey timesS) <?> "cross"