Token.hs revision 72b8c0349a58cf0eb361cb5bb410d95a0372900a
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder{- |
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian MaederModule : $Header$
e9458b1a7a19a63aa4c179f9ab20f4d50681c168Jens ElknerDescription : parser for CASL 'Id's based on "Common.Lexer"
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian MaederCopyright : (c) Christian Maeder and Uni Bremen 2002-2004
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian MaederLicense : GPLv2 or higher, see LICENSE.txt
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian Maeder
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian MaederMaintainer : Christian.Maeder@dfki.de
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian MaederStability : provisional
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian MaederPortability : portable
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian Maeder
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian MaederParser for CASL 'Id's based on "Common.Lexer"
9d0567bda351efa4286f38e85fb3e41ecd3683eaChristian Maeder-}
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder{- http://www.cofi.info/Documents/CASL/Summary/
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder from 25 March 2001
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder C.2.1 Basic Specifications with Subsorts
38504dc968167ba2e82dd568edeae8d6af4dc145Christian Maeder
20bd79f8844604c145510c616fecdaf47eba2fdbChristian MaederSIMPLE-ID ::= WORDS
20bd79f8844604c145510c616fecdaf47eba2fdbChristian MaederID ::= TOKEN-ID | MIXFIX-ID
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian MaederTOKEN-ID ::= TOKEN
ab0274ab68a174d3e92235b4c4ca865c03901583Christian MaederMIXFIX-ID ::= TOKEN-ID PLACE-TOKEN-ID ... PLACE-TOKEN-ID
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder | PLACE-TOKEN-ID ... PLACE-TOKEN-ID
d3f192025f2836285d9705a959542350e057f281Christian MaederPLACE-TOKEN-ID ::= PLACE TOKEN-ID
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder | PLACE
3b48e17c1da54ee669e70b626d9fbc32ce495b2cChristian MaederPLACE ::= __
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian MaederTOKEN ::= WORDS | DOT-WORDS | DIGIT | QUOTED-CHAR
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder | SIGNS
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder SIGNS are adapted here and more permissive as in the summary
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder WORDS and NO-BRACKET-SIGNS are treated equally
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder legal are, ie. "{a}", "{+}", "a{}="
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder illegal is "a=" (no two SIMPLE-TOKEN stay beside each other)
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder SIMPLE-TOKEN ::= WORDS | DOT-WORDS | DIGIT | QUOTED-CHAR
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder | NO-BRACKET-SIGNS
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder STB ::= SIMPLE-TOKEN BRACKETS
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder BST ::= BRACKETS SIMPLE-TOKEN
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder SIGNS ::= BRACKETS
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder | BRACKETS STB ... STB
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder | BRACKETS STB ... STB SIMPLE-TOKEN
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder | SIMPLE-TOKEN
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder | SIMPLE-TOKEN BST ... BST
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder | SIMPLE-TOKEN BST ... BST BRACKETS
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder A SIMPLE-TOKEN followed by "[" outside nested brackets
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder will be taken as the beginning of a compound list.
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder Within SIGNS brackets need not be balanced,
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder only after their composition to a MIXFIX-ID.
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder BRACKETS = BRACKET ... BRACKET
20bd79f8844604c145510c616fecdaf47eba2fdbChristian Maeder BRACKET ::= [ | ] | { | }
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder
1a38107941725211e7c3f051f7a8f5e12199f03acmaeder 2.4 Identifiers
3b48e17c1da54ee669e70b626d9fbc32ce495b2cChristian Maeder brackets/braces within MIXFIX-ID must be balanced
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder C.2.2 Structured Specifications
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder TOKEN-ID ::= ... | TOKEN [ ID ,..., ID ]
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder A compound list must follow the last TOKEN within MIXFIX-ID,
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder so a compound list is never nested within (balanced) mixfix BRACKETS.
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder Only PLACEs may follow a compound list.
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder The IDs within the compound list may surely be compound IDs again.
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder-}
3b48e17c1da54ee669e70b626d9fbc32ce495b2cChristian Maeder
9f93b2a8b552789cd939d599504d39732672dc84Christian Maedermodule Common.Token where
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder
9f93b2a8b552789cd939d599504d39732672dc84Christian Maederimport Common.Id
9f93b2a8b552789cd939d599504d39732672dc84Christian Maederimport Common.Keywords
9f93b2a8b552789cd939d599504d39732672dc84Christian Maederimport Common.Lexer
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maederimport Common.Parsec
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder
9f93b2a8b552789cd939d599504d39732672dc84Christian Maederimport Text.ParserCombinators.Parsec
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maeder-- * Casl keyword lists
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder-- | reserved signs
9f93b2a8b552789cd939d599504d39732672dc84Christian Maedercasl_reserved_ops :: [String]
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maedercasl_reserved_ops = [colonS, colonQuMark, defnS, dotS, cDot, mapsTo]
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maeder
9f93b2a8b552789cd939d599504d39732672dc84Christian Maeder-- | these formula signs are legal in terms, but illegal in declarations
23a0d43ca980983c7d7aebaa9f03bfe120be7de8Christian Maederformula_ops :: [String]
ab0274ab68a174d3e92235b4c4ca865c03901583Christian Maederformula_ops = [equalS, implS, equivS, lOr, lAnd, negS]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
d3f192025f2836285d9705a959542350e057f281Christian Maeder-- | all reseverd signs
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maedercasl_reserved_fops :: [String]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maedercasl_reserved_fops = formula_ops ++ casl_reserved_ops
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
d3f192025f2836285d9705a959542350e057f281Christian Maeder-- | reserved keywords
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maedercasl_basic_reserved_words :: [String]
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maedercasl_basic_reserved_words =
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder [axiomS, axiomS ++ sS, cogeneratedS, cotypeS, cotypeS ++ sS,
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder esortS, esortS ++ sS, etypeS, etypeS ++ sS,
38504dc968167ba2e82dd568edeae8d6af4dc145Christian Maeder existsS, forallS, freeS, generatedS,
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder opS, opS ++ sS, predS, predS ++ sS,
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder sortS, sortS ++ sS, typeS, typeS ++ sS, varS, varS ++ sS]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
66ea26416f702f5e00759ffc767f0f785cc86058Christian Maeder-- | reserved keywords
d3f192025f2836285d9705a959542350e057f281Christian Maedercasl_structured_reserved_words :: [String]
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maedercasl_structured_reserved_words = libraryS :
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder continuationKeywords ++ otherStartKeywords
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder ++ criticalKeywords
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder-- | keywords terminating a basic spec or starting a new library item
d3f192025f2836285d9705a959542350e057f281Christian MaedercriticalKeywords :: [String]
df15a183ca8bf1c25db69775979905198d7cc8bbChristian MaedercriticalKeywords = terminatingKeywords ++ startingKeywords
499b6cb3ab95a336a324545123e18f387ee4d2a3Christian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder-- | keywords terminating a basic spec
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian MaederterminatingKeywords :: [String]
d3f192025f2836285d9705a959542350e057f281Christian MaederterminatingKeywords =
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder [andS, endS, fitS, hideS, revealS, thenS, withS, withinS]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
d3f192025f2836285d9705a959542350e057f281Christian Maeder-- | keywords starting a library item
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian MaederstartingKeywords :: [String]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian MaederstartingKeywords =
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder [ archS, fromS, logicS, newlogicS, refinementS, specS, unitS, viewS
d3f192025f2836285d9705a959542350e057f281Christian Maeder , ontologyS, alignmentS, networkS, equivalenceS, newcomorphismS
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder , interpretationS ]
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder-- | keywords that may follow a defining equal sign
df15a183ca8bf1c25db69775979905198d7cc8bbChristian MaederotherStartKeywords :: [String]
df15a183ca8bf1c25db69775979905198d7cc8bbChristian MaederotherStartKeywords =
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder [closedS, cofreeS, freeS, localS, unitS ++ sS, combineS]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder-- | other intermediate keywords
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian MaedercontinuationKeywords :: [String]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian MaedercontinuationKeywords =
d3f192025f2836285d9705a959542350e057f281Christian Maeder [ behaviourallyS, getS, givenS, lambdaS, refinedS, resultS, toS, versionS
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder , excludingS ]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder-- | reserved keywords
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maedercasl_reserved_words :: [String]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maedercasl_reserved_words =
d3f192025f2836285d9705a959542350e057f281Christian Maeder casl_basic_reserved_words ++ casl_structured_reserved_words
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder-- | these formula words are legal in terms, but illegal in declarations
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maederformula_words :: [String]
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maederformula_words = [asS, defS, elseS, ifS, inS, whenS, falseS, notS, trueS]
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder-- | all reserved words
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maedercasl_reserved_fwords :: [String]
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maedercasl_reserved_fwords = formula_words ++ casl_reserved_words
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder
d3f192025f2836285d9705a959542350e057f281Christian Maeder-- * a single 'Token' parser taking lists of key symbols and words as parameter
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maeder
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder{- | a simple 'Token' parser depending on reserved signs and words
2119c0874c93fc1cdfea381bcfea69e8fdb8b6e2Christian Maeder (including a quoted char, dot-words or a single digit) -}
df15a183ca8bf1c25db69775979905198d7cc8bbChristian Maedersid :: ([String], [String]) -> GenParser Char st Token
sid (kOps, kWords) = pToken (scanQuotedChar <|> scanDotWords
<|> scanDigit <|> reserved kOps scanAnySigns
<|> reserved kWords scanAnyWords <?> "simple-id")
-- * 'Token' lists parsers
-- | balanced mixfix components within braces
braceP :: GenParser Char st [Token] -> GenParser Char st [Token]
braceP p = oBraceT <:> p <++> single cBraceT
<|> try (oBracketT <:> single cBracketT)
-- | balanced mixfix components within square brackets
bracketP :: GenParser Char st [Token] -> GenParser Char st [Token]
bracketP p = oBracketT <:> p <++> single cBracketT
{- | an 'sid' optionally followed by other mixfix components
(without no two consecutive 'sid's) -}
innerMix1 :: ([String], [String]) -> GenParser Char st [Token]
innerMix1 l = sid l <:> optionL (innerMix2 l)
-- | mixfix components not starting with a 'sid' (possibly places)
innerMix2 :: ([String], [String]) -> GenParser Char st [Token]
innerMix2 l = let p = innerList l in
flat (many1 (braceP p <|> bracketP p <|> many1 placeT))
<++> optionL (innerMix1 l)
-- | any mixfix components within braces or brackets
innerList :: ([String], [String]) -> GenParser Char st [Token]
innerList l = optionL (innerMix1 l <|> innerMix2 l <?> "token")
-- | mixfix components starting with a 'sid' (outside 'innerList')
topMix1 :: ([String], [String]) -> GenParser Char st [Token]
topMix1 l = sid l <:> optionL (topMix2 l)
{- | mixfix components starting with braces ('braceP')
that may follow 'sid' outside 'innerList'.
(Square brackets after a 'sid' will be taken as a compound list.) -}
topMix2 :: ([String], [String]) -> GenParser Char st [Token]
topMix2 l = flat (many1 (braceP $ innerList l)) <++> optionL (topMix1 l)
{- | mixfix components starting with square brackets ('bracketP')
that may follow a place ('placeT') (outside 'innerList') -}
topMix3 :: ([String], [String]) -> GenParser Char st [Token]
topMix3 l = let p = innerList l in
bracketP p <++> flat (many (braceP p))
<++> optionL (topMix1 l)
{- | any ('topMix1', 'topMix2', 'topMix3') mixfix components
that may follow a place ('placeT') at the top level -}
afterPlace :: ([String], [String]) -> GenParser Char st [Token]
afterPlace l = topMix1 l <|> topMix2 l <|> topMix3 l
-- | places possibly followed by other ('afterPlace') mixfix components
middle :: ([String], [String]) -> GenParser Char st [Token]
middle l = many1 placeT <++> optionL (afterPlace l)
{- | many (balanced, top-level) mixfix components ('afterPlace')
possibly interspersed with multiple places ('placeT') -}
tokStart :: ([String], [String]) -> GenParser Char st [Token]
tokStart l = afterPlace l <++> flat (many (middle l))
{- | any (balanced, top-level) mixfix components
possibly starting with places but no single 'placeT' only. -}
start :: ([String], [String]) -> GenParser Char st [Token]
start l = tokStart l <|> placeT <:> (tokStart l <|>
many1 placeT <++> optionL (tokStart l))
<?> "id"
-- * parser for mixfix and compound 'Id's
-- | parsing a compound list
comps :: ([String], [String]) -> GenParser Char st ([Id], Range)
comps keys = do
o <- oBracketT
(ts, ps) <- mixId keys keys `separatedBy` commaT
c <- cBracketT
return (ts, toRange o ps c)
{- | parse mixfix components ('start') and an optional compound list ('comps')
if the last token was no place. Accept possibly further places.
Key strings (second argument) within compound list may differ from
top-level key strings (first argument)!
-}
mixId :: ([String], [String]) -> ([String], [String]) -> GenParser Char st Id
mixId keys idKeys = do
l <- start keys
if isPlace (last l) then return (Id l [] nullRange) else do
(c, p) <- option ([], nullRange) (comps idKeys)
u <- many placeT
return (Id (l ++ u) c p)
-- | the Casl key strings (signs first) with additional keywords
casl_keys :: [String] -> ([String], [String])
casl_keys ks = (ks ++ casl_reserved_fops, ks ++ casl_reserved_fwords)
-- | Casl ids for operations and predicates
parseId :: [String] -> GenParser Char st Id
parseId ks = mixId (casl_keys ks) (casl_keys ks)
-- | disallow 'barS' within the top-level of constructor names
consId :: [String] -> GenParser Char st Id
consId ks = mixId (barS : ks ++ casl_reserved_fops,
ks ++ casl_reserved_fwords) $ casl_keys ks
{- | Casl sorts are simple words ('varId'),
but may have a compound list ('comps') -}
sortId :: [String] -> GenParser Char st Id
sortId ks =
do s <- varId ks
(c, p) <- option ([], nullRange) (comps $ casl_keys ks)
return (Id [s] c p)
-- * parser for simple 'Id's
-- | parse a simple word not in 'casl_reserved_fwords'
varId :: [String] -> GenParser Char st Token
varId ks = pToken (reserved (ks ++ casl_reserved_fwords) scanAnyWords)
-- | non-skipping version for simple ids
nonSkippingSimpleId :: GenParser Char st Token
nonSkippingSimpleId =
parseToken $ reserved casl_reserved_words scanAnyWords
-- | like 'varId'. 'Common.Id.SIMPLE_ID' for spec- and view names
simpleId :: GenParser Char st Token
simpleId = nonSkippingSimpleId << skipSmart
-- * parser for key 'Token's
-- | parse a question mark key sign ('quMark')
quMarkT :: GenParser Char st Token
quMarkT = pToken $ toKey quMark
-- | parse a 'colonS' possibly immediately followed by a 'quMark'
colonST :: GenParser Char st Token
colonST = pToken $ try $ string colonS << notFollowedBy
(satisfy $ \ c -> c /= '?' && isSignChar c)
-- | parse the product key sign ('prodS' or 'timesS')
crossT :: GenParser Char st Token
crossT = pToken (toKey prodS <|> toKey timesS) <?> show prodS