hets/Common/Token.hs

	Token.hs revision 743178d5294deadc2ed15e56b5e58ca0e7101fe4
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett{- |
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian MaederModule      :  $Header$
e071fb22ea9923a2a4ff41184d80ca46b55ee932Till MossakowskiDescription :  parser for CASL 'Id's based on "Common.Lexer"
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettCopyright   :  (c) Christian Maeder and Uni Bremen 2002-2004
98890889ffb2e8f6f722b00e265a211f13b5a861Corneliu-Claudiu ProdescuLicense     :  GPLv2 or higher, see LICENSE.txt
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettMaintainer  :  Christian.Maeder@dfki.de
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettStability   :  provisional
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettPortability :  portable
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettParser for CASL 'Id's based on "Common.Lexer"
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett-}
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett{- http://www.cofi.info/Documents/CASL/Summary/
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   from 25 March 2001
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   C.2.1 Basic Specifications with Subsorts
20ed727452613e36c0a95ddabf7ecc81cf941ed2Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettSIMPLE-ID       ::= WORDS
f909337bf7012aca169c0b56b89efbd4a310f8daAndy GimblettID              ::= TOKEN-ID  |  MIXFIX-ID
9f93b2a8b552789cd939d599504d39732672dc84Christian MaederTOKEN-ID        ::= TOKEN
9f93b2a8b552789cd939d599504d39732672dc84Christian MaederMIXFIX-ID       ::= TOKEN-ID PLACE-TOKEN-ID ... PLACE-TOKEN-ID
f909337bf7012aca169c0b56b89efbd4a310f8daAndy Gimblett                  |          PLACE-TOKEN-ID ... PLACE-TOKEN-ID
04ceed96d1528b939f2e592d0656290d81d1c045Andy GimblettPLACE-TOKEN-ID  ::= PLACE TOKEN-ID
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett                  | PLACE
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettPLACE           ::= __
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy GimblettTOKEN           ::= WORDS  |  DOT-WORDS  |  DIGIT  |  QUOTED-CHAR
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett                  | SIGNS
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   SIGNS are adapted here and more permissive as in the summary
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   WORDS and NO-BRACKET-SIGNS are treated equally
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   legal are, ie. "{a}", "{+}", "a{}="
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   illegal is "a=" (no two SIMPLE-TOKEN stay beside each other)
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   SIMPLE-TOKEN ::= WORDS  |  DOT-WORDS  |  DIGIT  |  QUOTED-CHAR
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett                  | NO-BRACKET-SIGNS
20ed727452613e36c0a95ddabf7ecc81cf941ed2Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   STB ::= SIMPLE-TOKEN BRACKETS
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   BST ::= BRACKETS SIMPLE-TOKEN
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett   SIGNS ::= BRACKETS
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett           | BRACKETS STB ... STB
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett           | BRACKETS STB ... STB SIMPLE-TOKEN
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett           | SIMPLE-TOKEN
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett           | SIMPLE-TOKEN BST ... BST
020cdb5dad6b871aba61136a0e1567c00426de87Andy Gimblett           | SIMPLE-TOKEN BST ... BST BRACKETS
2f06b54890375b6cac90394b80b07bd451d728fcAndy Gimblett
2f06b54890375b6cac90394b80b07bd451d728fcAndy Gimblett   A SIMPLE-TOKEN followed by "[" outside nested brackets
2f06b54890375b6cac90394b80b07bd451d728fcAndy Gimblett   will be taken as the beginning of a compound list.
2f06b54890375b6cac90394b80b07bd451d728fcAndy Gimblett   Within SIGNS brackets need not be balanced,
2f06b54890375b6cac90394b80b07bd451d728fcAndy Gimblett   only after their composition to a MIXFIX-ID.
2f06b54890375b6cac90394b80b07bd451d728fcAndy Gimblett
a09bfcbcb0fba5663fca1968aa82daebf2e092c4Andy Gimblett   BRACKETS = BRACKET ... BRACKET
ac5ec613b786cd05f495b568ab5214c31a333e67Andy Gimblett   BRACKET         ::= [  |  ]  |  {  |  }
a09bfcbcb0fba5663fca1968aa82daebf2e092c4Andy Gimblett
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder   2.4 Identifiers
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder   brackets/braces within MIXFIX-ID must be balanced
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder   C.2.2 Structured Specifications
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder   TOKEN-ID        ::= ...  |  TOKEN [ ID ,..., ID ]
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder   A compound list must follow the last TOKEN within MIXFIX-ID,
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder   so a compound list is never nested within (balanced) mixfix BRACKETS.
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder   Only PLACEs may follow a compound list.
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder   The IDs within the compound list may surely be compound IDs again.
a88d32442096d4fd88fce34842ca6f8cf34d8160Christian Maeder-}
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maedermodule Common.Token where
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maederimport Common.Id
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maederimport Common.Keywords
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maederimport Common.Lexer
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maederimport Common.Parsec
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maederimport Text.ParserCombinators.Parsec
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder-- * Casl keyword lists
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder-- | reserved signs
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maedercasl_reserved_ops :: [String]
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maedercasl_reserved_ops = [colonS, colonQuMark, defnS, dotS, cDot, mapsTo]
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder-- | these formula signs are legal in terms, but illegal in declarations
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maederformula_ops :: [String]
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maederformula_ops = [equalS, implS, equivS, lOr, lAnd, negS]
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder
12b2ae689353ecbaad720a9af9f9be01c1a3fe2dChristian Maeder-- | all reseverd signs
casl_reserved_fops :: [String]
casl_reserved_fops = formula_ops ++ casl_reserved_ops

-- | reserved keywords
casl_basic_reserved_words :: [String]
casl_basic_reserved_words =
    [axiomS, axiomS ++ sS, cogeneratedS, cotypeS, cotypeS ++ sS,
     esortS, esortS ++ sS, etypeS, etypeS ++ sS,
     existsS, forallS, freeS, generatedS,
     opS, opS ++ sS, predS, predS ++ sS,
     sortS, sortS ++ sS, typeS, typeS ++ sS, varS, varS ++ sS]

-- | reserved keywords
casl_structured_reserved_words :: [String]
casl_structured_reserved_words = libraryS :
    continuationKeywords ++ otherStartKeywords
    ++ criticalKeywords

-- | keywords terminating a basic spec or starting a new library item
criticalKeywords :: [String]
criticalKeywords = terminatingKeywords ++ startingKeywords

-- | keywords terminating a basic spec
terminatingKeywords :: [String]
terminatingKeywords =
    [andS, endS, fitS, hideS, revealS, thenS, withS, withinS]

-- | keywords starting a library item
startingKeywords :: [String]
startingKeywords =
    [ archS, fromS, logicS, newlogicS, refinementS, specS, unitS, viewS
    , ontologyS, alignmentS, diagramS, equivalenceS, newcomorphismS
    , interpretationS ]

-- | keywords that may follow a defining equal sign
otherStartKeywords :: [String]
otherStartKeywords =
    [closedS, cofreeS, freeS, localS, unitS ++ sS, combineS]

-- | other intermediate keywords
continuationKeywords :: [String]
continuationKeywords =
    [ behaviourallyS, getS, givenS, lambdaS, refinedS, resultS, toS, versionS
    , excludingS ]

-- | reserved keywords
casl_reserved_words :: [String]
casl_reserved_words =
   casl_basic_reserved_words ++ casl_structured_reserved_words

-- | these formula words are legal in terms, but illegal in declarations
formula_words :: [String]
formula_words = [asS, defS, elseS, ifS, inS, whenS, falseS, notS, trueS]

-- | all reserved words
casl_reserved_fwords :: [String]
casl_reserved_fwords = formula_words ++ casl_reserved_words

-- * a single 'Token' parser taking lists of key symbols and words as parameter

{- | a simple 'Token' parser depending on reserved signs and words
   (including a quoted char, dot-words or a single digit) -}
sid :: ([String], [String]) -> GenParser Char st Token
sid (kOps, kWords) = pToken (scanQuotedChar <|> scanDotWords
                <|> scanDigit <|> reserved kOps scanAnySigns
                <|> reserved kWords scanAnyWords <?> "simple-id")

-- * 'Token' lists parsers

-- | balanced mixfix components within braces
braceP :: GenParser Char st [Token] -> GenParser Char st [Token]
braceP p = oBraceT <:> p <++> single cBraceT
           <|> try (oBracketT <:> single cBracketT)
-- | balanced mixfix components within square brackets
bracketP :: GenParser Char st [Token] -> GenParser Char st [Token]
bracketP p = oBracketT <:> p <++> single cBracketT

{- | an 'sid' optionally followed by other mixfix components
   (without no two consecutive 'sid's) -}
innerMix1 :: ([String], [String]) -> GenParser Char st [Token]
innerMix1 l = sid l <:> optionL (innerMix2 l)

-- | mixfix components not starting with a 'sid' (possibly places)
innerMix2 :: ([String], [String]) -> GenParser Char st [Token]
innerMix2 l = let p = innerList l in
                  flat (many1 (braceP p <|> bracketP p <|> many1 placeT))
                           <++> optionL (innerMix1 l)

-- | any mixfix components within braces or brackets
innerList :: ([String], [String]) -> GenParser Char st [Token]
innerList l = optionL (innerMix1 l <|> innerMix2 l <?> "token")

-- | mixfix components starting with a 'sid' (outside 'innerList')
topMix1 :: ([String], [String]) -> GenParser Char st [Token]
topMix1 l = sid l <:> optionL (topMix2 l)

{- | mixfix components starting with braces ('braceP')
   that may follow 'sid' outside 'innerList'.
   (Square brackets after a 'sid' will be taken as a compound list.) -}
topMix2 :: ([String], [String]) -> GenParser Char st [Token]
topMix2 l = flat (many1 (braceP $ innerList l)) <++> optionL (topMix1 l)

{- | mixfix components starting with square brackets ('bracketP')
   that may follow a place ('placeT') (outside 'innerList') -}
topMix3 :: ([String], [String]) -> GenParser Char st [Token]
topMix3 l = let p = innerList l in
                bracketP p <++> flat (many (braceP p))
                             <++> optionL (topMix1 l)

{- | any ('topMix1', 'topMix2', 'topMix3') mixfix components
   that may follow a place ('placeT') at the top level -}
afterPlace :: ([String], [String]) -> GenParser Char st [Token]
afterPlace l = topMix1 l <|> topMix2 l <|> topMix3 l

-- | places possibly followed by other ('afterPlace') mixfix components
middle :: ([String], [String]) -> GenParser Char st [Token]
middle l = many1 placeT <++> optionL (afterPlace l)

{- | many (balanced, top-level) mixfix components ('afterPlace')
   possibly interspersed with multiple places ('placeT') -}
tokStart :: ([String], [String]) -> GenParser Char st [Token]
tokStart l = afterPlace l <++> flat (many (middle l))

{- | any (balanced, top-level) mixfix components
   possibly starting with places but no single 'placeT' only. -}
start :: ([String], [String]) -> GenParser Char st [Token]
start l = tokStart l <|> placeT <:> (tokStart l <|>
                                 many1 placeT <++> optionL (tokStart l))
                                     <?> "id"

-- * parser for mixfix and compound 'Id's

-- | parsing a compound list
comps :: ([String], [String]) -> GenParser Char st ([Id], Range)
comps keys = do
  o <- oBracketT
  (ts, ps) <- mixId keys keys `separatedBy` commaT
  c <- cBracketT
  return (ts, toRange o ps c)

{- | parse mixfix components ('start') and an optional compound list ('comps')
   if the last token was no place. Accept possibly further places.
   Key strings (second argument) within compound list may differ from
   top-level key strings (first argument)!
-}
mixId :: ([String], [String]) -> ([String], [String]) -> GenParser Char st Id
mixId keys idKeys = do
  l <- start keys
  if isPlace (last l) then return (Id l [] nullRange) else do
    (c, p) <- option ([], nullRange) (comps idKeys)
    u <- many placeT
    return (Id (l ++ u) c p)

-- | the Casl key strings (signs first) with additional keywords
casl_keys :: [String] -> ([String], [String])
casl_keys ks = (ks ++ casl_reserved_fops, ks ++ casl_reserved_fwords)

-- | Casl ids for operations and predicates
parseId :: [String] -> GenParser Char st Id
parseId ks = mixId (casl_keys ks) (casl_keys ks)

-- | disallow 'barS' within the top-level of constructor names
consId :: [String] -> GenParser Char st Id
consId ks = mixId (barS : ks ++ casl_reserved_fops,
                   ks ++ casl_reserved_fwords) $ casl_keys ks

{- | Casl sorts are simple words ('varId'),
   but may have a compound list ('comps') -}
sortId :: [String] -> GenParser Char st Id
sortId ks =
    do s <- varId ks
       (c, p) <- option ([], nullRange) (comps $ casl_keys ks)
       return (Id [s] c p)

-- * parser for simple 'Id's

-- | parse a simple word not in 'casl_reserved_fwords'
varId :: [String] -> GenParser Char st Token
varId ks = pToken (reserved (ks ++ casl_reserved_fwords) scanAnyWords)

-- | non-skipping version for simple ids
nonSkippingSimpleId :: GenParser Char st Token
nonSkippingSimpleId =
  parseToken $ reserved casl_reserved_words scanAnyWords

-- | like 'varId'.  'Common.Id.SIMPLE_ID' for spec- and view names
simpleId :: GenParser Char st Token
simpleId = nonSkippingSimpleId << skipSmart

-- * parser for key 'Token's

-- | parse a question mark key sign ('quMark')
quMarkT :: GenParser Char st Token
quMarkT = pToken $ toKey quMark

-- | parse a 'colonS' possibly immediately followed by a 'quMark'
colonST :: GenParser Char st Token
colonST = pToken $ try $ string colonS << notFollowedBy
    (satisfy $ \ c -> c /= '?' && isSignChar c)

-- | parse the product key sign ('prodS' or 'timesS')
crossT :: GenParser Char st Token
crossT = pToken (toKey prodS <|> toKey timesS) <?> show prodS