d0e518695adc90b82233b99af7dffbb3d3f92c00amw * CDDL HEADER START
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * The contents of this file are subject to the terms of the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Common Development and Distribution License (the "License").
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * You may not use this file except in compliance with the License.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * See the License for the specific language governing permissions
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * and limitations under the License.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * When distributing Covered Code, include this CDDL HEADER in each
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * If applicable, add the following below this CDDL HEADER, with the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * fields enclosed by brackets "[]" replaced with your own identifying
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * information: Portions Copyright [yyyy] [name of copyright owner]
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * CDDL HEADER END
a0b6e447978c306e15941d158bf6939a42ed2726Alan Wright * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Use is subject to license terms.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * C-like lexical analysis.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 1. Define a "struct node"
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 2. Define a "struct symbol" that encapsulates a struct node.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 3. Define a "struct integer" that encapsulates a struct node.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 4. Set the YACC stack type in the grammar:
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * #define YYSTYPE struct node *
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * "%token STRUCT_KW":
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * // atomic values
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * %token INTEGER STRING IDENTIFIER
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * // keywords
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * %token STRUCT_KW CASE_KW
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * // operators
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * %token PLUS MINUS ASSIGN ARROW
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * %token INCOP RELOP EQUOP ASSOP
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 6. It's easiest to use the yacc(1) generated token numbers for node
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * labels. For node labels that are not actually part of the grammer,
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * use a %token with an L_ prefix:
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * // node labels (can't be generated by lex)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * 7. Call set_lex_input() before parsing.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw#define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f')
d0e518695adc90b82233b99af7dffbb3d3f92c00amw#define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f')
d0e518695adc90b82233b99af7dffbb3d3f92c00amw (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f'))
d0e518695adc90b82233b99af7dffbb3d3f92c00amw/* In yacc(1) generated parser */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * The keywtab[] and optable[] could be external to this lex
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * and it would all still work.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Enter the symbols for keyword.
d0e518695adc90b82233b99af7dffbb3d3f92c00amwstatic void
d0e518695adc90b82233b99af7dffbb3d3f92c00amw if (c == '\n') {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Handle preprocessor lines. This just notes
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * which file we're processing.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* note: no ungetc() of newline, we don't want to count it */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* not a line we know */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Skip white space
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Symbol? Might be a keyword or just an identifier
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* we got a symbol */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw } while (is_sfollow(c));
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Integer constant?
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* we got a number */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw if (c == '0') {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* handle hex specially */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw } while (is_xdigit(c));
d0e518695adc90b82233b99af7dffbb3d3f92c00amw } while (is_digit(c));
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* could be anything */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw while (is_digit(c)) {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* Could handle strings. We don't seem to need them yet */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw xc = getch(fp); /* get look-ahead for two-char lexemes */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Look for to-end-of-line comment
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* eat the comment */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Look for multi-line comment
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* eat the comment */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* that's it */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw if (c == '\n')
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Use symbol table lookup for two-character and
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * one character operator tokens.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* there better be a keyword attached */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* Try a one-character form */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* there better be a keyword attached */
a0b6e447978c306e15941d158bf6939a42ed2726Alan Wright compile_error("unrecognized character: 0x%02x (%c)", c, c);
a0b6e447978c306e15941d158bf6939a42ed2726Alan Wright compile_error("unrecognized character: 0x%02x", c);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (p);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (0);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (p);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (p);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (p);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (p);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* NOTREACHED */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw return (p);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * The input context (filename, line number) is maintained by the
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * lexical analysis, and we generally want such info reported for
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * errors in a consistent manner.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n",
d0e518695adc90b82233b99af7dffbb3d3f92c00amw (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf);
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Setup nodes for the lexical analyzer.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * list: item
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * | list item ={ n_splice($1, $2); }
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Convert a string of words to a vector of strings.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw * Returns the number of words.
d0e518695adc90b82233b99af7dffbb3d3f92c00amw char *p = buf;
d0e518695adc90b82233b99af7dffbb3d3f92c00amw char *q = buf;
d0e518695adc90b82233b99af7dffbb3d3f92c00amw for (;;) {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw if (c == 0)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw while (((c = *p++) != 0) && (c != qc))
d0e518695adc90b82233b99af7dffbb3d3f92c00amw if (c == 0)
d0e518695adc90b82233b99af7dffbb3d3f92c00amw } else if (iswhite(c)) {
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* end of word */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw /* still inside word */
d0e518695adc90b82233b99af7dffbb3d3f92c00amw *pp = (char *)0;