ndr_lex.c revision d0e518695adc90b82233b99af7dffbb3d3f92c00
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <errno.h>
#include <stdarg.h>
#include "ndrgen.h"
#include "y.tab.h"
/*
* C-like lexical analysis.
*
* 1. Define a "struct node"
* 2. Define a "struct symbol" that encapsulates a struct node.
* 3. Define a "struct integer" that encapsulates a struct node.
* 4. Set the YACC stack type in the grammar:
* %{
* #define YYSTYPE struct node *
* %}
* 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER.
* Using "_KW" as a suffix for keyword tokens, i.e. "struct" is
* "%token STRUCT_KW":
* // atomic values
* %token INTEGER STRING IDENTIFIER
* // keywords
* %token STRUCT_KW CASE_KW
* // operators
* %token PLUS MINUS ASSIGN ARROW
* // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...)
* %token INCOP RELOP EQUOP ASSOP
* 6. It's easiest to use the yacc(1) generated token numbers for node
* labels. For node labels that are not actually part of the grammer,
* use a %token with an L_ prefix:
* // node labels (can't be generated by lex)
* %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ
* 7. Call set_lex_input() before parsing.
*/
#define SQ '\''
#define DQ '"'
#define is_between(c, l, u) ((l) <= (c) && (c) <= (u))
#define is_xdigit(c) \
static ndr_integer_t *integer_list;
static ndr_symbol_t *file_name;
int line_number;
int n_compile_error;
static int lex_at_bol;
/* In yacc(1) generated parser */
/*
* The keywtab[] and optable[] could be external to this lex
* and it would all still work.
*/
static ndr_keyword_t keywtable[] = {
{ "struct", STRUCT_KW, 0 },
{ "union", UNION_KW, 0 },
{ "typedef", TYPEDEF_KW, 0 },
{ "interface", INTERFACE_KW, 0 },
{ "uuid", UUID_KW, 0 },
{ "_no_reorder", _NO_REORDER_KW, 0 },
{ "extern", EXTERN_KW, 0 },
{ "reference", REFERENCE_KW, 0 },
{ "align", ALIGN_KW, 0 },
{ "operation", OPERATION_KW, 0 },
{ "in", IN_KW, 0 },
{ "out", OUT_KW, 0 },
{ "string", STRING_KW, 0 },
{ "size_is", SIZE_IS_KW, 0 },
{ "length_is", LENGTH_IS_KW, 0 },
{ "switch_is", SWITCH_IS_KW, 0 },
{ "case", CASE_KW, 0 },
{ "default", DEFAULT_KW, 0 },
{ "transmit_as", TRANSMIT_AS_KW, 0 },
{ "arg_is", ARG_IS_KW, 0 },
{0}
};
static ndr_keyword_t optable[] = {
{ "{", LC, 0 },
{ "}", RC, 0 },
{ "(", LP, 0 },
{ ")", RP, 0 },
{ "[", LB, 0 },
{ "]", RB, 0 },
{ "*", STAR, 0 },
{ ";", SEMI, 0 },
{0}
};
static ndr_integer_t *int_enter(long);
static ndr_symbol_t *sym_find(char *);
/*
* Enter the symbols for keyword.
*/
static void
{
int i;
}
}
void
{
line_number = 1;
lex_at_bol = 1;
}
static int
{
}
int
yylex(void)
{
char lexeme[512];
char *p = lexeme;
int c, xc;
top:
p = lexeme;
if (c == EOF)
return (EOF);
if (c == '\n') {
line_number++;
lex_at_bol = 1;
goto top;
}
/*
* Handle preprocessor lines. This just notes
* which file we're processing.
*/
if (c == '#' && lex_at_bol) {
char *sv[10];
int sc;
*p++ = c;
*p = 0;
/* note: no ungetc() of newline, we don't want to count it */
if (*lexeme != ' ') {
/* not a line we know */
goto top;
}
if (sc < 2)
goto top;
lex_at_bol = 1;
goto top;
}
lex_at_bol = 0;
/*
* Skip white space
*/
if (is_white(c))
goto top;
/*
* Symbol? Might be a keyword or just an identifier
*/
if (is_sstart(c)) {
/* we got a symbol */
do {
*p++ = c;
} while (is_sfollow(c));
*p = 0;
} else {
return (IDENTIFIER);
}
}
/*
* Integer constant?
*/
if (is_digit(c)) {
/* we got a number */
*p++ = c;
if (c == '0') {
if (c == 'x' || c == 'X') {
/* handle hex specially */
do {
*p++ = c;
} while (is_xdigit(c));
goto convert_icon;
} else if (c == 'b' || c == 'B' ||
c == 'd' || c == 'D' ||
c == 'o' || c == 'O') {
do {
*p++ = c;
} while (is_digit(c));
goto convert_icon;
}
}
/* could be anything */
while (is_digit(c)) {
*p++ = c;
}
*p = 0;
return (INTEGER);
}
/* Could handle strings. We don't seem to need them yet */
yylval = 0; /* operator tokens have no value */
lexeme[0] = c;
lexeme[2] = 0;
/*
* Look for to-end-of-line comment
*/
/* eat the comment */
;
goto top;
}
/*
* Look for multi-line comment
*/
/* eat the comment */
xc = -1;
/* that's it */
break;
}
xc = c;
if (c == '\n')
line_number++;
}
goto top;
}
/*
* Use symbol table lookup for two-character and
* one character operator tokens.
*/
if (sym) {
/* there better be a keyword attached */
}
/* Try a one-character form */
lexeme[1] = 0;
if (sym) {
/* there better be a keyword attached */
}
compile_error("unrecognized character 0x%02x", c);
goto top;
}
static ndr_symbol_t *
{
ndr_symbol_t **pp;
ndr_symbol_t *p;
return (p);
}
return (0);
}
{
ndr_symbol_t **pp;
ndr_symbol_t *p;
return (p);
}
*pp = p;
return (p);
}
static ndr_integer_t *
{
ndr_integer_t **pp;
ndr_integer_t *p;
return (p);
}
*pp = p;
return (p);
}
void *
{
void *p;
/* NOTREACHED */
}
return (p);
}
/*
* The input context (filename, line number) is maintained by the
* lexical analysis, and we generally want such info reported for
* errors in a consistent manner.
*/
void
compile_error(const char *fmt, ...)
{
}
void
fatal_error(const char *fmt, ...)
{
exit(1);
}
/*
* Setup nodes for the lexical analyzer.
*/
struct node *
{
ndr_node_t *np;
return (np);
}
/*
* list: item
* | list item ={ n_splice($1, $2); }
* ;
*/
void
{
}
/*
* Convert a string of words to a vector of strings.
* Returns the number of words.
*/
static int
{
char *p = buf;
char *q = buf;
int in_word = 0;
int c;
for (;;) {
c = *p++;
if (c == 0)
break;
if (!in_word) {
if (iswhite(c))
continue;
*pp++ = q;
in_word = 1;
}
if (isquote(c)) {
int qc = c;
while (((c = *p++) != 0) && (c != qc))
*q++ = c;
if (c == 0)
break;
} else if (iswhite(c)) {
/* end of word */
*q++ = 0;
in_word = 0;
} else {
/* still inside word */
*q++ = c;
}
}
if (in_word)
*q++ = 0;
*pp = (char *)0;
}