48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * This file and its contents are supplied under the terms of the
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * Common Development and Distribution License ("CDDL"), version 1.0.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * You may only use this file in accordance with the terms of version
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * 1.0 of the CDDL.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * A full copy of the text of the CDDL should have accompanied this
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * source. A copy of the CDDL is also available via the Internet at
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * This file contains the "scanner", which tokenizes charmap files
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * for iconv for processing by the higher level grammar processor.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * Token space ... grows on demand.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * The last keyword seen. This is useful to trigger the special lexer rules
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * for "copy" and also collating symbols and elements.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * These are keywords used in the charmap file. Note that
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * Solaris orginally used angle brackets to wrap some of them,
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * but we removed that to simplify our parser. The first of these
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * items are "global items."
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * These special words are only used in a charmap file, enclosed in <>.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross if (c == '\n') {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross if (c == '\n') {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross switch (c) {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* put the character back so we can get it */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross switch (c) {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross return ('\n');
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross return ('\r');
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross return ('\t');
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross return ('\f');
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross return ('\v');
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross return ('\b');
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross return ('\a');
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* NB: yylval.mbs[0] is the length */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross yyerror(_("max multibyte character size too big"));
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* result in yylval.mbs */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross if (c == '\n')
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * This restarts the token from the beginning
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * the next time we scan a character. (This
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * token is complete.)
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * A few symbols are handled as keywords outside
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * of the normal categories.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* its an undefined symbol */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * this one is special, because we don't want it to alter the
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * last_kw field.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* search for reserved words first */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* clear the top level category if we're done with it */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* set the top level category if we're changing */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross for (j = 0; categories[j]; j++) {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* maybe its a numeric constant? */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * A single lone character is treated as a character literal.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * To avoid duplication of effort, we stick in the charmap.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* anything else is treated as a symbolic name */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* end of file without newline! */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* special handling for quoted string */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* if newline, just eat and forget it */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross if (c == '\n')
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross switch (c) {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* oops! should generate syntax error */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* escaped characters first */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross if (c == '\n') {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* eat the newline */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* an escape mid-token is nonsense */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* numeric escapes are treated as wide characters */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* if it is the escape charter itself note it */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* remove from the comment char to end of line */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross while (c != '\n') {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* end of file without newline! */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * If there were no tokens on this line,
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * then just pretend it didn't exist at all.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * These are all token delimiters. If there
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * is a token already in progress, we need to
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * process it.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross switch (c) {
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * If the line was completely devoid of tokens,
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross * then just ignore it.
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* we're starting a new line, reset the token state */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* symbol start! */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross /* whitespace, just ignore it */
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross (void) fprintf(stderr, _("%s: %d: error: %s\n"),
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross (void) fprintf(stderr, _("%s: %d: error: %s\n"),
48edc7cf07b5dccc3ad84bf2dafe4150bd666d60Gordon Ross (void) fprintf(stderr, _("%s: %d: warning: %s\n"),