gnu_lex.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2001, 2002 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "gnu_msgfmt.h"
#include "gnu_lex.h"
#include "y.tab.h"
int cur_line = 1;
static char backbuf[MB_LEN_MAX];
static int backlen = 0;
/*
* get_mb() returns one multibyte character.
*
* This function uses the iconv() function to find out one
* multibyte character from a sequence of bytes in the file stream.
* The conversion from the codeset specified in the PO file to UTF-8
* is performed. The funcition reads another byte and calls iconv(),
* until iconv() successfully returns as a valid UTF-8 character has
* been converted or returns EILSEQ. If iconv() successfully returned,
* the function returns the read bytes as one character. Otherwise,
* returns error. The string converted to UTF-8 in outbuf won't be
* used at all.
*/
static size_t
get_mb(unsigned char *tmpbuf, unsigned char fc)
{
int c;
char outbuf[8]; /* max size of a UTF-8 char */
const char *inptr;
char *outptr;
size_t insize = 0, inlen, outlen, ret;
tmpbuf[insize++] = fc; /* size of tmpbuf is MB_LEN_MAX+1 */
if (cd == (iconv_t)-1) {
/* no conversion */
tmpbuf[insize] = '\0';
return (insize);
}
for (; ; ) {
inptr = (const char *)tmpbuf;
outptr = &outbuf[0];
inlen = insize;
outlen = sizeof (outbuf);
errno = 0;
ret = iconv(cd, &inptr, &inlen, &outptr, &outlen);
if (ret == (size_t)-1) {
/* iconv failed */
switch (errno) {
case EILSEQ:
/* invalid character found */
error(gettext(ERR_INVALID_CHAR),
cur_line, cur_po);
/* NOTREACHED */
case EINVAL:
/* not enough input */
if (insize == MB_LEN_MAX) {
/* invalid character found */
error(gettext(ERR_INVALID_CHAR),
cur_line, cur_po);
/* NOTREACHED */
}
c = getc(fp);
if (c == EOF) {
error(gettext(ERR_UNEXP_EOF),
cur_line, cur_po);
/* NOTREACHED */
}
tmpbuf[insize++] = (unsigned char)c;
/* initialize the conversion */
outptr = &outbuf[0];
outlen = sizeof (outbuf);
(void) iconv(cd, NULL, NULL, &outptr, &outlen);
continue;
/* NOTREACHED */
default:
/* should never happen */
error(ERR_INTERNAL,
cur_line, cur_po);
/* NOTREACHED */
}
/* NOTREACHED */
}
tmpbuf[insize] = '\0';
return (insize);
/* NOTRECHED */
}
}
static void
po_uninput(int c)
{
(void) ungetc(c, fp);
if (c == '\n')
cur_line--;
}
static void
po_ungetc(struct ch *pch)
{
if (backlen) {
error(gettext(ERR_INTERNAL), cur_line, cur_po);
/* NOTREACHED */
}
if (!pch->eof) {
backlen = pch->len;
(void) memcpy(backbuf, pch->buf, backlen);
}
}
static struct ch *
po_getc(void)
{
static struct ch och;
int c;
if (backlen) {
och.len = backlen;
(void) memcpy(och.buf, backbuf, backlen);
backlen = 0;
return (&och);
}
for (; ; ) {
c = getc(fp);
if (c == EOF) {
if (ferror(fp)) {
/* error happend */
error(gettext(ERR_READ_FAILED), cur_po);
/* NOTREACHED */
}
och.len = 0;
och.eof = 1;
return (&och);
}
if (c == '\\') {
c = getc(fp);
if (c == '\n') {
/* this newline should be escaped */
cur_line++;
continue;
} else {
po_uninput(c);
och.len = 1;
och.eof = 0;
och.buf[0] = '\\';
return (&och);
}
/* NOTREACHED */
}
if (c == '\n') {
cur_line++;
och.len = 1;
och.eof = 0;
och.buf[0] = '\n';
return (&och);
}
if (isascii((unsigned char)c)) {
/* single byte ascii */
och.len = 1;
och.eof = 0;
och.buf[0] = (unsigned char)c;
return (&och);
}
och.len = get_mb(&och.buf[0], (unsigned char)c);
och.eof = 0;
return (&och);
}
/* NOTREACHED */
}
static void
extend_buf(char **buf, size_t *size, size_t add)
{
char *tmp;
*size += add;
tmp = (char *)Xrealloc(*buf, *size);
*buf = tmp;
}
static struct ch *
expand_es(void)
{
int c, n, loop;
static struct ch och;
struct ch *pch;
pch = po_getc();
if (pch->eof) {
error(gettext(ERR_UNEXP_EOF),
cur_line, cur_po);
/* NOTREACHED */
}
if (pch->len > 1) {
/* not a valid escape sequence */
return (pch);
}
och.len = 1;
och.eof = 0;
switch (pch->buf[0]) {
case '"':
case '\\':
och.buf[0] = pch->buf[0];
break;
case 'b':
och.buf[0] = '\b';
break;
case 'f':
och.buf[0] = '\f';
break;
case 'n':
och.buf[0] = '\n';
break;
case 'r':
och.buf[0] = '\r';
break;
case 't':
och.buf[0] = '\t';
break;
case 'v':
och.buf[0] = '\v';
break;
case 'a':
och.buf[0] = '\a';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* octal */
c = pch->buf[0];
for (n = 0, loop = 0; ; ) {
n = n * 8 + c - '0';
loop++;
if (loop >= 3)
break;
pch = po_getc();
if (pch->eof) {
error(gettext(ERR_UNEXP_EOF),
cur_line, cur_po);
/* NOTREACHED */
}
if ((pch->len > 1) || (pch->buf[0] < '0') ||
(pch->buf[0] > '7'))
break;
c = pch->buf[0];
}
po_ungetc(pch);
och.buf[0] = (unsigned char)n;
break;
case 'x':
/* hex */
pch = po_getc();
if (pch->eof) {
error(gettext(ERR_UNEXP_EOF),
cur_line, cur_po);
/* NOTREACHED */
}
if (pch->len > 1) {
po_ungetc(pch);
och.buf[0] = 'x';
break;
}
c = pch->buf[0];
if (!isxdigit((unsigned char)c)) {
po_ungetc(pch);
och.buf[0] = 'x';
break;
}
if (isdigit((unsigned char)c)) {
n = c - '0';
} else if (isupper((unsigned char)c)) {
n = c - 'A' + 10;
} else {
n = c - 'a' + 10;
}
pch = po_getc();
if (pch->eof) {
error(gettext(ERR_UNEXP_EOF),
cur_line, cur_po);
/* NOTREACHED */
}
if (pch->len > 1) {
po_ungetc(pch);
och.buf[0] = (unsigned char)n;
break;
}
c = pch->buf[0];
if (!isxdigit((unsigned char)c)) {
po_ungetc(pch);
och.buf[0] = (unsigned char)n;
break;
}
n *= 16;
if (isdigit((unsigned char)c)) {
n += c - '0';
} else if (isupper((unsigned char)c)) {
n += c - 'A' + 10;
} else {
n += c - 'a' + 10;
}
och.buf[0] = (unsigned char)n;
break;
default:
och.buf[0] = pch->buf[0];
break;
}
return (&och);
}
int
yylex(void)
{
unsigned int uc;
struct ch *pch;
char *buf;
size_t buf_size, buf_pos;
for (; ; ) {
pch = po_getc();
if (pch->eof) {
/* EOF */
return (0);
}
if (pch->len > 1) {
/* multi byte */
yylval.c.len = pch->len;
(void) memcpy(yylval.c.buf, pch->buf, pch->len);
return (CHR);
}
/* single byte */
switch (pch->buf[0]) {
case ' ':
case '\t':
case '\n':
break;
case '#':
/* comment start */
buf_size = CBUFSIZE;
buf = (char *)Xmalloc(buf_size);
buf_pos = 0;
pch = po_getc();
while (!pch->eof &&
((pch->len != 1) || (pch->buf[0] != '\n'))) {
if (buf_pos + pch->len + 1 > buf_size)
extend_buf(&buf, &buf_size, CBUFSIZE);
(void) memcpy(buf + buf_pos,
pch->buf, pch->len);
buf_pos += pch->len;
pch = po_getc();
}
buf[buf_pos] = '\0';
yylval.str = buf;
return (COMMENT);
/* NOTREACHED */
case '[':
case ']':
return (pch->buf[0]);
/* NOTREACHED */
case '"':
buf_size = MBUFSIZE;
buf = (char *)Xmalloc(buf_size);
buf_pos = 0;
for (; ; ) {
pch = po_getc();
if (pch->eof) {
/* EOF */
error(gettext(ERR_UNEXP_EOF),
cur_line, cur_po);
/* NOTREACHED */
}
if (pch->len == 1) {
uc = pch->buf[0];
if (uc == '\n') {
error(gettext(ERR_UNEXP_EOL),
cur_line, cur_po);
/* NOTREACHED */
}
if (uc == '"')
break;
if (uc == '\\')
pch = expand_es();
}
if (buf_pos + pch->len + 1 > buf_size)
extend_buf(&buf, &buf_size,
MBUFSIZE);
(void) memcpy(buf + buf_pos,
pch->buf, pch->len);
buf_pos += pch->len;
}
buf[buf_pos] = '\0';
yylval.str = buf;
return (STR);
/* NOTREACHED */
default:
uc = pch->buf[0];
if (isalpha(uc) || (uc == '_')) {
buf_size = KBUFSIZE;
buf = (char *)Xmalloc(buf_size);
buf_pos = 0;
buf[buf_pos++] = (char)uc;
pch = po_getc();
while (!pch->eof &&
(pch->len == 1) &&
(isalpha(uc = pch->buf[0]) ||
isdigit(uc) || (uc == '_'))) {
if (buf_pos + 1 + 1 > buf_size)
extend_buf(&buf, &buf_size,
KBUFSIZE);
buf[buf_pos++] = (char)uc;
pch = po_getc();
}
/* push back the last char */
po_ungetc(pch);
buf[buf_pos] = '\0';
yylval.str = buf;
if (buf_pos > MAX_KW_LEN) {
/* kbuf is longer than any keywords */
return (SYMBOL);
}
yylval.num = cur_line;
if (strcmp(buf, KW_DOMAIN) == 0) {
free(buf);
return (DOMAIN);
} else if (strcmp(buf, KW_MSGID) == 0) {
free(buf);
return (MSGID);
} else if (strcmp(buf, KW_MSGID_PLURAL) == 0) {
free(buf);
return (MSGID_PLURAL);
} else if (strcmp(buf, KW_MSGSTR) == 0) {
free(buf);
return (MSGSTR);
} else {
free(buf);
return (SYMBOL);
}
/* NOTREACHED */
}
if (isdigit(uc)) {
buf_size = NBUFSIZE;
buf = (char *)Xmalloc(buf_size);
buf_pos = 0;
buf[buf_pos++] = (char)uc;
pch = po_getc();
while (!pch->eof &&
(pch->len == 1) &&
isdigit(uc = pch->buf[0])) {
if (buf_pos + 1 + 1 > buf_size)
extend_buf(&buf, &buf_size,
NBUFSIZE);
buf[buf_pos++] = (char)uc;
pch = po_getc();
}
/* push back the last char */
po_ungetc(pch);
buf[buf_pos] = '\0';
yylval.num = atoi(buf);
free(buf);
return (NUM);
}
/* just a char */
yylval.c.len = 1;
yylval.c.buf[0] = uc;
return (CHR);
/* NOTREACHED */
}
}
}