lib.c revision dc5a8425272d2602e4c21b95b9eeac2b897f45a1
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#pragma ident "%Z%%M% %I% %E% SMI"
#include <stdio.h>
#include "awk.def"
#include "awk.h"
#include <ctype.h>
#include <wctype.h>
#include "awktype.h"
#include <stdlib.h>
FILE *infile = NULL;
wchar_t *file;
#define RECSIZE (5 * 512)
wchar_t record[RECSIZE];
wchar_t fields[RECSIZE];
wchar_t L_NULL[] = L"";
#define MAXFLD 100
int donefld; /* 1 = implies rec broken into fields */
int donerec; /* 1 = record is valid (no flds have changed) */
int mustfld; /* 1 = NF seen, so always break */
static wchar_t L_record[] = L"$record";
#define FINIT { OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
CELL fldtab[MAXFLD] = { /* room for fields */
{ OCELL, CFLD, L_record, record, 0.0, STR|FLD},
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
};
int maxfld = 0; /* last used field */
/* pointer to CELL for maximum field assigned to */
CELL *maxmfld = &fldtab[0];
static int isclvar(wchar_t *);
static void setclvar(wchar_t *);
void fldbld(void);
int
getrec(void)
{
wchar_t *rr, *er;
int c, sep;
FILE *inf;
extern int svargc;
extern wchar_t **svargv;
dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
donefld = 0;
donerec = 1;
record[0] = 0;
er = record + RECSIZE;
while (svargc > 0) {
dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
if (infile == NULL) { /* have to open a new file */
/*
* If the argument contains a '=', determine if the
* argument needs to be treated as a variable assignment
* or as the pathname of a file.
*/
if (isclvar(*svargv)) {
/* it's a var=value argument */
setclvar(*svargv);
if (svargc > 1) {
svargv++;
svargc--;
continue;
}
*svargv = L"-";
}
*FILENAME = file = *svargv;
dprintf("opening file %ws\n", file, NULL, NULL);
if (*file == (wchar_t)L'-')
infile = stdin;
else if ((infile = fopen(toeuccode(file), "r")) == NULL)
error(FATAL, "can't open %ws", file);
}
if ((sep = **RS) == 0)
sep = '\n';
inf = infile;
for (rr = record; /* dummy */; /* dummy */) {
for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
*rr++ = c)
;
if (rr >= er)
error(FATAL, "record `%.20ws...' too long",
record);
if (**RS == sep || c == EOF)
break;
if ((c = getwc(inf)) == '\n' || c == EOF)
/* 2 in a row */
break;
*rr++ = '\n';
*rr++ = c;
}
if (rr >= er)
error(FATAL, "record `%.20ws...' too long", record);
*rr = 0;
if (mustfld)
fldbld();
if (c != EOF || rr > record) { /* normal record */
recloc->tval &= ~NUM;
recloc->tval |= STR;
++nrloc->fval;
nrloc->tval &= ~STR;
nrloc->tval |= NUM;
return (1);
}
/* EOF arrived on this file; set up next */
if (infile != stdin)
fclose(infile);
infile = NULL;
svargc--;
svargv++;
}
return (0); /* true end of file */
}
/*
* isclvar()
*
* Returns 1 if the input string, arg, is a variable assignment,
* otherwise returns 0.
*
* An argument to awk can be either a pathname of a file, or a variable
* assignment. An operand that begins with an undersore or alphabetic
* character from the portable character set, followed by a sequence of
* underscores, digits, and alphabetics from the portable character set,
* followed by the '=' character, shall specify a variable assignment
* rather than a pathname.
*/
static int
isclvar(wchar_t *arg)
{
wchar_t *tmpptr = arg;
if (tmpptr != NULL) {
/* Begins with an underscore or alphabetic character */
if (iswalpha(*tmpptr) || *tmpptr == '_') {
/*
* followed by a sequence of underscores, digits,
* and alphabetics
*/
for (tmpptr++; *tmpptr; tmpptr++) {
if (!(isalnum(*tmpptr) || (*tmpptr == '_'))) {
break;
}
}
return (*tmpptr == '=');
}
}
return (0);
}
static void
setclvar(wchar_t *s) /* set var=value from s */
{
wchar_t *p;
CELL *q;
for (p = s; *p != '='; p++)
;
*p++ = 0;
q = setsymtab(s, tostring(p), 0.0, STR, symtab);
setsval(q, p);
dprintf("command line set %ws to |%ws|\n", s, p, NULL);
}
void
fldbld(void)
{
wchar_t *r, *fr, sep, c;
static wchar_t L_NF[] = L"NF";
CELL *p, *q;
int i, j;
r = record;
fr = fields;
i = 0; /* number of fields accumulated here */
if ((sep = **FS) == ' ')
for (i = 0; /* dummy */; /* dummy */) {
c = *r;
while (iswblank(c) || c == '\t' || c == '\n')
c = *(++r);
if (*r == 0)
break;
i++;
if (i >= MAXFLD)
error(FATAL,
"record `%.20ws...' has too many fields", record);
if (!(fldtab[i].tval&FLD))
xfree(fldtab[i].sval);
fldtab[i].sval = fr;
fldtab[i].tval = FLD | STR;
do {
*fr++ = *r++;
c = *r;
} while (! iswblank(c) && c != '\t' &&
c != '\n' && c != '\0');
*fr++ = 0;
} else if (*r != 0) /* if 0, it's a null field */
for (;;) {
i++;
if (i >= MAXFLD)
error(FATAL,
"record `%.20ws...' has too many fields", record);
if (!(fldtab[i].tval&FLD))
xfree(fldtab[i].sval);
fldtab[i].sval = fr;
fldtab[i].tval = FLD | STR;
while ((c = *r) != sep && c != '\n' && c != '\0')
/* \n always a separator */
*fr++ = *r++;
*fr++ = 0;
if (*r++ == 0)
break;
}
*fr = 0;
/* clean out junk from previous record */
for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
if (!(p->tval&FLD))
xfree(p->sval);
p->tval = STR | FLD;
p->sval = L_NULL;
}
maxfld = i;
maxmfld = &fldtab[i];
donefld = 1;
for (i = 1; i <= maxfld; i++)
if (isanumber(fldtab[i].sval)) {
fldtab[i].fval = watof(fldtab[i].sval);
fldtab[i].tval |= NUM;
}
setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
if (dbg)
for (i = 0; i <= maxfld; i++)
printf("field %d: |%ws|\n", i, fldtab[i].sval);
}
void
recbld(void)
{
int i;
wchar_t *r, *p;
if (donefld == 0 || donerec == 1)
return;
r = record;
for (i = 1; i <= *NF; i++) {
p = getsval(&fldtab[i]);
while (*r++ = *p++)
;
*(r-1) = **OFS;
}
*(r-1) = '\0';
dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
recloc->tval = STR | FLD;
dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
if (r > record+RECSIZE)
error(FATAL, "built giant record `%.20ws...'", record);
dprintf("recbld = |%ws|\n", record, NULL, NULL);
}
CELL *
fieldadr(n)
{
if (n < 0 || n >= MAXFLD)
error(FATAL, "trying to access field %d", n);
return (&fldtab[n]);
}
int errorflag = 0;
int
yyerror(char *s)
{
fprintf(stderr,
gettext("awk: %s near line %lld\n"), gettext(s), lineno);
errorflag = 2;
return (0);
}
void
error(f, s, a1, a2, a3, a4, a5, a6, a7)
{
fprintf(stderr, "awk: ");
fprintf(stderr, gettext((char *)s), a1, a2, a3, a4, a5, a6, a7);
fprintf(stderr, "\n");
if (NR && *NR > 0)
fprintf(stderr, gettext(" record number %g\n"), *NR);
if (f)
exit(2);
}
void
PUTS(char *s)
{
dprintf("%s\n", s, NULL, NULL);
}
#define MAXEXPON 38 /* maximum exponenet for fp number */
int
isanumber(wchar_t *s)
{
int d1, d2;
int point;
wchar_t *es;
extern wchar_t radixpoint;
d1 = d2 = point = 0;
while (*s == ' ' || *s == '\t' || *s == '\n')
s++;
if (*s == '\0')
return (0); /* empty stuff isn't number */
if (*s == '+' || *s == '-')
s++;
/*
* Since, iswdigit() will include digit from other than code set 0,
* we have to check it from code set 0 or not.
*/
if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
return (0);
if (iswdigit(*s) && iswascii(*s)) {
do {
d1++;
s++;
} while (iswdigit(*s) && iswascii(*s));
}
if (d1 >= MAXEXPON)
return (0); /* too many digits to convert */
if (*s == radixpoint) {
point++;
s++;
}
if (iswdigit(*s) && iswascii(*s)) {
d2++;
do {
s++;
} while (iswdigit(*s) && iswascii(*s));
}
if (!(d1 || point && d2))
return (0);
if (*s == 'e' || *s == 'E') {
s++;
if (*s == '+' || *s == '-')
s++;
if (!(iswdigit(*s) && iswascii(*s)))
return (0);
es = s;
do {
s++;
} while (iswdigit(*s) && iswascii(*s));
if (s - es > 2)
return (0);
else if (s - es == 2 &&
10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
return (0);
}
while (*s == ' ' || *s == '\t' || *s == '\n')
s++;
if (*s == '\0')
return (1);
else
return (0);
}
char *
toeuccode(str)
wchar_t *str;
{
static char euccode[RECSIZE];
(void) wcstombs(euccode, str, RECSIZE);
return (euccode);
}