wclib.c revision 7c2fbfb345896881c631598ee3852ce9ce33fb07
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1992-2008 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* http://www.opensource.org/licenses/cpl1.0.txt *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* David Korn
* AT&T Bell Laboratories
*
* library interface for word count
*/
#include <cmd.h>
#include <wc.h>
#include <ctype.h>
#if _hdr_wchar && _hdr_wctype && _lib_iswctype
#include <wchar.h>
#include <wctype.h>
#else
#ifndef iswspace
#define iswspace(x) isspace(x)
#endif
#endif
#define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1))
#define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1)
Wc_t *wc_init(int mode)
{
register int n;
register int w;
Wc_t* wp;
if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
return(0);
wp->mode = mode;
w = mode & WC_WORDS;
for(n=(1<<CHAR_BIT);--n >=0;)
wp->space[n] = w ? !!isspace(n) : 0;
wp->space['\n'] = -1;
return(wp);
}
/*
* compute the line, word, and character count for file <fd>
*/
int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
{
register signed char *space = wp->space;
register unsigned char *cp;
register Sfoff_t nchars;
register Sfoff_t nwords;
register Sfoff_t nlines;
register Sfoff_t eline;
register Sfoff_t longest;
register ssize_t c;
register unsigned char *endbuff;
register int lasttype = 1;
unsigned int lastchar;
unsigned char *buff;
wchar_t x;
sfset(fd,SF_WRITE,1);
nlines = nwords = nchars = 0;
wp->longest = 0;
if (wp->mode & (WC_LONGEST|WC_MBYTE))
{
longest = 0;
eline = -1;
cp = buff = endbuff = 0;
for (;;)
{
if (!mbok(cp, endbuff-cp))
{
if (buff)
sfread(fd, buff, cp-buff);
if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR)))
break;
endbuff = (cp = buff) + sfvalue(fd);
}
nchars++;
x = mbchar(cp);
if (x == -1)
{
if (eline != nlines && !(wp->mode & WC_QUIET))
{
error_info.file = (char*)file;
error_info.line = eline = nlines;
error(ERROR_SYSTEM|1, "invalid multibyte character");
error_info.file = 0;
error_info.line = 0;
}
}
else if (x == '\n')
{
if ((nchars - longest) > wp->longest)
wp->longest = nchars - longest;
longest = nchars;
nlines++;
lasttype = 1;
}
else if (iswspace(x))
lasttype = 1;
else if (lasttype)
{
lasttype = 0;
nwords++;
}
}
}
else
{
for (;;)
{
/* fill next buffer and check for end-of-file */
if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0)
break;
sfread(fd,(char*)(cp=buff),c);
nchars += c;
/* check to see whether first character terminates word */
if(c==1)
{
if(endline(lasttype))
nlines++;
if((c = space[*cp]) && !lasttype)
nwords++;
lasttype = c;
continue;
}
if(!lasttype && space[*cp])
nwords++;
lastchar = cp[--c];
cp[c] = '\n';
endbuff = cp+c;
c = lasttype;
/* process each buffer */
for (;;)
{
/* process spaces and new-lines */
do if (endline(c))
{
for (;;)
{
/* check for end of buffer */
if (cp > endbuff)
goto eob;
nlines++;
if (*cp != '\n')
break;
cp++;
}
} while (c = space[*cp++]);
/* skip over word characters */
while(!(c = space[*cp++]));
nwords++;
}
eob:
if((cp -= 2) >= buff)
c = space[*cp];
else
c = lasttype;
lasttype = space[lastchar];
/* see if was in word */
if(!c && !lasttype)
nwords--;
}
if(endline(lasttype))
nlines++;
else if(!lasttype)
nwords++;
}
wp->chars = nchars;
wp->words = nwords;
wp->lines = nlines;
return(0);
}