/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1992-2010 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* David Korn
* AT&T Bell Laboratories
*
* cut fields or columns from fields from a file
*/
static const char usage[] =
"[-?\n@(#)$Id: cut (AT&T Research) 2009-12-04 $\n]"
"[+NAME?cut - cut out selected columns or fields of each line of a file]"
"[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields "
"from one or more files, contatenating them on standard output.]"
"[+?The option argument \alist\a is a comma-separated or blank-separated "
"list of positive numbers and ranges. Ranges can be of three "
"forms. The first is two positive integers separated by a hyphen "
"(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to "
"\ahigh\a. The second is a positive number preceded by a hyphen "
"(\b-\b\ahigh\a), which represents all fields from field \b1\b to "
"\ahigh\a. The last is a positive number followed by a hyphen "
"(\alow\a\b-\b), which represents all fields from \alow\a to the "
"last field, inclusive. Elements in the \alist\a can be repeated, "
"can overlap, and can appear in any order. The order of the "
"output is that of the input.]"
"[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]"
"[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b "
"cuts from standard input. The start of the file is defined "
"as the current offset.]"
"[b:bytes]:[list?\bcut\b based on a list of byte counts.]"
"[c:characters]:[list?\bcut\b based on a list of character counts.]"
"[d:delimiter]:[delim?The field character for the \b-f\b option is set "
"to \adelim\a. The default is the \btab\b character.]"
"[f:fields]:[list?\bcut\b based on fields separated by the delimiter "
"character specified with the \b-d\b optiion.]"
"[n!:split?Split multibyte characters selected by the \b-b\b option.]"
"[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length "
"records of length \areclen\a when used with the \b-b\b or \b-c\b "
"option.]"
"[s:suppress|only-delimited?Suppress lines with no delimiter characters, "
"when used with the \b-f\b option. By default, lines with no "
"delimiters will be passsed in untouched.]"
"[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for "
"the \b-f\b option is set to \aldelim\a. The default is the "
"\bnewline\b character.]"
"[N!:newline?Output new-lines at end of each record when used "
"with the \b-b\b or \b-c\b option.]"
"\n"
"\n[file ...]\n"
"\n"
"[+EXIT STATUS?]{"
"[+0?All files processed successfully.]"
"[+>0?One or more files failed to open or could not be read.]"
"}"
"[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]"
;
#include <cmd.h>
#include <ctype.h>
typedef struct Delim_s
{
char* str;
int len;
int chr;
} Delim_t;
typedef struct Cut_s
{
int mb;
int eob;
int cflag;
int nosplit;
int sflag;
int nlflag;
int reclen;
} Cut_t;
/*
* compare the first of an array of integers
*/
static int
mycomp(register const void* a, register const void* b)
{
if (*((int*)a) < *((int*)b))
return -1;
if (*((int*)a) > *((int*)b))
return 1;
return 0;
}
static Cut_t*
{
register int* lp;
register int c;
register int n = 0;
register int range = 0;
{
}
else
for (;;)
switch(c = *cp++)
{
case ' ':
case '\t':
cp++;
/*FALLTHROUGH*/
case 0:
case ',':
if(range)
{
--range;
*lp++ = n;
}
else
{
*lp++ = --n;
*lp++ = 1;
}
if(c==0)
{
register int *dp;
/* eliminate overlapping regions */
{
{
{
break;
}
{
range += c;
dp[-1] += c;
}
}
else
{
{
break;
}
}
}
/* convert ranges into gaps */
{
c = *lp;
*lp -= n;
n = c+lp[1];
}
return cut;
}
n = range = 0;
break;
case '-':
if(range)
range = n?n:1;
n = 0;
break;
default:
if(!isdigit(c))
n = 10*n + (c-'0');
break;
}
/* NOTREACHED */
}
/*
* cut each line of file <fdin> and put results to <fdout> using list <list>
*/
static void
{
register int c;
register int len;
register int ncol = 0;
register char* bp;
int must;
char* ep;
const char* xx;
for (;;)
{
else
break;
xx = 0;
must = 1;
do
{
{
register const char* s = bp;
register int z;
while (w > 0)
{
if (!(*s & 0x80))
z = 1;
else if ((z = mblen(s, w)) <= 0)
{
{
w += s - xx;
xx = 0;
continue;
}
xx = s;
if (skip)
s += w;
w = 0;
break;
}
s += z;
w -= z;
}
c = s - bp;
}
{
register const char* s = bp;
register int w = len;
register int z;
while (w > 0 && ncol > 0)
{
ncol--;
if (!(*s & 0x80) || (z = mblen(s, w)) <= 0)
z = 1;
s += z;
w -= z;
}
c = s - bp;
}
else
{
c = len;
ncol++;
ncol -= c;
}
if (!skip && c)
{
return;
must = 0;
}
bp += c;
if (ncol)
break;
len -= c;
{
else
}
}
}
/*
* cut each line of file <fdin> and put results to <fdout> using list <list>
* stream <fdin> must be line buffered
*/
static void
{
register unsigned char *cp;
register unsigned char *wp;
register int c, nfields;
register unsigned char *copy;
register unsigned char *ep;
int lastchar;
wchar_t w;
long offset = 0;
/* process each buffer */
{
/* process each line in the buffer */
{
if (!inword)
{
copy = 0;
else
}
else if (copy)
inword = 0;
do
{
/* skip over non-delimiter characters */
for (;;)
{
{
case 0:
continue;
case SP_WIDE:
{
/* mb char possibly spanning buffer boundary -- fun stuff */
{
int i;
int j;
int k;
{
break;
}
if (copy)
{
empty = 0;
goto failed;
}
goto failed;
j = i;
k = 0;
while (j < mbmax())
{
c = i;
w = 0;
}
if (copy)
{
{
empty = 0;
goto failed;
}
}
c = 0;
}
else
{
w = *cp;
c = 1;
}
break;
}
cp += c;
c = w;
{
c = SP_WORD;
break;
}
{
c = SP_LINE;
break;
}
continue;
default:
break;
}
break;
}
else
{
}
/* check for end-of-line */
if (c == SP_LINE)
{
break;
break;
/* restore cut->last character */
inword++;
break;
}
nodelim = 0;
if (--nfields > 0)
continue;
if (copy)
{
empty = 0;
goto failed;
copy = 0;
}
else
/* set to delimiter unless the first field */
} while (!inword);
if (!inword)
{
if (!copy)
{
if (nodelim)
{
{
if (offset)
{
}
}
}
else
}
if (offset)
}
goto failed;
}
/* see whether to save in tmp file */
{
/* copy line to tmpfile in case no fields */
if(!fdtmp)
offset +=c;
}
}
if(fdtmp)
}
int
{
register char* cp = 0;
char* s;
int n;
int mode = 0;
for (;;)
{
{
case 0:
break;
case 'b':
case 'c':
{
continue;
}
if(n=='b')
else
continue;
case 'D':
if (mbwide())
{
{
continue;
}
}
continue;
case 'd':
if (mbwide())
{
{
continue;
}
}
continue;
case 'f':
{
continue;
}
continue;
case 'n':
continue;
case 'N':
mode |= C_NONEWLINE;
continue;
case 'R':
case 'r':
continue;
case 's':
continue;
case ':':
break;
case '?':
break;
}
break;
}
if (error_info.errors)
if(!cp)
{
}
if(!*cp)
argv++;
do
{
{
continue;
}
else
return error_info.errors != 0;
}