cut.c revision da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1992-2007 AT&T Knowledge Ventures *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Knowledge Ventures *
* *
* A copy of the License is available at *
* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* David Korn
* AT&T Bell Laboratories
*
* cut [-sN] [-f flist] [-c clist] [-d delim] [-D delim] [-r reclen] [file] ...
*
* cut fields or columns from fields from a file
*/
static const char usage[] =
"[-?\n@(#)$Id: cut (AT&T Research) 2007-01-23 $\n]"
"[+NAME?cut - cut out selected columns or fields of each line of a file]"
"[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields "
"from one or more files, contatenating them on standard output.]"
"[+?The option argument \alist\a is a comma-separated or blank-separated "
"list of positive numbers and ranges. Ranges can be of three "
"forms. The first is two positive integers separated by a hyphen "
"(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to "
"\ahigh\a. The second is a positive number preceded by a hyphen "
"(\b-\b\ahigh\a), which represents all fields from field \b1\b to "
"\ahigh\a. The last is a positive number followed by a hyphen "
"(\alow\a\b-\b), which represents all fields from \alow\a to the "
"last field, inclusive. Elements in the \alist\a can be repeated, "
"can overlap, and can appear in any order. The order of the "
"output is that of the input.]"
"[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]"
"[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b "
"cuts from standard input. The start of the file is defined "
"as the current offset.]"
"[b:bytes]:[list?\bcut\b based on a list of bytes.]"
"[c:characters]:[list?\bcut\b based on a list of characters.]"
"[d:delimiter]:[delim?The field character for the \b-f\b option is set "
"to \adelim\a. The default is the \btab\b character.]"
"[f:fields]:[list?\bcut\b based on fields separated by the delimiter "
"character specified with the \b-d\b optiion.]"
"[n:nosplit?Do not split characters. Currently ignored.]"
"[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length "
"records of length \areclen\a when used with the \b-b\b or \b-c\b "
"option.]"
"[s:suppress|only-delimited?Suppress lines with no delimiter characters, "
"when used with the \b-f\b option. By default, lines with no "
"delimiters will be passsed in untouched.]"
"[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for "
"the \b-f\b option is set to \aldelim\a. The default is the "
"\bnewline\b character.]"
"[N:nonewline?Do not output new-lines at end of each record when used "
"with the \b-b\b or \b-c\b option.]"
"\n"
"\n[file ...]\n"
"\n"
"[+EXIT STATUS?]{"
"[+0?All files processed successfully.]"
"[+>0?One or more files failed to open or could not be read.]"
"}"
"[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]"
;
#include <cmd.h>
#include <ctype.h>
typedef struct Last_s
{
int seqno;
int seq;
int wdelim;
int ldelim;
} Last_t;
typedef struct Cut_s
{
int cflag;
int sflag;
int nlflag;
int wdelim;
int ldelim;
int seqno;
int reclen;
} Cut_t;
#define C_BYTES 1
#define C_CHARS 2
#define C_FIELDS 4
#define C_SUPRESS 8
#define C_NOCHOP 16
#define C_NONEWLINE 32
/*
* compare the first of an array of integers
*/
static int mycomp(register const void *a,register const void *b)
{
return(*((int*)a) - *((int*)b));
}
{
register int *lp, c, n=0;
register int range = 0;
while(1) switch(c= *cp++)
{
case ' ':
case '\t':
cp++;
case 0:
case ',':
if(range)
{
--range;
*lp++ = n;
}
else
{
*lp++ = --n;
*lp++ = 1;
}
if(c==0)
{
register int *dp;
/* eliminate overlapping regions */
{
{
{
break;
}
{
range += c;
dp[-1] += c;
}
}
else
{
{
break;
}
}
}
/* convert ranges into gaps */
{
c = *lp;
*lp -= n;
n = c+lp[1];
}
return(cuthdr);
}
n = range = 0;
break;
case '-':
if(range)
range = n?n:1;
n = 0;
break;
default:
if(!isdigit(c))
n = 10*n + (c-'0');
}
/* NOTREACHED */
}
/*
* advance <cp> by <n> multi-byte characters
*/
{
while(len>0 && n-->0)
{
if(size<0)
size = 1;
}
if(n>0)
return(inlen+1);
}
/*
* cut each line of file <fdin> and put results to <fdout> using list <list>
*/
{
register char *inp;
register int skip; /* non-zero for don't copy */
while(1)
{
else
break;
while(1)
{
c = len;
ncol++;
ncol -= c;
return(-1);
inp += c;
if(ncol)
break;
len -= c;
}
}
return(c);
}
/*
* cut each line of file <fdin> and put results to <fdout> using list <list>
* stream <fdin> must be line buffered
*/
{
register unsigned char *cp;
register int c, nfields;
register unsigned char *copy;
register unsigned char *endbuff;
int lastchar;
long offset = 0;
{
}
/* process each buffer */
{
/* process each line in the buffer */
{
if(!inword)
{
copy = 0;
else
}
else if(copy)
inword = 0;
while(!inword)
{
/* skip over non-delimiter characters */
/* check for end-of-line */
if(endline(c))
{
break;
break;
/* restore cuthdr->last. character */
inword++;
if(!c)
break;
}
nodelim = 0;
if(--nfields >0)
continue;
if(copy)
{
empty = 0;
goto failed;
copy = 0;
}
else
/* set to delimiter unless the first field */
}
if(!inword)
{
if(!copy)
{
if(nodelim)
{
{
if(offset)
{
}
}
}
else
}
if(offset)
}
goto failed;
}
/* see whether to save in tmp file */
{
/* copy line to tmpfile in case no fields */
if(!fdtmp)
offset +=c;
}
}
if(fdtmp)
return(0);
}
int
{
register char *cp = 0;
int n;
int mode = 0;
int wdelim = '\t';
int ldelim = '\n';
{
case 'b':
case 'c':
{
break;
}
if(n=='b')
else
break;
case 'D':
break;
case 'd':
break;
case 'f':
{
break;
}
break;
case 'n':
break;
case 'N':
mode |= C_NONEWLINE;
break;
case 'R':
case 'r':
break;
case 's':
break;
case ':':
break;
case '?':
break;
}
if (error_info.errors)
if(!cp)
{
}
if(!*cp)
argv++;
do
{
{
continue;
}
else
}
}