cut.c revision 7c2fbfb345896881c631598ee3852ce9ce33fb07
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* Copyright (c) 1992-2008 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* David Korn <dgk@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * David Korn
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * AT&T Bell Laboratories
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * cut [-sN] [-f flist] [-c clist] [-d delim] [-D delim] [-r reclen] [file] ...
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * cut fields or columns from fields from a file
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic const char usage[] =
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin"[-?\n@(#)$Id: cut (AT&T Research) 2008-04-01 $\n]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+NAME?cut - cut out selected columns or fields of each line of a file]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "from one or more files, contatenating them on standard output.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+?The option argument \alist\a is a comma-separated or blank-separated "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "list of positive numbers and ranges. Ranges can be of three "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "forms. The first is two positive integers separated by a hyphen "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "\ahigh\a. The second is a positive number preceded by a hyphen "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "(\b-\b\ahigh\a), which represents all fields from field \b1\b to "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "\ahigh\a. The last is a positive number followed by a hyphen "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "(\alow\a\b-\b), which represents all fields from \alow\a to the "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "last field, inclusive. Elements in the \alist\a can be repeated, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "can overlap, and can appear in any order. The order of the "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "output is that of the input.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "cuts from standard input. The start of the file is defined "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "as the current offset.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[b:bytes]:[list?\bcut\b based on a list of bytes.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[c:characters]:[list?\bcut\b based on a list of characters.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[d:delimiter]:[delim?The field character for the \b-f\b option is set "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "to \adelim\a. The default is the \btab\b character.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[f:fields]:[list?\bcut\b based on fields separated by the delimiter "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "character specified with the \b-d\b optiion.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[n:nosplit?Do not split characters. Currently ignored.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "records of length \areclen\a when used with the \b-b\b or \b-c\b "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "option.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[s:suppress|only-delimited?Suppress lines with no delimiter characters, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "when used with the \b-f\b option. By default, lines with no "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "delimiters will be passsed in untouched.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "the \b-f\b option is set to \aldelim\a. The default is the "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "\bnewline\b character.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[N:nonewline?Do not output new-lines at end of each record when used "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "with the \b-b\b or \b-c\b option.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"\n[file ...]\n"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+EXIT STATUS?]{"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[+0?All files processed successfully.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[+>0?One or more files failed to open or could not be read.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Last_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Cut_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * compare the first of an array of integers
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic int mycomp(register const void *a,register const void *b)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return(*((int*)a) - *((int*)b));
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic Cut_t *cutinit(int mode,char *str,int wdelim,int ldelim,size_t reclen)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int *lp, c, n=0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int range = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!(cuthdr = (Cut_t*)stakalloc(sizeof(Cut_t)+strlen(cp)*sizeof(int))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case '\t':
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int *dp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* eliminate overlapping regions */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* convert ranges into gaps */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* NOTREACHED */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * advance <cp> by <n> multi-byte characters
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic int advance(const char *str, register int n, register int inlen)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while(len>0 && n-->0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * cut each line of file <fdin> and put results to <fdout> using list <list>
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chinstatic void cutcols(Cut_t *cuthdr,Sfio_t *fdin,Sfio_t *fdout)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register char *inp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if((c=(cuthdr->cflag?advance(inp,ncol,len):ncol)) > len)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * cut each line of file <fdin> and put results to <fdout> using list <list>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * stream <fdin> must be line buffered
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1))
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chinstatic void cutfields(Cut_t *cuthdr,Sfio_t *fdin,Sfio_t *fdout)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register unsigned char *cp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int c, nfields;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register unsigned char *copy;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register unsigned char *endbuff;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin cuthdr->space[cuthdr->last.ldelim=cuthdr->ldelim] = -1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* process each buffer */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while ((inbuff = (unsigned char*)sfreserve(fdin, SF_UNBOUND, 0)) && (c = sfvalue(fdin)) > 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* process each line in the buffer */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* skip over non-delimiter characters */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* check for end-of-line */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* restore cuthdr->last. character */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if((c=(cp-1)-copy)>0 && sfwrite(fdout,(char*)copy,c)< 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* set to delimiter unless the first field */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(copy && (c=cp-copy)>0 && (!nodelim || !cuthdr->sflag) && sfwrite(fdout,(char*)copy,c)< 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* see whether to save in tmp file */
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin if(inword && nodelim && !cuthdr->sflag && (c=cp-first)>0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* copy line to tmpfile in case no fields */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register char *cp = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(n=='b')
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(3, "non-empty b, c or f option must be specified");