da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner* Copyright (c) 1992-2010 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* http://www.opensource.org/licenses/cpl1.0.txt *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* David Korn <dgk@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#pragma prototyped
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * David Korn
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * AT&T Bell Laboratories
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * cut fields or columns from fields from a file
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic const char usage[] =
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner"[-?\n@(#)$Id: cut (AT&T Research) 2009-12-04 $\n]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinUSAGE_LICENSE
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+NAME?cut - cut out selected columns or fields of each line of a file]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "from one or more files, contatenating them on standard output.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+?The option argument \alist\a is a comma-separated or blank-separated "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "list of positive numbers and ranges. Ranges can be of three "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "forms. The first is two positive integers separated by a hyphen "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "\ahigh\a. The second is a positive number preceded by a hyphen "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "(\b-\b\ahigh\a), which represents all fields from field \b1\b to "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "\ahigh\a. The last is a positive number followed by a hyphen "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "(\alow\a\b-\b), which represents all fields from \alow\a to the "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "last field, inclusive. Elements in the \alist\a can be repeated, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "can overlap, and can appear in any order. The order of the "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "output is that of the input.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "cuts from standard input. The start of the file is defined "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "as the current offset.]"
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner"[b:bytes]:[list?\bcut\b based on a list of byte counts.]"
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner"[c:characters]:[list?\bcut\b based on a list of character counts.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[d:delimiter]:[delim?The field character for the \b-f\b option is set "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "to \adelim\a. The default is the \btab\b character.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[f:fields]:[list?\bcut\b based on fields separated by the delimiter "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "character specified with the \b-d\b optiion.]"
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner"[n!:split?Split multibyte characters selected by the \b-b\b option.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "records of length \areclen\a when used with the \b-b\b or \b-c\b "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "option.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[s:suppress|only-delimited?Suppress lines with no delimiter characters, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "when used with the \b-f\b option. By default, lines with no "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "delimiters will be passsed in untouched.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "the \b-f\b option is set to \aldelim\a. The default is the "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "\bnewline\b character.]"
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner"[N!:newline?Output new-lines at end of each record when used "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "with the \b-b\b or \b-c\b option.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"\n"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"\n[file ...]\n"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"\n"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+EXIT STATUS?]{"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[+0?All files processed successfully.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[+>0?One or more files failed to open or could not be read.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"}"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <cmd.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#include <ctype.h>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknertypedef struct Delim_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner char* str;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int len;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int chr;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner} Delim_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef struct Cut_s
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int mb;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int eob;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int cflag;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int nosplit;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int sflag;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int nlflag;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int reclen;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner Delim_t wdelim;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner Delim_t ldelim;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner unsigned char space[UCHAR_MAX+1];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int list[2]; /* NOTE: must be last member */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin} Cut_t;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner#define HUGE INT_MAX
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define BLOCK 8*1024
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define C_BYTES 1
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define C_CHARS 2
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define C_FIELDS 4
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define C_SUPRESS 8
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner#define C_NOSPLIT 16
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin#define C_NONEWLINE 32
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner#define SP_LINE 1
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner#define SP_WORD 2
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner#define SP_WIDE 3
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner#define mb2wc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * compare the first of an array of integers
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknerstatic int
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknermycomp(register const void* a, register const void* b)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (*((int*)a) < *((int*)b))
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner return -1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (*((int*)a) > *((int*)b))
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner return 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner return 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknerstatic Cut_t*
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknercutinit(int mode, char* str, Delim_t* wdelim, Delim_t* ldelim, size_t reclen)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int* lp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int c;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int n = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int range = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register char* cp = str;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner Cut_t* cut;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!(cut = (Cut_t*)stakalloc(sizeof(Cut_t) + strlen(cp) * sizeof(int))))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(ERROR_exit(1), "out of space");
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (cut->mb = mbwide())
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner memset(cut->space, 0, sizeof(cut->space) / 2);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner memset(cut->space + sizeof(cut->space) / 2, SP_WIDE, sizeof(cut->space) / 2);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner memset(cut->space, 0, sizeof(cut->space));
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->wdelim = *wdelim;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (wdelim->len == 1)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->space[wdelim->chr] = SP_WORD;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->ldelim = *ldelim;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->eob = (ldelim->len == 1) ? ldelim->chr : 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->space[cut->eob] = SP_LINE;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->cflag = (mode&C_CHARS) && cut->mb;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->nosplit = (mode&(C_BYTES|C_NOSPLIT)) == (C_BYTES|C_NOSPLIT) && cut->mb;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->sflag = (mode&C_SUPRESS) != 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->nlflag = (mode&C_NONEWLINE) != 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut->reclen = reclen;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner lp = cut->list;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner for (;;)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner switch(c = *cp++)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case ' ':
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case '\t':
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin while(*cp==' ' || *cp=='\t')
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin cp++;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner /*FALLTHROUGH*/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case 0:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case ',':
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(range)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin --range;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if((n = (n ? (n-range) : (HUGE-1))) < 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(ERROR_exit(1),"invalid range for c/f option");
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *lp++ = range;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *lp++ = n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *lp++ = --n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *lp++ = 1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(c==0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int *dp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *lp = HUGE;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner n = 1 + (lp-cut->list)/2;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner qsort(lp=cut->list,n,2*sizeof(*lp),mycomp);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* eliminate overlapping regions */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for(n=0,range= -2,dp=lp; *lp!=HUGE; lp+=2)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(lp[0] <= range)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(lp[1]==HUGE)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin dp[-1] = HUGE;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if((c = lp[0]+lp[1]-range)>0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin range += c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin dp[-1] += c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin range = *dp++ = lp[0];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(lp[1]==HUGE)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *dp++ = HUGE;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin range += (*dp++ = lp[1]);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *dp = HUGE;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner lp = cut->list;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* convert ranges into gaps */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin for(n=0; *lp!=HUGE; lp+=2)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin c = *lp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin *lp -= n;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = c+lp[1];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner return cut;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = range = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin case '-':
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(range)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(ERROR_exit(1),"bad list for c/f option");
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin range = n?n:1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin default:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(!isdigit(c))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(ERROR_exit(1),"bad list for c/f option");
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin n = 10*n + (c-'0');
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* NOTREACHED */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * cut each line of file <fdin> and put results to <fdout> using list <list>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknerstatic void
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknercutcols(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int c;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int len;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int ncol = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register const int* lp = cut->list;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register char* bp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int skip; /* non-zero for don't copy */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int must;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner char* ep;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner const char* xx;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner for (;;)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (len = cut->reclen)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner bp = sfreserve(fdin, len, -1);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner bp = sfgetr(fdin, '\n', 0);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!bp && !(bp = sfgetr(fdin, 0, SF_LASTR)))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin len = sfvalue(fdin);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ep = bp + len;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner xx = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!(ncol = skip = *(lp = cut->list)))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ncol = *++lp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner must = 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner do
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (cut->nosplit)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register const char* s = bp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int w = len < ncol ? len : ncol;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int z;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner while (w > 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!(*s & 0x80))
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner z = 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else if ((z = mblen(s, w)) <= 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (s == bp && xx)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner w += s - xx;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner bp = (char*)(s = xx);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner xx = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner xx = s;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (skip)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner s += w;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner w = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner s += z;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner w -= z;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = s - bp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ncol = !w && ncol >= len;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else if (cut->cflag)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register const char* s = bp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int w = len;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register int z;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner while (w > 0 && ncol > 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ncol--;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!(*s & 0x80) || (z = mblen(s, w)) <= 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner z = 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner s += z;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner w -= z;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = s - bp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ncol = !w && (ncol || !skip);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((c = ncol) > len)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = len;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else if (c == len && !skip)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ncol++;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ncol -= c;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!skip && c)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (sfwrite(fdout, (char*)bp, c) < 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner return;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner must = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner bp += c;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (ncol)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin len -= c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin ncol = *++lp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin skip = !skip;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner } while (ncol != HUGE);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!cut->nlflag && (skip || must || cut->reclen))
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (cut->ldelim.len > 1)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner sfwrite(fdout, cut->ldelim.str, cut->ldelim.len);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner sfputc(fdout, cut->ldelim.chr);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/*
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * cut each line of file <fdin> and put results to <fdout> using list <list>
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * stream <fdin> must be line buffered
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknerstatic void
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknercutfields(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register unsigned char *sp = cut->space;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register unsigned char *cp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register unsigned char *wp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int c, nfields;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register const int *lp = cut->list;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register unsigned char *copy;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int nodelim, empty, inword=0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register unsigned char *ep;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner unsigned char *bp, *first;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin int lastchar;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wchar_t w;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin Sfio_t *fdtmp = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin long offset = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner unsigned char mb[8];
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* process each buffer */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner while ((bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) && (c = sfvalue(fdin)) > 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cp = bp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ep = cp + --c;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if((lastchar = cp[c]) != cut->eob)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner *ep = cut->eob;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* process each line in the buffer */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner while (cp <= ep)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin first = cp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!inword)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin nodelim = empty = 1;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin copy = cp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (nfields = *(lp = cut->list))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin copy = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin nfields = *++lp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else if (copy)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin copy = cp;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin inword = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner do
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* skip over non-delimiter characters */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (cut->mb)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner for (;;)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner switch (c = sp[*(unsigned char*)cp++])
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 0:
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case SP_WIDE:
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wp = --cp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner while ((c = mb2wc(w, cp, ep - cp)) <= 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner /* mb char possibly spanning buffer boundary -- fun stuff */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((ep - cp) < mbmax())
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int i;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int j;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int k;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (lastchar != cut->eob)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner *ep = lastchar;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((c = mb2wc(w, cp, ep - cp)) > 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (copy)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner empty = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((c = cp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner goto failed;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner for (i = 0; i <= (ep - cp); i++)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mb[i] = cp[i];
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!(bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) || (c = sfvalue(fdin)) <= 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner goto failed;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cp = bp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ep = cp + --c;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((lastchar = cp[c]) != cut->eob)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner *ep = cut->eob;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner j = i;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner k = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner while (j < mbmax())
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mb[j++] = cp[k++];
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((c = mb2wc(w, (char*)mb, j)) <= 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = i;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner w = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner first = bp = cp += c - i;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (copy)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner copy = bp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (w == cut->ldelim.chr)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner lastchar = cut->ldelim.chr;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else if (w != cut->wdelim.chr)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner empty = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (sfwrite(fdout, (char*)mb, c) < 0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner goto failed;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner w = *cp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cp += c;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = w;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (c == cut->wdelim.chr)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = SP_WORD;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (c == cut->ldelim.chr)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner c = SP_LINE;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner default:
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wp = cp - 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner while (!(c = sp[*cp++]));
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wp = cp - 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* check for end-of-line */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (c == SP_LINE)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (cp <= ep)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (lastchar == cut->ldelim.chr)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner /* restore cut->last character */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (lastchar != cut->eob)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner *ep = lastchar;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin inword++;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!sp[lastchar])
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin nodelim = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (--nfields > 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin continue;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin nfields = *++lp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (copy)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin empty = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((c = wp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin goto failed;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin copy = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* set to delimiter unless the first field */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner copy = empty ? cp : wp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner } while (!inword);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!inword)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!copy)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (nodelim)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (!cut->sflag)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (offset)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfseek(fdtmp,(Sfoff_t)0,SEEK_SET);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfmove(fdtmp,fdout,offset,-1);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin copy = first;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfputc(fdout,'\n');
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (offset)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfseek(fdtmp,offset=0,SEEK_SET);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (copy && (c=cp-copy)>0 && (!nodelim || !cut->sflag) && sfwrite(fdout,(char*)copy,c)< 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin goto failed;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* see whether to save in tmp file */
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if(inword && nodelim && !cut->sflag && (c=cp-first)>0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* copy line to tmpfile in case no fields */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(!fdtmp)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fdtmp = sftmp(BLOCK);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfwrite(fdtmp,(char*)first,c);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin offset +=c;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner failed:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(fdtmp)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfclose(fdtmp);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinint
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulknerb_cut(int argc, char** argv, void* context)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin{
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register char* cp = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner register Sfio_t* fp;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner char* s;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int n;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner Cut_t* cut;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner int mode = 0;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner Delim_t wdelim;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner Delim_t ldelim;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner size_t reclen = 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin cmdinit(argc, argv, context, ERROR_CATALOG, 0);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wdelim.chr = '\t';
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ldelim.chr = '\n';
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wdelim.len = ldelim.len = 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner for (;;)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner switch (n = optget(argv, usage))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 0:
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'b':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'c':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if(mode&C_FIELDS)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner error(2, "f option already specified");
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cp = opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if(n=='b')
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mode |= C_BYTES;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mode |= C_CHARS;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'D':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ldelim.str = opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (mbwide())
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner s = opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ldelim.chr = mbchar(s);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((n = s - opt_info.arg) > 1)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ldelim.len = n;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ldelim.chr = *(unsigned char*)opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner ldelim.len = 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'd':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wdelim.str = opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if (mbwide())
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner s = opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wdelim.chr = mbchar(s);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if ((n = s - opt_info.arg) > 1)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wdelim.len = n;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wdelim.chr = *(unsigned char*)opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner wdelim.len = 1;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'f':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if(mode&(C_CHARS|C_BYTES))
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner {
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner error(2, "c option already specified");
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner }
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cp = opt_info.arg;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mode |= C_FIELDS;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'n':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mode |= C_NOSPLIT;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'N':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mode |= C_NONEWLINE;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'R':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 'r':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner if(opt_info.num>0)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner reclen = opt_info.num;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case 's':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner mode |= C_SUPRESS;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner continue;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case ':':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner error(2, "%s", opt_info.arg);
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner break;
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner case '?':
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner error(ERROR_usage(2), "%s", opt_info.arg);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin break;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin argv += opt_info.index;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (error_info.errors)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(ERROR_usage(2), "%s",optusage(NiL));
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(!cp)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(2, "b, c or f option must be specified");
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(ERROR_usage(2), "%s", optusage(NiL));
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(!*cp)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(3, "non-empty b, c or f option must be specified");
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if((mode & (C_FIELDS|C_SUPRESS)) == C_SUPRESS)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(3, "s option requires f option");
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cut = cutinit(mode, cp, &wdelim, &ldelim, reclen);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(cp = *argv)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin argv++;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin do
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(!cp || streq(cp,"-"))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin fp = sfstdin;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else if(!(fp = sfopen(NiL,cp,"r")))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin {
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin error(ERROR_system(0),"%s: cannot open",cp);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin continue;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin }
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(mode&C_FIELDS)
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cutfields(cut,fp,sfstdout);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin else
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner cutcols(cut,fp,sfstdout);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(fp!=sfstdin)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin sfclose(fp);
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin } while(cp = *argv++);
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin if (sfsync(sfstdout))
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin error(ERROR_system(0), "write error");
3e14f97f673e8a630f076077de35afdd43dc1587Roger A. Faulkner return error_info.errors != 0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin}