uniq.c revision 1
1N/A/***********************************************************************
1N/A* *
1N/A* This software is part of the ast package *
1N/A* Copyright (c) 1992-2011 AT&T Intellectual Property *
1N/A* and is licensed under the *
1N/A* Common Public License, Version 1.0 *
1N/A* by AT&T Intellectual Property *
1N/A* *
1N/A* A copy of the License is available at *
1N/A* http://www.opensource.org/licenses/cpl1.0.txt *
1N/A* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
1N/A* *
1N/A* Information and Software Systems Research *
1N/A* AT&T Research *
1N/A* Florham Park NJ *
1N/A* *
1N/A* Glenn Fowler <gsf@research.att.com> *
1N/A* David Korn <dgk@research.att.com> *
1N/A* *
1N/A***********************************************************************/
1N/A#pragma prototyped
1N/A/*
1N/A * uniq
1N/A *
1N/A * Written by David Korn
1N/A */
1N/A
1N/Astatic const char usage[] =
1N/A"[-n?\n@(#)$Id: uniq (AT&T Research) 2009-11-28 $\n]"
1N/AUSAGE_LICENSE
1N/A"[+NAME?uniq - Report or filter out repeated lines in a file]"
1N/A"[+DESCRIPTION?\buniq\b reads the input, compares adjacent lines, and "
1N/A "writes one copy of each input line on the output. The second "
1N/A "and succeeding copies of the repeated adjacent lines are not "
1N/A "written.]"
1N/A"[+?If the output file, \aoutfile\a, is not specified, \buniq\b writes "
1N/A "to standard output. If no \ainfile\a is given, or if the \ainfile\a "
1N/A "is \b-\b, \buniq\b reads from standard input with the start of "
1N/A "the file defined as the current offset.]"
1N/A"[c:count?Output the number of times each line occurred along with "
1N/A "the line.]"
1N/A"[d:repeated|duplicates?Output the first of each duplicate line.]"
1N/A"[D:all-repeated?Output all duplicate lines as a group with an empty "
1N/A "line delimiter specified by \adelimit\a:]:?[delimit:=none]"
1N/A "{"
1N/A "[n:none?Do not delimit duplicate groups.]"
1N/A "[p:prepend?Prepend an empty line before each group.]"
1N/A "[s:separate?Separate each group with an empty line.]"
1N/A "}"
1N/A"[f:skip-fields]#[fields?\afields\a is the number of fields to skip over "
1N/A "before checking for uniqueness. A field is the minimal string matching "
1N/A "the BRE \b[[:blank:]]]]*[^[:blank:]]]]*\b. -\anumber\a is equivalent to "
1N/A "\b--skip-fields\b=\anumber\a.]"
1N/A"[i:ignore-case?Ignore case in comparisons.]"
1N/A"[s:skip-chars]#[chars?\achars\a is the number of characters to skip over "
1N/A "before checking for uniqueness. If specified along with \b-f\b, "
1N/A "the first \achars\a after the first \afields\a are ignored. If "
1N/A "the \achars\a specifies more characters than are on the line, "
1N/A "an empty string will be used for comparison. +\anumber\a is "
1N/A "equivalent to \b--skip-chars\b=\anumber\a.]"
1N/A"[u:unique?Output unique lines.]"
1N/A"[w:check-chars]#[chars?\achars\a is the number of characters to compare "
1N/A "after skipping any specified fields and characters.]"
1N/A"\n"
1N/A"\n[infile [outfile]]\n"
1N/A"\n"
1N/A"[+EXIT STATUS?]{"
1N/A "[+0?The input file was successfully processed.]"
1N/A "[+>0?An error occurred.]"
1N/A"}"
1N/A"[+SEE ALSO?\bsort\b(1), \bgrep\b(1)]"
1N/A;
1N/A
1N/A#include <cmd.h>
1N/A
1N/A#define C_FLAG 1
1N/A#define D_FLAG 2
1N/A#define U_FLAG 4
1N/A
1N/A#define CWIDTH 4
1N/A#define MAXCNT 9999
1N/A
1N/Atypedef int (*Compare_f)(const char*, const char*, size_t);
1N/A
1N/Astatic int uniq(Sfio_t *fdin, Sfio_t *fdout, int fields, int chars, int width, int mode, int* all, Compare_f compare)
1N/A{
1N/A register int n, f, outsize=0, mb = mbwide();
1N/A register char *cp, *ep, *mp, *bufp, *outp;
1N/A char *orecp, *sbufp=0, *outbuff;
1N/A int reclen,oreclen= -1,count=0,cwidth=0,sep,next;
1N/A if(mode&C_FLAG)
1N/A cwidth = CWIDTH+1;
1N/A while(1)
1N/A {
1N/A if(bufp = sfgetr(fdin,'\n',0))
1N/A n = sfvalue(fdin);
1N/A else if(bufp = sfgetr(fdin,'\n',SF_LASTR))
1N/A {
1N/A n = sfvalue(fdin);
1N/A bufp = memcpy(fmtbuf(n + 1), bufp, n);
1N/A bufp[n++] = '\n';
1N/A }
1N/A else
1N/A n = 0;
1N/A if (n)
1N/A {
1N/A cp = bufp;
1N/A ep = cp + n;
1N/A if (f = fields)
1N/A while (f-->0 && cp<ep) /* skip over fields */
1N/A {
1N/A while (cp<ep && *cp==' ' || *cp=='\t')
1N/A cp++;
1N/A while (cp<ep && *cp!=' ' && *cp!='\t')
1N/A cp++;
1N/A }
1N/A if (chars)
1N/A {
1N/A if (mb)
1N/A for (f = chars; f; f--)
1N/A mbchar(cp);
1N/A else
1N/A cp += chars;
1N/A }
1N/A if ((reclen = n - (cp - bufp)) <= 0)
1N/A {
1N/A reclen = 1;
1N/A cp = bufp + n - 1;
1N/A }
1N/A else if (width >= 0 && width < reclen)
1N/A {
1N/A if (mb)
1N/A {
1N/A reclen = 0;
1N/A mp = cp;
1N/A while (reclen < width && mp < ep)
1N/A {
1N/A reclen++;
1N/A mbchar(mp);
1N/A }
1N/A reclen = mp - cp;
1N/A }
1N/A else
1N/A reclen = width;
1N/A }
1N/A }
1N/A else
1N/A reclen = -2;
1N/A if(reclen==oreclen && (!reclen || !(*compare)(cp,orecp,reclen)))
1N/A {
1N/A count++;
1N/A if (!all)
1N/A continue;
1N/A next = count;
1N/A }
1N/A else
1N/A {
1N/A next = 0;
1N/A if(outsize>0)
1N/A {
1N/A if(((mode&D_FLAG)&&count==0) || ((mode&U_FLAG)&&count))
1N/A {
1N/A if(outp!=sbufp)
1N/A sfwrite(fdout,outp,0);
1N/A }
1N/A else
1N/A {
1N/A if(cwidth)
1N/A {
1N/A if(count<9)
1N/A {
1N/A f = 0;
1N/A while(f < CWIDTH-1)
1N/A outp[f++] = ' ';
1N/A outp[f++] = '0' + count + 1;
1N/A outp[f] = ' ';
1N/A }
1N/A else if(count<MAXCNT)
1N/A {
1N/A count++;
1N/A f = CWIDTH;
1N/A outp[f--] = ' ';
1N/A do
1N/A {
1N/A outp[f--] = '0' + (count % 10);
1N/A } while (count /= 10);
1N/A while (f >= 0)
1N/A outp[f--] = ' ';
1N/A }
1N/A else
1N/A {
1N/A outsize -= (CWIDTH+1);
1N/A if(outp!=sbufp)
1N/A {
1N/A if(!(sbufp=fmtbuf(outsize)))
1N/A return(1);
1N/A memcpy(sbufp,outp+CWIDTH+1,outsize);
1N/A sfwrite(fdout,outp,0);
1N/A outp = sbufp;
1N/A }
1N/A else
1N/A outp += CWIDTH+1;
1N/A sfprintf(fdout,"%4d ",count+1);
1N/A }
1N/A }
1N/A if(sfwrite(fdout,outp,outsize) != outsize)
1N/A return(1);
1N/A }
1N/A }
1N/A }
1N/A if(n==0)
1N/A break;
1N/A if(count = next)
1N/A {
1N/A if(sfwrite(fdout,outp,outsize) != outsize)
1N/A return(1);
1N/A if(*all >= 0)
1N/A *all = 1;
1N/A sep = 0;
1N/A }
1N/A else
1N/A sep = all && *all > 0;
1N/A /* save current record */
1N/A if (!(outbuff = sfreserve(fdout, 0, 0)) || (outsize = sfvalue(fdout)) < 0)
1N/A return(1);
1N/A outp = outbuff;
1N/A if(outsize < n+cwidth+sep)
1N/A {
1N/A /* no room in outp, clear lock and use side buffer */
1N/A sfwrite(fdout,outp,0);
1N/A if(!(sbufp = outp=fmtbuf(outsize=n+cwidth+sep)))
1N/A return(1);
1N/A }
1N/A else
1N/A outsize = n+cwidth+sep;
1N/A memcpy(outp+cwidth+sep,bufp,n);
1N/A if(sep)
1N/A outp[cwidth] = '\n';
1N/A oreclen = reclen;
1N/A orecp = outp+cwidth+sep + (cp-bufp);
1N/A }
1N/A return(0);
1N/A}
1N/A
1N/Aint
1N/Ab_uniq(int argc, char** argv, void* context)
1N/A{
1N/A register int n, mode=0;
1N/A register char *cp;
1N/A int fields=0, chars=0, width=-1;
1N/A Sfio_t *fpin, *fpout;
1N/A int* all = 0;
1N/A int sep;
1N/A Compare_f compare = (Compare_f)memcmp;
1N/A
1N/A cmdinit(argc, argv, context, ERROR_CATALOG, 0);
1N/A for (;;)
1N/A {
1N/A switch (optget(argv, usage))
1N/A {
1N/A case 'c':
1N/A mode |= C_FLAG;
1N/A continue;
1N/A case 'd':
1N/A mode |= D_FLAG;
1N/A continue;
1N/A case 'D':
1N/A mode |= D_FLAG;
1N/A switch ((int)opt_info.num)
1N/A {
1N/A case 'p':
1N/A sep = 1;
1N/A break;
1N/A case 's':
1N/A sep = 0;
1N/A break;
1N/A default:
1N/A sep = -1;
1N/A break;
1N/A }
1N/A all = &sep;
1N/A continue;
1N/A case 'i':
1N/A compare = (Compare_f)strncasecmp;
1N/A continue;
1N/A case 'u':
1N/A mode |= U_FLAG;
1N/A continue;
1N/A case 'f':
1N/A if(*opt_info.option=='-')
1N/A fields = opt_info.num;
1N/A else
1N/A chars = opt_info.num;
1N/A continue;
1N/A case 's':
1N/A chars = opt_info.num;
1N/A continue;
1N/A case 'w':
1N/A width = opt_info.num;
1N/A continue;
1N/A case ':':
1N/A error(2, "%s", opt_info.arg);
1N/A break;
1N/A case '?':
1N/A error(ERROR_usage(2), "%s", opt_info.arg);
1N/A break;
1N/A }
1N/A break;
1N/A }
1N/A argv += opt_info.index;
1N/A if(all && (mode&C_FLAG))
1N/A error(2, "-c and -D are mutually exclusive");
1N/A if(error_info.errors)
1N/A error(ERROR_usage(2), "%s", optusage(NiL));
1N/A if((cp = *argv) && (argv++,!streq(cp,"-")))
1N/A {
1N/A if(!(fpin = sfopen(NiL,cp,"r")))
1N/A error(ERROR_system(1),"%s: cannot open",cp);
1N/A }
1N/A else
1N/A fpin = sfstdin;
1N/A if(cp = *argv)
1N/A {
1N/A argv++;
1N/A if(!(fpout = sfopen(NiL,cp,"w")))
1N/A error(ERROR_system(1),"%s: cannot create",cp);
1N/A }
1N/A else
1N/A fpout = sfstdout;
1N/A if(*argv)
1N/A {
1N/A error(2, "too many arguments");
1N/A error(ERROR_usage(2), "%s", optusage(NiL));
1N/A }
1N/A error_info.errors = uniq(fpin,fpout,fields,chars,width,mode,all,compare);
1N/A if(fpin!=sfstdin)
1N/A sfclose(fpin);
1N/A if(fpout!=sfstdout)
1N/A sfclose(fpout);
1N/A return(error_info.errors);
1N/A}
1N/A