uniq.c revision 34f9b3eef6fdadbda0a846aa4d68691ac40eace5
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin/***********************************************************************
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* This software is part of the ast package *
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz* Copyright (c) 1992-2009 AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* and is licensed under the *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Common Public License, Version 1.0 *
7c2fbfb345896881c631598ee3852ce9ce33fb07April Chin* by AT&T Intellectual Property *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* A copy of the License is available at *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Information and Software Systems Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* AT&T Research *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Florham Park NJ *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* Glenn Fowler <gsf@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin* David Korn <dgk@research.att.com> *
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin***********************************************************************/
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin * Written by David Korn
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic const char usage[] =
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz"[-n?\n@(#)$Id: uniq (AT&T Research) 2009-08-10 $\n]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+NAME?uniq - Report or filter out repeated lines in a file]"
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz"[+DESCRIPTION?\buniq\b reads the input, compares adjacent lines, and "
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz "writes one copy of each input line on the output. The second "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "and succeeding copies of the repeated adjacent lines are not "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "written.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+?If the output file, \aoutfile\a, is not specified, \buniq\b writes "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "to standard output. If no \ainfile\a is given, or if the \ainfile\a "
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz "is \b-\b, \buniq\b reads from standard input with the start of "
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz "the file defined as the current offset.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[c:count?Output the number of times each line occurred along with "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "the line.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[d:repeated|duplicates?Output the first of each duplicate line.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[D:all-repeated?Output all duplicate lines as a group with an empty "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "line delimiter specified by \adelimit\a:]:?[delimit:=none]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[n:none?Do not delimit duplicate groups.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[p:prepend?Prepend an empty line before each group.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[s:separate?Separate each group with an empty line.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[f:skip-fields]#[fields?\afields\a is the number of fields to skip over "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "before checking for uniqueness. A field is the minimal string matching "
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz "the BRE \b[[:blank:]]]]*[^[:blank:]]]]*\b. -\anumber\a is equivalent to "
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz "\b--skip-fields\b=\anumber\a.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[i:ignore-case?Ignore case in comparisons.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[s:skip-chars]#[chars?\achars\a is the number of characters to skip over "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "before checking for uniqueness. If specified along with \b-f\b, "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "the first \achars\a after the first \afields\a are ignored. If "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "the \achars\a specifies more characters than are on the line, "
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz "an empty string will be used for comparison. +\anumber\a is "
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz "equivalent to \b--skip-chars\b=\anumber\a.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[u:unique?Output unique lines.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[w:check-chars]#[chars?\achars\a is the number of characters to compare "
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "after skipping any specified fields and characters.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"\n[infile [outfile]]\n"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+EXIT STATUS?]{"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[+0?The input file was successfully processed.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin "[+>0?An error occurred.]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin"[+SEE ALSO?\bsort\b(1), \bgrep\b(1)]"
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chintypedef int (*Compare_f)(const char*, const char*, size_t);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chinstatic int uniq(Sfio_t *fdin, Sfio_t *fdout, int fields, int chars, int width, int mode, int* all, Compare_f compare)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int n, f, outsize=0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(reclen==oreclen && (!reclen || !(*compare)(cp,orecp,reclen)))
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if(((mode&D_FLAG)&&count==0) || ((mode&U_FLAG)&&count))
34f9b3eef6fdadbda0a846aa4d68691ac40eace5Roland Mainz while (f >= 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return(1);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return(1);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return(1);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* save current record */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin if (!(outbuff = sfreserve(fdout, 0, 0)) || (outsize = sfvalue(fdout)) < 0)
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return(1);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin /* no room in outp, clear lock and use side buffer */
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return(1);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin return(0);
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register int n, mode=0;
da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968chin register char *cp;