uniq.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* uniq: delete repeated lines within a file.
*
* uniq [-c|-d|-u][-f fields][-s char] [input_file [output_file]]
* OR:
* uniq [-c|-d|-u][-n][+m] [input_file [output_file]]
*/
#include <stdio.h>
#include <ctype.h>
#include <locale.h>
#include <stdlib.h>
#include <libintl.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#define isWblank(c) \
static int mcount = 0; /* # of mutually exclusive flags used */
static int fields = 0; /* # of fields to be ignored */
static int letters = 0; /* # of letters to be ignored */
static int linec;
static char mode; /* = [c, d, u] */
static int uniq;
static int mac; /* our modified argc, after parseargs() */
static char **mav; /* our modified argv, after parseargs() */
static char *skip();
/*
* according to spec 1170 (draft April 8, 1994), there are two
* ways to use uniq; and both ways are mutually exclusive. we use modeflag
* to insure that the user doesn't mix these mutually exclusive flags.
* if the [-f -s] flags are used, modeflag should be 1. if [-n +m] are
* used, then modeflag should be 2. so the possible values for modeflag are:
* 0: [-f,-s] && [-n, +m] weren't specified. default to XBD.
* 1: either -f or -s was specified. XBD specification.
* 2: either -n or +m was specified. obsolescent usage.
*/
static int modeflag = 0; /* 0,1 = XBD spec. 2 = Obsolescent usage */
static char usage0[] = "uniq [-c|-d|-u][-f fields][-s char]";
static char usage1[] = "uniq [-c|-d|-u][-n][+m]";
static void printe();
static void usage();
int
{
int c; /* for getopt(3C) parsing */
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
#endif
(void) textdomain(TEXT_DOMAIN);
/* handle all of uniq's arguments via getopt(3C): */
switch (c) {
case 'n': /* parseargs() psuedo argument for -# */
modeflag |= MODEFLAG_NM;
break;
case 'm': /* parseargs() psuedo argument for +# */
modeflag |= MODEFLAG_NM;
break;
case 'c': /* -c: precede output lines */
/* FALLTHROUGH! */
case 'd': /* -d: suppress non-repeated lines */
/* FALLTHROUGH! */
case 'u': /* -u: suppress repeated lines */
mcount++;
mode = c;
break;
case 'f': /* -f: ignore 1st fields on input lines */
modeflag |= MODEFLAG_FS;
} else {
usage();
exit(1);
}
break;
case 's': /* -s: ignore 1st chars on comparisons */
modeflag |= MODEFLAG_FS;
} else {
usage();
exit(1);
}
break;
default:
usage();
exit(2);
break;
}
}
/* see if we have any mutually exclusive options: */
if (mcount > 1) {
gettext("Mutually exclusive options were given!\n"));
usage();
exit(3);
}
/* see if the user mixed the old style usage with the new: */
if (modeflag > MODEFLAG_NM) {
"Mutually exclusive command lines arguments!\n"));
usage();
exit(4);
}
/* if there are more arguments than getopt(3C) handled: */
/* if the user specified an input filename: */
/* if the user didn't specify stdin: */
}
}
}
/* if the user specified an output filename: */
}
}
}
exit(0);
for (; ; ) {
linec++;
exit(0);
}
linec = 0;
do {
linec++;
exit(0);
}
linec = 0;
}
}
}
/*
* Get an input line, dynamically growing the buffer as necessary.
*/
static int
char **buf;
int *size;
{
while ((c = getchar()) != '\n')
{
if (c == EOF)
return (1);
*input++ = c;
if (--left == 0)
{
}
}
*input = '\0';
return (0);
}
static void
register char buf[];
{
switch (mode) {
case 'u':
if (uniq) {
uniq = 0;
return;
}
break;
case 'd':
if (uniq) break;
return;
case 'c':
}
uniq = 0;
(void) putchar('\n');
}
/*
* equal: see if two strings are the same, accounting for any skipping.
* similar to strcmp(), except that we call skip() first.
* output: 1 if the strings are the same. 0 otherwise.
*/
static int
{
uniq++;
return (1);
}
return (0);
}
char *
skip(char *s)
{
int clen; /* # bytes which comprise a mb char */
/*
* we want to skip all user-specified fields first, and then
* any specified characters. so while there're fields to be
* skipped, examine each (possible m.b.) char. for each field,
* we first skip all blanks. then we skip any non-blank chars.
*/
/* skip blank characters (s.b. or m.b) */
s += clen;
}
if (clen == -1) {
/*
* illegal char found
* treat it as a non-blank single byte char
*/
s++;
} else if (clen == 0) {
/* EOL found */
break;
}
/* skip non-blank and illegal characters */
(clen == -1)) {
}
/* if we've encountered EOL */
if (clen == 0) {
break;
}
}
/*
* skip all user-specified letters, s.b. or m.b.
*/
/* if we've encountered EOL */
if (clen == 0) {
break;
}
}
return (s);
}
static void
printe(p, s)
char *p, *s;
{
exit(1);
}
/*
* parseargs(): modify the args
* this routine is used to transform all arguments into a format
* which is acceptable to getopt(3C), and which retains backwards
* Solaris 2.[0-4] compatibility.
*
* This routine allows us to make full use of getopts, without any
* funny argument processing in main().
*
* The other alternative would be to hand-craft the processed arguments
* during and after getopt(3C) - which usually leads to uglier code
* in main(). I've opted to keep the ugliness isolated down here,
* instead of in main().
*
* We leave the following arguments unchanged:
* [-c | -d | -u], [-f fields] [-s char].
*
* We modify the following arguments:
* -# (a.k.a. -n) to "-n #"
* +# (a.k.a. +n) to "-m #"
*
* E.g. -3 gets changed to the psuedo argument "-n 3".
*
* N.B.: we *DON'T* map -# to -f, nor +# to -s, as -/+ usage is
* mutually exclusive with -f & -s according to the
* spec 1170 man page.
*
* Anything after the valid options is assumed to be input or
* output filenames.
*
*/
static void
int ac;
char **av;
{
int i; /* current argument */
int minusflag; /* !0 = have hit a "--": end of flags */
perror("malloc failed");
exit(1);
}
/* for each argument, see if we need to change things: */
/*
* if we're doing argument processing, and we have
* a "+" sign, then it should be of the form: +#.
* map it to "-m #".
*/
/*
* The user did not follow the + with a
* positive decimal integer.
* Exit here because we don't want getopt() to
* print an error message about the -m option,
* since it doesn't exist in the man page!
*/
usage();
exit(1);
}
/* since we're adding an arg, need to inc mav space */
mav_sz += sizeof (char *);
perror("realloc failed");
exit(1);
}
(char *) NULL) {
perror("malloc failed");
exit(1);
}
++mac; /* prepare for 2nd argument */
/* add the arg to our modified space */
(char *) NULL) {
perror("malloc failed");
exit(1);
}
continue;
}
/*
* Here we need to see if the user typed -#, where # is
* a positive integer.
* Allow for input file named "-" (standard input).
*/
/* this user did, so convert it to "-n #". */
/* since we're adding an arg, need to inc mav space */
mav_sz += sizeof (char *);
perror("realloc failed");
exit(1);
}
(char *) NULL) {
perror("malloc failed");
exit(1);
}
(char *) NULL) {
perror("malloc failed");
exit(1);
}
continue;
}
/* the rest should be normal argument processing: */
/* first copy the argument: */
perror("malloc failed");
exit(1);
}
/* see if we need to do any further processing: */
(minusflag == 0)) {
switch (av[i][1]) {
/*
* start of all the other expected arguments.
* here we keep continuing - eventually we'll
* either run out of arguments, or we'll run
* into the input & output files (after which
* we terminate this loop).
*/
/* flags without subarguments: */
case 'c': /* FALLTHROUGH */
case 'd': /* FALLTHROUGH */
case 'u':
break; /* no more processing required */
/* flags with required subarguments: */
case 'f': /* FALLTHROUGH */
case 's':
/*
* The user has put white space
* between the option and its argument;
* alloc some space, & add the next
* arg.
*/
++mac; /* inc our arg count */
++i; /* mv to next (sub)arg */
/*
* If there's no next argument, then
* simply return; getopt(3C) will
* print a message about the missing
* option argument.
*/
return;
else {
/* add the subargument */
perror("malloc failed");
exit(1);
}
&av[i][0]);
}
}
break;
case '-': /* --: end of arguments */
minusflag = 1;
break;
default:
/*
* fflag, so that:
* - we do no further argument processing.
* - we know apriori that there will
* be no more than 2 files.
* we leave if we hit the second file.
*/
if (++fflag >= 2) {
/*
* we've copied the file argument
* already, so leave.
*/
return;
}
break;
}
} else if (i > 0) { /* if we're not the 1st arg */
/*
* here it's not a flag, so it *must* be either
* the input or the output file, including stdin.
*
* set fflag, so we don't mishandle the -[cdu] flags.
*/
if (++fflag >= 2) {
/*
* we've copied the file argument
* already, so leave.
*/
return;
}
}
mac++;
i++;
}
}
static void
usage()
{
usage0);
usage1);
}