/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* csplit - Context or line file splitter
* Compile: cc -O -s -o csplit csplit.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <regexpr.h>
#include <signal.h>
#include <locale.h>
#include <libintl.h>
#define LAST 0LL
#define ERR -1
#define FALSE 0
#define TRUE 1
#define EXPMODE 2
#define LINMODE 3
#define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */
/* Globals */
char linbuf[LINSIZ]; /* Input line buffer */
char *expbuf;
char tmpbuf[BUFSIZ]; /* Temporary buffer for stdin */
char file[8192] = "xx"; /* File name buffer */
char *targ; /* Arg ptr for error messages */
char *sptr;
FILE *infile, *outfile; /* I/O file streams */
int silent, keep, create; /* Flags: -s(ilent), -k(eep), (create) */
int errflg;
int fiwidth = 2; /* file index width (output file names) */
extern int optind;
extern char *optarg;
offset_t offset; /* Regular expression offset value */
offset_t curline; /* Current line in input file */
/*
* These defines are needed for regexp handling(see regexp(7))
*/
#define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ);
static int asc_to_ll(char *, long long *);
static void closefile(void);
static void fatal(char *, char *);
static offset_t findline(char *, offset_t);
static void flush(void);
static FILE *getfile(void);
static char *getaline(int);
static void line_arg(char *);
static void num_arg(char *, int);
static void re_arg(char *);
static void sig(int);
static void to_line(offset_t);
static void usage(void);
int
main(int argc, char **argv)
{
int ch, mode;
char *ptr;
(void) setlocale(LC_ALL, "");
#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
#endif
(void) textdomain(TEXT_DOMAIN);
while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
switch (ch) {
case 'f':
(void) strcpy(file, optarg);
if ((ptr = strrchr(optarg, '/')) == NULL)
ptr = optarg;
else
ptr++;
break;
case 'n': /* POSIX.2 */
for (ptr = optarg; *ptr != NULL; ptr++)
if (!isdigit((int)*ptr))
fatal("-n num\n", NULL);
fiwidth = atoi(optarg);
break;
case 'k':
keep++;
break;
case 's':
silent++;
break;
case '?':
errflg++;
}
}
argv = &argv[optind];
argc -= optind;
if (argc <= 1 || errflg)
usage();
if (strcmp(*argv, "-") == 0) {
infile = tmpfile();
while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
if (errno == ENOSPC) {
(void) fprintf(stderr, "csplit: ");
(void) fprintf(stderr, gettext(
"No space left on device\n"));
exit(1);
} else {
(void) fprintf(stderr, "csplit: ");
(void) fprintf(stderr, gettext(
"Bad write to temporary "
"file\n"));
exit(1);
}
/* clear the buffer to get correct size when writing buffer */
(void) memset(tmpbuf, '\0', sizeof (tmpbuf));
}
rewind(infile);
} else if ((infile = fopen(*argv, "r")) == NULL)
fatal("Cannot open %s\n", *argv);
++argv;
curline = (offset_t)1;
(void) signal(SIGINT, sig);
/*
* The following for loop handles the different argument types.
* A switch is performed on the first character of the argument
* and each case calls the appropriate argument handling routine.
*/
for (; *argv; ++argv) {
targ = *argv;
switch (**argv) {
case '/':
mode = EXPMODE;
create = TRUE;
re_arg(*argv);
break;
case '%':
mode = EXPMODE;
create = FALSE;
re_arg(*argv);
break;
case '{':
num_arg(*argv, mode);
mode = FALSE;
break;
default:
mode = LINMODE;
create = TRUE;
line_arg(*argv);
break;
}
}
create = TRUE;
to_line(LAST);
return (0);
}
/*
* asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
* It returns ERR if an illegal character. The reason that asc_to_ll
* does not return an answer(long long) is that any value for the long
* long is legal, and this version of asc_to_ll detects error strings.
*/
static int
asc_to_ll(char *str, long long *plc)
{
int f;
*plc = 0;
f = 0;
for (; ; str++) {
switch (*str) {
case ' ':
case '\t':
continue;
case '-':
f++;
/* FALLTHROUGH */
case '+':
str++;
}
break;
}
for (; *str != NULL; str++)
if (*str >= '0' && *str <= '9')
*plc = *plc * 10 + *str - '0';
else
return (ERR);
if (f)
*plc = -(*plc);
return (TRUE); /* not error */
}
/*
* Closefile prints the byte count of the file created,(via fseeko
* and ftello), if the create flag is on and the silent flag is not on.
* If the create flag is on closefile then closes the file(fclose).
*/
static void
closefile()
{
if (!silent && create) {
(void) fseeko(outfile, (offset_t)0, SEEK_END);
(void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
}
if (create)
(void) fclose(outfile);
}
/*
* Fatal handles error messages and cleanup.
* Because "arg" can be the global file, and the cleanup processing
* uses the global file, the error message is printed first. If the
* "keep" flag is not set, fatal unlinks all created files. If the
* "keep" flag is set, fatal closes the current file(if there is one).
* Fatal exits with a value of 1.
*/
static void
fatal(char *string, char *arg)
{
char *fls;
int num;
(void) fprintf(stderr, "csplit: ");
/* gettext dynamically replaces string */
(void) fprintf(stderr, gettext(string), arg);
if (!keep) {
if (outfile) {
(void) fclose(outfile);
for (fls = file; *fls != '\0'; fls++)
continue;
fls -= fiwidth;
for (num = atoi(fls); num >= 0; num--) {
(void) sprintf(fls, "%.*d", fiwidth, num);
(void) unlink(file);
}
}
} else
if (outfile)
closefile();
exit(1);
}
/*
* Findline returns the line number referenced by the current argument.
* Its arguments are a pointer to the compiled regular expression(expr),
* and an offset(oset). The variable lncnt is used to count the number
* of lines searched. First the current stream location is saved via
* ftello(), and getaline is called so that R.E. searching starts at the
* line after the previously referenced line. The while loop checks
* that there are more lines(error if none), bumps the line count, and
* checks for the R.E. on each line. If the R.E. matches on one of the
* lines the old stream location is restored, and the line number
* referenced by the R.E. and the offset is returned.
*/
static offset_t
findline(char *expr, offset_t oset)
{
static int benhere = 0;
offset_t lncnt = 0, saveloc;
saveloc = ftello(infile);
if (curline != (offset_t)1 || benhere) /* If first line, first time, */
(void) getaline(FALSE); /* then don't skip */
else
lncnt--;
benhere = 1;
while (getaline(FALSE) != NULL) {
lncnt++;
if ((sptr = strrchr(linbuf, '\n')) != NULL)
*sptr = '\0';
if (step(linbuf, expr)) {
(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
return (curline+lncnt+oset);
}
}
(void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
return (curline+lncnt+oset+2);
}
/*
* Flush uses fputs to put lines on the output file stream(outfile)
* Since fputs does its own buffering, flush doesn't need to.
* Flush does nothing if the create flag is not set.
*/
static void
flush()
{
if (create)
(void) fputs(linbuf, outfile);
}
/*
* Getfile does nothing if the create flag is not set. If the create
* flag is set, getfile positions the file pointer(fptr) at the end of
* the file name prefix on the first call(fptr=0). The file counter is
* stored in the file name and incremented. If the subsequent fopen
* fails, the file name is copied to tfile for the error message, the
* previous file name is restored for cleanup, and fatal is called. If
* the fopen succeeds, the stream(opfil) is returned.
*/
FILE *
getfile()
{
static char *fptr;
static int ctr;
FILE *opfil;
char tfile[15];
char *delim;
char savedelim;
if (create) {
if (fptr == 0)
for (fptr = file; *fptr != NULL; fptr++)
continue;
(void) sprintf(fptr, "%.*d", fiwidth, ctr++);
/* check for suffix length overflow */
if (strlen(fptr) > fiwidth) {
fatal("Suffix longer than %ld chars; increase -n\n",
(char *)fiwidth);
}
/* check for filename length overflow */
delim = strrchr(file, '/');
if (delim == (char *)NULL) {
if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
fatal("Name too long: %s\n", file);
}
} else {
/* truncate file at pathname delim to do pathconf */
savedelim = *delim;
*delim = '\0';
/*
* file: pppppppp\0fffff\0
* ..... ^ file
* ............. ^ delim
*/
if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
fatal("Name too long: %s\n", delim + 1);
}
*delim = savedelim;
}
if ((opfil = fopen(file, "w")) == NULL) {
(void) strcpy(tfile, file);
(void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
fatal("Cannot create %s\n", tfile);
}
return (opfil);
}
return (NULL);
}
/*
* Getline gets a line via fgets from the input stream "infile".
* The line is put into linbuf and may not be larger than LINSIZ.
* If getaline is called with a non-zero value, the current line
* is bumped, otherwise it is not(for R.E. searching).
*/
static char *
getaline(int bumpcur)
{
char *ret;
if (bumpcur)
curline++;
ret = fgets(linbuf, LINSIZ, infile);
return (ret);
}
/*
* Line_arg handles line number arguments.
* line_arg takes as its argument a pointer to a character string
* (assumed to be a line number). If that character string can be
* converted to a number(long long), to_line is called with that number,
* otherwise error.
*/
static void
line_arg(char *line)
{
long long to;
if (asc_to_ll(line, &to) == ERR)
fatal("%s: bad line number\n", line);
to_line(to);
}
/*
* Num_arg handles repeat arguments.
* Num_arg copies the numeric argument to "rep" (error if number is
* larger than 20 characters or } is left off). Num_arg then converts
* the number and checks for validity. Next num_arg checks the mode
* of the previous argument, and applys the argument the correct number
* of times. If the mode is not set properly its an error.
*/
static void
num_arg(char *arg, int md)
{
offset_t repeat, toline;
char rep[21];
char *ptr;
int len;
ptr = rep;
for (++arg; *arg != '}'; arg += len) {
if (*arg == NULL)
fatal("%s: missing '}'\n", targ);
if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
len = 1;
if ((ptr + len) >= &rep[20])
fatal("%s: Repeat count too large\n", targ);
(void) memcpy(ptr, arg, len);
ptr += len;
}
*ptr = NULL;
if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
fatal("Illegal repeat count: %s\n", targ);
if (md == LINMODE) {
toline = offset = curline;
for (; repeat > 0LL; repeat--) {
toline += offset;
to_line(toline);
}
} else if (md == EXPMODE)
for (; repeat > 0LL; repeat--)
to_line(findline(expbuf, offset));
else
fatal("No operation for %s\n", targ);
}
/*
* Re_arg handles regular expression arguments.
* Re_arg takes a csplit regular expression argument. It checks for
* delimiter balance, computes any offset, and compiles the regular
* expression. Findline is called with the compiled expression and
* offset, and returns the corresponding line number, which is used
* as input to the to_line function.
*/
static void
re_arg(char *string)
{
char *ptr;
char ch;
int len;
ch = *string;
ptr = string;
ptr++;
while (*ptr != ch) {
if (*ptr == '\\')
++ptr;
if (*ptr == NULL)
fatal("%s: missing delimiter\n", targ);
if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
len = 1;
ptr += len;
}
/*
* The line below was added because compile no longer supports
* the fourth argument being passed. The fourth argument used
* to be '/' or '%'.
*/
*ptr = NULL;
if (asc_to_ll(++ptr, &offset) == ERR)
fatal("%s: illegal offset\n", string);
/*
* The line below was added because INIT which did this for us
* was removed from compile in regexp.h
*/
string++;
expbuf = compile(string, (char *)0, (char *)0);
if (regerrno)
PERROR(regerrno);
to_line(findline(expbuf, offset));
}
/*
* Sig handles breaks. When a break occurs the signal is reset,
* and fatal is called to clean up and print the argument which
* was being processed at the time the interrupt occured.
*/
/* ARGSUSED */
static void
sig(int s)
{
(void) signal(SIGINT, sig);
fatal("Interrupt - program aborted at arg '%s'\n", targ);
}
/*
* To_line creates split files.
* To_line gets as its argument the line which the current argument
* referenced. To_line calls getfile for a new output stream, which
* does nothing if create is False. If to_line's argument is not LAST
* it checks that the current line is not greater than its argument.
* While the current line is less than the desired line to_line gets
* lines and flushes(error if EOF is reached).
* If to_line's argument is LAST, it checks for more lines, and gets
* and flushes lines till the end of file.
* Finally, to_line calls closefile to close the output stream.
*/
static void
to_line(offset_t ln)
{
outfile = getfile();
if (ln != LAST) {
if (curline > ln)
fatal("%s - out of range\n", targ);
while (curline < ln) {
if (getaline(TRUE) == NULL)
fatal("%s - out of range\n", targ);
flush();
}
} else /* last file */
if (getaline(TRUE) != NULL) {
flush();
for (;;) {
if (getaline(TRUE) == NULL)
break;
flush();
}
} else
fatal("%s - out of range\n", targ);
closefile();
}
static void
usage()
{
(void) fprintf(stderr, gettext(
"usage: csplit [-ks] [-f prefix] [-n number] "
"file arg1 ...argn\n"));
exit(1);
}