split.c revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1989-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* split.c
* David Korn
* AT&T Research
*/
static const char split_usage[] =
"[-?\n@(#)$Id: split (AT&T Research) 2006-09-19 $\n]"
"[+NAME?split - split files into pieces]"
"[+DESCRIPTION?\bsplit\b reads an input file and writes one or more"
" output files so that \bcat\b(1) on these files will produce"
" the input file. The default size for each piece is 1000 lines."
" The suffix consists of \asuffix_len\a lower case characters"
" from the POSIX locale.]"
"[+?If \aprefix\a is specified it will be used as a prefix for each"
" of the resulting files from the split operation. If \aprefix\a"
" is specified, the prefix \bx\b will be used.]"
"[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bsplit\b"
" copies from standard input starting at the current location.]"
"[+?The option arguments for \b-b\b and \b-C\b can optionally be followed"
" by one of the following characters to specify a different"
" unit other than a single byte:]{"
" [+b?512 bytes.]"
" [+k?1-killobytes.]"
" [+m?1-megabyte.]"
" [+g?1-gigabyte.]"
" [+t?1-terabyte.]"
" }"
"[+?For backwards compatibility, \b-\b\aline_count\a is equivalent to"
" \b-l\b \aline_count\a.]"
"[l:lines]#[line_count:=1000?\aline_count\a specified the number of lines"
" for each piece except the last. If the input does not end in"
" a newline, the partial line is included in the last piece.]"
"[a|n:suffix-length]#[suffix_len:=2?\asuffix_len\a defines the number of"
" letters that form the suffix portion of the file names for each of"
" the pieces that the file is split into.]"
"[b:bytes]#[n?Splits the file into byte size pieces defined by \an\a"
" rather than lines.]"
"[C:line-bytes]#[n?Splits the file into lines totaling a most \an\a bytes.]"
"\n"
"\n[ file [ name ] ]\n"
"\n"
"[+EXIT STATUS]{"
" [+0?Successful completion.]"
" [+>0?An error occurred.]"
"}"
"[+SEE ALSO? \bcsplit\b(1), \bcat\b(1)]"
;
static const char csplit_usage[] =
"[-?\n@(#)$Id: csplit (AT&T Research) 2003-08-21 $\n]"
"[+NAME?csplit - split a file into sections determined by context lines]"
"[+DESCRIPTION?\bcsplit\b creates zero or more output files containing"
" sections of the given input \afile\a, or the standard input if the"
" name \b-\b is given. By default, \bcsplit\b prints the number of"
" bytes written to each output file after it has been created.]"
"[+?The contents of the output files are determined by the \apattern\a"
" arguments. An error occurs if a pattern argument refers to a"
" nonexistent line of the input file, such as if no remaining line"
" matches a given regular expression. After all the given patterns have"
" been matched, any remaining output is copied into one last output"
" file. The types of pattern arguments are:]{"
" [+line?Create an output file containing the current line up"
" to (but not including) line \aline\a (a positive"
" integer) of the input file. If followed by a repeat"
" count, also create an output file containing the"
" next \aline\a lines of the input file once for each"
" repeat.]"
" [+/regexp/[offset]]?Create an output file containing the"
" current line up to (but not including) the next line"
" of the input file that contains a match for"
" \aregexp\a. The optional \aoffset\a is a \b+\b or"
" \b-\b followed by a positive integer. If it is given,"
" the input up to the matching line plus or minus"
" \aoffset\a is put into the output file, and the line"
" after that begins the next section of input.]"
" [+%regexp%[offset]]?Like the previous type, except that it"
" does not create an output file, so that section of"
" the input file is effectively ignored.]"
" [+{repeat-count}?Repeat the previous pattern \arepeat-count\a"
" (a positive integer) additional times. An asterisk"
" may be given in place of the (integer) repeat count,"
" in which case the preceeding pattern is repeated as"
" many times as necessary until the input is exausted.]"
" }"
"[+?The output file names consist of a prefix followed by a suffix. By"
" default, the suffix is merely an ascending linear sequence of two-digit"
" decimal numbers starting with 00 and ranging up to 99, however this"
" default may be overridden by either the \b--digits\b option or by the"
" \b--suffix-format\b option (see below.) In any case, concatenating"
" the output files in sorted order by file name produces the original"
" input file, in order. The default output file name prefix is \bxx\b.]"
"[+?By default, if \bcsplit\b encounters an error or receives a hangup,"
" interrupt, quit, or terminate signal, it removes any output files"
" that it has created so far before it exits.]"
"[b:suffix-format?Use the \bprintf\b(3) \aformat\a to generate the file"
" name suffix.]:[format:=\b%02d\b]"
"[f:prefix?Use \aprefix\a to generate the file name prefix.]:[prefix:=\bxx\b]"
"[k:keep-files?Do not remove output files on errors.]"
"[a|n:digits?Use \adigits\a in the generated file name suffixes.]#[digits:=2]"
"[s:silent|quiet?Do not print output file counts and sizes.]"
"[z:elide-empty-files?Remove empty output files.]"
"\n"
"\nfile arg ...\n"
"\n"
"[+EXIT STATUS?]{"
" [+0?Successful completion.]"
" [+>0?An error occurred.]"
"}"
"[+SEE ALSO? \bsplit\b(1), \bcat\b(1)]"
;
#include <cmd.h>
#include <regex.h>
#define S_FLAG 001
#define K_FLAG 002
#define C_FLAG 004
#define B_FLAG 010
#define Z_FLAG 020
#define M_FLAG 040
#define OP_LINES 0
#define OP_SEARCH 1
#define OP_SKIP 2
#define OP_ABSOLUTE 3
#define BLK_SIZE 2048
struct fname
{
char* fname;
char* format;
char* suffix;
char* last;
char low;
char high;
int count;
};
struct op
{
int flags;
};
/*
* create an operation structure
*/
static struct op*
{
{
if (re)
}
return op;
}
/*
* returns new operation structure which is added to linked list
*/
static struct op*
{
char* ep;
int n;
{
{
return 0;
}
if (*cp)
{
if (*ep)
}
}
return op;
}
/*
* set up file name generator whose form is <prefix>... where ... is
* suflen characters from low..high
* returns a pointer to a structure that can be used to create
* file names
*/
static struct fname*
{
int flen;
int slen;
int len;
char* cp;
if (format)
{
}
else
slen = 0;
{
if (format)
{
}
while (suflen-- > 0)
*cp-- = 0;
(*cp)--;
{
cp++;
{
*(cp - 1) = 0;
}
}
else
{
}
}
return fp;
}
/*
* return next sequential file name
*/
static char*
{
{
{
error(0, "file limit reached");
return 0;
}
}
}
/*
* remove all generated files
*/
static void
{
while (*cp)
{
}
}
static int
{
register char* cp;
register char* dp;
register long m;
register long n = len;
register long nlen = 0;
return len;
while (nlen == 0 && n > 0)
{
n -= BLK_SIZE;
if (n < 0)
n = 0;
return len;
m = BLK_SIZE;
while (m-- > 0)
{
if (*cp++ == '\n')
}
}
if (n > 0)
}
static int
{
register char* cp;
register char* s;
Sfoff_t z;
int c;
register char* peek = 0;
register long n = 0;
while (op)
{
do
{
{
goto err;
{
goto err;
}
}
{
if (peek)
{
goto done;
peek = 0;
if (len > 0)
len--;
lineno++;
}
if (len)
{
goto done;
lineno += n;
}
}
else
{
if (peek)
{
goto done;
lineno++;
peek = 0;
}
{
break;
lineno++;
if (c != REG_NOMATCH)
{
goto err;
}
goto done;
}
if (!(peek = s))
{
repeat = 1;
}
}
if (out)
{
out = 0;
}
}
done:
if (out)
{
if (n <= 0)
}
if (n >= 0)
return 0;
err:
return 1;
}
int
{
char* cp;
char* prefix;
const char* usage;
int flags;
ssize_t n;
char* format = 0;
int suflen = 2;
cp++;
else
{
usage = split_usage;
prefix = "x";
}
else
{
prefix = "xx";
}
for (;;)
{
{
case 0:
break;
case 'l':
continue;
case 'k':
continue;
case 's':
continue;
case 'z':
continue;
case 'f':
continue;
case 'a':
case 'n':
continue;
case 'C':
case 'b':
{
}
else
continue;
case ':':
break;
case '?':
break;
}
break;
}
{
char* sp;
{
switch (*sp)
{
case '/':
case '?':
case '%':
break;
case '{':
if (!op)
else
{
}
break;
default:
break;
}
}
}
else
{
}
return n;
}