/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1996-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* Phong Vo <kpv@research.att.com> *
* Doug McIlroy <doug@research.bell-labs.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* sort main
*
* algorithm and interface
*
* Glenn Fowler
* Phong Vo
* AT&T Research
*
* key coders
*
* Doug McIlroy
* Bell Laboratories
*/
static const char usage[] =
"[-n?\n@(#)$Id: sort (AT&T Research) 2010-08-11 $\n]"
"[+DESCRIPTION?\bsort\b sorts lines of all the \afiles\a together and "
"writes the result on the standard output. The file name \b-\b means the "
"standard input. If no files are named, the standard input is sorted.]"
"[+?The default sort key is an entire line. Default ordering is "
"lexicographic by bytes in machine collating sequence. The ordering is "
"more of which may appear. See \brecsort\b(3) for details.]"
"[+?For backwards compatibility the \b-o\b option is allowed in any file "
"operand position when neither the \b-c\b nor the \b--\b options are "
"specified.]"
"[k:key?Restrict the sort key to a string beginning at \apos1\a and "
"ending at \apos2\a. \apos1\a and \apos2\a each have the form \am.n\a, "
"counting from 1, optionally followed by one or more of the flags "
"\bCMbdfginprZ\b; \bm\b counts fields from the beginning of the line and "
"\bn\b counts characters from the beginning of the field. If any flags "
"are present they override all the global ordering options for this key. "
"If \a.n\a is missing from \apos1\a, it is taken to be 1; if missing "
"from \apos2\a, it is taken to be the end of the field. If \apos2\a is "
"missing, it is taken to be end of line. The second form specifies a "
"fixed record length \areclen\a, and the last form specifies a fixed "
"field at byte position \aposition\a (counting from 1) of \alength\a "
"bytes. The obsolescent \breclen:fieldlen:offset\b (byte offset from 0) "
"is also accepted.]:[pos1[,pos2]]|.reclen|.position.length]]]]]"
"[K:oldkey?Specified in pairs: \b-K\b \apos1\a \b-K\b \apos2\a, where "
"positions count from 0.]# [pos]"
"[R:record|recfmt?Sets the record format to \aformat\a; newlines will be "
"treated as normal characters. The formats are:]:[format]"
"{"
"[+d[\aterminator\a]]?Variable length with record \aterminator\a "
"character, \b\\n\b by default.]"
"[+[f]]\areclen\a?Fixed record length \areclen\a.]"
"[+v[op...]]?Variable length. \bh4o0z2bi\b (4 byte IBM V format "
"descriptor) if \aop\a are omitted. \aop\a may be a combination "
"of:]"
"{"
"[+h\an\a?Header size is \an\a bytes (default 4).]"
"[+o\an\a?Size offset in header is \an\a bytes (default "
"0).]"
"[+z\an\a?Size length is \an\a bytes (default "
"min(\bh\b-\bo\b,2)).]"
"[+b?Size is big-endian (default).]"
"[+l?Size is little-endian (default \bb\b).]"
"[+i?Record length includes header (default).]"
"[+n?Record length does not include header (default "
"\bi\b).]"
"}"
"[+%?If the record format is not otherwise specified, and the "
"any input file name, from left to right, ends with "
"\b%\b\aformat\a or \b%\b\aformat\a\b.\b* then the record format "
"is set to \aformat\a. In addition, the \b-o\b path, if "
"specified and if it does not contain \b%\b and if it names a "
"regular file, is renamed to contain the input \b%\b\aformat\a.]"
"[+-?The first block of the first input file is sampled to check "
"for \bv\b variable length and \bf\b fixed length format "
"records. Not all formats are detected. \bsort\b exits with an "
"error diagnostic if the record format cannot be determined from "
"the sample.]"
"}"
"[b:ignorespace|ignore-leading-blanks?Ignore leading white space (spaces "
"and tabs) in field comparisons.]"
"[d:dictionary?`Phone directory' order: only letters, digits and white "
"space are significant in string comparisons.]"
"[E:codeset|convert?The field data codeset is \acodeset\a or the field "
"data must be converted from the \afrom\a codeset to the \ato\a codeset. "
"The codesets are:]:[codeset|from::to]"
"{\fcodesets\f}"
"[f:fold|ignorecase?Fold lower case letters onto upper case.]"
"[h:scaled|human-readable?Compare numbers scaled with IEEE 1541-2002 "
"suffixes.]"
"[i:ignorecontrol?Ignore characters outside the ASCII range 040-0176 in "
"string comparisons.]"
"[J:shuffle|jumble?Do a random shuffle of the sort keys. \aseed\a "
"specifies a pseudo random number generator seed. A \aseed\a of 0 "
"generates a seed based on time and pid.]#[seed]"
"[n:numeric?An initial numeric string, consisting of optional white "
"space, optional sign, and a nonempty string of digits with optional "
"decimal point, is sorted by value.]"
"[g:floating?Numeric, like \b-n\b, with \be\b-style exponents allowed.]"
"[p:bcd|packed-decimal?Compare packed decimal (bcd) numbers with "
"trailing sign.]"
"[M:months?Compare as month names. The first three characters after "
"optional white space are folded to lower case and compared. Invalid "
"fields compare low to \bjan\b.]"
"[r:reverse|invert?Reverse the sense of comparisons.]"
"[t:tabs?`Tab character' separating fields is \achar\a.]:[tab-char]"
"[c:check?Check that the single input file is sorted according to the "
"ordering rules; give no output on the standard output. If the input "
"is out of sort then write one diagnostic line on the standard error "
"and exit with code \b1\b.]"
"[C:silent-check?Like \b--check\b except no diagnostic is written.]"
"[j:processes|nproc|jobs?Use up to \ajobs\a separate processes to sort "
"the input. The current implementation still uses one process for the "
"final merge phase; improvements are planned.]#[processes]"
"[m:merge?Merge; the input files are already sorted.]"
"[u:unique?Unique. Keep only the first of multiple records that compare "
"equal on all keys. Implies \b-s\b.]"
"[s:stable?Stable sort. When all keys compare equal, preserve input "
"order. The default is \b--nostable\b (\aunstable\a sort): when all "
"keys compare equal, break the tie by using the entire record, ignoring "
"all but the \b-r\b option.]"
"[o:output?Place output in the designated \afile\a instead of on the "
"standard output. This file may be the same as one of the inputs. The "
"\afile\a \b-\b names the standard output. The option may appear among "
"the file arguments, except after \b--\b.]:[output]"
"[l:library?Load the external sort discipline \alibrary\a with optional "
"comma separated \aname=value\a arguments. Libraries are loaded, in left "
"to right order, after the sort method has been "
"initialized.]:[library[,name=value...]]]"
"[L:list?List the available sort methods. See the \b-x\b option.]"
"[x:method?Specify the sort method to apply:]:[method:=rasp]"
"{\fmethods\f} [v:verbose?Trace the sort progress on the standard "
"error.]"
"[P:plugins?List plugin information for each \afile\a operand in "
"--\astyle\a on the standard error. If no \afile\a operands are "
"specified then the first instance of each \bsort\b plugin installed on "
"\b$PATH\b or a sibling dir on \b$PATH\b is listed. The special "
"\astyle\a \blist\b lists a line on the standard output for each plugin "
"with the name, a tab character, and plugin specific command line "
"options parameterized by \b${style}\b (suitable for \beval\b'ing in "
"\bsh\b(1).)]:[style:=list|man|html|nroff|usage]"
"[Z:zd|zoned-decimal?Compare zoned decimal (ZD) numbers with embedded "
"trailing sign.]"
"[z:size|zip?Suggest using the specified number of bytes of internal "
"store to tune performance. Power of 2 and power of 10 size suffixes are "
"accepted. Type is a single character and may be one of:]:[type[size]]]"
"{"
"[+a?Buffer alignment.]"
"[+b?Input reserve buffer size.]"
"[+c?Input chunk size; sort chunks of this size and disable "
"merge.]"
"[+i?Input buffer size.]"
"[+m?Maximum number of intermediate merge files.]"
"[+p?Input sort size; sort chunks of this size before merge.]"
"[+o?Output buffer size.]"
"[+r?Maximum record size.]"
"[+I?Decompress the input if it is compressed.]"
"[+O?\bgzip\b(1) compress the output.]"
"}"
"[X:test?Enables implementation defined test code. Some or all of these "
"may be disabled.]:[test]"
"{"
"[+dump?List detailed information on the option settings.]"
"[+io?List io file paths.]"
"[+keys?List the canonical key for each record.]"
"[+read?Force input file read by disabling memory mapping.]"
"[+show?Show setup information and exit before sorting.]"
"[+test?Immediatly exit with status 0; used to verify this "
"implementation]"
"}"
"[D:debug?Sets the debug trace level. Higher levels produce more "
"output.]# [level]"
"[S|y?Equivalent to \b-zp\b\asize\a; if \asize\a has no suffix then \bki\b "
"is assumed.]:[size]"
"\n"
"\n[ file ... ]\n"
"\n"
"[+?+\apos1\a -\apos2\a is the classical alternative to \b-k\b, with "
"counting from 0 instead of 1, and pos2 designating next-after-last "
"instead of last character of the key. A missing character count in "
"\apos2\a means 0, which in turn excludes any \b-t\b tab character from "
"the end of the key. Thus +1 -1.3 is the same as \b-k\b 2,2.3 and +1r -3 "
"is the same as \b-k\b 2r,3.]"
"[+?Under option \b-t\b\ax\a fields are strings separated by \ax\a; "
"otherwise fields are non-empty strings separated by white space. White "
"space before a field is part of the field, except under option \b-b\b. "
"A \bb\b flag may be attached independently to \apos1\a and \apos2\a.]"
"[+?When there are multiple sort keys, later keys are compared only "
"after all earlier keys compare equal. Except under option \b-s\b, lines "
"with all keys equal are ordered with all bytes significant. \b-S\b "
"turns off \b-s\b, the last occurrence, left-to-right, takes affect.]"
"[+?Sorting is done by a method determined by the \b-x\b option. \b-L\b "
"lists the available methods. rasp (radix+splay-tree) is the default and "
"current all-around best.]"
"[+?Single-letter options may be combined into a single string, such as "
"\b-cnrt:\b. The option combination \b-di\b and the combination of "
"\b-n\b with any of \b-diM\b are improper. Posix argument conventions "
"are supported.]"
"[+?Options \b-b\b, \b-c\b, \b-d\b, \b-f\b, \b-i\b, \b-k\b, \b-m\b, "
"[+DIAGNOSTICS?\asort\a comments and exits with non-zero status for "
"various trouble conditions and for disorder discovered under option "
"\b-c\b.]"
"[+SEE ALSO?\bcomm\b(1), \bjoin\b(1), \buniq\b(1), \brecsort\b(3)]"
"[+CAVEATS?The never-documented default \apos1\a=0 for cases such as "
"\bsort -1\b has been abolished. An input file overwritten by \b-o\b is "
"not replaced until the entire output file is generated in the same "
"directory as the input, at which point the input is renamed.]"
;
#include <sfio_t.h>
#include <ast.h>
#include <error.h>
#include <ctype.h>
#include <fs3d.h>
#include <ls.h>
#include <option.h>
#include <recsort.h>
#include <recfmt.h>
#include <sfdcgzip.h>
#include <vmalloc.h>
#include <wait.h>
#include <iconv.h>
#include <dlldefs.h>
typedef struct Part_s
{
} Part_t;
typedef struct Job_s
{
} Job_t;
typedef struct Sort_s
{
} Sort_t;
/*
* optget() info discipline function
*/
static int
{
register int n;
if (streq(s, "codesets"))
{
n = 0;
return n;
}
else if (streq(s, "methods"))
return 0;
}
/*
* handle RS_VERIFY event
*/
static int
{
return 0;
}
static int
{
exit(1);
return 0;
}
/*
* return read stream for path
*/
static Sfio_t*
{
else
{
{
}
{
return 0;
}
}
return fp;
}
/*
* prevent ERROR_USAGE|4 messages from exiting
*/
static void
{
}
/*
* list info for one plugin on the standard error
*/
static void
{
char* args;
if (style)
else
}
/*
* list info for all [selected] plugins on the standard error
*/
static int
{
char* name;
void (*oexit)(int);
style = 0;
else
{
}
{
}
if (style)
return 0;
}
struct Lib_s
{
char* name;
};
/*
* process argv as in sort(1)
*/
static int
{
register int n;
register char* s;
char* e;
char* p;
char** a;
char** v;
size_t z;
int i;
int map;
char* plugins = 0;
Recfmt_t r;
for (;;)
{
{
case 0:
break;
case 'c':
case 'C':
obsolescent = 0;
continue;
case 'E':
case 'J':
return 0;
continue;
case 'j':
continue;
case 'k':
return -1;
continue;
case 'l':
if (lastlib)
else
continue;
case 'm':
continue;
case 'o':
continue;
case 's':
else
continue;
case 't':
{
n = 0;
}
continue;
case 'u':
continue;
case 'v':
continue;
case 'x':
continue;
case 'z':
s++;
else
n = 'r';
size:
if (*e == '%')
{
error(2, "%s %c%s: %% not supported -- do you really want that much memory?", opt_info.option, n, s);
return -1;
}
{
return -1;
}
switch (n)
{
case 'a':
break;
case 'b':
break;
case 'c':
break;
case 'i':
break;
case 'm':
break;
case 'p':
break;
case 'o':
break;
case 'r':
break;
case 'I':
break;
case 'O':
break;
}
continue;
case 'D':
continue;
case 'K':
{
}
return -1;
continue;
case 'L':
exit(0);
case 'P':
continue;
case 'R':
if (*e)
{
return -1;
}
continue;
case 'S':
case 'y':
n = 'p';
{
s = opt;
}
goto size;
case 'T':
continue;
case 'X':
if (*e)
{
if (streq(s, "dump"))
else if (streq(s, "io"))
else if (streq(s, "keys"))
else if (streq(s, "read"))
else if (streq(s, "reserve"))
else if (streq(s, "show"))
else if (streq(s, "test"))
{
exit(0);
}
else
}
else
continue;
case '?':
return 1;
case ':':
return -1;
default:
opt[1] = 0;
return 0;
continue;
}
break;
}
{
/*
* check for obsolescent `-o output' after first file operand
*/
a = v = argv;
while (s = *a++)
{
if (*s == '-' && *(s + 1) == 'o')
{
if (!*(s += 2) && !(s = *a++))
{
break;
}
}
else
*v++ = s;
}
*v = 0;
}
/*
* disciplines have the opportunity to modify key info
*/
{
return 1;
}
/*
* plugins list bails early
*/
if (plugins)
/*
* record format chicanery
*/
if (s = strrchr(p, '%'))
{
r = recstr(s + 1, &e);
if (!*e || *e == '.' && e > (s + 1))
{
if (r != key->disc->data && i >= 0 && (RECTYPE(r) != REC_variable || RECTYPE(key->disc->data) != REC_variable || REC_V_ATTRIBUTES(r) != REC_V_ATTRIBUTES(key->disc->data)))
{
error(2, "%s: format %s incompatible with %s format %s", p, fmtrec(r, 0), key->input[i], fmtrec(key->disc->data, 0));
return 1;
}
if (RECTYPE(r) != REC_variable || RECTYPE(key->disc->data) != REC_variable || REC_V_SIZE(key->disc->data) < REC_V_SIZE(r))
i = n;
}
}
if (RECTYPE(key->disc->data) == REC_method && ((n = REC_M_INDEX(key->disc->data)) == REC_M_path || n == REC_M_data))
{
if ((sp->opened = fileopen(sp, key->input[0])) && (s = sfreserve(sp->opened, SF_UNBOUND, SF_LOCKR)))
{
}
else
{
}
}
if (map && key->output && key->disc->data != REC_N_TYPE() && (stat(key->output, &st) || S_ISREG(st.st_mode)))
{
s = p + 1;
else
if (!strchr(s, '%'))
{
if (!(e = strrchr(s, '.')))
e = s + strlen(s);
}
}
return error_info.errors != 0;
}
/*
* dump keys to stderr
*/
static ssize_t
dumpkey(Rs_t* rs, unsigned char* dat, size_t datlen, unsigned char* key, size_t keylen, Rsdisc_t* disc)
{
ssize_t n;
int i;
{
buf[1] = 0;
for (i = 0; i < n; i++)
{
}
}
return n;
}
/*
* initialize sp from argv
*/
static int
{
register char* s;
register char** p;
char* t;
int n;
unsigned long x;
unsigned long z;
return -1;
#if 0
if (conformance(0, 0))
#endif
{
if (n < 0)
return -1;
}
/*
* finalize the buffer dimensions
*/
z = 0;
if (x > INMAX)
x = INMAX;
x = INMIN;
{
{
}
}
else
{
if (z)
if (fixed)
}
else
{
if (z)
for (;;)
{
if (fixed)
break;
if ((x >>= 1) < INMIN)
}
}
{
else
{
}
}
{
int i;
{
uno:
}
goto uno;
else
{
else
goto uno;
if (fixed)
{
i = (size + x - 1) / x;
{
goto uno;
}
offset = 0;
{
jp->intermediates = i;
}
else
{
}
}
else
{
register char* s;
register char* t;
register char* b;
char* file;
i = (size + x - 1) / x;
{
goto uno;
}
offset = 0;
{
else
{
/*UNDENT...*/
/*
* snoop around for the closest record boundary
*/
error(ERROR_SYSTEM|3, "%s: record boundary seek error at offset %lld", file, (Sflong_t)offset + size);
error(ERROR_SYSTEM|3, "%s: record boundary read error at offset %lld", file, (Sflong_t)offset + size);
while (*s++ != '\n')
{
if (t < b)
{
error(ERROR_SYSTEM|3, "%s: record boundary input seek error at %lld", file, (Sflong_t)offset + size);
t = (s = b) + scan;
do
{
if (s >= t)
goto bigger;
} while (*s++ != '\n');
break;
}
if (*t-- == '\n')
{
s = t + 2;
break;
}
}
size += (s - b);
/*...INDENT*/
}
jp->intermediates = i;
}
return -1;
key->procsize = (key->procsize > chunk) ? chunk : chunk / ((chunk + key->procsize - 1) / key->procsize);
}
}
}
/*
* check the output file for clash with the input files
*/
{
}
{
{
{
while (s = *p++)
if (!pathstdin(s))
{
{
{
*t = 0;
}
else
s = ".";
if (t) *t = '/';
break;
}
}
}
}
}
return -1;
/*
* finally ready for recsort now
*/
{
return -1;
}
{
}
return 0;
}
/*
* close sp->files and push fp if not 0
*/
static void
{
register int i;
{
}
if (fp)
{
}
else
}
/*
* flush the intermediate data
* r is the partial record offset
* updated r is returned
*/
static ssize_t
{
register size_t n;
register size_t m;
register size_t b;
{
/*
* skip merge and output sorted chunk
*/
{
if (!error_info.errors)
return -1;
}
}
{
/*
* write to an intermediate file and rewind for rsmerge
*/
{
}
{
return -1;
}
{
return -1;
}
/*
* multi-stage merge when open file limit exceeded
*/
{
{
return -1;
}
{
return -1;
}
}
}
/*
* slide over partial record data so the next read is aligned
*/
{
if (n < r)
{
m = n;
}
else
{
b = r;
r += m;
n += m;
while (r > b)
m = n;
}
}
else
return m;
}
/*
* input the records for file ip
*/
static int
{
register ssize_t n;
register ssize_t p;
register ssize_t m;
register ssize_t r;
size_t z;
char* b;
int c;
/*
* align the read buffer and
* loop on insize chunks
*/
m = -1;
z = 0;
{
else
}
p = 0;
for (;;)
{
{
break;
return -1;
}
else
{
{
{
}
n = -1;
}
}
if (n <= 0)
{
if (n < 0)
break;
{
break;
}
if (RECTYPE(sp->key->disc->data) == REC_delimited && sp->buf[sp->cur - 1] != (c = REC_D_DELIMITER(sp->key->disc->data)))
{
if (c == '\n')
else
{
del[0] = c;
del[1] = 0;
}
}
}
{
else
}
{
break;
if (p)
{
m = -(n - p + 1);
{
return -1;
}
}
{
m = -(n + 1);
}
{
break;
}
{
break;
}
else
{
if (RECTYPE(sp->key->disc->data) == REC_delimited && b[sp->cur - 1] != (c = REC_D_DELIMITER(sp->key->disc->data)))
{
if (c == '\n')
else
{
del[0] = c;
del[1] = 0;
}
}
goto process;
}
}
else
r += p;
}
error_info.file = 0;
return 0;
}
/*
* sfio part discipline read
*/
static ssize_t
{
return 0;
}
/*
* sfio part discipline seek
*/
static Sfoff_t
{
switch (op)
{
case SEEK_SET:
break;
case SEEK_CUR:
break;
case SEEK_END:
break;
}
{
}
return offset;
}
/*
* job control
* requires single named input file
* no multi-stage merge
*/
static void
{
register int i;
register int j;
register int f;
int status;
char* file;
{
error(0, "%s#%d pos %12lld : len %10lld : buf %10lld : num %2d", error_info.id, jp - sp->jobs + 1, (Sflong_t)jp->offset, (Sflong_t)jp->size, (Sflong_t)jp->chunk, jp->intermediates);
exit(0);
}
f = 0;
for (i = 0; i < jp->intermediates; i++)
j = 0;
{
switch (fork())
{
case -1:
case 0:
for (i = 0; i < jp->intermediates; i++)
while (i < f)
error(0, "%s pos %12lld : len %10lld : buf %10lld : num %2d", error_info.id, (Sflong_t)jp->offset, (Sflong_t)jp->size, (Sflong_t)jp->chunk, jp->intermediates);
}
exit(1);
j += jp->intermediates;
}
i = 0;
while (j > 0)
{
{
if (status)
i++;
j--;
}
{
break;
}
}
if (i)
}
/*
* all done
*/
static void
{
/*
* if the output would have overwritten an input
* file now is the time to commit to it
*/
{
if (error_info.errors)
{
{
{
}
else
}
else
}
}
}
int
{
register char* s;
char** merge;
exit(1);
{
}
{
{
break;
}
}
else
{
fp = 0;
exit(0);
else
{
if (merge)
{
{
merge = 0;
fp = 0;
continue;
}
}
break;
{
fp = 0;
}
}
{
return 1;
}
else
{
}
}
}