sync.c revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 2003-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* ibm dfsort discipline
*/
static const char usage[] =
"[-1lp0s5P?\n@(#)$Id: dfsort (AT&T Research) 2007-01-25 $\n]"
"[+PLUGIN?sync - IBM dfsort discipline]"
"[+DESCRIPTION?The \bsync\b \bsort\b(1) discipline applies an IBM \bDFSORT\b"
" control file to the input data. Command line keys are overidden"
" by the control file. Auxilliary output files must be named by"
" \bout\b\aid\a=\apath\a options.]"
"[+?User callout functions (\aexits\a in IBM parlance) must be linked in"
" DLLs or shared libraries. If the control file library is not found"
" then the name is treated as an environment variable and searched for"
" again. If the function \brs_intercept\b exists then it is used as a"
" wrapper for the callouts:"
" rs_intercept(\acallout\a,Rsobj_t*rec,Rsobj_t*dup,void**state),"
" otherwise the callout is called directly:"
" \acallout\a(Rsobj_t*rec,Rsobj_t*dup,void**state)."
" \bRsobj_t\b and callout return values are defined in \b<recsort.h>\b"
" and described in \brecsort\b(3). The callout arguments are:]{"
" [+Rsobj_t* rec?The current record.]"
" [+Rsobj_t* dup?The record comparing equal to \arec\a just before"
" it is discarded.]"
" [+void** state?User defined state, initialized to 0 before the first"
" callout. The same \astate\a is passed to all callouts.]"
"}"
"[+?The callout return values are:]{"
" [+RS_TERMINATE?Terminate the sort and exit with non-zero exit status.]"
" [+RS_DELETE?Delete \arec\a.]"
" [+RS_ACCEPT?Accept the possibly modified \arec\a.]"
" [+RS_INSERT?Insert a new record pointed to by \arec\a.]"
"}"
"[C:codeset?The data codeset is \acodeset\a. The codesets"
" are:]:[codeset]{\fcodesets\f}"
"[c:control?Specifies the control file path name. Control file details may be"
" found in the IBM \bDFSORT\b documentation. The control file is read"
" as an 80 column punched deck. If no control file is specified then"
" the standard input is read.]:[path]"
"[d:duplicates?Print a message to the standard error containing the number"
" of records with duplicate keys.]"
"[j:junk?Print to \afile\a the number of non-SUM field byte differences"
" between retained and discarded duplicate records. Each line in the"
" report is a field byte offset followed by the number of differences"
" for that offset.]:[file]"
"[l:list?List control file information on the standard output and exit.]"
"[o:out*?\bout\b\aid\a=\apath\a assigns \apath\a to the auxiliary output"
" file \aid\a. A leading \b-\b or \b_\b in \aid\a is ignored. File"
" paths may also be assigned by exporting \bSORTOF\b\aid\a=\apath\a;"
" \b--out\b takes precedence. Unassigned auxiliary output files are"
" silently ignored.]:[path]"
"[R:reclen|lrecl?Sets the fixed record length to \areclen\a.]#[reclen]"
"[+EXAMPLES]{"
" [+sort -lsync,control=xyz.ss,out02=out.2?Sorts using the"
" control file \bxyz.ss\b and places auxiliary file"
" \b02\b in \bout.2\b.]"
"}"
"[+SEE ALSO?\bsort\b(1), \bDFSORT\b(IBM), \brecsort\b(3)]"
"\n\n--library=sync[,option[=value]...]\n\n"
;
#include <ast.h>
#include <ctype.h>
#include <ccode.h>
#include <dirent.h>
#include <error.h>
#include <recsort.h>
#include <ss.h>
#define CALLOUT(s,f,r,d) ((s)->intercept ? (*(s)->intercept)(f, r, d, &(s)->exitstate) : (*f)(r, d, &(s)->exitstate))
typedef struct State_s
{
int dups;
char tmp[1];
} State_t;
/*
*/
static void
{
register size_t i;
register size_t k;
register size_t n;
register unsigned char* b;
register unsigned char* s;
register unsigned char* t;
register Ssfield_t* f;
register Sfulong_t* z;
if (r)
{
b = r->data;
if (n > r->datalen)
n = r->datalen;
{
s = b;
t = r->data;
for (i = 0; i < n; i++)
if (s[i] != t[i])
z[i]++;
}
}
else
{
z[i] = 0;
for (i = 0; i < n; i++)
if (z[i])
}
}
static int
{
int hit;
int c;
switch (op)
{
case RS_OPEN:
return -1;
case RS_POP:
(*ss->disc->errorf)(NiL, ss->disc, 0, "%I*u duplicate key%s", sizeof(state->dupcount), state->dupcount, state->dupcount == 1 ? "" : "s");
c = 0;
else
case RS_READ:
{
return RS_DELETE;
}
return RS_DELETE;
return RS_DELETE;
return c;
{
return -1;
}
{
return -1;
break;
}
return c;
case RS_SUMMARY:
if (!ss->summaryexit)
c = RS_ACCEPT;
return c;
return -1;
return c;
case RS_WRITE:
{
c = RS_ACCEPT;
}
return c;
size = 0;
return -1;
{
return c;
}
break;
default:
return -1;
}
hit = 0;
save = 0;
{
{
hit = 1;
return -1;
}
}
return -1;
return c;
}
typedef struct Suf_s
{
char* base;
char* suff;
} Suf_t;
static int
{
char** b;
char* s;
char* t;
char* z;
size_t i;
size_t j;
size_t k;
size_t m;
size_t n;
for (b = v; *b; b++);
if (n = b - v)
{
goto bad;
for (i = 0; i < n; i++)
{
if (s = strrchr(v[i], '/'))
s++;
else
s = v[i];
if (!strchr(s, '%'))
{
}
}
i = 0;
for (;;)
{
i++;
if (i >= n)
break;
else
{
}
if (dp)
{
z = strrchr(s, '.');
for (j = i; j < n; j++)
{
if (sp[j].base && (sp[j].base - v[j]) == k && (!k || !memcmp(v[i], v[j], k)) && !memcmp(ep->d_name, sp[j].base, m) && (!sp[j].suff || (sp[j].suff - sp[j].base) < m || z && !strcmp(z, sp[j].suff)))
{
else
if (!(t = strdup(t)))
{
goto bad;
}
v[j] = t;
}
}
}
for (j = i; j < n; j++)
if (dp)
}
}
return 0;
bad:
if (sp)
return -1;
}
{
char* s;
char* t;
char* u;
char* p;
char* junk;
char** v;
int n;
int m;
int list;
int dups;
Recfmt_t f;
unsigned long events;
{
return 0;
}
dups = 0;
junk = 0;
list = 0;
ss = 0;
if (options)
{
for (;;)
{
{
case 0:
break;
case 'C':
{
goto drop;
}
continue;
case 'c':
goto drop;
continue;
case 'd':
dups = 1;
continue;
case 'j':
continue;
case 'l':
list = 1;
continue;
case 'o':
goto drop;
continue;
case 'R':
{
(*ssdisc->errorf)(NiL, ssdisc, 2, "%d: fixed record length mismatch -- %d expected", (int)opt_info.num, key->fixed);
goto drop;
}
continue;
case '?':
goto drop;
case ':':
goto drop;
}
break;
}
}
goto drop;
{
{
s = u;
n = 1;
m = 0;
for (;;)
{
while (*s == ' ')
s++;
if (!(t = strchr(s, ' ')))
t = s + strlen(s);
{
n++;
m += strlen(p) + 1;
}
if (!*t)
break;
s = t + 1;
}
{
goto drop;
}
s = u;
u = (char*)(v + n);
n = 0;
for (;;)
{
while (*s == ' ')
s++;
if (!(t = strchr(s, ' ')))
t = s + strlen(s);
{
v[n++] = u;
u = strcopy(u, p) + 1;
}
if (!*t)
break;
s = t + 1;
}
v[n] = 0;
}
}
goto drop;
{
}
{
}
else
{
p = 0;
{
f = recstr(t + 1, &u);
if (f != ss->format && p && (RECTYPE(f) != REC_variable || RECTYPE(ss->format) != REC_variable || REC_V_ATTRIBUTES(f) != REC_V_ATTRIBUTES(ss->format)))
{
(*ssdisc->errorf)(NiL, ssdisc, 2, "%s: format %s incompatible with %s format %s", s, fmtrec(f, 0), p, fmtrec(ss->format, 0));
goto drop;
}
p = s;
if (RECTYPE(f) != REC_variable || RECTYPE(ss->format) != REC_variable || REC_V_SIZE(ss->format) < REC_V_SIZE(f))
}
}
goto drop;
{
{
{
if (ss->file->format != REC_N_TYPE() && f != ss->file->format && !ss->in && !ss->file->out && (RECTYPE(f) != REC_variable || RECTYPE(ss->file->format) != REC_variable || REC_V_ATTRIBUTES(f) != REC_V_ATTRIBUTES(ss->file->format)))
{
(*ssdisc->errorf)(NiL, ssdisc, 2, "%s: format %s incompatible with %s format %s", s, fmtrec(f, 0), p, fmtrec(ss->file->format, 0));
goto drop;
}
}
{
else
{
goto drop;
}
}
}
}
{
}
if (list)
{
exit(0);
}
{
goto drop;
}
{
}
{
events |= RS_SUMMARY;
}
{
case 'N':
break;
default:
break;
}
{
case 'D':
break;
case 'F':
goto drop;
break;
case 'V':
case 'B':
break;
}
goto drop;
if (junk)
{
{
goto drop;
}
{
goto drop;
}
}
drop:
if (ss)
if (ssdisc)
return 0;
}