/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1992-2012 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* David Korn <dgk@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* David Korn
* Glenn Fowler
* AT&T Research
*
* join
*/
static const char usage[] =
"[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
"[+NAME?join - relational database operator]"
"[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
"and \afile2\a and writes the resulting joined files to standard "
"output. By default, a field is delimited by one or more spaces "
"can be used to change the field delimiter.]"
"[+?The \ajoin field\a is a field in each file on which files are compared. "
"By default \bjoin\b writes one line in the output for each pair "
"of lines in \afiles1\a and \afiles2\a that have identical join "
"fields. The default output line consists of the join field, "
"then the remaining fields from \afile1\a, then the remaining "
"fields from \afile2\a, but this can be changed with the \b-o\b "
"option. The \b-a\b option can be used to add unmatched lines "
"to the output. The \b-v\b option can be used to output only "
"unmatched lines.]"
"[+?The files \afile1\a and \afile2\a must be ordered in the collating "
"sequence of \bsort -b\b on the fields on which they are to be "
"joined otherwise the results are unspecified.]"
"[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
"uses standard input starting at the current location.]"
"[e:empty]:[string?Replace empty output fields in the list selected with"
" \b-o\b with \astring\a.]"
"[o:output]:[list?Construct the output line to comprise the fields specified "
"in a blank or comma separated list \alist\a. Each element in "
"\alist\a consists of a file number (either 1 or 2), a period, "
"and a field number or \b0\b representing the join field. "
"As an obsolete feature multiple occurrences of \b-o\b can "
"be specified.]"
"[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
" and output.]"
"[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]"
"[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]"
"[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
"[a:unpairable]#[fileno?Write a line for each unpairable line in file"
" \afileno\a, where \afileno\a is either 1 or 2, in addition to the"
" normal output. If \b-a\b options appear for both 1 and 2, then "
"all unpairable lines will be output.]"
"[v:suppress]#[fileno?Write a line for each unpairable line in file"
" \afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
"output. If \b-v\b options appear for both 1 and 2, then "
"all unpairable lines will be output.] ]"
"[i:ignorecase?Ignore case in field comparisons.]"
"[B!:mmap?Enable memory mapped reads instead of buffered.]"
"[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
" is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
" is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
" equivalent to \b-2\b \afield\a.]"
"\n"
"\nfile1 file2\n"
"\n"
"[+EXIT STATUS?]{"
"[+0?Both files processed successfully.]"
"[+>0?An error occurred.]"
"}"
"[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
;
#include <cmd.h>
#include <sfdisc.h>
#include <wchar.h>
#include <wctype.h>
#else
#include <ctype.h>
#ifndef iswspace
#endif
#endif
typedef struct Field_s
{
char* beg;
char* end;
} Field_t;
typedef struct File_s
{
char* name;
char* recptr;
int reclen;
int field;
int fieldlen;
int nfields;
int maxfields;
int spaces;
int hit;
int discard;
} File_t;
typedef struct Join_s
{
int* outlist;
int outmode;
int ooutmode;
char* nullfield;
char* delimstr;
int delim;
int delimlen;
int buffered;
int ignorecase;
int mb;
char* same;
int samesize;
} Join_t;
static void
{
}
static Join_t*
init(void)
{
register int i;
{
for (i = 0x80; i <= 0xff; i++)
{
return 0;
}
}
return jp;
}
static int
{
register int c;
int* outptr;
int* outmax;
char* str;
while (c = *cp++)
{
if (c==' ' || c=='\t' || c==',')
continue;
{
str++;
c = JOINFIELD;
goto skip;
}
{
break;
}
c--;
c <<=2;
if (*cp=='2')
c |=1;
skip:
{
nfield *= 2;
}
*outptr++ = c;
}
/* need to accept obsolescent command syntax */
{
{
{
c = JOINFIELD;
goto skip2;
}
break;
}
if (*str || --c<0)
break;
argv++;
c <<= 2;
if (*cp=='2')
c |=1;
{
nfield *= 2;
}
*outptr++ = c;
}
*outptr = -1;
}
/*
* read in a record from file <index> and split into fields
*/
static unsigned char*
{
register char* cp;
register int n;
char* tp;
return 0;
{
return 0;
}
{
field++;
}
else
do /* separate into fields */
{
{
}
{
{
case S_SPACE:
cp++;
break;
case S_WIDE:
{
break;
}
/*FALLTHROUGH*/
default:
goto next;
}
for (;;)
{
{
case S_SPACE:
continue;
case S_WIDE:
{
continue;
}
break;
}
break;
}
else
cp--;
}
next:
{
for (;;)
{
{
case 0:
continue;
case S_WIDE:
cp--;
{
n = S_DELIM;
break;
}
{
n = S_SPACE;
break;
}
continue;
}
break;
}
}
else
{
}
field++;
} while (n != S_NL);
{
/* eliminate leading spaces */
{
for (;;)
{
{
case S_SPACE:
continue;
case S_WIDE:
{
continue;
}
break;
}
break;
}
else
cp--;
}
return (unsigned char*)cp;
}
return (unsigned char*)"";
}
#if DEBUG_TRACE
static unsigned char* u1;
#define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
#endif
/*
* print field <n> from file <index>
*/
static int
{
register char* cp;
register char* cpmax;
register int size;
char* tp;
{
}
else
cp = 0;
{
{
/*eliminate leading spaces */
for (;;)
{
{
case S_SPACE:
continue;
case S_WIDE:
{
continue;
}
break;
}
break;
}
else
cp--;
}
n = ' ';
}
n = -1;
if (last)
n = '\n';
if (cp)
else
size = 0;
if (n == -1)
{
if (size<=1)
{
return -1;
}
return -1;
return -1;
}
else if (size <= 1)
{
return -1;
}
else
{
return -1;
}
return 0;
}
#if DEBUG_TRACE
#define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
#endif
static int
{
register int i;
register int j;
register int k;
register int n;
int* out;
return 0;
return 0;
{
while ((n = *out++) >= 0)
{
if (n == JOINFIELD)
{
i = mode >= 0;
}
else
{
i = n & 1;
n >> 2;
}
return -1;
}
return 0;
}
if (mode >= 0)
for (i=0; i<2; i++)
{
if (mode>0 && i==0)
{
continue;
}
if (mode||i==0)
{
/* output join field first */
return -1;
if (!k)
return 0;
for (j=0; j<n; j++)
{
return -1;
if (!k)
return 0;
}
j = n + 1;
}
else
j = 0;
{
return -1;
if (!k)
return 0;
}
}
return 0;
}
#if DEBUG_TRACE
#define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
#endif
static int
{
register unsigned char* cp1;
register unsigned char* cp2;
register int n1;
register int n2;
register int n;
register int cmp;
register int same;
int o2;
{
same = 0;
for (;;)
{
#if DEBUG_TRACE
if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
if (!cmp)
#else
if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
#endif
{
{
{
same = 1;
continue;
}
break;
if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
{
return -1;
}
}
return -1;
{
{
return -1;
}
}
{
continue;
}
#if DEBUG_TRACE
#endif
}
else if (cmp > 0)
{
if (same)
{
same = 0;
next:
{
{
return -1;
}
}
break;
goto next;
continue;
}
if (hi >= 0)
{
{
return -1;
}
hi = -1;
}
return -1;
lo = -1;
{
continue;
}
#if DEBUG_TRACE
#endif
}
else if (same)
{
same = 0;
break;
continue;
}
if (lo >= 0)
{
{
return -1;
}
lo = -1;
}
else if (!cp2)
break;
return -1;
break;
}
}
#if DEBUG_TRACE
sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
#endif
if (cp2)
{
if (hi >= 0 &&
{
return -1;
}
#if DEBUG_TRACE
#endif
cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
cmp = 1;
n = 1;
}
else
{
cmp = -1;
n = 0;
}
#if DEBUG_TRACE
sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
#endif
{
return 0;
}
return -1;
do
{
return 0;
return -1;
}
int
{
register int n;
register char* cp;
char* e;
#if !DEBUG_TRACE
#endif
for (;;)
{
{
case 'j':
/*
* check for obsolete "-j1 field" and "-j2 field"
*/
{
n = cp[n] == 'j';
}
else
n = 0;
if (n)
{
{
argc = 0;
break;
}
if (*e)
{
argc = 0;
break;
}
}
else
{
n = '2';
}
/*FALLTHROUGH*/
case '1':
case '2':
continue;
case 'v':
/*FALLTHROUGH*/
case 'a':
continue;
case 'e':
continue;
case 'o':
/* need to accept obsolescent command syntax */
continue;
case 't':
{
{
continue;
}
}
continue;
case 'i':
continue;
case 'B':
continue;
case ':':
break;
case '?':
break;
}
break;
}
{
}
{
{
if (sfdcseekable(sfstdin))
else
}
}
{
}
{
{
if (sfdcseekable(sfstdin))
else
}
}
{
}
{
}
{
}
return error_info.errors;
}