/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1998-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* induce fixed length record groups from data
*/
static const char usage[] =
"[-?\n@(#)$Id: rectify (AT&T Research) 1999-03-22 $\n]"
"[+NAME?rectify - induce fixed length record groups from data]"
"[+DESCRIPTION?\brectify\b induces fixed length record groups from input data"
" by sampling and comparing character frequencies. The standard input is"
" read if \a-\a or no files are specified.]"
"[c:context?List \acontext\a records at the beginning and end of"
" record groups larger that 3*\acontext\a.]#[context]"
"[d:description?Specify a structured dump description file. Each line of"
" this file describes the size and content of a contiguous portion"
" of the input file. The description is applied separately to each"
" input file. Comments and optional labels in the following"
" descriptions are listed with the \b--verbose\b option. Supported"
" descriptions are:]:[file]{"
" [+c comment?comment]"
" [+d size [label]]?\asize\a bytes of data with optional label]"
" [+i size [label]]?ignore \asize\a bytes of data]"
" [+r size count [label]]?\acount\a records of length \asize\a]"
" [+t count?Match \acount\a records against the \bT\b record"
" table. \acount\a=0 continues until no record type"
" match is found.]"
" [+z size [label]]?a string with length determined by a"
" \asize\a byte binary integer]"
" [+T idlen id size unit [offset]]?Defines a sized record"
" table entry.]{"
" [+idlen?type identifier length, must be"
" <= 4 bytes]"
" [+id?type identifier, starting at record offset 0]"
" [+size?default record size]"
" [+unit?if > 0 then the record is variable length and"
" the size is the byte at \aoffset\a]"
" [+offset?if \aunit\a > 0 then this byte multiplied by"
" \aunit\a is the size of variable length data"
" appended to the record]"
" }"
"}"
"[f:format?Byte output \bprintf\b(3) format.]:[format:=02x]"
"[g!:group?Group output in 4's.]"
"[m:min?Minimum record length to consider.]#[min:=8]"
"[n:count?List the top \acount\a candidate record lengths.]#[count:=16]"
"[o:offset?Start description listing at \aoffset\a.]#[offset:=0]"
"[r:run?List runs at least as long as \arun\a.]#[run]"
"[v:verbose?Dump description labels with data.]"
"\n"
"\n[ file ... ]\n"
"\n"
"[+SEE ALSO?\bpin\b(1), \bpop\b(1)]"
;
#include <ast.h>
#include <error.h>
#include <tok.h>
typedef struct Item_s
{
unsigned long index;
unsigned long offset;
unsigned long start;
unsigned long count;
unsigned long run;
} Item_t;
typedef struct
{
int len;
unsigned long id;
int size;
int unit;
int offset;
} Type_t;
typedef struct
{
unsigned long count;
} Loop_t;
static struct
{
char* format1;
char* format4;
unsigned long context;
unsigned long count;
unsigned long min;
unsigned long run;
int group;
int types;
int typelen;
int typelast;
} state;
/*
* order items by count hi to lo
*/
static int
{
return 1;
return -1;
if (a < b)
return 1;
if (a > b)
return -1;
return 0;
}
/*
* rectify fp open for read on file
*/
static void
{
register unsigned char* s;
register Item_t* p;
register unsigned long* q;
register unsigned long offset;
register unsigned long i;
unsigned long n;
unsigned long cur;
unsigned long dif;
unsigned long max;
max = 0;
offset = 0;
{
for (i = 0; i < n; i++)
{
*q = cur;
{
p->count++;
{
if (!p->run++)
}
else if (p->run)
{
p->run = 0;
}
}
}
offset += n;
}
n = 0;
{
n++;
sfprintf(sfstdout, "rec %7lu %7lu %7lu\n", state.mod[i].index, state.mod[i].count, state.mod[i].offset);
}
}
/*
* dump size n buffer b to op in 4 hex byte chunks
*/
static void
{
register unsigned char* e = b + n / 4 * 4;
register unsigned char* x;
while (b < e)
{
if ((b += 4) < x)
}
while (b < x)
}
/*
* return a number from b and advance b
*/
static unsigned long
number(char** b)
{
register char* s;
unsigned long r;
for (s = *b; *s == ' ' || *s == '\t'; s++);
r = strtoul(s, b, 0);
if (*b == s)
for (s = *b; *s == ' ' || *s == '\t'; s++);
*b = s;
return r;
}
/*
* dump fp according to dp
*/
static void
{
register unsigned char* p;
unsigned char* e;
long size;
long count;
unsigned long context;
int nest;
int op;
char* s;
char* t;
error_info.line = 0;
offset = 0;
nest = -1;
{
error_info.line++;
for (; *s == ' ' || *s == '\t'; s++);
switch (op)
{
case '#':
case '\n':
break;
case '{':
sfprintf(sfstdout, "=== %I*d === loop %d %lu %I*d === %-.*s\n", sizeof(offset), offset, nest, loop[nest].count, sizeof(loop[nest].offset), loop[nest].offset, t - s, s);
break;
case '}':
if (nest < 0)
nest--;
sfprintf(sfstdout, "=== %I*d === loop %d %lu %I*d === %-.*s\n", sizeof(offset), offset, nest, loop[nest].count, sizeof(loop[nest].offset), loop[nest].offset, t - s, s);
break;
case 'c':
break;
case 'd':
{
if (verbose)
}
break;
case 'i':
break;
case 'r':
{
{
error(ERROR_SYSTEM|3, "%s: cannot seek %I*d bytes at %I*d", file, sizeof(skip), skip, sizeof(offset), offset);
}
}
{
if (verbose)
sfprintf(sfstdout, "=== %I*d === %ld * %ld === %-.*s\n", sizeof(offset), offset, size, count, t - s, s);
{
while (count-- > 0)
{
}
error(ERROR_SYSTEM|3, "%s: cannot seek %I*d bytes at %I*d", file, sizeof(skip), skip, sizeof(offset), offset);
}
while (count-- > 0)
{
}
}
else
{
error(ERROR_SYSTEM|3, "%s: cannot seek %I*d bytes at %I*d", file, sizeof(skip), skip, sizeof(offset), offset);
}
break;
case 't':
context = 0;
do
{
break;
{
}
{
for (state.typelast = 0; state.typelast < state.types && state.type[state.typelast].id != id[state.type[state.typelast].len]; state.typelast++);
{
if (verbose)
sfprintf(sfstdout, "=== %I*d === %0*x === type not found\n", sizeof(offset), offset, 2 * state.typelen, id[state.typelen]);
break;
}
sfprintf(sfstdout, "=== %I*d === %0*x === type\n", sizeof(offset), offset, 2 * state.type[state.typelast].len, id[state.type[state.typelast].len]);
context = 0;
}
{
}
{
}
break;
case 'z':
count = 0;
e = p + size;
while (p < e)
{
if (verbose)
}
else
{
offset += 2;
}
break;
case 'T':
break;
default:
break;
}
}
error_info.file = 0;
error_info.line = 0;
}
int
{
register char* file;
int n;
char* desc = 0;
int verbose = 0;
for (;;)
{
{
case 'c':
continue;
case 'd':
if (desc)
else
continue;
case 'f':
continue;
case 'g':
continue;
case 'm':
continue;
case 'n':
continue;
case 'o':
continue;
case 'r':
continue;
case 'v':
continue;
case '?':
continue;
case ':':
continue;
}
break;
}
if (error_info.errors)
argv++;
do
{
if (desc)
else
return error_info.errors != 0;;
}