1N/A/***********************************************************************
1N/A* *
1N/A* This software is part of the ast package *
1N/A* Copyright (c) 1985-2011 AT&T Intellectual Property *
1N/A* and is licensed under the *
1N/A* Common Public License, Version 1.0 *
1N/A* by AT&T Intellectual Property *
1N/A* *
1N/A* A copy of the License is available at *
1N/A* http://www.opensource.org/licenses/cpl1.0.txt *
1N/A* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
1N/A* *
1N/A* Information and Software Systems Research *
1N/A* AT&T Research *
1N/A* Florham Park NJ *
1N/A* *
1N/A* Glenn Fowler <gsf@research.att.com> *
1N/A* David Korn <dgk@research.att.com> *
1N/A* Phong Vo <kpv@research.att.com> *
1N/A* *
1N/A***********************************************************************/
1N/A#pragma prototyped
1N/A
1N/A/*
1N/A * determine record format by sampling data in <buf,size>
1N/A * total is the total file size, <=0 if not available
1N/A * return r:
1N/A * -1 could not determine
1N/A * RECTYPE(r)==REC_fixed fixed length REC_F_SIZE(r)
1N/A * RECTYPE(r)==REC_delimited variable length delimiter=REC_D_DELIMITER(r)
1N/A * RECTYPE(r)==REC_variable variable length
1N/A */
1N/A
1N/A#include <recfmt.h>
1N/A
1N/Atypedef struct
1N/A{
1N/A unsigned int rep[4 * 1024];
1N/A unsigned int hit[UCHAR_MAX + 1];
1N/A} Sample_t;
1N/A
1N/ARecfmt_t
1N/Arecfmt(const void* buf, size_t size, off_t total)
1N/A{
1N/A register unsigned char* s;
1N/A register unsigned char* t;
1N/A register Sample_t* q;
1N/A register unsigned int* h;
1N/A register unsigned int i;
1N/A unsigned int j;
1N/A unsigned int k;
1N/A unsigned int n;
1N/A unsigned int m;
1N/A unsigned int x;
1N/A unsigned long f;
1N/A unsigned long g;
1N/A
1N/A static unsigned char terminators[] = { '\n', 0x15, 0x25 };
1N/A
1N/A /*
1N/A * check for V format
1N/A */
1N/A
1N/A s = (unsigned char*)buf;
1N/A t = s + size;
1N/A while ((k = (t - s)) >= 4 && !s[2] && !s[3])
1N/A {
1N/A if ((i = (s[0]<<8)|s[1]) > k)
1N/A break;
1N/A s += i;
1N/A }
1N/A if (!k || size > 2 * k)
1N/A return REC_V_TYPE(4, 0, 2, 0, 1);
1N/A s = (unsigned char*)buf;
1N/A
1N/A /*
1N/A * check for terminated records
1N/A */
1N/A
1N/A for (i = 0; i < elementsof(terminators); i++)
1N/A if ((t = (unsigned char*)memchr((void*)s, k = terminators[i], size / 2)) && (n = t - s + 1) > 1 && (total <= 0 || !(total % n)))
1N/A {
1N/A for (j = n - 1; j < size; j += n)
1N/A if (s[j] != k)
1N/A {
1N/A n = 0;
1N/A break;
1N/A }
1N/A if (n)
1N/A return REC_D_TYPE(terminators[i]);
1N/A }
1N/A
1N/A /*
1N/A * check fixed length record frequencies
1N/A */
1N/A
1N/A if (!(q = newof(0, Sample_t, 1, 0)))
1N/A return REC_N_TYPE();
1N/A x = 0;
1N/A for (i = 0; i < size; i++)
1N/A {
1N/A h = q->hit + s[i];
1N/A m = i - *h;
1N/A *h = i;
1N/A if (m < elementsof(q->rep))
1N/A {
1N/A if (m > x)
1N/A x = m;
1N/A q->rep[m]++;
1N/A }
1N/A }
1N/A n = 0;
1N/A m = 0;
1N/A f = ~0;
1N/A for (i = x; i > 1; i--)
1N/A {
1N/A if ((total <= 0 || !(total % i)) && q->rep[i] > q->rep[n])
1N/A {
1N/A m++;
1N/A g = 0;
1N/A for (j = i; j < size - i; j += i)
1N/A for (k = 0; k < i; k++)
1N/A if (s[j + k] != s[j + k - i])
1N/A g++;
1N/A g = (((g * 100) / i) * 100) / q->rep[i];
1N/A if (g <= f)
1N/A {
1N/A f = g;
1N/A n = i;
1N/A }
1N/A }
1N/A }
1N/A if (m <= 1 && n <= 2 && total > 1 && total < 256)
1N/A {
1N/A n = 0;
1N/A for (i = 0; i < size; i++)
1N/A for (j = 0; j < elementsof(terminators); j++)
1N/A if (s[i] == terminators[j])
1N/A n++;
1N/A n = n ? 0 : total;
1N/A }
1N/A free(q);
1N/A return n ? REC_F_TYPE(n) : REC_N_TYPE();
1N/A}
1N/A
1N/A#if MAIN
1N/A
1N/Amain()
1N/A{
1N/A void* s;
1N/A size_t size;
1N/A off_t total;
1N/A
1N/A if (!(s = sfreserve(sfstdin, SF_UNBOUND, 0)))
1N/A {
1N/A sfprintf(sfstderr, "read error\n");
1N/A return 1;
1N/A }
1N/A size = sfvalue(sfstdin);
1N/A total = sfsize(sfstdin);
1N/A sfprintf(sfstdout, "%d\n", recfmt(s, size, total));
1N/A return 0;
1N/A}
1N/A
1N/A#endif