html2db.c revision 3f54fd611f536639ec30dd53c48e5ec1897cc7d9
/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1996-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*
* html2db - extract flat file database from html tables
*/
static const char usage[] =
"[-?\n@(#)$Id: html2db (AT&T Research) 1998-11-10 $\n]"
"[+NAME?html2db - extract flat file database from html tables]"
"[+DESCRIPTION?\bhtml2db\b extracts a flat file database from tables in the"
" input \bhtml\b \afile\as. If \afile\a is not specified then the"
" standard input is read. The \bhtml\b parse is rudimentary; don't use"
" \bhtml2db\b to detect valid \bhtml\b files.]"
"\n"
"\n[ file ... ]\n"
"\n"
"[+SEE ALSO?\bhtml2rtf\b(1)]"
;
#include <ast.h>
#include <ctype.h>
#include <error.h>
static void
{
register int c;
register int q;
register int p;
register int b;
register char* s;
char tag[256];
b = p = 0;
for (;;)
{
{
case EOF:
break;
case '<':
q = 0;
s = tag;
for (;;)
{
{
case EOF:
return;
case '>':
break;
default:
if (isspace(c))
break;
continue;
}
break;
}
*s = 0;
q = 0;
for (;;)
{
{
case EOF:
return;
case '\'':
case '"':
if (q == c)
q = 0;
else if (q == 0)
q = c;
continue;
case '>':
if (q == 0)
break;
continue;
default:
continue;
}
break;
}
s = tag;
if (s[0] == 'T' && s[1] == 'D' && s[2] == 0)
p = 1;
else if (s[0] == '/' && s[1] == 'T')
{
if (s[2] == 'D' && s[3] == 0)
{
b = p = 0;
}
else if (s[2] == 'R' && s[3] == 0)
}
continue;
default:
if (p)
{
if (isspace(c))
{
if (b)
continue;
b = 1;
c = ' ';
}
else
b = 0;
}
continue;
}
break;
}
}
int
{
register char* s;
for (;;)
{
{
case '?':
continue;
case ':':
continue;
}
break;
}
if (error_info.errors)
do
{
{
}
{
continue;
}
} while (*argv++);
return error_info.errors != 0;
}