/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1996-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*/
static const char usage[] =
"[-?\n@(#)$Id: bb2tok (AT&T Research) 2007-12-19 $\n]"
"[+NAME?bb2tok - convert bb html to tokens]"
"[+DESCRIPTION?\bbb2tok\b extracts tokens from input \bhtml\b \afile\as. "
"If \afile\a is not specified then the standard input is read. The "
"\bhtml\b parse is rudimentary; don't use \bbb2tok\b to detect valid "
"\bhtml\b files.]"
"\n"
"\n[ file ... ]\n"
"\n"
"[+SEE ALSO?\bhtml2db\b(1), \bhtml2rtf\b(1)]"
;
#include <ast.h>
#include <ctype.h>
#include <error.h>
#define LINK 0
typedef struct Header_s
{
char* in;
char* out;
int lex;
int unary;
} Header_t;
{
};
typedef struct State_s
{
int push;
int keep;
int last;
unsigned char* lex;
} State_t;
static void
{
if (!head)
{
{
return;
{
sfprintf(op, "%s<%s%s>\n", (!state->push && (state->prev->lex == HEADER || state->prev->lex == CODE && state->last != '\n')) ? "\n" : "", state->push ? "" : "/", state->prev->out);
}
}
}
else if (push)
{
{
return;
return;
{
return;
}
sfprintf(op, "%s<%s%s>\n", (!state->push && (state->prev->lex == HEADER || state->prev->lex == CODE && state->last != '\n')) ? "\n" : "", state->push ? "" : "/", state->prev->out);
}
{
case CODE:
return;
case NAME:
break;
}
}
else
{
{
{
return;
}
sfprintf(op, "%s<%s%s>\n", (!state->push && (state->prev->lex == HEADER || state->prev->lex == CODE && state->last != '\n')) ? "\n" : "", state->push ? "" : "/", state->prev->out);
}
}
}
static void
{
register int c;
register int i;
register int k;
register int q;
register int n;
register int x;
register int level;
register char* e;
register char* s;
register char* t;
const Header_t* h;
t = tok;
k = q = n = level = 0;
for (;;)
{
{
case EOF:
break;
case '<':
x = 0;
s = tag;
for (;;)
{
{
case EOF:
return;
case '"':
if (!q)
q = c;
else if (q == c)
q = 0;
goto keep;
case '!':
if (s != tag)
goto keep;
x = 1;
continue;
case '\n':
x = 1;
continue;
case '>':
if (!q)
break;
/*FALLTHROUGH*/
default:
keep:
continue;
}
break;
}
*s = 0;
s = tag;
if (!k)
{
if (s[0] == 'b' && s[1] == 'o' && s[2] == 'd' && s[3] == 'y' && (!s[4] || s[4] == ' '))
k = 1;
else
continue;
}
if (s[0] == 's' && s[1] == 'p' && s[2] == 'a' && s[3] == 'n' && (!s[4] || s[4] == ' ') && (s += 4) || s[0] == 't' && s[1] == 'd' && (!s[2] || s[2] == ' ') && (s += 2))
{
h = 0;
{
for (e = s += 8; *e && *e != '"'; e++);
*e = 0;
for (i = 0; i < elementsof(header); i++)
{
h = &header[i];
{
n++;
}
break;
}
}
level++;
}
else if (s[0] == '/' && (s[1] == 's' && s[2] == 'p' && s[3] == 'a' && s[4] == 'n' && !s[5] || s[1] == 't' && s[2] == 'd' && !s[3]))
{
if (level > 0)
{
level--;
{
n--;
}
}
}
else if (n)
{
if (s[0] == 'b' && s[1] == 'r' && (!s[2] || s[2] == ' ' || s[2] == '/'))
{
continue;
}
if (s[0] == 'a' && s[1] == ' ')
else
{
c = ' ';
goto space;
}
}
continue;
case '&':
c = ' ';
goto space;
case ':':
case ';':
case ',':
case '.':
goto code;
*t++ = c;
continue;
case ' ':
case '\t':
case '\r':
case '\v':
goto code;
continue;
case '\n':
goto code;
continue;
default:
goto code;
*t++ = c;
continue;
code:
continue;
}
break;
}
}
int
{
register char* s;
for (;;)
{
{
case '?':
continue;
case ':':
continue;
}
break;
}
if (error_info.errors)
do
{
{
}
{
continue;
}
return error_info.errors != 0;
}