/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 1996-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*
* html to rtf filter
*/
static const char usage[] =
"[-?\n@(#)$Id: html2rtf (AT&T Research) 1999-01-01 $\n]"
"[+NAME?html2rtf - html to rtf filter]"
"[+DESCRIPTION?\bhtml2rtf\b converts input \bhtml\b documents to an \bRTF\b"
" document on the standard output. \bhtml2rtf\b expects properly nested"
"[d:debug?Set the debug trace level to \alevel\a. Higher levels produce"
" more output.]#[level]"
"[f:font-size?Set the initial font size to \asize\a points.]#[size:=12]"
"[p:project-file?Appends MS HELP project information to the help project file"
" \afile\a. This file combines individual RTF files into a"
" hyper-linked collection. Note that MS expects \afile\a to have a"
" \b.hlp\b extension.]:[file]"
"[v:verbose?Enable verbose error and warning messages. Some \bhtml\b source"
" can't stand the heat.]"
"\n"
"\n[ file ... ]\n"
"\n"
"[+SEE ALSO?\bman\b(1), \bmm\b(1), \bmm2html\b(1), \btroff\b(1),"
" \btroff2html\b(1)]"
;
#include "html2rtf.h"
#include <error.h>
/*
* return the attribute pointer for name in ap
*/
static Attribute_t*
{
if (ap)
return ap;
return 0;
}
/*
* new paragraph with optional hanging indent
*/
static void
{
if (hanging)
sfprintf(state.out, "\\li%d\\tx%d\\tx20000%s", twips(state.indent), twips(state.indent), tail ? tail : "");
}
static void
{
register int c;
if (s)
{
if (ref)
{
if (*s != '#')
{
return;
}
s++;
}
else
while (c = *s++)
if (ref)
{
}
}
}
static int
{
return 1;
}
static int
{
{
}
return 0;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
else
return 1;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 0;
}
static int
{
char* s;
char* e;
int n;
{
if (*s == '+' || *s == '-')
return 1;
}
return 0;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
char* s;
return 1;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 0;
}
/*
* NOTE: roman() transcribed from GNU groff
*/
static void
{
register char* dig;
register int i;
register int m;
if (n <= -40000 || n >= 40000)
{
return;
}
if (n == 0)
{
return;
}
if (n < 0)
{
n = -n;
}
while (n >= 10000)
{
n -= 10000;
}
{
m = n / i;
n -= m * i;
switch (m)
{
case 9:
break;
case 8:
break;
case 7:
break;
case 6:
break;
case 5:
break;
case 4:
break;
case 3:
/*FALLTHROUGH*/
case 2:
/*FALLTHROUGH*/
case 1:
break;
}
}
}
static int
{
{
case '1':
break;
case 'A':
break;
case 'a':
break;
case 'I':
case 'i':
break;
default:
break;
}
return 1;
}
static int
{
{
}
return 0;
}
static int
{
char* e;
if (!(op = attribute(ap, "START")) || !op->value || (state.sp->list_counter = strtol(op->value, &e, 10)) < 0 || *e)
return 1;
}
static int
{
register char* s;
{
if (!strcasecmp(s, "CENTER"))
else if (!strcasecmp(s, "LEFT"))
else if (!strcasecmp(s, "RIGHT"))
}
return 1;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
register int i;
return 1;
}
static int
{
register int i;
return 1;
}
static int
{
register char* s;
register char* e;
register int n;
{
{
}
{
for (n = 0, e = s; e && (e = strchr(e, ',')); n++, e++);
n = 0;
do
{
if (e = strchr(s, ','))
*e++ = 0;
n++;
} while (s = e);
else
{
}
}
}
return 0;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
return 1;
}
static int
{
return 0;
}
static int
{
return 0;
}
static int
{
return 1;
}
static int
{
return 0;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
return 0;
}
static int
{
return 1;
}
static int
{
return 1;
}
static int
{
{
case 'c':
break;
case 's':
break;
default:
break;
}
return 1;
}
static int
{
return 1;
}
/*
* generic tag end
*/
static int
{
return 1;
}
/*
* convert html file in to rtf file out
*/
static void
{
register int c;
register int lastc;
register int item;
register int cc;
register int tc;
register char* s;
int lastlastc;
int quote;
int n;
ap = 0;
item = 0;
lastc = 0;
for (;;)
{
{
case EOF:
goto done;
case '<':
if (!item)
{
item = c;
quote = 0;
ap = attributes;
{
if (c == '!')
}
continue;
}
break;
case '>':
{
item = 0;
if (*s == '!')
{
{
cc = 0;
{
if (c == '\n')
error_info.line++;
else
}
}
continue;
}
for (;;)
{
{
}
if (ap == attributes)
break;
ap--;
}
if (c = *s == '/')
s++;
else if (!c)
{
{
{
}
{
}
}
}
else
{
{
for (;;)
{
{
sp = 0;
break;
}
break;
sp--;
}
if (sp)
{
{
{
error(1, "<%s> on line %d has no matching </%s>", state.sp->tag->name, state.sp->line, state.sp->tag->name);
}
}
}
}
if (sp)
{
{
}
}
}
ap = 0;
continue;
}
break;
case '=':
{
continue;
}
break;
case '"':
if (ap)
{
continue;
}
break;
case '&':
if (!item)
{
item = c;
continue;
}
break;
case ';':
if (item == '&')
{
item = 0;
if (*s == '#')
{
tc++;
if (isspace(n))
lastc = ' ';
}
{
tc++;
lastc = ' ';
}
else
{
tc++;
}
continue;
}
break;
case '{':
case '}':
case '\\':
cc++;
break;
case '\n':
error_info.line++;
{
cc += 5;
tc = 0;
break;
}
/*FALLTHROUGH*/
case ' ':
case '\t':
case '\v':
if (ap)
{
if (!quote)
{
{
ap++;
lastc = ' ';
}
continue;
}
}
{
if (lastc == ' ')
continue;
c = ' ';
if (cc >= 72)
{
cc = 0;
}
}
else if (c == ' ')
{
cc += 2;
tc++;
continue;
}
else if (c == '\t')
{
do
{
cc += 2;
tc++;
} while (tc % 8);
continue;
}
break;
default:
if (iscntrl(c))
continue;
if (c > 0177)
{
tc++;
continue;
}
break;
}
{
if (c != ' ')
{
cc++;
tc++;
}
}
lastc = c;
cc++;
tc++;
}
done:
{
error(1, "<%s> on line %d has no matching </%s>", state.sp->tag->name, state.sp->line, state.sp->tag->name);
}
error_info.file = 0;
error_info.line = 0;
}
/*
* return 1 if project file must be updated
*/
static int
project_update(const char* s, char* v, void* h)
{
NoP(s);
return v == (char*)h;
}
/*
* list project file names
*/
static int
project_list(const char* s, char* v, void* h)
{
NoP(v);
return 0;
}
/*
*/
static void
{
register char* s;
{
{
{
{
if (!s)
break;
}
}
}
else
s = "\
[OPTIONS]\n\
COMPRESS=TRUE\n\
REPORT=ON\n\
TITLE=Manual\n\
";
{
else
{
}
}
}
}
/*
* html to rtf entity reference map
*/
{
"AElig", "\\'c6", 0,
"Aacute", "\\'c1", 0,
"Acirc", "\\'c2", 0,
"Agrave", "\\'c0", 0,
"Aring", "\\'c5", 0,
"Atilde", "\\'c3", 0,
"Auml", "\\'c4", 0,
"Ccedil", "\\'c7", 0,
"ETH", "\\'d0", 0,
"Eacute", "\\'c9", 0,
"Ecirc", "\\'ca", 0,
"Egrave", "\\'c8", 0,
"Euml", "\\'cb", 0,
"Iacute", "\\'cd", 0,
"Icirc", "\\'ce", 0,
"Igrave", "\\'cc", 0,
"Iuml", "\\'cf", 0,
"Ntilde", "\\'d1", 0,
"Oacute", "\\'d3", 0,
"Ocirc", "\\'d4", 0,
"Ograve", "\\'d2", 0,
"Oslash", "\\'d8", 0,
"Otilde", "\\'d5", 0,
"Ouml", "\\'d6", 0,
"THORN", "\\'de", 0,
"Uacute", "\\'da", 0,
"Ucirc", "\\'db", 0,
"Ugrave", "\\'d9", 0,
"Uuml", "\\'dc", 0,
"Yacute", "\\'dd", 0,
"aacute", "\\'e1", 0,
"acirc", "\\'e2", 0,
"acute", "\\'b4", 0,
"aelig", "\\'e6", 0,
"agrave", "\\'e0", 0,
"amp", "&", 0,
"aring", "\\'e5", 0,
"atilde", "\\'e3", 0,
"auml", "\\'e4", 0,
"brvbar", "\\'a6", 0,
"ccedil", "\\'e7", 0,
"cedil", "\\'b8", 0,
"cent", "\\'a2", 0,
"copy", "\\'a9", 0,
"curren", "\\'a4", 0,
"deg", "\\'b0", 0,
"divide", "\\'f7", 0,
"eacute", "\\'e9", 0,
"ecirc", "\\'ea", 0,
"egrave", "\\'e8", 0,
"emdash", "\\emdash", 0,
"endash", "\\endash", 0,
"eth", "\\'f0", 0,
"euml", "\\'eb", 0,
"frac12", "\\'bd", 0,
"frac14", "\\'bc", 0,
"frac34", "\\'be", 0,
"gt", ">", 0,
"iacute", "\\'ed", 0,
"icirc", "\\'ee", 0,
"iexcl", "\\'a1", 0,
"igrave", "\\'ec", 0,
"iquest", "\\'bf", 0,
"iuml", "\\'ef", 0,
"laquo", "\\'ab", 0,
"lt", "<", 0,
"macr", "\\'af", 0,
"micro", "\\'b5", 0,
"middot", "\\bullet", 0,
"not", "\\'ac", 0,
"ntilde", "\\'f1", 0,
"oacute", "\\'f3", 0,
"ocirc", "\\'f4", 0,
"ograve", "\\'f2", 0,
"ordf", "\\'aa", 0,
"ordm", "\\'ba", 0,
"oslash", "\\'f8", 0,
"otilde", "\\'f5", 0,
"ouml", "\\'f6", 0,
"para", "\\'b6", 0,
"plusmn", "\\'b1", 0,
"pound", "\\'a3", 0,
"quot", "\"", 0,
"raquo", "\\'bb", 0,
"reg", "\\'ae", 0,
"sect", "\\'a7", 0,
"shy", "\\'ad", 0,
"sup1", "\\'b9", 0,
"sup2", "\\'b2", 0,
"sup3", "\\'b3", 0,
"szlig", "\\'df", 0,
"thorn", "\\'fe", 0,
"times", "\\'d7", 0,
"uacute", "\\'fa", 0,
"ucirc", "\\'fb", 0,
"ugrave", "\\'f9", 0,
"uml", "\\'a8", 0,
"uuml", "\\'fc", 0,
"yacute", "\\'fd", 0,
"yen", "\\'a5", 0,
"yuml", "\\'ff", 0,
#if 0
"trademark", "", 0,
#endif
};
/*
* html tag table
*/
{
"BR", start_br, 0, 0,0,
"DD", start_dd, 0, 0,0,
"DT", start_dt, 0, 0,0,
"HR", start_hr, 0, 0,0,
"IMG", start_img, 0, 0,0,
"META", start_meta, 0, 0,0,
"NULL", 0, 0, 0,0,
"RENDER", start_render, 0, 0,0,
"TH", start_th, 0, 0,0,
"UNKNOWN", 0, 0, 0,0,
};
/*
* case insensitive hash
*/
static unsigned int
strcasehash(const char* s)
{
register const unsigned char* p = (const unsigned char*)s;
register unsigned int h = 0;
register unsigned int c;
while (c = *p++)
{
if (isupper(c))
c = tolower(c);
HASHPART(h, c);
}
return h;
}
/*
* initialize the global data
*/
static void
init(void)
{
register int i;
i = 1024;
if (!(state.tags = hashalloc(NiL, HASH_compare, strcasecmp, HASH_hash, strcasehash, HASH_name, "tags", 0)))
if (state.project && !(state.files = hashalloc(state.tags, HASH_set, HASH_ALLOCATE, HASH_name, "files", 0)))
for (i = 0; i < elementsof(entities); i++)
for (i = 0; i < elementsof(tags); i++)
}
int
{
register int c;
register char* s;
register char* t;
register char* u;
for (;;)
{
{
case 'd':
continue;
case 'f':
continue;
case 'p':
continue;
case 'v':
continue;
case '?':
continue;
case ':':
continue;
}
break;
}
if (error_info.errors)
init();
if (!*argv)
{
}
else while (s = *argv++)
{
{
{
if (!(t = strrchr(s, '/')))
t = s;
if (u = strrchr(t, '.'))
c = u - t;
else
c = strlen(t);
{
continue;
}
while (c = *t++)
}
else
{
}
{
}
}
}
}