/***********************************************************************
* *
* This software is part of the ast package *
* Copyright (c) 2000-2011 AT&T Intellectual Property *
* and is licensed under the *
* Eclipse Public License, Version 1.0 *
* by AT&T Intellectual Property *
* *
* A copy of the License is available at *
* (with md5 checksum b35adb5213ca9657e911e9befb180842) *
* *
* Information and Software Systems Research *
* AT&T Research *
* Florham Park NJ *
* *
* Glenn Fowler <gsf@research.att.com> *
* *
***********************************************************************/
#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
*/
static const char usage[] =
"[-?\n@(#)$Id: msgcvt (AT&T Research) 2000-05-01 $\n]"
"[+DESCRIPTION?\bmsgcvt\b reads a \bgencat\b(1) format file on the standard"
" input and converts it to \bhtml\b on the standard output. The input"
" file must contain the control statement \b$quote \"\b and use the \""
" character to quote message text. The output is in a form suitable for"
" automatic translation by web sites like"
" \bhttp://babelfish.altavista.com/\b or filters like"
" \btranslate\b(1).]"
"[h:html?Generate \bhtml\b from \bgencat\b(1) input. This is the default.]"
"[m:msg?Generate a \bgencat\b(1) message file from (presumably translated)"
" \bhtml\b. Wide characters are UTF-8 encoded.]"
"[r:raw?The message file is raw message text, one message per line, with no"
" quoting or line numbering.]"
"[+SEE ALSO?\bgencat\b(1), \bmsgcc\b(1), \bmsggen\b(1), \btranslate\b(1)]"
;
#include <ast.h>
#include <ctype.h>
#include <error.h>
typedef struct
{
const char* name;
int code;
} Code_t;
{
"aacute", 225,
"Aacute", 193,
"acirc", 226,
"Acirc", 194,
"aelig", 230,
"AElig", 198,
"agrave", 224,
"Agrave", 192,
"amp", '&',
"aring", 229,
"Aring", 197,
"atilde", 227,
"Atilde", 195,
"auml", 228,
"Auml", 196,
"ccedil", 231,
"Ccedil", 199,
"copy", 169,
"eacute", 233,
"Eacute", 201,
"ecirc", 234,
"Ecirc", 202,
"egrave", 232,
"Egrave", 200,
"euml", 235,
"Euml", 203,
"gt", '>',
"iacute", 237,
"Iacute", 205,
"icirc", 238,
"Icirc", 206,
"igrave", 236,
"Igrave", 204,
"iuml", 239,
"Iuml", 207,
"lt", '<',
"nbsp", ' ',
"ntilde", 241,
"Ntilde", 209,
"oacute", 243,
"Oacute", 211,
"ocirc", 244,
"Ocirc", 212,
"ograve", 242,
"Ograve", 210,
"oslash", 248,
"Oslash", 216,
"otilde", 245,
"Otilde", 213,
"ouml", 246,
"Ouml", 214,
"quot", '"',
"reg", 174,
"szlig", 223,
"uacute", 250,
"Uacute", 218,
"ucirc", 251,
"Ucirc", 219,
"ugrave", 249,
"Ugrave", 217,
"uuml", 252,
"Uuml", 220,
"yuml", 255,
};
static int
{
register int c;
register int i;
return '&';
name[0] = c;
i = 1;
if (c != '#' && !isalpha(c))
goto bad;
{
if (c == '&')
i = 0;
else
{
name[i++] = c;
goto bad;
}
}
name[i] = 0;
if (name[0] == '#')
{
{
case 91:
c = '[';
break;
case 93:
c = ']';
break;
}
}
else
{
for (i = 0; i < elementsof(codes); i++)
{
break;
}
if (i >= elementsof(codes))
goto bad;
}
return c;
bad:
name[i] = 0;
if (c == ';')
else
while (i--)
return '&';
}
static int
{
if (!(w & ~0x7F))
else if (!(w & ~0x7FF))
else if (!(w & ~0xFFFF))
{
}
else
}
static int
{
register int c;
return c;
}
static void
{
register int c;
register int q;
if (c == '<')
{
break;
while (c != EOF && c != '>')
}
q = 0;
for (;;)
{
{
case EOF:
break;
case '&':
if (isspace(c))
{
if (c == EOF)
break;
}
continue;
case '<':
{
case '/':
{
if (q)
{
q = '"';
}
goto again;
}
break;
case 'B':
break;
case 'L':
{
if (q)
else
q = '"';
do
{
if (c == EOF)
break;
if (isspace(c))
if (c == '<' &&
/* great */;
continue;
}
break;
case 'P':
else if (c == 'C' &&
for (;;)
{
{
case EOF:
case '"':
break;
case '&':
continue;
default:
continue;
}
break;
}
break;
}
while (c != EOF && c != '>')
break;
continue;
case '"':
if (!flags)
continue;
case '\n':
if (flags)
{
continue;
}
/*FALLTHROUGH*/
case ' ':
case '\t':
if (c == '&')
{
if (!isspace(c))
break;
}
else if (!isspace(c))
{
if (c == '<')
{
if (c == EOF)
break;
if (c != 'L' && c != '/')
}
else
{
if (c != EOF)
}
break;
}
continue;
case '\r':
case '[':
case ']':
continue;
default:
continue;
}
break;
}
if (q)
}
static void
{
if (c == '<')
else if (c == '>')
else if (c == '"')
else if (c == '&')
else if (c == '[')
else if (c == ']')
else
}
static void
{
register char* s;
register int c;
register int q;
register int p;
sfprintf(op, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\"><HTML><HEAD><!-- text massaged for external translation --></HEAD><BODY>\n");
p = q = 0;
{
error_info.line++;
if (flags)
else
{
if (*s == '$')
{
if (p)
else
p = 1;
while (c = *s++)
continue;
}
p = 0;
if (!isdigit(*s))
continue;
while (isdigit(c = *s++))
while (c && c != '"')
c = *s++;
if (!c)
s--;
else if (isspace(*s))
{
s++;
}
}
for (;;)
{
switch (c = *s++)
{
case 0:
flags &= ~MSG_SPLICE;
if (q)
{
q = 0;
}
break;
case '<':
continue;
case '>':
continue;
case '&':
continue;
case '[':
continue;
case ']':
continue;
case '$':
if (!q)
{
q = 1;
}
while (isalnum(c = *s++))
s--;
continue;
case '%':
if (!q)
{
q = 1;
}
if (*s == '%')
else
do
{
if (!(c = *s++) || c == '"')
{
s--;
break;
}
if (SPACE(s))
continue;
case '"':
{
s = "";
continue;
}
/*FALLTHROUGH*/
case '\'':
case ':':
case '/':
case '+':
case '@':
if (!q)
{
q = 1;
}
/*FALLTHROUGH*/
case '.':
case ',':
if (SPACE(s))
continue;
case '\\':
if (!(c = *s++))
{
flags |= MSG_SPLICE;
break;
}
if (c != 'n' && c != 't')
{
if (!q)
{
q = 1;
}
if (c == 'b')
{
for (;;)
{
if (!(c = *s++) || c == '"')
{
s--;
break;
}
if (c == '?')
{
if (*s != '?')
{
s--;
break;
}
continue;
}
if (c == '\\')
{
if (!*s)
break;
if (*s == 'a' || *s == 'b' || *s == '0')
{
break;
}
c = *s++;
}
}
}
{
if (isdigit(*s))
}
if (SPACE(s))
continue;
}
/*FALLTHROUGH*/
case ' ':
case '\t':
s++;
if (*s == '"')
{
if (q)
{
q = 0;
}
else
continue;
}
c = ' ';
/*FALLTHROUGH*/
default:
if (q)
{
q = 0;
}
continue;
}
break;
}
}
error_info.line = 0;
}
int
{
int flags = 0;
for (;;)
{
{
case 'h':
continue;
case 'm':
continue;
case 'r':
continue;
case '?':
continue;
case ':':
continue;
}
break;
}
if (error_info.errors)
return error_info.errors != 0;
}