deroff.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <sys/varargs.h>
/*
* Deroff command -- strip troff, eqn, and Tbl sequences from a file.
* Has three flags argument, -w, to cause output one word per line
* rather than in the original format.
* -mm (or -ms) causes the corresponding macro's to be interpreted
* so that just sentences are output
* -ml also gets rid of lists.
* -i causes deroff to ignore .so and .nx commands.
* Deroff follows .so and .nx commands, removes contents of macro
* definitions, equations (both .EQ ... .EN and $...$),
* Tbl command sequences, and Troff backslash constructions.
*
* All input is through the C macro; the most recently read character
* is in c.
*/
#define C ((c = getc(infile)) == EOF ? eof() : \
((c == ldelim) && (filesp == files) ? skeqn() : c))
#define C1 ((c = getc(infile)) == EOF ? eof() : c)
#define SKIP while (C != '\n')
#define SKIP_TO_COM SKIP; SKIP; pc = c; \
while ((C != '.') || (pc != '\n') || \
(C > 'Z')) { \
pc = c; \
}
#define YES 1
#define NO 0
#define MS 0
#define MM 1
#define ONE 1
#define TWO 2
#define NOCHAR -2
#define SPECIAL 0
#define APOS 1
#define DIGIT 2
#define LETTER 3
#define MAXLINESZ 512
static int wordflag = NO;
static int msflag = NO;
static int iflag = NO;
static int mac = MM;
static int disp = 0;
static int inmacro = NO;
static int intable = NO;
static int lindx;
static size_t linesize = MAXLINESZ;
static char chars[128]; /* SPECIAL, APOS, DIGIT, or LETTER */
static char *line = NULL;
static char c;
static int pc;
static int ldelim = NOCHAR;
static int rdelim = NOCHAR;
static int argc;
static char **argv;
extern int optind;
extern char *optarg;
static char fname[50];
static FILE *files[15];
static FILE **filesp;
static FILE *infile;
static void backsl(void);
static void comline(void);
static char *copys(char *);
static int eof(void);
static void eqn(void);
static void fatal(const char *, ...);
static void fatal_msg(char *);
static void getfname(void);
static void macro(void);
static FILE *opn(char *);
static void putmac(char *, int);
static void putwords(int);
static void regline(int, int);
static void sce(void);
static int skeqn();
static void sdis(char, char);
static void stbl(void);
static void tbl(void);
static void usage(void);
static void work(void);
void
main(int ac, char **av)
{
int i;
int errflg = 0;
int optchar;
(void) setlocale(LC_ALL, "");
#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN "SYS_TEST"
#endif
(void) textdomain(TEXT_DOMAIN);
argc = ac;
argv = av;
while ((optchar = getopt(argc, argv, "wim:")) != EOF) {
switch (optchar) {
case 'w':
wordflag = YES;
break;
case 'm':
msflag = YES;
if (*optarg == 'm')
mac = MM;
else if (*optarg == 's')
mac = MS;
else if (*optarg == 'l')
disp = 1;
else
errflg++;
break;
case 'i':
iflag = YES;
break;
case '?':
errflg++;
}
}
if (errflg)
usage();
if (optind == argc)
infile = stdin;
else
infile = opn(argv[optind++]);
files[0] = infile;
filesp = &files[0];
for (i = 'a'; i <= 'z'; ++i)
chars[i] = LETTER;
for (i = 'A'; i <= 'Z'; ++i)
chars[i] = LETTER;
for (i = '0'; i <= '9'; ++i)
chars[i] = DIGIT;
chars['\''] = APOS;
chars['&'] = APOS;
work();
}
static int
skeqn()
{
while ((c = getc(infile)) != rdelim) {
if (c == EOF) {
c = eof();
} else if (c == '"') {
while ((c = getc(infile)) != '"') {
if (c == EOF) {
c = eof();
} else if (c == '\\') {
if ((c = getc(infile)) == EOF) {
c = eof();
}
}
}
}
}
if (msflag) {
return (c = 'x');
}
return (c = ' ');
}
/* Functions calling opn() should ensure 'p' is non-null */
static FILE *
opn(char *p)
{
FILE *fd;
assert(p != NULL);
if ((fd = fopen(p, "r")) == NULL)
fatal(gettext("Cannot open file %s: %s\n"), p, strerror(errno));
return (fd);
}
static int
eof(void)
{
if (infile != stdin)
(void) fclose(infile);
if (filesp > files) {
infile = *--filesp;
} else if (optind < argc) {
infile = opn(argv[optind++]);
} else {
exit(0);
}
return (C);
}
static void
getfname(void)
{
char *p;
struct chain {
struct chain *nextp;
char *datap;
};
struct chain *q;
static struct chain *namechain = NULL;
while (C == ' ')
;
for (p = fname; ((*p = c) != '\n') && (c != ' ') && (c != '\t') &&
(c != '\\'); ++p) {
(void) C;
}
*p = '\0';
while (c != '\n') {
(void) C;
}
/* see if this name has already been used */
for (q = namechain; q; q = q->nextp)
if (strcmp(fname, q->datap) != 0) {
fname[0] = '\0';
return;
}
q = (struct chain *)calloc(1, sizeof (*namechain));
q->nextp = namechain;
q->datap = copys(fname);
namechain = q;
}
/*
* Functions calling fatal() should ensure 'format' and
* arguments are non-null.
*/
static void
fatal(const char *format, ...)
{
va_list alist;
assert(format != NULL);
(void) fputs(gettext("deroff: "), stderr);
va_start(alist, format);
(void) vfprintf(stderr, format, alist);
exit(1);
}
/* Functions calling fatal_msg() should ensure 's' is non-null */
static void
fatal_msg(char *s)
{
assert(s != NULL);
(void) fprintf(stderr, gettext("deroff: %s\n"), s);
exit(1);
}
static void
usage(void)
{
(void) fputs(gettext(
"usage: deroff [ -w ] [ -m (m s l) ] [ -i ] "
"[ file ] ... \n"), stderr);
exit(1);
}
static void
work(void)
{
for (;;) {
if ((C == '.') || (c == '\''))
comline();
else
regline(NO, TWO);
}
}
static void
regline(int macline, int cnst)
{
if (line == NULL) {
if ((line = (char *)malloc(linesize * sizeof (char))) == NULL) {
fatal_msg(gettext("Cannot allocate memory"));
}
}
lindx = 0;
line[lindx] = c;
for (;;) {
if (c == '\\') {
line[lindx] = ' ';
backsl();
if (c == '%') { /* no blank for hyphenation char */
lindx--;
}
}
if (c == '\n') {
break;
}
/*
* We're just about to add another character to the line
* buffer so ensure we don't overrun it.
*/
if (++lindx >= linesize - 1) {
linesize = linesize * 2;
if ((line = (char *)realloc(line,
linesize * sizeof (char))) == NULL) {
fatal_msg(gettext("Cannot allocate memory"));
}
}
if (intable && (c == 'T')) {
line[lindx] = C;
if ((c == '{') || (c == '}')) {
line[lindx - 1] = ' ';
line[lindx] = C;
}
} else {
line[lindx] = C;
}
}
line[lindx] = '\0';
if (line[0] != '\0') {
if (wordflag) {
putwords(macline);
} else if (macline) {
putmac(line, cnst);
} else {
(void) puts(line);
}
}
}
static void
putmac(char *s, int cnst)
{
char *t;
while (*s) {
while ((*s == ' ') || (*s == '\t')) {
(void) putchar(*s++);
}
for (t = s; (*t != ' ') && (*t != '\t') && (*t != '\0'); ++t)
;
if (*s == '\"')
s++;
if ((t > s + cnst) && (chars[s[0]] == LETTER) &&
(chars[s[1]] == LETTER)) {
while (s < t) {
if (*s == '\"')
s++;
else
(void) putchar(*s++);
}
} else {
s = t;
}
}
(void) putchar('\n');
}
static void
putwords(int macline) /* break into words for -w option */
{
char *p, *p1;
int i, nlet;
for (p1 = line; ; ) {
/* skip initial specials ampersands and apostrophes */
while (chars[*p1] < DIGIT) {
if (*p1++ == '\0')
return;
}
nlet = 0;
for (p = p1; (i = chars[*p]) != SPECIAL; ++p) {
if (i == LETTER)
++nlet;
}
if ((!macline && (nlet > 1)) /* MDM definition of word */ ||
(macline && (nlet > 2) && (chars[p1[0]] == LETTER) &&
(chars[p1[1]] == LETTER))) {
/* delete trailing ampersands and apostrophes */
while ((p[-1] == '\'') || (p[-1] == '&')) {
--p;
}
while (p1 < p) {
(void) putchar(*p1++);
}
(void) putchar('\n');
} else {
p1 = p;
}
}
}
static void
comline(void)
{
int c1, c2;
com:
while ((C == ' ') || (c == '\t'))
;
comx:
if ((c1 = c) == '\n')
return;
c2 = C;
if ((c1 == '.') && (c2 != '.'))
inmacro = NO;
if (c2 == '\n')
return;
if ((c1 == 'E') && (c2 == 'Q') && (filesp == files)) {
eqn();
} else if ((c1 == 'T') && ((c2 == 'S') || (c2 == 'C') ||
(c2 == '&')) && (filesp == files)) {
if (msflag) {
stbl();
} else {
tbl();
}
} else if ((c1 == 'T') && (c2 == 'E')) {
intable = NO;
} else if (!inmacro && (c1 == 'd') && (c2 == 'e')) {
macro();
} else if (!inmacro && (c1 == 'i') && (c2 == 'g')) {
macro();
} else if (!inmacro && (c1 == 'a') && (c2 == 'm')) {
macro();
} else if ((c1 == 's') && (c2 == 'o')) {
if (iflag) {
SKIP;
} else {
getfname();
if (fname[0]) {
infile = *++filesp = opn(fname);
}
}
} else if ((c1 == 'n') && (c2 == 'x')) {
if (iflag) {
SKIP;
} else {
getfname();
if (fname[0] == '\0') {
exit(0);
}
if (infile != stdin) {
(void) fclose(infile);
}
infile = *filesp = opn(fname);
}
} else if ((c1 == 'h') && (c2 == 'w')) {
SKIP;
} else if (msflag && (c1 == 'T') && (c2 == 'L')) {
SKIP_TO_COM;
goto comx;
} else if (msflag && (c1 == 'N') && (c2 == 'R')) {
SKIP;
} else if (msflag && (c1 == 'A') && ((c2 == 'U') || (c2 == 'I'))) {
if (mac == MM) {
SKIP;
} else {
SKIP_TO_COM;
goto comx;
}
} else if (msflag && (c1 == 'F') && (c2 == 'S')) {
SKIP_TO_COM;
goto comx;
} else if (msflag && (c1 == 'S') && (c2 == 'H')) {
SKIP_TO_COM;
goto comx;
} else if (msflag && (c1 == 'N') && (c2 == 'H')) {
SKIP_TO_COM;
goto comx;
} else if (msflag && (c1 == 'O') && (c2 == 'K')) {
SKIP_TO_COM;
goto comx;
} else if (msflag && (c1 == 'N') && (c2 == 'D')) {
SKIP;
} else if (msflag && (mac == MM) && (c1 == 'H') &&
((c2 == ' ') || (c2 == 'U'))) {
SKIP;
} else if (msflag && (mac == MM) && (c2 == 'L')) {
if (disp || (c1 == 'R')) {
sdis('L', 'E');
} else {
SKIP;
(void) putchar('.');
}
} else if (msflag && ((c1 == 'D') || (c1 == 'N') ||
(c1 == 'K') || (c1 == 'P')) && (c2 == 'S')) {
sdis(c1, 'E'); /* removed RS-RE */
} else if (msflag && (c1 == 'K' && c2 == 'F')) {
sdis(c1, 'E');
} else if (msflag && (c1 == 'n') && (c2 == 'f')) {
sdis('f', 'i');
} else if (msflag && (c1 == 'c') && (c2 == 'e')) {
sce();
} else {
if ((c1 == '.') && (c2 == '.')) {
while (C == '.')
;
}
++inmacro;
if ((c1 <= 'Z') && msflag) {
regline(YES, ONE);
} else {
regline(YES, TWO);
}
--inmacro;
}
}
static void
macro(void)
{
if (msflag) {
/* look for .. */
do {
SKIP;
} while ((C != '.') || (C != '.') || (C == '.'));
if (c != '\n') {
SKIP;
}
return;
}
SKIP;
inmacro = YES;
}
static void
sdis(char a1, char a2)
{
int c1, c2;
int eqnf;
int notdone = 1;
eqnf = 1;
SKIP;
while (notdone) {
while (C != '.')
SKIP;
if ((c1 = C) == '\n')
continue;
if ((c2 = C) == '\n')
continue;
if ((c1 == a1) && (c2 == a2)) {
SKIP;
if (eqnf)
(void) putchar('.');
(void) putchar('\n');
return;
} else if ((a1 == 'D') && (c1 == 'E') && (c2 == 'Q')) {
eqn();
eqnf = 0;
} else {
SKIP;
}
}
}
static void
tbl(void)
{
while (C != '.')
;
SKIP;
intable = YES;
}
static void
stbl(void)
{
while (C != '.')
;
SKIP_TO_COM;
if ((c != 'T') || (C != 'E')) {
SKIP;
pc = c;
while ((C != '.') || (pc != '\n') ||
(C != 'T') || (C != 'E')) {
pc = c;
}
}
}
static void
eqn(void)
{
int c1, c2;
int dflg;
int last;
last = 0;
dflg = 1;
SKIP;
for (;;) {
if ((C1 == '.') || (c == '\'')) {
while ((C1 == ' ') || (c == '\t'))
;
if ((c == 'E') && (C1 == 'N')) {
SKIP;
if (msflag && dflg) {
(void) putchar('x');
(void) putchar(' ');
if (last) {
(void) putchar('.');
(void) putchar(' ');
}
}
return;
}
} else if (c == 'd') { /* look for delim */
if ((C1 == 'e') && (C1 == 'l')) {
if ((C1 == 'i') && (C1 == 'm')) {
while (C1 == ' ')
;
if (((c1 = c) == '\n') ||
((c2 = C1) == '\n') ||
((c1 == 'o') && (c2 == 'f') &&
(C1 == 'f'))) {
ldelim = NOCHAR;
rdelim = NOCHAR;
} else {
ldelim = c1;
rdelim = c2;
}
}
dflg = 0;
}
}
if (c != '\n') {
while (C1 != '\n') {
if (c == '.') {
last = 1;
} else {
last = 0;
}
}
}
}
}
static void
backsl(void) /* skip over a complete backslash construction */
{
int bdelim;
sw: switch (C) {
case '"':
SKIP;
return;
case 's':
if (C == '\\') {
backsl();
} else {
while ((C >= '0') && (c <= '9'))
;
(void) ungetc(c, infile);
c = '0';
}
lindx--;
return;
case 'f':
case 'n':
case '*':
if (C != '(')
return;
/* FALLTHROUGH */
case '(':
if (C != '\n') {
(void) C;
}
return;
case '$':
(void) C; /* discard argument number */
return;
case 'b':
case 'x':
case 'v':
case 'h':
case 'w':
case 'o':
case 'l':
case 'L':
if ((bdelim = C) == '\n')
return;
while ((C != '\n') && (c != bdelim))
if (c == '\\')
backsl();
return;
case '\\':
if (inmacro)
goto sw;
default:
return;
}
}
static char *
copys(char *s)
{
char *t, *t0;
if ((t0 = t = calloc((unsigned)(strlen(s) + 1), sizeof (*t))) == NULL)
fatal_msg(gettext("Cannot allocate memory"));
while (*t++ = *s++)
;
return (t0);
}
static void
sce(void)
{
char *ap;
int n, i;
char a[10];
for (ap = a; C != '\n'; ap++) {
*ap = c;
if (ap == &a[9]) {
SKIP;
ap = a;
break;
}
}
if (ap != a) {
n = atoi(a);
} else {
n = 1;
}
for (i = 0; i < n; ) {
if (C == '.') {
if (C == 'c') {
if (C == 'e') {
while (C == ' ')
;
if (c == '0') {
break;
} else {
SKIP;
}
} else {
SKIP;
}
} else {
SKIP;
}
} else {
SKIP;
i++;
}
}
}