fmt.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 1997 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#pragma ident "%Z%%M% %I% %E% SMI"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <wctype.h>
#include <widec.h>
#include <dlfcn.h>
#include <locale.h>
#include <sys/param.h>
#include <string.h>
/*
* fmt -- format the concatenation of input files or standard input
* onto standard output. Designed for use with Mail ~|
*
* Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ]
* Author: Kurt Shoens (UCB) 12/7/78
*/
#define NOSTR ((wchar_t *) 0) /* Null string pointer for lint */
#define MAXLINES 100 /* maximum mail header lines to verify */
wchar_t outbuf[BUFSIZ]; /* Sandbagged output line image */
wchar_t *outp; /* Pointer in above */
int filler; /* Filler amount in outbuf */
int pfx; /* Current leading blank count */
int width = 72; /* Width that we will not exceed */
int nojoin = 0; /* split lines only, don't join short ones */
int errs = 0; /* Current number of errors */
enum crown_type {c_none, c_reset, c_head, c_lead, c_fixup, c_body};
enum crown_type crown_state; /* Crown margin state */
int crown_head; /* The header offset */
int crown_body; /* The body offset */
/* currently-known initial strings found in mail headers */
wchar_t *headnames[] = {
L"Apparently-To", L"Bcc", L"bcc", L"Cc", L"cc", L"Confirmed-By",
L"Content", L"content-length", L"From", L"Date", L"id",
L"Message-I", L"MIME-Version", L"Precedence", L"Return-Path",
L"Received", L"Reply-To", L"Status", L"Subject", L"To", L"X-IMAP",
L"X-Lines", L"X-Sender", L"X-Sun", L"X-Status", L"X-UID",
0};
enum hdr_type {
off, /* mail header processing is off */
not_in_hdr, /* not currently processing a mail header */
in_hdr, /* currently filling hdrbuf with potential hdr lines */
flush_hdr, /* flush hdrbuf; not a header, no special processing */
do_hdr /* process hdrbuf as a mail header */
};
/* current state of hdrbuf */
enum hdr_type hdr_state = not_in_hdr;
wchar_t *hdrbuf[MAXLINES]; /* buffer to hold potential mail header lines */
int h_lines; /* index into lines of hdrbuf */
int (*(split))();
extern int scrwidth(wchar_t);
static void fill_hdrbuf(wchar_t line[]);
static void header_chk(void);
static void process_hdrbuf(void);
/*
* Drive the whole formatter by managing input files. Also,
* cause initialization of the output stuff and flush it out
* at the end.
*/
main(int argc, char **argv)
{
register FILE *fi;
char sobuf[BUFSIZ];
register char *cp;
int nofile;
char *locale;
int csplit(), msplit();
void _wckind_init();
outp = NOSTR;
setbuf(stdout, sobuf);
setlocale(LC_ALL, "");
locale = setlocale(LC_CTYPE, "");
if (strcmp(locale, "C") == 0) {
split = csplit;
} else {
split = msplit;
(void) _wckind_init();
}
if (argc < 2) {
single:
fmt(stdin);
oflush();
exit(0);
}
nofile = 1;
while (--argc) {
cp = *++argv;
if (setopt(cp))
continue;
nofile = 0;
if ((fi = fopen(cp, "r")) == NULL) {
perror(cp);
errs++;
continue;
}
fmt(fi);
fclose(fi);
}
if (nofile)
goto single;
oflush();
exit(errs);
/* NOTREACHED */
}
/*
* Read up characters from the passed input file, forming lines,
* doing ^H processing, expanding tabs, stripping trailing blanks,
* and sending each line down for analysis.
*/
fmt(FILE *fi)
{
wchar_t linebuf[BUFSIZ], canonb[BUFSIZ];
register wchar_t *cp, *cp2;
register int col;
wchar_t c;
char cbuf[BUFSIZ]; /* stores wchar_t string as char string */
c = getwc(fi);
while (c != EOF) {
/*
* Collect a line, doing ^H processing.
* Leave tabs for now.
*/
cp = linebuf;
while (c != L'\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
if (c == L'\b') {
if (cp > linebuf)
cp--;
c = getwc(fi);
continue;
}
if (!(iswprint(c)) && c != L'\t') {
c = getwc(fi);
continue;
}
*cp++ = c;
c = getwc(fi);
}
*cp = L'\0';
/*
* Toss anything remaining on the input line.
*/
while (c != L'\n' && c != EOF)
c = getwc(fi);
/*
* Expand tabs on the way to canonb.
*/
col = 0;
cp = linebuf;
cp2 = canonb;
while (c = *cp++) {
if (c != L'\t') {
col += scrwidth(c);
if (cp2-canonb < BUFSIZ-1)
*cp2++ = c;
continue;
}
do {
if (cp2-canonb < BUFSIZ-1)
*cp2++ = L' ';
col++;
} while ((col & 07) != 0);
}
/*
* Swipe trailing blanks from the line.
*/
for (cp2--; cp2 >= canonb && *cp2 == L' '; cp2--);
*++cp2 = '\0';
/* special processing to look for mail header lines */
switch (hdr_state) {
case off:
prefix(canonb);
case not_in_hdr:
/* look for an initial mail header line */
/* skip initial blanks */
for (cp = canonb; *cp == L' '; cp++);
/*
* Need to convert string from wchar_t to char,
* since this is what ishead() expects. Since we
* only want to make sure cp points to a "From" line
* of the email, we don't have to alloc
* BUFSIZ * MB_LEN_MAX to cbuf.
*/
wcstombs(cbuf, cp, (BUFSIZ - 1));
if (ishead(cbuf)) {
hdr_state = in_hdr;
fill_hdrbuf(canonb);
} else {
/* no mail header line; process normally */
prefix(canonb);
}
break;
case in_hdr:
/* already saw 1st mail header line; look for more */
if (canonb[0] == L'\0') {
/*
* blank line means end of mail header;
* verify current mail header buffer
* then process it accordingly
*/
header_chk();
process_hdrbuf();
/* now process the current blank line */
prefix(canonb);
} else
/*
* not a blank line--save this line as
* a potential mail header line
*/
fill_hdrbuf(canonb);
break;
}
if (c != EOF)
c = getwc(fi);
}
/*
* end of this file--make sure we process the stuff in
* hdrbuf before we're finished
*/
if (hdr_state == in_hdr) {
header_chk();
process_hdrbuf();
}
}
/*
* Take a line devoid of tabs and other garbage and determine its
* blank prefix. If the indent changes, call for a linebreak.
* If the input line is blank, echo the blank line on the output.
* Finally, if the line minus the prefix is a mail header, try to keep
* it on a line by itself.
*/
prefix(wchar_t line[])
{
register wchar_t *cp;
register int np;
register int i;
int nosplit = 0; /* flag set if line should not be split */
if (line[0] == L'\0') {
oflush();
putchar('\n');
if (crown_state != c_none)
crown_state = c_reset;
return;
}
for (cp = line; *cp == L' '; cp++);
np = cp - line;
/*
* The following horrible expression attempts to avoid linebreaks
* when the indent changes due to a paragraph.
*/
if (crown_state == c_none && np != pfx && (np > pfx || abs(pfx-np) > 8))
oflush();
/*
* if this is a mail header line, don't split it; flush previous
* line, if any, so we don't join this line to it
*/
if (hdr_state == do_hdr) {
nosplit = 1;
oflush();
}
/* flush previous line so we don't join this one to it */
if (nojoin)
oflush();
/* nroff-type lines starting with '.' are not split nor joined */
if (!nosplit && (nosplit = (*cp == L'.')))
oflush();
pfx = np;
switch (crown_state) {
case c_reset:
crown_head = pfx;
crown_state = c_head;
break;
case c_lead:
crown_body = pfx;
crown_state = c_body;
break;
case c_fixup:
crown_body = pfx;
crown_state = c_body;
if (outp) {
wchar_t s[BUFSIZ];
*outp = L'\0';
wscpy(s, &outbuf[crown_head]);
outp = NOSTR;
split(s);
}
break;
}
if (nosplit) {
/* put whole input line onto outbuf and print it out */
pack(cp);
oflush();
} else
/*
* split puts current line onto outbuf, but splits it
* at word boundaries, if it exceeds desired length
*/
split(cp);
if (nojoin)
/*
* flush current line so next lines, if any,
* won't join to this one
*/
oflush();
}
/*
* Split up the passed line into output "words" which are
* maximal strings of non-blanks with the blank separation
* attached at the end. Pass these words along to the output
* line packer.
*/
csplit(wchar_t line[])
{
register wchar_t *cp, *cp2;
wchar_t word[BUFSIZ];
static const wchar_t *srchlist = (const wchar_t *) L".:!?";
cp = line;
while (*cp) {
cp2 = word;
/*
* Collect a 'word,' allowing it to contain escaped
* white space.
*/
while (*cp && !(iswspace(*cp))) {
if (*cp == '\\' && iswspace(cp[1]))
*cp2++ = *cp++;
*cp2++ = *cp++;
}
/*
* Guarantee a space at end of line.
* Two spaces after end of sentence punctuation.
*/
if (*cp == L'\0') {
*cp2++ = L' ';
if (wschr(srchlist, cp[-1]) != NULL)
*cp2++ = L' ';
}
while (iswspace(*cp))
*cp2++ = *cp++;
*cp2 = L'\0';
pack(word);
}
}
msplit(wchar_t line[])
{
register wchar_t *cp, *cp2, prev;
wchar_t word[BUFSIZ];
static const wchar_t *srchlist = (const wchar_t *) L".:!?";
cp = line;
while (*cp) {
cp2 = word;
prev = *cp;
/*
* Collect a 'word,' allowing it to contain escaped
* white space.
*/
while (*cp) {
if (iswspace(*cp))
break;
if (_wckind(*cp) != _wckind(prev))
if (wcsetno(*cp) != 0 || wcsetno(prev) != 0)
break;
if (*cp == '\\' && iswspace(cp[1]))
*cp2++ = *cp++;
prev = *cp;
*cp2++ = *cp++;
}
/*
* Guarantee a space at end of line.
* Two spaces after end of sentence punctuation.
*/
if (*cp == L'\0') {
*cp2++ = L' ';
if (wschr(srchlist, cp[-1]) != NULL)
*cp2++ = L' ';
}
while (iswspace(*cp))
*cp2++ = *cp++;
*cp2 = L'\0';
pack(word);
}
}
/*
* Output section.
* Build up line images from the words passed in. Prefix
* each line with correct number of blanks. The buffer "outbuf"
* contains the current partial line image, including prefixed blanks.
* "outp" points to the next available space therein. When outp is NOSTR,
* there ain't nothing in there yet. At the bottom of this whole mess,
* leading tabs are reinserted.
*/
/*
* Pack a word onto the output line. If this is the beginning of
* the line, push on the appropriately-sized string of blanks first.
* If the word won't fit on the current line, flush and begin a new
* line. If the word is too long to fit all by itself on a line,
* just give it its own and hope for the best.
*/
pack(wchar_t word[])
{
register wchar_t *cp;
register int s, t;
if (outp == NOSTR)
leadin();
t = wscol(word);
*outp = L'\0';
s = wscol(outbuf);
if (t+s <= width) {
for (cp = word; *cp; *outp++ = *cp++);
return;
}
if (s > filler) {
oflush();
leadin();
}
for (cp = word; *cp; *outp++ = *cp++);
}
/*
* If there is anything on the current output line, send it on
* its way. Set outp to NOSTR to indicate the absence of the current
* line prefix.
*/
oflush(void)
{
if (outp == NOSTR)
return;
*outp = L'\0';
tabulate(outbuf);
outp = NOSTR;
}
/*
* Take the passed line buffer, insert leading tabs where possible, and
* output on standard output (finally).
*/
tabulate(wchar_t line[])
{
register wchar_t *cp, *cp2;
register int b, t;
/* Toss trailing blanks in the output line */
cp = line + wslen(line) - 1;
while (cp >= line && *cp == L' ')
cp--;
*++cp = L'\0';
/* Count the leading blank space and tabulate */
for (cp = line; *cp == L' '; cp++);
b = cp - line;
t = b >> 3;
b &= 07;
if (t > 0)
do
putc('\t', stdout);
while (--t);
if (b > 0)
do
putc(' ', stdout);
while (--b);
while (*cp)
putwc(*cp++, stdout);
putc('\n', stdout);
}
/*
* Initialize the output line with the appropriate number of
* leading blanks.
*/
leadin()
{
register int b;
register wchar_t *cp;
register int l;
switch (crown_state) {
case c_head:
l = crown_head;
crown_state = c_lead;
break;
case c_lead:
case c_fixup:
l = crown_head;
crown_state = c_fixup;
break;
case c_body:
l = crown_body;
break;
default:
l = pfx;
break;
}
filler = l;
for (b = 0, cp = outbuf; b < l; b++)
*cp++ = L' ';
outp = cp;
}
/*
* Is s1 a prefix of s2??
*/
ispref(wchar_t *s1, wchar_t *s2)
{
while (*s1 != L'\0' && *s2 != L'\0')
if (*s1++ != *s2++)
return (0);
return (1);
}
/*
* Set an input option
*/
setopt(cp)
register char *cp;
{
static int ws = 0;
if (*cp == '-') {
if (cp[1] == 'c' && cp[2] == '\0') {
crown_state = c_reset;
return (1);
}
if (cp[1] == 's' && cp[2] == '\0') {
nojoin = 1;
return (1);
}
if (cp[1] == 'w' && cp[2] == '\0') {
ws++;
return (1);
}
width = atoi(cp+1);
} else if (ws) {
width = atoi(cp);
ws = 0;
} else
return (0);
if (width <= 0 || width >= BUFSIZ-2) {
fprintf(stderr, "fmt: bad width: %d\n", width);
exit(1);
}
return (1);
}
#define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so"
#define WCHKIND "_wdchkind_"
static int _wckind_c_locale();
static int (*__wckind)() = _wckind_c_locale;
static void *dlhandle = NULL;
void
_wckind_init()
{
char *locale;
char path[MAXPATHLEN + 1];
if (dlhandle != NULL) {
(void) dlclose(dlhandle);
dlhandle = NULL;
}
locale = setlocale(LC_CTYPE, NULL);
if (strcmp(locale, "C") == 0)
goto c_locale;
(void) sprintf(path, LIB_WDRESOLVE, locale);
if ((dlhandle = dlopen(path, RTLD_LAZY)) != NULL) {
__wckind = (int (*)(int))dlsym(dlhandle, WCHKIND);
if (__wckind != NULL)
return;
(void) dlclose(dlhandle);
dlhandle = NULL;
}
c_locale:
__wckind = _wckind_c_locale;
}
int
_wckind(wc)
wchar_t wc;
{
return (*__wckind) (wc);
}
static int
_wckind_c_locale(wc)
wchar_t wc;
{
int ret;
/*
* DEPEND_ON_ANSIC: L notion for the character is new in
* ANSI-C, k&r compiler won't work.
*/
if (iswascii(wc))
ret = (iswalnum(wc) || wc == L'_') ? 0 : 1;
else
ret = wcsetno(wc) + 1;
return (ret);
}
/*
* header_chk -
* Called when done looking for a set mail header lines.
* Either a blank line was seen, or EOF was reached.
*
* Verifies if current hdrbuf of potential mail header lines
* is really a mail header. A mail header must be at least 2
* lines and more than half of them must start with one of the
* known mail header strings in headnames.
*
* header_chk sets hdr_state to do_hdr if hdrbuf contained a valid
* mail header. Otherwise, it sets hdr_state to flush_hdr.
*
* h_lines = hdrbuf index for next line to be saved;
* also indicates current # of lines in potential header
*/
static void
header_chk(void)
{
wchar_t *cp; /* ptr to current char of line */
wchar_t **hp; /* ptr to current char of a valid */
/* mail header string */
int l; /* index */
/*
* number of lines in hdrbuf that look
* like mail header lines (start with
* a known mail header prefix)
*/
int hdrcount = 0;
/* header must have at least 2 lines (h_lines > 1) */
if (h_lines < 2) {
hdr_state = flush_hdr;
return;
}
/*
* go through each line in hdrbuf and see how many
* look like mail header lines
*/
for (l = 0; l < h_lines; l++) {
/* skip initial blanks */
for (cp = hdrbuf[l]; *cp == L' '; cp++);
for (hp = &headnames[0]; *hp != (wchar_t *) 0; hp++)
if (ispref(*hp, cp)) {
hdrcount++;
break;
}
}
/*
* if over half match, we'll assume this is a header;
* set hdr_state to indicate whether to treat
* these lines as mail header (do_hdr) or not (flush_hdr)
*/
if (hdrcount > h_lines / 2)
hdr_state = do_hdr;
else
hdr_state = flush_hdr;
}
/*
* fill_hdrbuf -
* Save given input line into next element of hdrbuf,
* as a potential mail header line, to be processed later
* once we decide whether or not the contents of hdrbuf is
* really a mail header, via header_chk().
*
* Does not allow hdrbuf to exceed MAXLINES lines.
* Dynamically allocates space for each line. If we are unable
* to allocate space for the current string, stop special mail
* header preservation at this point and continue formatting
* without it.
*/
static void
fill_hdrbuf(wchar_t line[])
{
wchar_t *cp; /* pointer to characters in input line */
int i; /* index into characters a hdrbuf line */
if (h_lines >= MAXLINES) {
/*
* if we run over MAXLINES potential mail header
* lines, stop checking--this is most likely NOT a
* mail header; flush out the hdrbuf, then process
* the current 'line' normally.
*/
hdr_state = flush_hdr;
process_hdrbuf();
prefix(line);
return;
}
hdrbuf[h_lines] = (wchar_t *)malloc(sizeof (wchar_t) *
(wslen(line) + 1));
if (hdrbuf[h_lines] == NULL) {
perror("malloc");
fprintf(stderr, "fmt: unable to do mail header preservation\n");
errs++;
/*
* Can't process mail header; flush current contents
* of mail header and continue with no more mail
* header processing
*/
if (h_lines == 0)
/* hdrbuf is empty; process this line normally */
prefix(line);
else {
hdr_state = flush_hdr;
for (i = 0; i < h_lines; i++) {
prefix(hdrbuf[i]);
free(hdrbuf[i]);
}
h_lines = 0;
}
hdr_state = off;
return;
}
/* save this line as a potential mail header line */
for (i = 0, cp = line; (hdrbuf[h_lines][i] = *cp) != L'\0'; i++, cp++);
h_lines++;
}
/*
* process_hdrbuf -
* Outputs the lines currently stored in hdrbuf, according
* to the current hdr_state value, assumed to be either do_hdr
* or flush_hdr.
* This should be called after doing a header_chk() to verify
* the hdrbuf and set the hdr_state flag.
*/
static void
process_hdrbuf(void)
{
int i;
for (i = 0; i < h_lines; i++) {
prefix(hdrbuf[i]);
free(hdrbuf[i]);
}
hdr_state = not_in_hdr;
h_lines = 0;
}