getNAME.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* Copyright (c) 1998 by Sun Microsystems, Inc.
* All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright (c) 1980 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
*/
#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */
/*
* Get name sections from manual pages.
* -t for building toc
* -i for building intro entries
* other apropos database
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#include <locale.h>
#include <wchar.h>
#include <errno.h>
static int tocrc;
static int intro;
static char *progname;
static void trimln(char *);
static void doname(char *);
static void section(char *, char *);
static void split(char *, char *);
static void dorefname(char *);
static void troffpage(char *);
static void sgmlpage(char *);
/*
* Test to see if this is an SGML manpage or a regular manpage
* Unless the first line begins with <!DOCTYPE, we assume it isn't.
*/
static int
{
static const char magic[] = "<!DOCTYPE";
return (0);
}
int
{
int c;
switch (c) {
case 't':
tocrc++;
break;
case 'i':
intro++;
break;
case '?':
default:
"usage: %s [-i][-t] files..\n", progname);
exit(1);
}
exit(1);
}
continue;
}
/*
* Most of the info we care about is in the first kbyte
*/
else
}
return (0);
}
/*
* Parse a troff-format manpage
*/
static void
{
char *strptr;
int i = 0;
for (;;) {
return;
if (headbuf[0] != '.')
continue;
break;
break;
}
for (;;) {
return;
if (linbuf[0] != '.')
continue;
break;
break;
}
if (tocrc)
if (!intro)
for (;;) {
break;
if (linbuf[0] == '.') {
break;
break;
continue;
}
if (intro) {
continue;
}
if (i != 0)
(void) printf(" ");
i++;
}
(void) printf("\n");
}
/*
* Substitute section defined in page with new section spec
* directory and yy is the filename extension (unless xx
* and yy are equal, in which case xx is the section).
* Pages should be placed in their proper directory with the
* proper name to simplify things.
*
* For example take the following names:
*
*/
static void
{
char *p = buf;
int i;
/*
* split dirname and filename
*/
} else {
*fname = 0;
fname++;
}
for (i = 0; i < 2; i++) {
while (*p && *p != ' ' && *p != '\t')
p++;
if (!*p)
break;
while (*p && (*p == ' ' || *p == '\t'))
p++;
if (!*p)
break;
}
*p++ = 0;
else
while (*p && *p != ' ' && *p != '\t')
p++;
(void) printf("%s\t", p);
}
static void
{
while (*cp)
cp++;
if (*--cp == '\n')
*cp = 0;
}
static void
{
if (*cp == '.') {
}
}
while (*cp) {
}
}
}
cp++;
}
}
static void
{
if (*dp)
if (*ep == '.') {
goto again;
}
(void) putchar('(');
if (*dp)
dp++;
while (*dp)
(void) putchar(')');
(void) putchar(' ');
}
static void
{
if (cp == 0)
return;
;
*++cp = '\0';
sp++;
if (cp) {
char *tp;
;
*++tp = '\0';
;
}
}
}
static void
{
if (*dp)
if (*ep == '.') {
goto again;
}
(void) putchar('.');
if (*dp)
dp++;
while (*dp)
}
/*
* The rest of the routines in the file form a simplistic parser
* for SGML manpages. We assume the input is syntactically correct
* SGML, and that the fields occur in the input file in order.
*/
/*
* Some utilities for constructing arbitrary length wide character strings
*/
typedef struct {
long index;
} string_t;
#define DEF_STR_SIZE 16
#define DEF_STR_GROWTH 16
static void
outofspace(char *where)
{
exit(1);
}
static string_t *
{
if (s == NULL)
outofspace("new s");
if (initial < DEF_STR_SIZE)
outofspace("new str");
s->index = 0;
*s->str = L'\0';
return (s);
}
static void
{
free(*s);
*s = NULL;
}
static wchar_t *
getwstring(string_t *s)
{
if (s)
return (s->str);
}
static char *
getcstring(string_t *s)
{
char *p = cstr;
if (p == NULL)
outofspace("getc");
while (*wp)
*p = '\0';
return (cstr);
}
static void
{
outofspace("appendw");
}
static void
{
s->size += DEF_STR_GROWTH;
outofspace("put");
}
}
/*
* Find the closing > of an SGML comment block
* (allowing for multibyte, embedded, comments)
*/
static void
eatcomments(void)
{
int pending = 1;
while (pending)
switch (getwchar()) {
default:
break;
case L'<':
pending++;
break;
case L'>':
pending--;
break;
case WEOF:
return;
}
}
/*
* Find the next token on stdin.
* Handles nested comment strings, and removes any trailing newlines
* from the stream after the closing '>'.
*/
static int
{
int c;
char *tokp;
top:
if (wc == L'<')
break;
return (0);
switch (c = getchar()) {
case EOF:
return (0);
default:
break;
case '!':
eatcomments();
goto top;
}
if (c == '>') {
if (c != '\n') {
break;
}
*tokp = '\0';
return (1);
}
*tokp++ = (char)c;
}
return (0);
}
/*
* This structure is filled out during the parsing of each page we encounter
*/
typedef struct {
char *name;
} manpage_t;
static void
{
}
/*
* Fetch a string from stdin, terminated by the endtoken.
* These strings may be localized, so do this with wide characters.
* Hack: skip over (completely ignore) all other tokens
* Hack: map all &blort; constructs to spaces.
*/
static string_t *
{
switch (wc) {
case L'\n':
if (wc != L'<')
putwstring(s, L' ');
break;
case L'<':
goto done;
break;
case L'&':
if (wc == L';')
break;
wc = L' ';
/* FALLTHROUGH */
default:
putwstring(s, wc);
break;
}
done:
putwstring(s, L'\0');
return (s);
}
/*
* <refentrytitle> TITLE </refentrytitle>
*/
static int
{
warning(m, "repeated refentrytitle\n");
return (1);
}
/*
* <manvolnum> MANVOLNUM </manvolnum>
*/
static int
{
warning(m, "repeated manvolnum\n");
return (1);
}
/*
* <refmiscinfo class="date"> DATE </refmiscinfo>
*/
static int
{
warning(m, "repeated date\n");
return (1);
}
/*
* .. </refmeta>
*/
static int
{
if (tocrc)
if (!intro)
if (m->title)
if (m->volnum)
if (m->date)
return (1);
}
static int
{
if (m->names) {
delstring(&r);
} else
m->names = r;
return (1);
}
/*
* <refdescriptor> REFDESCRIPTOR </refdescriptor>
*/
static int
{
return (appendname(m, "/refdescriptor"));
}
/*
* <refname> REFNAME </refname>
*/
static int
{
return (appendname(m, "/refname"));
}
/*
* <refpurpose> PURPOSE </refpurpose>
*/
static int
refpurpose(manpage_t *m)
{
warning(m, "repeated refpurpose\n");
return (1);
}
/*
* .. </refnamediv> - this is our chance to bail out.
*/
static int
{
if (m->names) {
if (intro) {
} else
}
if (m->names)
if (m->purpose)
(void) printf("\n");
return (0);
}
/*
* Basic control structure of the SGML "parser".
* It's very simplistic - when named tags are encountered in the
* input stream, control is transferred to the corresponding routine.
* No checking is done for correct pairing of tags. A few other hacks
* are sneaked into the lexical routines above.
* Output is generated after seeing the /refmeta and /refnamediv
* closing tags.
*/
static const struct {
char *name;
} acts[] = {
{ "refentrytitle", refentrytitle },
{ "manvolnum", manvolnum },
{ "refmiscinfo class=\"date\"", refmiscinfo_date },
{ "/refmeta", print_refmeta },
{ "refdescriptor", refdescriptor },
{ "refname", refname },
{ "refpurpose", refpurpose },
{ "/refnamediv", terminate },
{ 0 }
};
static void
{
int rc = 1, a;
(void) memset(m, 0, sizeof (*m));
do {
break;
continue;
break;
}
} while (rc);
}