mdoc_validate.c revision 95c635efb7c3b86efc493e0447eaec7aecca3f0f
/* $Id: mdoc_validate.c,v 1.182 2012/03/23 05:50:25 kristaps Exp $ */
/*
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef OSNAME
#endif
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "mdoc.h"
#include "mandoc.h"
#include "libmdoc.h"
#include "libmandoc.h"
/* FIXME: .Bl -diag can't have non-text children in HEAD. */
#define NUMSIZ 32
#define DATESIZE 32
enum check_ineq {
};
enum check_lvl {
};
struct valids {
};
enum check_lvl, enum check_ineq, int);
static void check_text(struct mdoc *, int, int, char *);
static void check_argv(struct mdoc *,
static int post_bl_block(POST_ARGS);
static int post_bl_block_width(POST_ARGS);
static int post_bl_block_tag(POST_ARGS);
static int post_bl_head(POST_ARGS);
static int post_defaults(POST_ARGS);
static int post_literal(POST_ARGS);
static int post_ignpar(POST_ARGS);
static int post_sh_body(POST_ARGS);
static int post_sh_head(POST_ARGS);
static int pre_display(PRE_ARGS);
static int pre_literal(PRE_ARGS);
};
};
NULL,
"NAME",
"LIBRARY",
"SYNOPSIS",
"DESCRIPTION",
"IMPLEMENTATION NOTES",
"RETURN VALUES",
"ENVIRONMENT",
"FILES",
"EXIT STATUS",
"EXAMPLES",
"DIAGNOSTICS",
"COMPATIBILITY",
"ERRORS",
"SEE ALSO",
"STANDARDS",
"HISTORY",
"AUTHORS",
"CAVEATS",
"BUGS",
"SECURITY CONSIDERATIONS",
};
int
{
v_pre *p;
char *tp;
switch (n->type) {
case (MDOC_TEXT):
/* FALLTHROUGH */
case (MDOC_TBL):
/* FALLTHROUGH */
case (MDOC_EQN):
/* FALLTHROUGH */
case (MDOC_ROOT):
return(1);
default:
break;
}
check_args(mdoc, n);
return(1);
if ( ! (*p)(mdoc, n))
return(0);
return(1);
}
int
{
v_post *p;
return(1);
case (MDOC_TEXT):
/* FALLTHROUGH */
case (MDOC_EQN):
/* FALLTHROUGH */
case (MDOC_TBL):
return(1);
case (MDOC_ROOT):
default:
break;
}
return(1);
if ( ! (*p)(mdoc))
return(0);
return(1);
}
static int
{
const char *p;
enum mandocerr t;
return(1);
switch (ineq) {
case (CHECK_LT):
p = "less than ";
return(1);
break;
case (CHECK_GT):
p = "more than ";
return(1);
break;
case (CHECK_EQ):
p = "";
return(1);
break;
default:
abort();
/* NOTREACHED */
}
"want %s%d children (have %d)",
return(1);
}
static int
{
}
static int
{
}
static int
{
}
static int
{
}
static int
{
}
static int
{
}
static int
{
}
static int
{
}
static int
{
}
static int
{
}
static void
{
int i;
return;
}
static void
{
int i;
for (i = 0; i < (int)v->sz; i++)
/* FIXME: move to post_std(). */
mdoc_nmsg(m, n, MANDOCERR_NONAME);
}
static void
{
char *cp;
if (MDOC_LITERAL & m->flags)
return;
}
static int
{
return(1);
return(0);
}
static int
{
if (MDOC_BLOCK != n->type)
return(1);
break;
if (node)
return(1);
}
static int
{
if (MDOC_BLOCK != n->type) {
if (ENDBODY_NOT != n->end) {
} else
return(1);
}
/*
* First figure out which kind of list to use: bind ourselves to
* the first mentioned list type and warn about any remaining
* ones. If we find no list type, we default to LIST_item.
*/
/* LINTED */
lt = LIST__NONE;
/* Set list types. */
case (MDOC_Bullet):
lt = LIST_bullet;
break;
case (MDOC_Dash):
break;
case (MDOC_Enum):
break;
case (MDOC_Hyphen):
lt = LIST_hyphen;
break;
case (MDOC_Item):
break;
case (MDOC_Tag):
break;
case (MDOC_Diag):
break;
case (MDOC_Hang):
break;
case (MDOC_Ohang):
lt = LIST_ohang;
break;
case (MDOC_Inset):
lt = LIST_inset;
break;
case (MDOC_Column):
lt = LIST_column;
break;
/* Set list arguments. */
case (MDOC_Compact):
comp = 1;
break;
case (MDOC_Width):
/* NB: this can be empty! */
break;
}
break;
case (MDOC_Offset):
/* NB: this can be empty! */
break;
}
break;
default:
continue;
}
/* Check: duplicate auxiliary arguments. */
if (dup)
/* Check: multiple list types. */
/* Assign list type. */
/* Set column information, too. */
if (LIST_column == lt) {
}
}
/* The list type should come first. */
continue;
}
/* Allow lists to default to LIST_item. */
}
/*
* Validate the width field. Some list types don't need width
* types and should be warned about them. Others should have it
* and must also be warned.
*/
case (LIST_tag):
break;
break;
case (LIST_column):
/* FALLTHROUGH */
case (LIST_diag):
/* FALLTHROUGH */
case (LIST_ohang):
/* FALLTHROUGH */
case (LIST_inset):
/* FALLTHROUGH */
case (LIST_item):
break;
default:
break;
}
return(1);
}
static int
{
const char *offs;
if (MDOC_BLOCK != n->type) {
if (ENDBODY_NOT != n->end) {
} else
return(1);
}
/* LINTED */
dt = DISP__NONE;
case (MDOC_Centred):
dt = DISP_centred;
break;
case (MDOC_Ragged):
dt = DISP_ragged;
break;
case (MDOC_Unfilled):
dt = DISP_unfilled;
break;
case (MDOC_Filled):
dt = DISP_filled;
break;
case (MDOC_Literal):
dt = DISP_literal;
break;
case (MDOC_File):
return(0);
case (MDOC_Offset):
/* NB: this can be empty! */
break;
}
break;
case (MDOC_Compact):
comp = 1;
break;
default:
abort();
/* NOTREACHED */
}
/* Check whether we have duplicates. */
if (dup)
/* Make our auxiliary assignments. */
/* Check whether a type has already been assigned. */
/* Make our type assignment. */
}
}
return(1);
}
static int
{
if (MDOC_BLOCK != n->type)
return(1);
}
static int
{
if (MDOC_BLOCK != n->type)
return(1);
}
static int
{
if (MDOC_BLOCK != n->type)
return(1);
}
static int
{
int i;
return(1);
else
abort();
return(1);
}
static int
{
return(1);
return(1);
}
static int
{
return(1);
}
static int
{
return(1);
}
static int
{
return(1);
}
static int
{
/*
* Unlike other data pointers, these are "housed" by the HEAD
* element, which contains the goods.
*/
} else
return(1);
}
/*
* Cannot have both argument and parameter.
* If neither is specified, let it through with a warning.
*/
return(0);
return(1);
}
/* Extract argument into data. */
if (MDOC_Emphasis == arg)
else if (MDOC_Literal == arg)
else if (MDOC_Symbolic == arg)
else
abort();
return(1);
}
/* Extract parameter into data. */
else
return(1);
}
static int
{
const char *p;
char *buf;
/* If lookup ok, replace with table value. */
if (p) {
return(1);
}
/* If not, use "library ``xxxx''. */
return(1);
}
static int
{
return(1);
}
static int
{
const struct mdoc_node *n;
/*
* The Vt macro comes in both ELEM and BLOCK form, both of which
* have different syntaxes (yet more context-sensitive
* behaviour). ELEM types must have a child, which is already
* guaranteed by the in_line parsing routine; BLOCK types,
* specifically the BODY, should only have TEXT children.
*/
return(1);
return(1);
}
static int
{
int c;
/* If no child specified, make sure we have the meta name. */
return(1);
return(1);
/* If no meta name, set it from the child. */
buf[0] = '\0';
return(0);
}
assert(c);
return(1);
}
static int
{
/*
* The `Dl' (note "el" not "one") and `Bd' macros unset the
* MDOC_LITERAL flag as they leave. Note that `Bd' only sets
* this in literal mode, but it doesn't hurt to just switch it
* off in general since displays can't be nested.
*/
return(1);
}
static int
{
/*
* The `Ar' defaults to "file ..." if no value is provided as an
* argument; the `Mt' and `Pa' macros use "~"; the `Li' just
* gets an empty string.
*/
return(1);
case (MDOC_Ar):
return(0);
return(0);
break;
case (MDOC_At):
return(0);
return(0);
break;
case (MDOC_Li):
return(0);
break;
case (MDOC_Pa):
/* FALLTHROUGH */
case (MDOC_Mt):
return(0);
break;
default:
abort();
/* NOTREACHED */
}
return(1);
}
static int
{
const char *p, *q;
char *buf;
/*
* If we have a child, look it up in the standard keys. If a
* key exist, use that instead of the child; if it doesn't,
* prefix "AT&T UNIX " to the existing data.
*/
return(1);
if (p) {
} else {
p = "AT&T UNIX ";
}
return(1);
}
static int
{
return(1);
}
static int
{
int i, cols;
struct mdoc_node *n, *c;
return(1);
if (LIST__NONE == lt) {
return(1);
}
switch (lt) {
case (LIST_tag):
break;
/* FIXME: give this a dummy value. */
break;
case (LIST_hang):
/* FALLTHROUGH */
case (LIST_ohang):
/* FALLTHROUGH */
case (LIST_inset):
/* FALLTHROUGH */
case (LIST_diag):
break;
case (LIST_bullet):
/* FALLTHROUGH */
case (LIST_dash):
/* FALLTHROUGH */
case (LIST_enum):
/* FALLTHROUGH */
case (LIST_hyphen):
/* FALLTHROUGH */
case (LIST_item):
break;
case (LIST_column):
i++;
if (i < cols)
break;
else
"columns == %d (have %d)", cols, i);
return(MANDOCERR_ARGCOUNT == er);
default:
break;
}
return(1);
}
static int
{
struct mdoc_node *n;
/*
* These are fairly complicated, so we've broken them into two
* functions. post_bl_block_tag() is called when a -tag is
* specified, but no -width (it must be guessed). The second
* when a -width is specified (macro indicators must be
* rewritten into real lengths).
*/
if ( ! post_bl_block_tag(mdoc))
return(0);
if ( ! post_bl_block_width(mdoc))
return(0);
} else
return(1);
return(1);
}
static int
{
int i;
struct mdoc_node *n;
/*
* Calculate the real width of a list from the -width string,
* which may contain a macro (with a known default width), a
* literal string, or a scaling width.
*
* If the value to -width is a macro, then we re-write it to be
* the macro's width as set in share/tmac/mdoc/doc-common.
*/
width = 6;
return(1);
return(1);
}
/* The value already exists: free and reallocate it. */
break;
/* Set our width! */
return(1);
}
static int
{
int i;
/*
* Calculate the -width for a `Bl -tag' list if it hasn't been
* provided. Uses the first head macro. NOTE AGAIN: this is
* ONLY if the -width argument has NOT been provided. See
* post_bl_block_width() for converting the -width string.
*/
sz = 10;
continue;
break;
break;
}
break;
}
/* Defaults to ten ens. */
/*
* We have to dynamically add this to the macro's argument list.
* We're guaranteed that a MDOC_Width doesn't already exist.
*/
/* Set our width! */
return(1);
}
static int
{
int i, j;
/* FIXME: this should be ERROR class... */
/*
* Convert old-style lists, where the column width specifiers
* trail as macro parameters, to the new-style ("normal-form")
* lists where they're argument values following -column.
*/
/* First, disallow both types and allow normal-form. */
/*
* TODO: technically, we can accept both and just merge the two
* lists, but I'll leave that for another day.
*/
return(0);
return(1);
break;
/*
* Accommodate for new-style groff column syntax. Shuffle the
* child nodes, all of which must be TEXT, as arguments for the
* column field. Then, delete the head children.
*/
}
return(1);
}
static int
{
struct mdoc_node *n;
return(post_bl_head(mdoc));
return(post_bl_block(mdoc));
return(1);
switch (n->tok) {
case (MDOC_Lp):
/* FALLTHROUGH */
case (MDOC_Pp):
/* FALLTHROUGH */
case (MDOC_It):
/* FALLTHROUGH */
case (MDOC_Sm):
continue;
default:
break;
}
return(0);
}
return(1);
}
static int
{
return(1);
}
return(1);
return(1);
return(1);
}
static int
{
int erc;
struct mdoc_node *n;
erc = 0;
/* Check that we have a finished prologue. */
erc++;
}
assert(n);
/* Check that we begin with a proper `Sh'. */
erc++;
erc++;
/* Can this be lifted? See rxdebug.1 for example. */
}
return(erc ? 0 : 1);
}
static int
{
const char *p;
return(1);
}
} else {
}
return(1);
}
static int
{
int i, j;
case (MDOC_HEAD):
return(1);
case (MDOC_BODY):
break;
return(1);
default:
return(1);
}
/*
* Make sure only certain types of nodes are allowed within the
* the `Rs' body. Delete offending nodes and raise a warning.
* Do this before re-ordering for the sake of clarity.
*/
for (i = 0; i < RSORD_MAX; i++)
break;
if (i < RSORD_MAX) {
continue;
}
}
/*
* Nothing to sort if only invalid nodes were found
* inside the `Rs' body.
*/
return(1);
/*
* The full `Rs' block needs special handling to order the
* sub-elements according to `rsord'. Pick through each element
* and correctly order it. This is a insertion sort.
*/
/* Determine order of `nn'. */
for (i = 0; i < RSORD_MAX; i++)
break;
/*
* Remove `nn' from the chain. This somewhat
* repeats mdoc_node_unlink(), but since we're
* just re-ordering, there's no need for the
* full unlink process.
*/
/*
* Scan back until we reach a node that's
* ordered before `nn'.
*/
/* Determine order of `prev'. */
for (j = 0; j < RSORD_MAX; j++)
break;
if (j <= i)
break;
}
/*
* Set `nn' back into its correct place in front
* of the `prev' node.
*/
if (prev) {
} else {
}
}
return(1);
}
static int
{
return(1);
}
static int
{
return(post_sh_head(mdoc));
return(post_sh_body(mdoc));
return(1);
}
static int
{
struct mdoc_node *n;
return(1);
/*
* Warn if the NAME section doesn't contain the `Nm' and `Nd'
* macros (can have multiple `Nm' and one `Nd'). Note that the
* children of the BODY declaration can also be "text".
*/
return(1);
}
continue;
continue;
}
assert(n);
return(1);
return(1);
}
static int
{
struct mdoc_node *n;
int c;
/*
* Process a new section. Sections are either "named" or
* "custom". Custom sections are user-defined, while named ones
* follow a conventional order and may only appear in certain
* manual sections.
*/
sec = SEC_CUSTOM;
buf[0] = '\0';
return(0);
} else if (1 == c)
/* The NAME should be first. */
/* The SYNOPSIS gets special attention in other areas. */
if (SEC_SYNOPSIS == sec)
else
/* Mark our last section. */
/*
* Set the section attribute for the current HEAD, for its
* parent BLOCK, and for the HEAD children; the latter can
* only be TEXT nodes, so no recursion is needed.
* For other blocks and elements, including .Sh BODY, this is
* done when allocating the node data structures, but for .Sh
* BLOCK and HEAD, the section is still unknown at that time.
*/
/* We don't care about custom sections after this. */
if (SEC_CUSTOM == sec)
return(1);
/*
* Check whether our non-custom section is being repeated or is
* out of order.
*/
/* Mark the last named section. */
switch (sec) {
case (SEC_RETURN_VALUES):
/* FALLTHROUGH */
case (SEC_ERRORS):
/* FALLTHROUGH */
case (SEC_LIBRARY):
break;
break;
break;
break;
default:
break;
}
return(1);
}
static int
{
return(1);
}
}
return(1);
}
static int
{
return(1);
return(1);
/*
* Don't allow prior `Lp' or `Pp' prior to a paragraph-type
* block: `Lp', `Pp', or non-compact `Bd' or `Bl'.
*/
return(1);
return(1);
return(1);
return(1);
return(1);
}
static int
{
return(1);
/*
* The `Dl' (note "el" not "one") and `Bd -literal' and `Bd
* -unfilled' macros set MDOC_LITERAL on entrance to the body.
*/
switch (n->tok) {
case (MDOC_Dl):
break;
case (MDOC_Bd):
break;
default:
abort();
/* NOTREACHED */
}
return(1);
}
static int
{
struct mdoc_node *n;
int c;
return(1);
}
buf[0] = '\0';
return(0);
}
assert(c);
return(1);
}
static int
{
const char *cp;
char *p;
/* First make all characters uppercase. */
if (toupper((unsigned char)*p) == *p)
continue;
/*
* FIXME: don't be lazy: have this make all
* characters be uppercase and just warn once.
*/
break;
}
/* Handles: `.Dt'
* --> title = unknown, volume = local, msec = 0, arch = NULL
*/
/* XXX: make these macro values. */
/* FIXME: warn about missing values. */
return(1);
}
/* Handles: `.Dt TITLE'
* --> title = TITLE, volume = local, msec = 0, arch = NULL
*/
/* FIXME: warn about missing msec. */
/* XXX: make this a macro value. */
return(1);
}
/* Handles: `.Dt TITLE SEC'
* --> title = TITLE, volume = SEC is msec ?
* format(msec) : SEC,
* msec = SEC is msec ? atoi(msec) : 0,
* arch = NULL
*/
if (cp) {
} else {
}
return(1);
/* Handles: `.Dt TITLE SEC VOL'
* --> title = TITLE, volume = VOL is vol ?
* format(VOL) :
* VOL is arch ? format(arch) :
* VOL
*/
if (cp) {
} else {
/* FIXME: warn about bad arch. */
} else
}
/* Ignore any subsequent parameters... */
/* FIXME: warn about subsequent parameters. */
return(1);
}
static int
{
/*
* Remove prologue macros from the document after they're
* processed. The final document uses mdoc_meta for these
* values and discards the originals.
*/
return(1);
}
static int
{
struct mdoc_node *n;
/*
* Make `Bx's second argument always start with an uppercase
* letter. Groff checks if it's an "accepted" term, but we just
* uppercase blindly.
*/
((unsigned char)*n->string);
return(1);
}
static int
{
struct mdoc_node *n;
int c;
#ifndef OSNAME
#endif
/*
* Set the operating system by way of the `Os' macro. Note that
* if an argument isn't provided and -DOSNAME="\"foo\"" is
* provided during compilation, this value will be used instead
* of filling in "sysname release" from uname().
*/
buf[0] = '\0';
return(0);
}
assert(c);
/* XXX: yes, these can all be dynamically-adjusted buffers, but
* it's really not worth the extra hackery.
*/
if ('\0' == buf[0]) {
#ifdef OSNAME
return(0);
}
#else /*!OSNAME */
}
return(0);
}
return(0);
}
return(0);
}
#endif /*!OSNAME*/
}
return(1);
}
static int
{
/*
* Macros accepting `-std' as an argument have the name of the
* current document (`Nm') filled in as the argument if it's not
* provided.
*/
if (n->child)
return(1);
return(1);
nn = n;
return(0);
return(1);
}
/*
* Concatenate a node, stopping at the first non-text.
* Concatenation is separated by a single whitespace.
* Returns -1 on fatal (string overrun) error, 0 if child nodes were
* encountered, 1 otherwise.
*/
static int
{
return(0);
return(-1);
return(-1);
}
return(1);
}
static enum mdoc_sec
a2sec(const char *p)
{
int i;
for (i = 0; i < (int)SEC__MAX; i++)
return((enum mdoc_sec)i);
return(SEC_CUSTOM);
}
static size_t
{
switch (macro) {
case(MDOC_Ad):
return(12);
case(MDOC_Ao):
return(12);
case(MDOC_An):
return(12);
case(MDOC_Aq):
return(12);
case(MDOC_Ar):
return(12);
case(MDOC_Bo):
return(12);
case(MDOC_Bq):
return(12);
case(MDOC_Cd):
return(12);
case(MDOC_Cm):
return(10);
case(MDOC_Do):
return(10);
case(MDOC_Dq):
return(12);
case(MDOC_Dv):
return(12);
case(MDOC_Eo):
return(12);
case(MDOC_Em):
return(10);
case(MDOC_Er):
return(17);
case(MDOC_Ev):
return(15);
case(MDOC_Fa):
return(12);
case(MDOC_Fl):
return(10);
case(MDOC_Fo):
return(16);
case(MDOC_Fn):
return(16);
case(MDOC_Ic):
return(10);
case(MDOC_Li):
return(16);
case(MDOC_Ms):
return(6);
case(MDOC_Nm):
return(10);
case(MDOC_No):
return(12);
case(MDOC_Oo):
return(10);
case(MDOC_Op):
return(14);
case(MDOC_Pa):
return(32);
case(MDOC_Pf):
return(12);
case(MDOC_Po):
return(12);
case(MDOC_Pq):
return(12);
case(MDOC_Ql):
return(16);
case(MDOC_Qo):
return(12);
case(MDOC_So):
return(12);
case(MDOC_Sq):
return(12);
case(MDOC_Sy):
return(6);
case(MDOC_Sx):
return(16);
case(MDOC_Tn):
return(10);
case(MDOC_Va):
return(12);
case(MDOC_Vt):
return(12);
case(MDOC_Xr):
return(10);
default:
break;
};
return(0);
}