371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov/* $Id: mdoc_argv.c,v 1.107 2015/10/17 00:21:07 schwarze Exp $ */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * copyright notice and this permission notice appear in all copies.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore#define MULTI_STEP 5 /* pre-allocate argument values */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov#define DELIMSZ 6 /* max possible size of a delimiter */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore ARGV_NONE, /* no args to flag (e.g., -split) */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore ARGV_MULTI /* multiple args (e.g., -column xxx yyy) */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amorestatic void argn_free(struct mdoc_arg *, int);
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankovstatic enum margserr args(struct roff_man *, int, int *,
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore char *, enum argsflag, char **);
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amorestatic int args_checkpunct(const char *, int);
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore struct mdoc_argv *, int *, char *);
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore struct mdoc_argv *, int *, char *);
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amorestatic const enum argvflag argvflags[MDOC_ARG_MAX] = {
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amorestatic const struct mdocarg mdocargs[MDOC_MAX] = {
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * Parse flags and their arguments from the input line.
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * These come in the form -flag [argument ...].
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * Some flags take no argument, some one, some multiple.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankovmdoc_argv(struct roff_man *mdoc, int line, int tok,
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* Which flags does this macro support? */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* Loop over the flags on the input line. */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* Seek to the first unescaped space. */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov for (argname = buf + ++ipos; buf[ipos] != '\0'; ipos++)
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * We want to nil-terminate the word to look it up.
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * But we may not have a flag, in which case we need
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * to restore the line as-is. So keep around the
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * stray byte, which we'll reset upon exiting.
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * Now look up the word as a flag. Use temporary
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov * storage that we'll copy into the node's flags.
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov while ((tmpv.arg = *argtable++) != MDOC_ARG_MAX)
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov if ( ! strcmp(argname, mdoc_argnames[tmpv.arg]))
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* If it isn't a flag, restore the saved byte. */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* Read to the next word (the first argument). */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* Parse the arguments of the flag. */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* Append to the return values. */
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov *retv = mandoc_reallocarray(*retv, retc, sizeof(**retv));
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov memcpy(*retv + retc - 1, &tmpv, sizeof(**retv));
260e9a87725c090ba5835b1f9f0b62fa2f96036fYuri Pankov /* Prepare for parsing the next flag. */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore for (--p->argc; iarg < (int)p->argc; iarg++)
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankovmdoc_args(struct roff_man *mdoc, int line, int *pos,
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov fl = tok == TOKEN_NONE ? ARGSFL_NONE : mdocargs[tok].flags;
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * We know that we're in an `It', so it's reasonable to expect
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * us to be sitting in a `Bl'. Someday this may not be the case
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * (if we allow random `It's sitting out there), so provide a
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * safe fall-back into the default behaviour.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov if (fl == ARGSFL_DELIM && args_checkpunct(buf, *pos))
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * Tabs in `It' lines in `Bl -column' can't be escaped.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * Phrases are reparsed for `Ta' and other macros later.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * Words right before and right after
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * tab characters are not parsed,
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * unless there is a blank in between.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * One or more blanks after a tab cause
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * one leading blank in the next column.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * So skip all but one of them.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov while (buf[*pos] == ' ' && buf[*pos + 1] == ' ')
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * A tab at the end of an input line
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * switches to the next column.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov *pos += (int)(p - *v);
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov /* Skip any trailing blank characters. */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * Process a quoted literal. A quote begins with a double-quote
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * and ends with a double-quote NOT preceded by a double-quote.
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore * NUL-terminate the literal in place.
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore * Collapse pairs of quotes inside quoted literals.
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * Whitespace is NOT involved in literal termination.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov if (mdoc->flags & MDOC_PHRASELIT || buf[*pos] == '\"') {
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore /* Move following text left after quoted quotes. */
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore /* Unquoted quotes end quoted args. */
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore /* Quoted quotes collapse. */
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore *v = mandoc_getarg(mdoc->parse, &p, line, pos);
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * After parsing the last word in this phrase,
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov * tell lookup() whether or not to interpret it.
371584c2eae4cf827fd406ba26c14f021adaaa70Yuri Pankov if (*p == '\0' && mdoc->flags & MDOC_PHRASEQL) {
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * Check if the string consists only of space-separated closing
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * delimiters. This is a bit of a dance: the first must be a close
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * delimiter, but it may be followed by middle delimiters. Arbitrary
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore * whitespace may separate these tokens.
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore /* First token must be a close-delimiter. */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++)
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore /* Remaining must NOT be open/none. */
95c635efb7c3b86efc493e0447eaec7aecca3f0fGarrett D'Amore while (buf[i] && ' ' != buf[i] && j < DELIMSZ)
698f87a48e2e945bfe5493ce168e0d0ae1cedd5cGarrett D'Amore ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p);