common/util/sscanf.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 1990, 1993
 *  The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Chris Torek.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
 * From: static char sccsid[] = "@(#)strtol.c   8.1 (Berkeley) 6/4/93";
 * From: static char sccsid[] = "@(#)strtoul.c  8.1 (Berkeley) 6/4/93";
 */


/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident   "%Z%%M% %I% %E% SMI"

#include <sys/types.h>
#include <sys/systm.h>
#include <sys/ctype.h>
#include <sys/sunddi.h>
#include <util/sscanf.h>

#define BUF     32  /* Maximum length of numeric string. */

/*
 * Flags used during conversion.
 */
#define LONG        0x01    /* l: long or double */
#define SHORT       0x04    /* h: short */
#define SUPPRESS    0x08    /* suppress assignment */
#define POINTER     0x10    /* weird %p pointer (`fake hex') */
#define NOSKIP      0x20    /* do not skip blanks */

/*
 * The following are used in numeric conversions only:
 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
 */
#define SIGNOK      0x40    /* +/- is (still) legal */
#define NDIGITS     0x80    /* no digits detected */

#define DPTOK       0x100   /* (float) decimal point is still legal */
#define EXPOK       0x200   /* (float) exponent (e+3, etc) still legal */

#define PFXOK       0x100   /* 0x prefix is (still) legal */
#define NZDIGITS    0x200   /* no zero digits detected */

/*
 * Conversion types.
 */
#define CT_CHAR     0   /* %c conversion */
#define CT_CCL      1   /* %[...] conversion */
#define CT_STRING   2   /* %s conversion */
#define CT_INT      3   /* integer, i.e., strtoq or strtouq */

static const uchar_t *set_ccl(char *, const uchar_t *);

#define isspace(ch) (((ch) == ' ') || ((ch) == '\r') || ((ch) == '\n') || \
            ((ch) == '\t') || ((ch) == '\f'))

int
vsscanf(const char *inp, char const *fmt0, va_list ap)
{
    int inr;
    const uchar_t *fmt = (const uchar_t *)fmt0;
    int c;          /* character from format, or conversion */
    size_t width;       /* field width, or 0 */
    char *p;        /* points into all kinds of strings */
    int n;          /* handy integer */
    int flags;      /* flags as defined above */
    char *p0;       /* saves original value of p when necessary */
    int nassigned;      /* number of fields assigned */
    int nconversions;   /* number of conversions */
    int nread;      /* number of characters consumed from fp */
    int base;       /* base argument to strtoq/strtouq */
    int sconv;      /* do signed conversion */
    char ccltab[256];   /* character class table for %[...] */
    char buf[BUF];      /* buffer for numeric conversions */

    /* `basefix' is used to avoid `if' tests in the integer scanner */
    static short basefix[17] =
        { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };

    inr = strlen(inp);

    sconv = 0;
    nassigned = 0;
    nconversions = 0;
    nread = 0;
    base = 0;
    for (;;) {
        c = *fmt++;
        if (c == 0)
            return (nassigned);
        if (isspace(c)) {
            while (inr > 0 && isspace(*inp))
                nread++, inr--, inp++;
            continue;
        }
        if (c != '%')
            goto literal;
        width = 0;
        flags = 0;
        /*
         * switch on the format.  continue if done;
         * break once format type is derived.
         */
again:      c = *fmt++;
        switch (c) {
        case '%':
literal:
            if (inr <= 0)
                goto input_failure;
            if (*inp != c)
                goto match_failure;
            inr--, inp++;
            nread++;
            continue;

        case '*':
            flags |= SUPPRESS;
            goto again;
        case 'l':
            flags |= LONG;
            goto again;
        case 'h':
            flags |= SHORT;
            goto again;

        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            width = width * 10 + c - '0';
            goto again;

        /*
         * Conversions.
         *
         */
        case 'd':
            c = CT_INT;
            sconv = 1;
            base = 10;
            break;

        case 'i':
            c = CT_INT;
            sconv = 1;
            base = 0;
            break;

        case 'o':
            c = CT_INT;
            base = 8;
            break;

        case 'u':
            c = CT_INT;
            base = 10;
            break;

        case 'x':
            flags |= PFXOK; /* enable 0x prefixing */
            c = CT_INT;
            base = 16;
            break;

        case 's':
            c = CT_STRING;
            break;

        case '[':
            fmt = set_ccl(ccltab, fmt);
            flags |= NOSKIP;
            c = CT_CCL;
            break;

        case 'c':
            flags |= NOSKIP;
            c = CT_CHAR;
            break;

        case 'p':   /* pointer format is like hex */
            flags |= POINTER | PFXOK;
            c = CT_INT;
            base = 16;
            break;

        case 'n':
            nconversions++;
            if (flags & SUPPRESS)   /* ??? */
                continue;
            if (flags & SHORT)
                *va_arg(ap, short *) = (short)nread;
            else if (flags & LONG)
                *va_arg(ap, long *) = (long)nread;
            else
                *va_arg(ap, int *) = nread;
            continue;
        }

        /*
         * We have a conversion that requires input.
         */
        if (inr <= 0)
            goto input_failure;

        /*
         * Consume leading white space, except for formats
         * that suppress this.
         */
        if ((flags & NOSKIP) == 0) {
            while (isspace(*inp)) {
                nread++;
                if (--inr > 0)
                    inp++;
                else
                    goto input_failure;
            }
            /*
             * Note that there is at least one character in
             * the buffer, so conversions that do not set NOSKIP
             * can no longer result in an input failure.
             */
        }

        /*
         * Do the conversion.
         */
        switch (c) {

        case CT_CHAR:
            /* scan arbitrary characters (sets NOSKIP) */
            if (width == 0)
                width = 1;
            if (flags & SUPPRESS) {
                size_t sum = 0;

                if ((n = inr) < width) {
                    sum += n;
                    width -= n;
                    inp += n;
                    if (sum == 0)
                        goto input_failure;
                } else {
                    sum += width;
                    inr -= width;
                    inp += width;
                }
                nread += sum;
            } else {
                bcopy(inp, va_arg(ap, char *), width);
                inr -= width;
                inp += width;
                nread += width;
                nassigned++;
            }
            nconversions++;
            break;

        case CT_CCL:
            /* scan a (nonempty) character class (sets NOSKIP) */
            if (width == 0)
                width = (size_t)~0; /* `infinity' */
            /* take only those things in the class */
            if (flags & SUPPRESS) {
                n = 0;
                while (ccltab[(unsigned char)*inp]) {
                    n++, inr--, inp++;
                    if (--width == 0)
                        break;
                    if (inr <= 0) {
                        if (n == 0)
                            goto input_failure;
                        break;
                    }
                }
                if (n == 0)
                    goto match_failure;
            } else {
                p0 = p = va_arg(ap, char *);
                while (ccltab[(unsigned char)*inp]) {
                    inr--;
                    *p++ = *inp++;
                    if (--width == 0)
                        break;
                    if (inr <= 0) {
                        if (p == p0)
                            goto input_failure;
                        break;
                    }
                }
                n = p - p0;
                if (n == 0)
                    goto match_failure;
                *p = 0;
                nassigned++;
            }
            nread += n;
            nconversions++;
            break;

        case CT_STRING:
            /* like CCL, but zero-length string OK, & no NOSKIP */
            if (width == 0)
                width = (size_t)~0;
            if (flags & SUPPRESS) {
                n = 0;
                while (!isspace(*inp)) {
                    n++, inr--, inp++;
                    if (--width == 0)
                        break;
                    if (inr <= 0)
                        break;
                }
                nread += n;
            } else {
                p0 = p = va_arg(ap, char *);
                while (!isspace(*inp)) {
                    inr--;
                    *p++ = *inp++;
                    if (--width == 0)
                        break;
                    if (inr <= 0)
                        break;
                }
                *p = 0;
                nread += p - p0;
                nassigned++;
            }
            nconversions++;
            continue;

        case CT_INT:
            /* scan an integer as if by strtoq/strtouq */
            /* size_t is unsigned, hence this optimisation */
            if (--width > sizeof (buf) - 2)
                width = sizeof (buf) - 2;
            width++;
            flags |= SIGNOK | NDIGITS | NZDIGITS;
            for (p = buf; width; width--) {
                c = *inp;
                /*
                 * Switch on the character; `goto ok'
                 * if we accept it as a part of number.
                 */
                switch (c) {

                /*
                 * The digit 0 is always legal, but is
                 * special.  For %i conversions, if no
                 * digits (zero or nonzero) have been
                 * scanned (only signs), we will have
                 * base==0.  In that case, we should set
                 * it to 8 and enable 0x prefixing.
                 * Also, if we have not scanned zero digits
                 * before this, do not turn off prefixing
                 * (someone else will turn it off if we
                 * have scanned any nonzero digits).
                 */
                case '0':
                    if (base == 0) {
                        base = 8;
                        flags |= PFXOK;
                    }
                    if (flags & NZDIGITS)
                        flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
                    else
                        flags &= ~(SIGNOK|PFXOK|NDIGITS);
                    goto ok;

                /* 1 through 7 always legal */
                case '1': case '2': case '3':
                case '4': case '5': case '6': case '7':
                    base = basefix[base];
                    flags &= ~(SIGNOK | PFXOK | NDIGITS);
                    goto ok;

                /* digits 8 and 9 ok iff decimal or hex */
                case '8': case '9':
                    base = basefix[base];
                    if (base <= 8)
                        break;  /* not legal here */
                    flags &= ~(SIGNOK | PFXOK | NDIGITS);
                    goto ok;

                /* letters ok iff hex */
                case 'A': case 'B': case 'C':
                case 'D': case 'E': case 'F':
                case 'a': case 'b': case 'c':
                case 'd': case 'e': case 'f':
                    /* no need to fix base here */
                    if (base <= 10)
                        break;  /* not legal here */
                    flags &= ~(SIGNOK | PFXOK | NDIGITS);
                    goto ok;

                /* sign ok only as first character */
                case '+': case '-':
                    if (flags & SIGNOK) {
                        flags &= ~SIGNOK;
                        goto ok;
                    }
                    break;

                /* x ok iff flag still set & 2nd char */
                case 'x': case 'X':
                    if (flags & PFXOK && p == buf + 1) {
                        base = 16;  /* if %i */
                        flags &= ~PFXOK;
                        goto ok;
                    }
                    break;
                }

                /*
                 * If we got here, c is not a legal character
                 * for a number.  Stop accumulating digits.
                 */
                break;
        ok:
                /*
                 * c is legal: store it and look at the next.
                 */
                *p++ = c;
                if (--inr > 0)
                    inp++;
                else
                    break;      /* end of input */
            }
            /*
             * If we had only a sign, it is no good; push
             * back the sign.  If the number ends in `x',
             * it was [sign] '0' 'x', so push back the x
             * and treat it as [sign] '0'.
             */
            if (flags & NDIGITS) {
                if (p > buf) {
                    inp--;
                    inr++;
                }
                goto match_failure;
            }
            c = ((uchar_t *)p)[-1];
            if (c == 'x' || c == 'X') {
                --p;
                inp--;
                inr++;
            }
            if ((flags & SUPPRESS) == 0) {
                ulong_t res;

                *p = 0;
                if (sconv)
                    (void) ddi_strtol(buf, (char **)NULL,
                        base, (long *)(&res));
                else
                    (void) ddi_strtoul(buf, (char **)NULL,
                        base, &res);
                if (flags & POINTER)
                    *va_arg(ap, void **) =
                        (void *)(uintptr_t)res;
                else if (flags & SHORT)
                    *va_arg(ap, short *) = (short)res;
                else if (flags & LONG)
                    *va_arg(ap, long *) = (long)res;
                else
                    *va_arg(ap, int *) = (int)res;
                nassigned++;
            }
            nread += p - buf;
            nconversions++;
            break;

        }
    }
input_failure:
    return (nconversions != 0 ? nassigned : -1);
match_failure:
    return (nassigned);
}

/*
 * Fill in the given table from the scanset at the given format
 * (just after `[').  Return a pointer to the character past the
 * closing `]'.  The table has a 1 wherever characters should be
 * considered part of the scanset.
 */
static const uchar_t *
set_ccl(char *tab, const uchar_t *fmt)
{
    int c, n, v;

    /* first `clear' the whole table */
    c = *fmt++;     /* first char hat => negated scanset */
    if (c == '^') {
        v = 1;      /* default => accept */
        c = *fmt++; /* get new first char */
    } else
        v = 0;      /* default => reject */

    /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
    for (n = 0; n < 256; n++)
        tab[n] = v; /* memset(tab, v, 256) */

    if (c == 0)
        return (fmt - 1); /* format ended before closing ] */

    /*
     * Now set the entries corresponding to the actual scanset
     * to the opposite of the above.
     *
     * The first character may be ']' (or '-') without being special;
     * the last character may be '-'.
     */
    v = 1 - v;
    for (;;) {
        tab[c] = v;     /* take character c */
doswitch:
        n = *fmt++;     /* and examine the next */
        switch (n) {

        case 0:         /* format ended too soon */
            return (fmt - 1);

        case '-':
            /*
             * A scanset of the form
             *  [01+-]
             * is defined as `the digit 0, the digit 1,
             * the character +, the character -', but
             * the effect of a scanset such as
             *  [a-zA-Z0-9]
             * is implementation defined.  The V7 Unix
             * scanf treats `a-z' as `the letters a through
             * z', but treats `a-a' as `the letter a, the
             * character -, and the letter a'.
             *
             * For compatibility, the `-' is not considerd
             * to define a range if the character following
             * it is either a close bracket (required by ANSI)
             * or is not numerically greater than the character
             * we just stored in the table (c).
             */
            n = *fmt;
            if (n == ']' || n < c) {
                c = '-';
                break;  /* resume the for(;;) */
            }
            fmt++;
            /* fill in the range */
            do {
                tab[++c] = v;
            } while (c < n);
            c = n;
            /*
             * Alas, the V7 Unix scanf also treats formats
             * such as [a-c-e] as `the letters a through e'.
             * This too is permitted by the standard....
             */
            goto doswitch;
            /* NOTREACHED */

        case ']':       /* end of scanset */
            return (fmt);

        default:        /* just another character */
            c = n;
            break;
        }
    }
    /* NOTREACHED */
}

int
sscanf(const char *ibuf, const char *fmt, ...)
{
    va_list ap;
    int ret;

    va_start(ap, fmt);
    ret = vsscanf(ibuf, fmt, ap);
    va_end(ap);
    return (ret);
}