port/fp/char_to_decimal.h

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident   "%Z%%M% %I% %E% SMI"

/*
 * This file contains the common part of the functions string_to_decimal,
 * func_to_decimal, and file_to_decimal.  Much of this code has been dupli-
 * cated in wstring_to_decimal (see wstod.c) with some simplifications and
 * appropriate modifications for wide characters.  DO NOT fix a bug here
 * without fixing the same bug in wstring_to_decimal, if it applies.
 *
 * The code below makes the following assumptions.
 *
 * 1. The first six parameters to the function are declared with the
 *    following names and types:
 *
 *    char **ppc;
 *    int nmax;
 *    int fortran_conventions;
 *    decimal_record *pd;
 *    enum decimal_string_form *pform;
 *    char **pechar;
 *
 * 2. Before this file is #included, the following variables have been
 *    defined and initialized as shown:
 *
 *    char *cp;
 *    char *good = *ppc - 1;
 *    int current;
 *    int nread;
 *
 *    If the first character can be read successfully, then current is set
 *    to the value of the first character, cp is set to *ppc, (char)current
 *    is stored at *cp, and nread = 1.  If the first character cannot be
 *    read successfully, then current = EOF and nread = 0.
 *
 * 3. The macro NEXT is defined to expand to code that implements
 *    the following logic:
 *
 *      if (nread < nmax) {
 *          current = <next character>;
 *          if (current != EOF) {
 *             *++cp = (char)current;
 *             nread++;
 *          }
 *      } else
 *          current = EOF;
 *
 *    Note that nread always reflects the number of characters successfully
 *    read, the buffer pointed to by *ppc gets filled only with characters
 *    that have been successfully read, and cp always points to the location
 *    in the buffer that was filled by the last character successfully read.
 *    current == EOF if and only if we can't read any more, either because
 *    we've reached the end of the input file or the buffer is full (i.e.,
 *    we've read nmax characters).
 *
 * 4. After this file is #included, the following variables may be used
 *    and will have the specified values:
 *
 *    *ppc, *pd, *pform, and *pechar will be set as documented in the
 *      manual page;
 *    nmax and fortran_conventions will be unchanged;
 *    nread will be the number of characters actually read;
 *    cp will point to the last character actually read, provided at least
 *      one character was read successfully (in which case cp >= *ppc).
 */

#define UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c)

#define NZDIGIT(c)  (('1' <= c && c <= '9') || ((int)form < 0 && \
            (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))))

{
    static const char    *infstring = "INFINITY";
    static const char    *nanstring = "NAN";

    int sigfound, spacefound = 0;
    int ids = 0;
    int i, agree;
    int nzbp = 0; /* number of zeros before point */
    int nzap = 0; /* number of zeros after point */
    char    decpt;
    int nfast, nfastlimit;
    char    *pfast;
    int e, esign;
    int expshift = 0;
    enum decimal_string_form    form;

    /*
     * This routine assumes that the radix point is a single
     * ASCII character, so that following this assignment, the
     * condition (current == decpt) will correctly detect it.
     */
    if (fortran_conventions > 0)
        decpt = '.';
    else
        decpt = *(localeconv()->decimal_point);

    /* input is invalid until we find something */
    pd->fpclass = fp_signaling;
    pd->sign = 0;
    pd->exponent = 0;
    pd->ds[0] = '\0';
    pd->more = 0;
    pd->ndigits = 0;
    *pform = form = invalid_form;
    *pechar = NULL;

    /* skip white space */
    while (isspace(current)) {
        spacefound = 1;
        NEXT;
    }

    if (fortran_conventions >= 2 && spacefound) {
        /*
         * We found at least one white space character.  For
         * Fortran formatted input, accept this; if we don't
         * find anything else, we'll interpret it as a valid zero.
         */
        pd->fpclass = fp_zero;
        form = whitespace_form;
        sigfound = 0;       /* 0 = only zeros found so far */
        if (current == EOF) {
            good = cp;
            goto done;
        } else {
            good = cp - 1;
        }
    } else {
        sigfound = -1;      /* -1 = no digits found yet */
    }

    /* look for optional leading sign */
    if (current == '+') {
        NEXT;
    } else if (current == '-') {
        pd->sign = 1;
        NEXT;
    }

    /*
     * Admissible first non-white-space, non-sign characters are
     * 0-9, i, I, n, N, or the radix point.
     */
    if ('1' <= current && current <= '9') {
        good = cp;
        pd->fpclass = fp_normal;
        form = fixed_int_form;
        sigfound = 1;       /* 1 = significant digits found */
        pd->ds[ids++] = (char)current;
        NEXT;
    } else {
        switch (current) {
        case ' ':
            if (fortran_conventions < 2)
                goto done;
            /*
             * When fortran_conventions >= 2, treat leading
             * blanks the same as leading zeroes.
             */
            /*FALLTHRU*/

        case '0':
            /*
             * Accept the leading zero and set pd->fpclass
             * accordingly, but don't set sigfound until we
             * determine that this isn't a "fake" hex string
             * (i.e., 0x.p...).
             */
            good = cp;
            pd->fpclass = fp_zero;
            if (fortran_conventions < 0) {
                /* look for a hex fp string */
                NEXT;
                if (current == 'X' || current == 'x') {
                    /* assume hex fp form */
                    form = (enum decimal_string_form)-1;
                    expshift = 2;
                    NEXT;
                    /*
                     * Only a digit or radix point can
                     * follow "0x".
                     */
                    if (NZDIGIT(current)) {
                        pd->fpclass = fp_normal;
                        good = cp;
                        sigfound = 1;
                        pd->ds[ids++] = (char)current;
                        NEXT;
                        break;
                    } else if (current == decpt) {
                        NEXT;
                        goto afterpoint;
                    } else if (current != '0') {
                        /* not hex fp after all */
                        form = fixed_int_form;
                        expshift = 0;
                        goto done;
                    }
                } else {
                    form = fixed_int_form;
                }
            } else {
                form = fixed_int_form;
            }

            /* skip all leading zeros */
            while (current == '0' || (current == ' ' &&
                fortran_conventions >= 2)) {
                NEXT;
            }
            sigfound = 0;   /* 0 = only zeros found so far */
            if (current == EOF) {
                good = cp;
                goto done;
            } else {
                good = cp - 1;
            }
            break;

        case 'i':
        case 'I':
            /* look for inf or infinity */
            NEXT;
            agree = 1;
            while (agree <= 7 &&
                UCASE(current) == infstring[agree]) {
                NEXT;
                agree++;
            }
            if (agree < 3)
                goto done;
            /* found valid infinity */
            pd->fpclass = fp_infinity;
            sigfound = 1;
            __inf_read = 1;
            if (agree < 8) {
                good = (current == EOF)? cp + 3 - agree :
                    cp + 2 - agree;
                form = inf_form;
            } else {
                good = (current == EOF)? cp : cp - 1;
                form = infinity_form;
            }
            /*
             * Accept trailing blanks if no extra characters
             * intervene.
             */
            if (fortran_conventions >= 2 && (agree == 3 ||
                agree == 8)) {
                while (current == ' ') {
                    NEXT;
                }
                good = (current == EOF)? cp : cp - 1;
            }
            goto done;

        case 'n':
        case 'N':
            /* look for nan or nan(string) */
            NEXT;
            agree = 1;
            while (agree <= 2 &&
                UCASE(current) == nanstring[agree]) {
                NEXT;
                agree++;
            }
            if (agree < 3)
                goto done;
            /* found valid NaN */
            good = (current == EOF)? cp : cp - 1;
            pd->fpclass = fp_quiet;
            form = nan_form;
            sigfound = 1;
            __nan_read = 1;
            if (current == '(') {
                /* accept parenthesized string */
                NEXT;
                if (fortran_conventions < 0) {
                    while ((isalnum(current) ||
                        current == '_') &&
                        ids < DECIMAL_STRING_LENGTH - 1) {
                        pd->ds[ids++] = (char)current;
                        NEXT;
                    }
                    while (isalnum(current) ||
                        current == '_') {
                        pd->more = 1;
                        NEXT;
                    }
                } else {
                    while (current > 0 && current != ')' &&
                        ids < DECIMAL_STRING_LENGTH - 1) {
                        pd->ds[ids++] = (char)current;
                        NEXT;
                    }
                    while (current > 0 && current != ')') {
                        pd->more = 1;
                        NEXT;
                    }
                }
                if (current != ')')
                    goto done;
                good = cp;
                form = nanstring_form;
                /* prepare for loop below */
                if (fortran_conventions >= 2) {
                    NEXT;
                }
            }
            /* accept trailing blanks */
            if (fortran_conventions >= 2) {
                while (current == ' ') {
                    NEXT;
                }
                good = (current == EOF)? cp : cp - 1;
            }
            goto done;

        default:
            if (current == decpt) {
                /*
                 * Don't accept the radix point just yet;
                 * we need to see at least one digit.
                 */
                NEXT;
                goto afterpoint;
            }
            goto done;
        }
    }

nextnumber:
    /*
     * Admissible characters after the first digit are a valid digit,
     * an exponent delimiter (E or e for any decimal form; +, -, D, d,
     * Q, or q when fortran_conventions >= 2; P or p for hex form),
     * or the radix point.  (Note that we can't get here unless we've
     * already found a digit.)
     */
    if (NZDIGIT(current)) {
        /*
         * Found another nonzero digit.  If there's enough room
         * in pd->ds, store any intervening zeros we've found so far
         * and then store this digit.  Otherwise, stop storing
         * digits in pd->ds and set pd->more.
         */
        if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
            for (i = 0; i < nzbp; i++)
                pd->ds[ids++] = '0';
            pd->ds[ids++] = (char)current;
        } else {
            pd->exponent += (nzbp + 1) << expshift;
            pd->more = 1;
            if (ids < DECIMAL_STRING_LENGTH) {
                pd->ds[ids] = '\0';
                pd->ndigits = ids;
                /* don't store any more digits */
                ids = DECIMAL_STRING_LENGTH;
            }
        }
        pd->fpclass = fp_normal;
        sigfound = 1;
        nzbp = 0;
        NEXT;

        /*
         * Use an optimized loop to grab a consecutive sequence
         * of nonzero digits quickly.
         */
        nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
        for (nfast = 0, pfast = &(pd->ds[ids]);
            nfast < nfastlimit && NZDIGIT(current);
            nfast++) {
            *pfast++ = (char)current;
            NEXT;
        }
        ids += nfast;
        if (current == '0')
            goto nextnumberzero;    /* common case */
        /* advance good to the last accepted digit */
        good = (current == EOF)? cp : cp - 1;
        goto nextnumber;
    } else {
        switch (current) {
        case ' ':
            if (fortran_conventions < 2)
                goto done;
            if (fortran_conventions == 2) {
                while (current == ' ') {
                    NEXT;
                }
                good = (current == EOF)? cp : cp - 1;
                goto nextnumber;
            }
            /*
             * When fortran_conventions > 2, treat internal
             * blanks the same as zeroes.
             */
            /*FALLTHRU*/

        case '0':
nextnumberzero:
            /*
             * Count zeros before the radix point.  Later we
             * will either put these zeros into pd->ds or add
             * nzbp to pd->exponent to account for them.
             */
            while (current == '0' || (current == ' ' &&
                fortran_conventions > 2)) {
                nzbp++;
                NEXT;
            }
            good = (current == EOF)? cp : cp - 1;
            goto nextnumber;

        case '+':
        case '-':
        case 'D':
        case 'd':
        case 'Q':
        case 'q':
            /*
             * Only accept these as the start of the exponent
             * field if fortran_conventions is positive.
             */
            if (fortran_conventions <= 0)
                goto done;
            /*FALLTHRU*/

        case 'E':
        case 'e':
            if ((int)form < 0)
                goto done;
            goto exponent;

        case 'P':
        case 'p':
            if ((int)form > 0)
                goto done;
            goto exponent;

        default:
            if (current == decpt) {
                /* accept the radix point */
                good = cp;
                if (form == fixed_int_form)
                    form = fixed_intdot_form;
                NEXT;
                goto afterpoint;
            }
            goto done;
        }
    }

afterpoint:
    /*
     * Admissible characters after the radix point are a valid digit
     * or an exponent delimiter.  (Note that it is possible to get
     * here even though we haven't found any digits yet.)
     */
    if (NZDIGIT(current)) {
        /* found a digit after the point; revise form */
        if (form == invalid_form || form == whitespace_form)
            form = fixed_dotfrac_form;
        else if (form == fixed_intdot_form)
            form = fixed_intdotfrac_form;
        good = cp;
        if (sigfound < 1) {
            /* no significant digits found until now */
            pd->fpclass = fp_normal;
            sigfound = 1;
            pd->ds[ids++] = (char)current;
            pd->exponent = (-(nzap + 1)) << expshift;
        } else {
            /* significant digits have been found */
            if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
                for (i = 0; i < nzbp + nzap; i++)
                    pd->ds[ids++] = '0';
                pd->ds[ids++] = (char)current;
                pd->exponent -= (nzap + 1) << expshift;
            } else {
                pd->exponent += nzbp << expshift;
                pd->more = 1;
                if (ids < DECIMAL_STRING_LENGTH) {
                    pd->ds[ids] = '\0';
                    pd->ndigits = ids;
                    /* don't store any more digits */
                    ids = DECIMAL_STRING_LENGTH;
                }
            }
        }
        nzbp = 0;
        nzap = 0;
        NEXT;

        /*
         * Use an optimized loop to grab a consecutive sequence
         * of nonzero digits quickly.
         */
        nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
        for (nfast = 0, pfast = &(pd->ds[ids]);
            nfast < nfastlimit && NZDIGIT(current);
            nfast++) {
            *pfast++ = (char)current;
            NEXT;
        }
        ids += nfast;
        pd->exponent -= nfast << expshift;
        if (current == '0')
            goto zeroafterpoint;
        /* advance good to the last accepted digit */
        good = (current == EOF)? cp : cp - 1;
        goto afterpoint;
    } else {
        switch (current) {
        case ' ':
            if (fortran_conventions < 2)
                goto done;
            if (fortran_conventions == 2) {
                /*
                 * Treat a radix point followed by blanks
                 * but no digits as zero so we'll pass FCVS.
                 */
                if (sigfound == -1) {
                    pd->fpclass = fp_zero;
                    sigfound = 0;
                }
                while (current == ' ') {
                    NEXT;
                }
                good = (current == EOF)? cp : cp - 1;
                goto afterpoint;
            }
            /*
             * when fortran_conventions > 2, treat internal
             * blanks the same as zeroes
             */
            /*FALLTHRU*/

        case '0':
            /* found a digit after the point; revise form */
            if (form == invalid_form || form == whitespace_form)
                form = fixed_dotfrac_form;
            else if (form == fixed_intdot_form)
                form = fixed_intdotfrac_form;
            if (sigfound == -1) {
                pd->fpclass = fp_zero;
                sigfound = 0;
            }
zeroafterpoint:
            /*
             * Count zeros after the radix point.  If we find
             * any more nonzero digits later, we will put these
             * zeros into pd->ds and decrease pd->exponent by
             * nzap.
             */
            while (current == '0' || (current == ' ' &&
                fortran_conventions > 2)) {
                nzap++;
                NEXT;
            }
            if (current == EOF) {
                good = cp;
                goto done;
            } else {
                good = cp - 1;
            }
            goto afterpoint;

        case '+':
        case '-':
        case 'D':
        case 'd':
        case 'Q':
        case 'q':
            /*
             * Only accept these as the start of the exponent
             * field if fortran_conventions is positive.
             */
            if (fortran_conventions <= 0)
                goto done;
            /*FALLTHRU*/

        case 'E':
        case 'e':
            /* don't accept exponent without preceding digits */
            if (sigfound == -1 || (int)form < 0)
                goto done;
            break;

        case 'P':
        case 'p':
            /* don't accept exponent without preceding digits */
            if (sigfound == -1 || (int)form > 0)
                goto done;
            break;

        default:
            goto done;
        }
    }

exponent:
    /*
     * Set *pechar to point to the character that looks like the
     * beginning of the exponent field, then attempt to parse it.
     */
    *pechar = cp;
    if (current != '+' && current != '-') {
        /* skip the exponent character and following blanks */
        NEXT;
        if (fortran_conventions >= 2 && current == ' ') {
            while (current == ' ') {
                NEXT;
            }
            if (fortran_conventions > 2)
                good = (current == EOF)? cp : cp - 1;
        }
    }

    e = 0;
    esign = 0;

    /* look for optional exponent sign */
    if (current == '+') {
        NEXT;
    } else if (current == '-') {
        esign = 1;
        NEXT;
    }

    /*
     * Accumulate explicit exponent.  Note that if we don't find at
     * least one digit, good won't be updated and e will remain 0.
     * Also, we keep e from getting too large so we don't overflow
     * the range of int (but notice that the threshold is large
     * enough that any larger e would cause the result to underflow
     * or overflow anyway).
     */
    while (('0' <= current && current <= '9') || current == ' ') {
        if (current == ' ') {
            if (fortran_conventions < 2)
                break;
            if (fortran_conventions == 2) {
                NEXT;
                continue;
            }
            current = '0';
        }
        good = cp;
        if (e <= 1000000)
            e = 10 * e + current - '0';
        NEXT;
        if (fortran_conventions == 2 && current == ' ') {
            /* accept trailing blanks */
            while (current == ' ') {
                NEXT;
            }
            good = (current == EOF)? cp : cp - 1;
        }
    }
    if (esign == 1)
        pd->exponent -= e;
    else
        pd->exponent += e;

    /*
     * If we successfully parsed an exponent field, update form
     * accordingly.  If we didn't, don't set *pechar.
     */
    if (good >= *pechar) {
        switch (form) {
        case whitespace_form:
        case fixed_int_form:
            form = floating_int_form;
            break;

        case fixed_intdot_form:
            form = floating_intdot_form;
            break;

        case fixed_dotfrac_form:
            form = floating_dotfrac_form;
            break;

        case fixed_intdotfrac_form:
            form = floating_intdotfrac_form;
            break;
        }
    } else {
        *pechar = NULL;
    }

done:
    /*
     * If we found any zeros before the radix point that were not
     * accounted for earlier, adjust the exponent.  (This is only
     * relevant when pd->fpclass == fp_normal, but it's harmless
     * in all other cases.)
     */
    pd->exponent += nzbp << expshift;

    /* terminate pd->ds if we haven't already */
    if (ids < DECIMAL_STRING_LENGTH) {
        pd->ds[ids] = '\0';
        pd->ndigits = ids;
    }

    /*
     * If we accepted any characters, advance *ppc to point to the
     * first character we didn't accept; otherwise, pass back a
     * signaling nan.
     */
    if (good >= *ppc) {
        *ppc = good + 1;
    } else {
        pd->fpclass = fp_signaling;
        pd->sign = 0;
        form = invalid_form;
    }

    *pform = form;
}