cmd/filesync/files.c

	files.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
 *
 * module:
 *  files.c
 *
 * purpose:
 *  routines to examine and manipulate file names
 *
 * contents:
 *  qualify ... ensure that a name is fully qualified
 *  expand  ... expand env variables within a string or file name
 *  noblanks .. ensure that a name contains no embdded unescaped blanks
 *  lex ....... a lexer that can handle escaped/embedded blanks
 *  wildcards . see whether or not a name contains wild cards
 *  prefix .... does one string begin with another
 *  suffix .... does one string end with another
 *  contains .. does one string contain another
 *
 *  cannonize (static) ...  compress redundant "." and ".." out of name
 *
 * notes:
 *  we are interested in embedded blanks because international
 *  character sets and non-unix file systems can both contain
 *  the byte 0x20.  Thus, whenever we record a filename in
 *  file, we must be careful to escape any embedded blanks that
 *  cause trouble when we re-lex that file later.
 */
#ident  "%W%    %E% SMI"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>

#include "filesync.h"
#include "messages.h"

static void cannonize(char *name);

/*
 * routine:
 *  qualify
 *
 * purpose:
 *  to fully qualify a name
 *
 * parameters:
 *  name to be qualified
 *
 * returns:
 *  either original pointer or copy to a new (malloced) buffer
 *
 * notes:
 *  someday I may conclude that I should always make a copy
 *  so that the caller can know that it is safe to free the parm
 *
 *  I thought about this and concluded that there is never a need
 *  to fully qualify a string containing variables.  If the string
 *  came from the command line, the variables were already expanded
 *  and if it came from the rules data base it is required to already
 *  be fully qualified.
 */
char *
qualify(char *name)
{
    char namebuf[ MAX_PATH ];

    /* in the simple case, the parameter is already there */
    if (*name == '/') {
        cannonize(name);
        return (name);
    }

    /* things that begin with variables get the benefit of the doubt */
    if (*name == '$') {
        cannonize(name);
        return (name);
    }

    /* start with the current working directory */
    if (getcwd(namebuf, sizeof (namebuf)) == 0) {
        fprintf(stderr, gettext(ERR_nocwd), name);
        exit(ERR_OTHER);
    }

    /* make sure we have room for our file name */
    if ((strlen(namebuf) + strlen(name) + 2) >= sizeof (namebuf)) {
        fprintf(stderr, gettext(ERR_longname), name);
        exit(ERR_OTHER);
    }

    /* append the specified file name to it */
    strcat(namebuf, "/");
    strcat(namebuf, name);

    /* filter out redundant dots    */
    cannonize(namebuf);

    if (opt_debug & DBG_VARS)
        fprintf(stderr, "VARS: QUALIFY %s to %s\n", name, namebuf);

    /* and return a newly malloc'd copy */
    return (strdup(namebuf));
}

/*
 * routine:
 *  expand
 *
 * purpose:
 *  to expand variable names within a string
 *
 * parameters:
 *  string to be expanded.  Variable references always begin
 *  with a $ and are delimited by parens or curleys.
 *
 * returns:
 *  either original pointer or a copy to a new (malloced) buffer
 *
 * notes:
 *  someday I may conclude that I should always make a copy
 *  so that the caller can know that it is safe to free the parm
 *
 *  someday I may decide to support escape conventions for embedding
 *  $(){} in file names, but I suspec that day will never come.
 *
 *  I thought about this and concluded there was no reason to
 *  fully qualify these names, because the only names that should
 *  need qualification are src/dst lines from the command line,
 *  and the shell should have handled those for me.  Once something
 *  makes it into the database, it is expected to be fully qualified
 *  already.
 *
 *  We are limited to producing strings of length MAX_PATH or less
 *  and variable names of length MAX_NAME or less.  In practice,
 *  these limitations should not be a problem.
 */
char *
expand(char *name)
{   const char *s;
    char *p, *v;
    char delim;
    char namebuf[ MAX_PATH ];
    char varbuf[ MAX_NAME ];

    /* first see if there are no variables to be bound */
    for (s = name; *s && *s != '$'; s++);
    if (*s == 0)
        return (name);

    /* move through the string, copying and expanding   */
    for (s = name, p = namebuf; *s; s++) {

        /* check for overflow   */
        if (p >= &namebuf[ MAX_PATH ]) {
            fprintf(stderr, gettext(ERR_longname), name);
            exit(ERR_OTHER);
        }

        /* normal characters, we just copy      */
        if (*s != '$') {
            *p++ = *s;
            continue;
        }

        /* figure out how the variable name is delimited */
        delim = *++s;
        if (delim == '(') {
            delim = ')';
            s++;
        } else if (delim == '{') {
            delim = '}';
            s++;
        } else
            delim = 0;

        /* copy the variable name up to the closing delimiter */
        for (v = varbuf; *s; s++) {
            if (isalnum(*s) || (*s == '_') ||
                (delim && *s != delim))
                *v++ = *s;
            else
                break;

            /* make sure we don't overflow var name buffer  */
            if (v >= &varbuf[MAX_NAME - 1]) {
                *v = 0;
                fprintf(stderr, gettext(ERR_longname), varbuf);
                exit(ERR_OTHER);
            }
        }

        *v = 0;

        /* FIX THIS ... there must be a more elegant way */
        /* we may have to back up because s will be bumped */
        if (delim == 0 || *s != delim)
            s--;

        /* look up the variable             */
        v = getenv(varbuf);
        if (v == 0 || *v == 0) {
            fprintf(stderr, gettext(ERR_undef), varbuf);
            return (0);
        }

        /* copy the variable into the buffer        */
        while (*v)
            *p++ = *v++;
    }

    /* null terminate the copy  */
    *p = 0;

    /* compress out any redundant dots and dot-dots */
    cannonize(namebuf);

    if (opt_debug & DBG_VARS)
        fprintf(stderr, "VARS: EXPAND %s to %s\n", name, namebuf);

    /* and return a newly malloc'd copy */
    return (strdup(namebuf));
}

/*
 * routine:
 *  noblanks
 *
 * purpose:
 *  to ensure that a name contains no unescaped embedded blanks
 *
 * parameters:
 *  pointer to name
 *
 * returns:
 *  pointer to name or pointer to buffer containing escaped version of name
 *
 * notes:
 *  this routine can be called on full file names, and so can
 *  conceivably require an arbitrarily large buffer.
 */
const char *
noblanks(const char *name)
{
    const char *s;
    char *p;
    static char *namebuf = 0;
    static int buflen = 0;
    int l;

    /* first see if there are no embedded blanks    */
    for (s = name; *s && *s != ' '; s++);
    if (*s == 0)
        return (name);

    /* make sure we have a buffer large enough for the worst case   */
    l = 4 + (2*strlen(name));
    for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME);
    namebuf = (char *) realloc(namebuf, buflen);

    /* quote the name, and copy it, escaping quotes */
    p = namebuf;
    *p++ = '"';

    for (s = name; *s; s++) {
        if (*s == '"' || *s == '\\')
            *p++ = '\\';
        *p++ = *s;
    }

    *p++ = '"';
    *p = 0;

    return (namebuf);
}

/*
 * routine:
 *  lex
 *
 * purpose:
 *  my own version of strtok that handles quoting and escaping
 *
 * parameters:
 *  FILE structure for file to read (0 for same string, same file)
 *
 * returns:
 *  pointer to next token
 *
 * notes:
 *  this routine makes no changes to the string it is passed,
 *  copying tokens into a static buffer.
 *
 *  this routine handles continuation lines after reading and
 *  before the lexing even starts.  This limits continued lines
 *  to a length of MAX_LINE, but keeps everything else very simple.
 *  We also, therefore, limit tokens to a maximum length of MAX_LINE.
 */
int lex_linenum;        /* line number in current input file    */

char *
lex(FILE *file)
{   char c, delim;
    char *p;
    char *s;
    static char *savep;
    static char namebuf[ MAX_LINE ];
    static char inbuf[ MAX_LINE ];

    if (file) {         /* read a new line      */
        p = inbuf + sizeof (inbuf);

        /* read the next input line, with all continuations */
        for (s = inbuf; savep = fgets(s, p - s, file); ) {
            lex_linenum++;

            /* go find the last character of the input line */
            while (*s && s[1])
                s++;
            if (*s == '\n')
                s--;

            /* see whether or not we need a continuation    */
            if (s < inbuf || *s != '\\')
                break;

            continue;
        }

        if (savep == 0)
            return (0);

        s = inbuf;
    } else {            /* continue with old line   */
        if (savep == 0)
            return (0);
        s = savep;
    }
    savep = 0;

    /* skip over leading white space    */
    while (isspace(*s))
        s++;
    if (*s == 0)
        return (0);

    /* see if this is a quoted string   */
    c = *s;
    if (c == '\'' || c == '"') {
        delim = c;
        s++;
    } else
        delim = 0;

    /* copy the token into the buffer   */
    for (p = namebuf; (c = *s) != 0; s++) {
        /* literal escape       */
        if (c == '\\') {
            s++;
            *p++ = *s;
            continue;
        }

        /* closing delimiter        */
        if (c == delim) {
            s++;
            break;
        }

        /* delimiting white space   */
        if (delim == 0 && isspace(c))
            break;

        /* ordinary characters      */
        *p++ = *s;
    }


    /* remember where we left off       */
    savep = *s ? s : 0;

    /* null terminate and return the buffer */
    *p = 0;
    return (namebuf);
}

/*
 * routine:
 *  wildcards
 *
 * purpose:
 *  determine whether or not there are any wild cards in a name
 *
 * parameters:
 *  name to be checked
 *
 * returns:
 *  true/false
 *
 * notes:
 *  we use this to take shortcuts
 */
bool_t
wildcards(const char *name)
{   const char *s;
    int literal = 0;

    for (s = name; *s; s++)
        if (literal)
            switch (*s) {
                case '\'':  /* end of literal string */
                    literal = 0;
                    continue;
                case '\\':  /* escape next character */
                    s++;
                    continue;
            }
        else
            switch (*s) {
                case '\'':  /* literal string   */
                    literal = 1;
                    continue;
                case '\\':  /* escape next character */
                    s++;
                    continue;
                case '*':
                case '[':
                case '{':
                case '?':
                    /* any of these is a wild card  */
                    return (TRUE);
            }

    return (FALSE);
}

/*
 * routine:
 *  cannonize
 *
 * purpose:
 *  to compress redundant dots out of a path
 *
 * parameters:
 *  file name in an editable buffer
 *
 * returns:
 *  void
 *
 * notes:
 *  because we compress the string in place, there is no danger
 *  of our overflowing any fixed sized buffer.
 */
static void
cannonize(char *name)
{   char *s, *p;

    /* leading dot-slashes  */
    for (s = name; *s == '.' && s[1] == '/'; strcpy(s, &s[2]));

    for (s = name; *s; s++) {
        /* interesting things happen after slashes  */
        if (*s != '/')
            continue;

        /* embedded dot-slashes */
        while (s[1] == '.' && s[2] == '/')
            strcpy(&s[1], &s[3]);

        /* embedded slash-dot-dot-slash */
        if (strncmp(s, "/../", 4) == 0) {
            /* scan backwards to eliminate last directory */
            for (p = s-1; p > name && *p != '/'; p--);

            if (p < name)
                p = name;
            strcpy(p, &s[3]);
        }

        continue;
    }
}

/*
 * routine:
 *  prefix
 *
 * purpose:
 *  determine whether or not one string begins with another
 *
 * parameters:
 *  string to be tested
 *  suspected prefix
 *
 * returns:
 *  no  0
 *  yes pointer character after prefix
 */
const char *
prefix(const char *s, const char *p)
{
    while (*p)
        if (*p++ != *s++)
            return (0);

    return (s);
}

/*
 * routine:
 *  suffix
 *
 * purpose:
 *  determine whether or not one string ends with another
 *
 * parameters:
 *  string to be tested
 *  suspected suffix
 *
 * returns:
 *  true/false
 */
bool_t
suffix(const char *str, const char *suf)
{   const char *s;

    /* go to where the alleged suffix would start */
    for (s = str; *s; s++);
    s -= strlen(suf);
    if (s < str)
        return (FALSE);

    /* see if the string ends with the suffix */
    while (*suf)
        if (*suf++ != *s++)
            return (FALSE);

    return (TRUE);
}

/*
 * routine:
 *  contains
 *
 * purpose:
 *  determine whether or not one string contains another
 *
 * parameters:
 *  string to be checked
 *  pattern we are seeking
 *
 * returns:
 *  true/false
 */
bool_t
contains(const char *str, const char *pat)
{   const char *s, *p;

    while (*str) {
        if (*str++ == *pat) {
            for (s = str, p = &pat[1]; *s == *p; s++, p++)
                if (p[1] == 0)
                    return (TRUE);
        }
    }

    return (FALSE);
}