fs/nfs/nfs_auth.c

	nfs_auth.c revision 323a81d93e2f58a7d62f6e523f9fddbc029d3d0b
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident   "%Z%%M% %I% %E% SMI"

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/cred.h>
#include <sys/cmn_err.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/pathname.h>
#include <sys/utsname.h>
#include <sys/debug.h>
#include <sys/door.h>
#include <sys/sdt.h>

#include <rpc/types.h>
#include <rpc/auth.h>
#include <rpc/clnt.h>

#include <nfs/nfs.h>
#include <nfs/export.h>
#include <nfs/nfs_clnt.h>
#include <nfs/auth.h>

#define EQADDR(a1, a2)  \
    (bcmp((char *)(a1)->buf, (char *)(a2)->buf, (a1)->len) == 0 && \
    (a1)->len == (a2)->len)

static struct knetconfig auth_knconf;
static servinfo_t svp;
static clinfo_t ci;

static struct kmem_cache *exi_cache_handle;
static void exi_cache_reclaim(void *);
static void exi_cache_trim(struct exportinfo *exi);

int nfsauth_cache_hit;
int nfsauth_cache_miss;
int nfsauth_cache_reclaim;

/*
 * Number of seconds to wait for an NFSAUTH upcall.
 */
static int nfsauth_timeout = 20;

/*
 * mountd is a server-side only daemon. This will need to be
 * revisited if the NFS server is ever made zones-aware.
 */
kmutex_t    mountd_lock;
door_handle_t   mountd_dh;

void
mountd_args(uint_t did)
{
    mutex_enter(&mountd_lock);
    if (mountd_dh)
        door_ki_rele(mountd_dh);
    mountd_dh = door_ki_lookup(did);
    mutex_exit(&mountd_lock);
}

void
nfsauth_init(void)
{
    /*
     * mountd can be restarted by smf(5). We need to make sure
     * the updated door handle will safely make it to mountd_dh
     */
    mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL);

    /*
     * Allocate nfsauth cache handle
     */
    exi_cache_handle = kmem_cache_create("exi_cache_handle",
        sizeof (struct auth_cache), 0, NULL, NULL,
        exi_cache_reclaim, NULL, NULL, 0);
}

/*
 * Finalization routine for nfsauth. It is important to call this routine
 * before destroying the exported_lock.
 */
void
nfsauth_fini(void)
{
    /*
     * Deallocate nfsauth cache handle
     */
    kmem_cache_destroy(exi_cache_handle);
}

/*
 * Convert the address in a netbuf to
 * a hash index for the auth_cache table.
 */
static int
hash(struct netbuf *a)
{
    int i, h = 0;

    for (i = 0; i < a->len; i++)
        h ^= a->buf[i];

    return (h & (AUTH_TABLESIZE - 1));
}

/*
 * Mask out the components of an
 * address that do not identify
 * a host. For socket addresses the
 * masking gets rid of the port number.
 */
static void
addrmask(struct netbuf *addr, struct netbuf *mask)
{
    int i;

    for (i = 0; i < addr->len; i++)
        addr->buf[i] &= mask->buf[i];
}

/*
 * nfsauth4_access is used for NFS V4 auth checking. Besides doing
 * the common nfsauth_access(), it will check if the client can
 * have a limited access to this vnode even if the security flavor
 * used does not meet the policy.
 */
int
nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req)
{
    int access;

    access = nfsauth_access(exi, req);

    /*
     * There are cases that the server needs to allow the client
     * to have a limited view.
     *
     * e.g.
     * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
     * /export/home is shared as "sec=sys,rw"
     *
     * When the client mounts /export with sec=sys, the client
     * would get a limited view with RO access on /export to see
     * "home" only because the client is allowed to access
     * /export/home with auth_sys.
     */
    if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
        /*
         * Allow ro permission with LIMITED view if there is a
         * sub-dir exported under vp.
         */
        if (has_visible(exi, vp)) {
            return (NFSAUTH_LIMITED);
        }
    }

    return (access);
}

static void
sys_log(const char *msg)
{
    static time_t   tstamp = 0;
    time_t      now;

    /*
     * msg is shown (at most) once per minute
     */
    now = gethrestime_sec();
    if ((tstamp + 60) < now) {
        tstamp = now;
        cmn_err(CE_WARN, msg);
    }
}

/*
 * Get the access information from the cache or callup to the mountd
 * to get and cache the access information in the kernel.
 */
int
nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor)
{
    struct netbuf         addr;
    struct netbuf        *claddr;
    struct auth_cache   **head;
    struct auth_cache    *ap;
    int           access;
    varg_t            varg = {0};
    nfsauth_res_t         res = {0};
    XDR           xdrs_a;
    XDR           xdrs_r;
    size_t            absz;
    caddr_t           abuf;
    size_t            rbsz = (size_t)(BYTES_PER_XDR_UNIT * 2);
    char              result[BYTES_PER_XDR_UNIT * 2] = {0};
    caddr_t           rbuf = (caddr_t)&result;
    int           last = 0;
    door_arg_t        da;
    door_info_t       di;
    door_handle_t         dh;
    uint_t            ntries = 0;

    /*
     * Now check whether this client already
     * has an entry for this flavor in the cache
     * for this export.
     * Get the caller's address, mask off the
     * parts of the address that do not identify
     * the host (port number, etc), and then hash
     * it to find the chain of cache entries.
     */

    claddr = svc_getrpccaller(req->rq_xprt);
    addr = *claddr;
    addr.buf = kmem_alloc(addr.len, KM_SLEEP);
    bcopy(claddr->buf, addr.buf, claddr->len);
    addrmask(&addr, svc_getaddrmask(req->rq_xprt));
    head = &exi->exi_cache[hash(&addr)];

    rw_enter(&exi->exi_cache_lock, RW_READER);
    for (ap = *head; ap; ap = ap->auth_next) {
        if (EQADDR(&addr, &ap->auth_addr) && flavor == ap->auth_flavor)
            break;
    }
    if (ap) {               /* cache hit */
        access = ap->auth_access;
        ap->auth_time = gethrestime_sec();
        nfsauth_cache_hit++;
    }

    rw_exit(&exi->exi_cache_lock);

    if (ap) {
        kmem_free(addr.buf, addr.len);
        return (access);
    }

    nfsauth_cache_miss++;

    /*
     * No entry in the cache for this client/flavor
     * so we need to call the nfsauth service in the
     * mount daemon.
     */
retry:
    mutex_enter(&mountd_lock);
    dh = mountd_dh;
    if (dh)
        door_ki_hold(dh);
    mutex_exit(&mountd_lock);

    if (dh == NULL) {
        /*
         * The rendezvous point has not been established yet !
         * This could mean that either mountd(1m) has not yet
         * been started or that _this_ routine nuked the door
         * handle after receiving an EINTR for a REVOKED door.
         *
         * Returning NFSAUTH_DROP will cause the NFS client
         * to retransmit the request, so let's try to be more
         * rescillient and attempt for ntries before we bail.
         */
        if (++ntries % NFSAUTH_DR_TRYCNT) {
            delay(hz);
            goto retry;
        }
        sys_log("nfsauth: mountd has not established door");
        kmem_free(addr.buf, addr.len);
        return (NFSAUTH_DROP);
    }
    ntries = 0;
    varg.vers = V_PROTO;
    varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
    varg.arg_u.arg.areq.req_client.n_len = addr.len;
    varg.arg_u.arg.areq.req_client.n_bytes = addr.buf;
    varg.arg_u.arg.areq.req_netid = svc_getnetid(req->rq_xprt);
    varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
    varg.arg_u.arg.areq.req_flavor = flavor;

    /*
     * Setup the XDR stream for encoding the arguments. Notice that
     * in addition to the args having variable fields (req_netid and
     * req_path), the argument data structure is itself versioned,
     * so we need to make sure we can size the arguments buffer
     * appropriately to encode all the args. If we can't get sizing
     * info _or_ properly encode the arguments, there's really no
     * point in continuting, so we fail the request.
     */
    DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
    if ((absz = xdr_sizeof(xdr_varg, (void *)&varg)) == 0) {
        door_ki_rele(dh);
        kmem_free(addr.buf, addr.len);
        return (NFSAUTH_DENIED);
    }
    abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
    xdrmem_create(&xdrs_a, abuf, absz, XDR_ENCODE);
    if (!xdr_varg(&xdrs_a, &varg)) {
        door_ki_rele(dh);
        goto fail;
    }
    XDR_DESTROY(&xdrs_a);

    /*
     * The result (nfsauth_res_t) is always two int's, so we don't
     * have to dynamically size (or allocate) the results buffer.
     * Now that we've got what we need, we prep the door arguments
     * and place the call.
     */
    da.data_ptr = (char *)abuf;
    da.data_size = absz;
    da.desc_ptr = NULL;
    da.desc_num = 0;
    da.rbuf = (char *)rbuf;
    da.rsize = rbsz;

    switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
        case 0:             /* Success */
            if (da.data_ptr != da.rbuf && da.data_size == 0) {
                /*
                 * The door_return that contained the data
                 * failed ! We're here because of the 2nd
                 * door_return (w/o data) such that we can
                 * get control of the thread (and exit
                 * gracefully).
                 */
                DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
                    door_arg_t *, &da);
                door_ki_rele(dh);
                goto fail;

            } else if (rbuf != da.rbuf) {
                /*
                 * The only time this should be true
                 * is iff userland wanted to hand us
                 * a bigger response than what we
                 * expect; that should not happen
                 * (nfsauth_res_t is only 2 int's),
                 * but we check nevertheless.
                 */
                rbuf = da.rbuf;
                rbsz = da.rsize;

            } else if (rbsz > da.data_size) {
                /*
                 * We were expecting two int's; but if
                 * userland fails in encoding the XDR
                 * stream, we detect that here, since
                 * the mountd forces down only one byte
                 * in such scenario.
                 */
                door_ki_rele(dh);
                goto fail;
            }
            door_ki_rele(dh);
            break;

        case EAGAIN:
            /*
             * Server out of resources; back off for a bit
             */
            door_ki_rele(dh);
            kmem_free(abuf, absz);
            delay(hz);
            goto retry;
            /* NOTREACHED */

        case EINTR:
            if (!door_ki_info(dh, &di)) {
                if (di.di_attributes & DOOR_REVOKED) {
                    /*
                     * The server barfed and revoked
                     * the (existing) door on us; we
                     * want to wait to give smf(5) a
                     * chance to restart mountd(1m)
                     * and establish a new door handle.
                     */
                    mutex_enter(&mountd_lock);
                    if (dh == mountd_dh)
                        mountd_dh = NULL;
                    mutex_exit(&mountd_lock);
                    door_ki_rele(dh);
                    kmem_free(abuf, absz);
                    delay(hz);
                    goto retry;
                }
                /*
                 * If the door was _not_ revoked on us,
                 * then more than likely we took an INTR,
                 * so we need to fail the operation.
                 */
                door_ki_rele(dh);
                goto fail;
            }
            /*
             * The only failure that can occur from getting
             * the door info is EINVAL, so we let the code
             * below handle it.
             */
            /* FALLTHROUGH */

        case EBADF:
        case EINVAL:
        default:
            /*
             * If we have a stale door handle, give smf a last
             * chance to start it by sleeping for a little bit.
             * If we're still hosed, we'll fail the call.
             *
             * Since we're going to reacquire the door handle
             * upon the retry, we opt to sleep for a bit and
             * _not_ to clear mountd_dh. If mountd restarted
             * and was able to set mountd_dh, we should see
             * the new instance; if not, we won't get caught
             * up in the retry/DELAY loop.
             */
            door_ki_rele(dh);
            if (!last) {
                delay(hz);
                last++;
                goto retry;
            }
            sys_log("nfsauth: stale mountd door handle");
            goto fail;
    }

    /*
     * No door errors encountered; setup the XDR stream for decoding
     * the results. If we fail to decode the results, we've got no
     * other recourse than to fail the request.
     */
    xdrmem_create(&xdrs_r, rbuf, rbsz, XDR_DECODE);
    if (!xdr_nfsauth_res(&xdrs_r, &res))
        goto fail;
    XDR_DESTROY(&xdrs_r);

    DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
    switch (res.stat) {
        case NFSAUTH_DR_OKAY:
            access = res.ares.auth_perm;
            kmem_free(abuf, absz);
            break;

        case NFSAUTH_DR_EFAIL:
        case NFSAUTH_DR_DECERR:
        case NFSAUTH_DR_BADCMD:
        default:
fail:
            kmem_free(addr.buf, addr.len);
            kmem_free(abuf, absz);
            return (NFSAUTH_DENIED);
            /* NOTREACHED */
    }

    /*
     * Now cache the result on the cache chain
     * for this export (if there's enough memory)
     */
    ap = kmem_cache_alloc(exi_cache_handle, KM_NOSLEEP);
    if (ap) {
        ap->auth_addr = addr;
        ap->auth_flavor = flavor;
        ap->auth_access = access;
        ap->auth_time = gethrestime_sec();
        rw_enter(&exi->exi_cache_lock, RW_WRITER);
        ap->auth_next = *head;
        *head = ap;
        rw_exit(&exi->exi_cache_lock);
    } else {
        kmem_free(addr.buf, addr.len);
    }

    return (access);
}

/*
 * Check if the requesting client has access to the filesystem with
 * a given nfs flavor number which is an explicitly shared flavor.
 */
int
nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
            int flavor, int perm)
{
    int access;

    if (! (perm & M_4SEC_EXPORTED)) {
        return (NFSAUTH_DENIED);
    }

    /*
     * Optimize if there are no lists
     */
    if ((perm & M_ROOT) == 0) {
        perm &= ~M_4SEC_EXPORTED;
        if (perm == M_RO)
            return (NFSAUTH_RO);
        if (perm == M_RW)
            return (NFSAUTH_RW);
    }

    access = nfsauth_cache_get(exi, req, flavor);

    return (access);
}

int
nfsauth_access(struct exportinfo *exi, struct svc_req *req)
{
    int access, mapaccess;
    struct secinfo *sp;
    int i, flavor, perm;
    int authnone_entry = -1;

    /*
     *  Get the nfs flavor number from xprt.
     */
    flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;

    /*
     * First check the access restrictions on the filesystem.  If
     * there are no lists associated with this flavor then there's no
     * need to make an expensive call to the nfsauth service or to
     * cache anything.
     */

    sp = exi->exi_export.ex_secinfo;
    for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
        if (flavor != sp[i].s_secinfo.sc_nfsnum) {
            if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
                authnone_entry = i;
            continue;
        }
        break;
    }

    mapaccess = 0;

    if (i >= exi->exi_export.ex_seccnt) {
        /*
         * Flavor not found, but use AUTH_NONE if it exists
         */
        if (authnone_entry == -1)
            return (NFSAUTH_DENIED);
        flavor = AUTH_NONE;
        mapaccess = NFSAUTH_MAPNONE;
        i = authnone_entry;
    }

    /*
     * If the flavor is in the ex_secinfo list, but not an explicitly
     * shared flavor by the user, it is a result of the nfsv4 server
     * namespace setup. We will grant an RO permission similar for
     * a pseudo node except that this node is a shared one.
     *
     * e.g. flavor in (flavor) indicates that it is not explictly
     *  shared by the user:
     *
     *      /   (sys, krb5)
     *      |
     *      export  #share -o sec=sys (krb5)
     *      |
     *      secure  #share -o sec=krb5
     *
     *  In this case, when a krb5 request coming in to access
     *  /export, RO permission is granted.
     */
    if (!(sp[i].s_flags & M_4SEC_EXPORTED))
        return (mapaccess | NFSAUTH_RO);

    /*
     * Optimize if there are no lists
     */
    perm = sp[i].s_flags;
    if ((perm & M_ROOT) == 0) {
        perm &= ~M_4SEC_EXPORTED;
        if (perm == M_RO)
            return (mapaccess | NFSAUTH_RO);
        if (perm == M_RW)
            return (mapaccess | NFSAUTH_RW);
    }

    access = nfsauth_cache_get(exi, req, flavor);

    return (access | mapaccess);
}

/*
 * Free the nfsauth cache for a given export
 */
void
nfsauth_cache_free(struct exportinfo *exi)
{
    int i;
    struct auth_cache *p, *next;

    for (i = 0; i < AUTH_TABLESIZE; i++) {
        for (p = exi->exi_cache[i]; p; p = next) {
            kmem_free(p->auth_addr.buf, p->auth_addr.len);
            next = p->auth_next;
            kmem_cache_free(exi_cache_handle, (void *)p);
        }
    }
}

/*
 * Called by the kernel memory allocator when
 * memory is low. Free unused cache entries.
 * If that's not enough, the VM system will
 * call again for some more.
 */
/*ARGSUSED*/
void
exi_cache_reclaim(void *cdrarg)
{
    int i;
    struct exportinfo *exi;

    rw_enter(&exported_lock, RW_READER);

    for (i = 0; i < EXPTABLESIZE; i++) {
        for (exi = exptable[i]; exi; exi = exi->exi_hash) {
            exi_cache_trim(exi);
        }
    }
    nfsauth_cache_reclaim++;

    rw_exit(&exported_lock);
}

/*
 * Don't reclaim entries until they've been
 * in the cache for at least exi_cache_time
 * seconds.
 */
time_t exi_cache_time = 60 * 60;

void
exi_cache_trim(struct exportinfo *exi)
{
    struct auth_cache *p;
    struct auth_cache *prev, *next;
    int i;
    time_t stale_time;

    stale_time = gethrestime_sec() - exi_cache_time;

    rw_enter(&exi->exi_cache_lock, RW_WRITER);

    for (i = 0; i < AUTH_TABLESIZE; i++) {

        /*
         * Free entries that have not been
         * used for exi_cache_time seconds.
         */
        prev = NULL;
        for (p = exi->exi_cache[i]; p; p = next) {
            next = p->auth_next;
            if (p->auth_time > stale_time) {
                prev = p;
                continue;
            }

            kmem_free(p->auth_addr.buf, p->auth_addr.len);
            kmem_cache_free(exi_cache_handle, (void *)p);
            if (prev == NULL)
                exi->exi_cache[i] = next;
            else
                prev->auth_next = next;
        }
    }

    rw_exit(&exi->exi_cache_lock);
}