/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
*/
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/cred.h>
#include <sys/cmn_err.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/pathname.h>
#include <sys/utsname.h>
#include <sys/debug.h>
#include <sys/door.h>
#include <sys/sdt.h>
#include <sys/thread.h>
#include <sys/avl.h>
#include <rpc/types.h>
#include <rpc/auth.h>
#include <rpc/clnt.h>
#include <nfs/nfs.h>
#include <nfs/export.h>
#include <nfs/nfs_clnt.h>
#include <nfs/auth.h>
static struct kmem_cache *exi_cache_handle;
static void exi_cache_reclaim(void *);
static void exi_cache_trim(struct exportinfo *exi);
extern pri_t minclsyspri;
volatile uint_t nfsauth_cache_hit;
volatile uint_t nfsauth_cache_miss;
volatile uint_t nfsauth_cache_refresh;
volatile uint_t nfsauth_cache_reclaim;
volatile uint_t exi_cache_auth_reclaim_failed;
volatile uint_t exi_cache_clnt_reclaim_failed;
/*
* The lifetime of an auth cache entry:
* ------------------------------------
*
* An auth cache entry is created with both the auth_time
* and auth_freshness times set to the current time.
*
* Upon every client access which results in a hit, the
* auth_time will be updated.
*
* If a client access determines that the auth_freshness
* indicates that the entry is STALE, then it will be
* refreshed. Note that this will explicitly reset
* auth_time.
*
* When the REFRESH successfully occurs, then the
* auth_freshness is updated.
*
* There are two ways for an entry to leave the cache:
*
* 1) Purged by an action on the export (remove or changed)
* 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
*
* For 2) we check the timeout value against auth_time.
*/
/*
* Number of seconds until we mark for refresh an auth cache entry.
*/
#define NFSAUTH_CACHE_REFRESH 600
/*
* Number of idle seconds until we yield to backpressure
* to trim a cache entry.
*/
#define NFSAUTH_CACHE_TRIM 3600
/*
* While we could encapuslate the exi_list inside the
* exi structure, we can't do that for the auth_list.
* So, to keep things looking clean, we keep them both
* in these external lists.
*/
typedef struct refreshq_exi_node {
struct exportinfo *ren_exi;
list_t ren_authlist;
list_node_t ren_node;
} refreshq_exi_node_t;
typedef struct refreshq_auth_node {
struct auth_cache *ran_auth;
char *ran_netid;
list_node_t ran_node;
} refreshq_auth_node_t;
/*
* Used to manipulate things on the refreshq_queue.
* Note that the refresh thread will effectively
* pop a node off of the queue, at which point it
* will no longer need to hold the mutex.
*/
static kmutex_t refreshq_lock;
static list_t refreshq_queue;
static kcondvar_t refreshq_cv;
/*
* If there is ever a problem with loading the
* module, then nfsauth_fini() needs to be called
* to remove state. In that event, since the
* refreshq thread has been started, they need to
* work together to get rid of state.
*/
typedef enum nfsauth_refreshq_thread_state {
REFRESHQ_THREAD_RUNNING,
REFRESHQ_THREAD_FINI_REQ,
REFRESHQ_THREAD_HALTED
} nfsauth_refreshq_thread_state_t;
nfsauth_refreshq_thread_state_t
refreshq_thread_state = REFRESHQ_THREAD_HALTED;
static void nfsauth_free_node(struct auth_cache *);
static void nfsauth_refresh_thread(void);
static int nfsauth_cache_compar(const void *, const void *);
/*
* mountd is a server-side only daemon. This will need to be
* revisited if the NFS server is ever made zones-aware.
*/
kmutex_t mountd_lock;
door_handle_t mountd_dh;
void
mountd_args(uint_t did)
{
mutex_enter(&mountd_lock);
if (mountd_dh != NULL)
door_ki_rele(mountd_dh);
mountd_dh = door_ki_lookup(did);
mutex_exit(&mountd_lock);
}
void
nfsauth_init(void)
{
/*
* mountd can be restarted by smf(5). We need to make sure
* the updated door handle will safely make it to mountd_dh
*/
mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&refreshq_queue, sizeof (refreshq_exi_node_t),
offsetof(refreshq_exi_node_t, ren_node));
cv_init(&refreshq_cv, NULL, CV_DEFAULT, NULL);
/*
* Allocate nfsauth cache handle
*/
exi_cache_handle = kmem_cache_create("exi_cache_handle",
sizeof (struct auth_cache), 0, NULL, NULL,
exi_cache_reclaim, NULL, NULL, 0);
refreshq_thread_state = REFRESHQ_THREAD_RUNNING;
(void) zthread_create(NULL, 0, nfsauth_refresh_thread,
NULL, 0, minclsyspri);
}
/*
* Finalization routine for nfsauth. It is important to call this routine
* before destroying the exported_lock.
*/
void
nfsauth_fini(void)
{
refreshq_exi_node_t *ren;
/*
* Prevent the nfsauth_refresh_thread from getting new
* work.
*/
mutex_enter(&refreshq_lock);
if (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
cv_broadcast(&refreshq_cv);
/*
* Also, wait for nfsauth_refresh_thread() to exit.
*/
while (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
cv_wait(&refreshq_cv, &refreshq_lock);
}
}
mutex_exit(&refreshq_lock);
/*
* Walk the exi_list and in turn, walk the auth_lists and free all
* lists. In addition, free INVALID auth_cache entries.
*/
while ((ren = list_remove_head(&refreshq_queue))) {
refreshq_auth_node_t *ran;
while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
struct auth_cache *p = ran->ran_auth;
if (p->auth_state == NFS_AUTH_INVALID)
nfsauth_free_node(p);
strfree(ran->ran_netid);
kmem_free(ran, sizeof (refreshq_auth_node_t));
}
list_destroy(&ren->ren_authlist);
exi_rele(ren->ren_exi);
kmem_free(ren, sizeof (refreshq_exi_node_t));
}
list_destroy(&refreshq_queue);
cv_destroy(&refreshq_cv);
mutex_destroy(&refreshq_lock);
mutex_destroy(&mountd_lock);
/*
* Deallocate nfsauth cache handle
*/
kmem_cache_destroy(exi_cache_handle);
}
/*
* Convert the address in a netbuf to
* a hash index for the auth_cache table.
*/
static int
hash(struct netbuf *a)
{
int i, h = 0;
for (i = 0; i < a->len; i++)
h ^= a->buf[i];
return (h & (AUTH_TABLESIZE - 1));
}
/*
* Mask out the components of an
* address that do not identify
* a host. For socket addresses the
* masking gets rid of the port number.
*/
static void
addrmask(struct netbuf *addr, struct netbuf *mask)
{
int i;
for (i = 0; i < addr->len; i++)
addr->buf[i] &= mask->buf[i];
}
/*
* nfsauth4_access is used for NFS V4 auth checking. Besides doing
* the common nfsauth_access(), it will check if the client can
* have a limited access to this vnode even if the security flavor
* used does not meet the policy.
*/
int
nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
int access;
access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
/*
* There are cases that the server needs to allow the client
* to have a limited view.
*
* e.g.
* /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
* /export/home is shared as "sec=sys,rw"
*
* When the client mounts /export with sec=sys, the client
* would get a limited view with RO access on /export to see
* "home" only because the client is allowed to access
* /export/home with auth_sys.
*/
if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
/*
* Allow ro permission with LIMITED view if there is a
* sub-dir exported under vp.
*/
if (has_visible(exi, vp))
return (NFSAUTH_LIMITED);
}
return (access);
}
static void
sys_log(const char *msg)
{
static time_t tstamp = 0;
time_t now;
/*
* msg is shown (at most) once per minute
*/
now = gethrestime_sec();
if ((tstamp + 60) < now) {
tstamp = now;
cmn_err(CE_WARN, msg);
}
}
/*
* Callup to the mountd to get access information in the kernel.
*/
static bool_t
nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor,
struct netbuf *addr, int *access, cred_t *clnt_cred, uid_t *srv_uid,
gid_t *srv_gid, uint_t *srv_gids_cnt, gid_t **srv_gids)
{
varg_t varg = {0};
nfsauth_res_t res = {0};
XDR xdrs;
size_t absz;
caddr_t abuf;
int last = 0;
door_arg_t da;
door_info_t di;
door_handle_t dh;
uint_t ntries = 0;
/*
* No entry in the cache for this client/flavor
* so we need to call the nfsauth service in the
* mount daemon.
*/
varg.vers = V_PROTO;
varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
varg.arg_u.arg.areq.req_client.n_len = addr->len;
varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
varg.arg_u.arg.areq.req_netid = req_netid;
varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
varg.arg_u.arg.areq.req_flavor = flavor;
varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
/*
* Setup the XDR stream for encoding the arguments. Notice that
* in addition to the args having variable fields (req_netid and
* req_path), the argument data structure is itself versioned,
* so we need to make sure we can size the arguments buffer
* appropriately to encode all the args. If we can't get sizing
* info _or_ properly encode the arguments, there's really no
* point in continuting, so we fail the request.
*/
if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
*access = NFSAUTH_DENIED;
return (FALSE);
}
abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
if (!xdr_varg(&xdrs, &varg)) {
XDR_DESTROY(&xdrs);
goto fail;
}
XDR_DESTROY(&xdrs);
/*
* Prepare the door arguments
*
* We don't know the size of the message the daemon
* will pass back to us. By setting rbuf to NULL,
* we force the door code to allocate a buf of the
* appropriate size. We must set rsize > 0, however,
* else the door code acts as if no response was
* expected and doesn't pass the data to us.
*/
da.data_ptr = (char *)abuf;
da.data_size = absz;
da.desc_ptr = NULL;
da.desc_num = 0;
da.rbuf = NULL;
da.rsize = 1;
retry:
mutex_enter(&mountd_lock);
dh = mountd_dh;
if (dh != NULL)
door_ki_hold(dh);
mutex_exit(&mountd_lock);
if (dh == NULL) {
/*
* The rendezvous point has not been established yet!
* This could mean that either mountd(1m) has not yet
* been started or that _this_ routine nuked the door
* handle after receiving an EINTR for a REVOKED door.
*
* Returning NFSAUTH_DROP will cause the NFS client
* to retransmit the request, so let's try to be more
* rescillient and attempt for ntries before we bail.
*/
if (++ntries % NFSAUTH_DR_TRYCNT) {
delay(hz);
goto retry;
}
kmem_free(abuf, absz);
sys_log("nfsauth: mountd has not established door");
*access = NFSAUTH_DROP;
return (FALSE);
}
ntries = 0;
/*
* Now that we've got what we need, place the call.
*/
switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
case 0: /* Success */
door_ki_rele(dh);
if (da.data_ptr == NULL && da.data_size == 0) {
/*
* The door_return that contained the data
* failed! We're here because of the 2nd
* door_return (w/o data) such that we can
* get control of the thread (and exit
* gracefully).
*/
DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
door_arg_t *, &da);
goto fail;
}
break;
case EAGAIN:
/*
* Server out of resources; back off for a bit
*/
door_ki_rele(dh);
delay(hz);
goto retry;
/* NOTREACHED */
case EINTR:
if (!door_ki_info(dh, &di)) {
door_ki_rele(dh);
if (di.di_attributes & DOOR_REVOKED) {
/*
* The server barfed and revoked
* the (existing) door on us; we
* want to wait to give smf(5) a
* chance to restart mountd(1m)
* and establish a new door handle.
*/
mutex_enter(&mountd_lock);
if (dh == mountd_dh) {
door_ki_rele(mountd_dh);
mountd_dh = NULL;
}
mutex_exit(&mountd_lock);
delay(hz);
goto retry;
}
/*
* If the door was _not_ revoked on us,
* then more than likely we took an INTR,
* so we need to fail the operation.
*/
goto fail;
}
/*
* The only failure that can occur from getting
* the door info is EINVAL, so we let the code
* below handle it.
*/
/* FALLTHROUGH */
case EBADF:
case EINVAL:
default:
/*
* If we have a stale door handle, give smf a last
* chance to start it by sleeping for a little bit.
* If we're still hosed, we'll fail the call.
*
* Since we're going to reacquire the door handle
* upon the retry, we opt to sleep for a bit and
* _not_ to clear mountd_dh. If mountd restarted
* and was able to set mountd_dh, we should see
* the new instance; if not, we won't get caught
* up in the retry/DELAY loop.
*/
door_ki_rele(dh);
if (!last) {
delay(hz);
last++;
goto retry;
}
sys_log("nfsauth: stale mountd door handle");
goto fail;
}
ASSERT(da.rbuf != NULL);
/*
* No door errors encountered; setup the XDR stream for decoding
* the results. If we fail to decode the results, we've got no
* other recourse than to fail the request.
*/
xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
if (!xdr_nfsauth_res(&xdrs, &res)) {
xdr_free(xdr_nfsauth_res, (char *)&res);
XDR_DESTROY(&xdrs);
kmem_free(da.rbuf, da.rsize);
goto fail;
}
XDR_DESTROY(&xdrs);
kmem_free(da.rbuf, da.rsize);
DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
switch (res.stat) {
case NFSAUTH_DR_OKAY:
*access = res.ares.auth_perm;
*srv_uid = res.ares.auth_srv_uid;
*srv_gid = res.ares.auth_srv_gid;
*srv_gids_cnt = res.ares.auth_srv_gids.len;
*srv_gids = kmem_alloc(*srv_gids_cnt * sizeof (gid_t),
KM_SLEEP);
bcopy(res.ares.auth_srv_gids.val, *srv_gids,
*srv_gids_cnt * sizeof (gid_t));
break;
case NFSAUTH_DR_EFAIL:
case NFSAUTH_DR_DECERR:
case NFSAUTH_DR_BADCMD:
default:
xdr_free(xdr_nfsauth_res, (char *)&res);
fail:
*access = NFSAUTH_DENIED;
kmem_free(abuf, absz);
return (FALSE);
/* NOTREACHED */
}
xdr_free(xdr_nfsauth_res, (char *)&res);
kmem_free(abuf, absz);
return (TRUE);
}
static void
nfsauth_refresh_thread(void)
{
refreshq_exi_node_t *ren;
refreshq_auth_node_t *ran;
struct exportinfo *exi;
int access;
bool_t retrieval;
callb_cpr_t cprinfo;
CALLB_CPR_INIT(&cprinfo, &refreshq_lock, callb_generic_cpr,
"nfsauth_refresh");
for (;;) {
mutex_enter(&refreshq_lock);
if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
/* Keep the hold on the lock! */
break;
}
ren = list_remove_head(&refreshq_queue);
if (ren == NULL) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&refreshq_cv, &refreshq_lock);
CALLB_CPR_SAFE_END(&cprinfo, &refreshq_lock);
mutex_exit(&refreshq_lock);
continue;
}
mutex_exit(&refreshq_lock);
exi = ren->ren_exi;
ASSERT(exi != NULL);
/*
* Since the ren was removed from the refreshq_queue above,
* this is the only thread aware about the ren existence, so we
* have the exclusive ownership of it and we do not need to
* protect it by any lock.
*/
while ((ran = list_remove_head(&ren->ren_authlist))) {
uid_t uid;
gid_t gid;
uint_t ngids;
gid_t *gids;
struct auth_cache *p = ran->ran_auth;
char *netid = ran->ran_netid;
ASSERT(p != NULL);
ASSERT(netid != NULL);
kmem_free(ran, sizeof (refreshq_auth_node_t));
mutex_enter(&p->auth_lock);
/*
* Once the entry goes INVALID, it can not change
* state.
*
* No need to refresh entries also in a case we are
* just shutting down.
*
* In general, there is no need to hold the
* refreshq_lock to test the refreshq_thread_state. We
* do hold it at other places because there is some
* related thread synchronization (or some other tasks)
* close to the refreshq_thread_state check.
*
* The check for the refreshq_thread_state value here
* is purely advisory to allow the faster
* nfsauth_refresh_thread() shutdown. In a case we
* will miss such advisory, nothing catastrophic
* happens: we will just spin longer here before the
* shutdown.
*/
if (p->auth_state == NFS_AUTH_INVALID ||
refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
mutex_exit(&p->auth_lock);
if (p->auth_state == NFS_AUTH_INVALID)
nfsauth_free_node(p);
strfree(netid);
continue;
}
/*
* Make sure the state is valid. Note that once we
* change the state to NFS_AUTH_REFRESHING, no other
* thread will be able to work on this entry.
*/
ASSERT(p->auth_state == NFS_AUTH_STALE);
p->auth_state = NFS_AUTH_REFRESHING;
mutex_exit(&p->auth_lock);
DTRACE_PROBE2(nfsauth__debug__cache__refresh,
struct exportinfo *, exi,
struct auth_cache *, p);
/*
* The first caching of the access rights
* is done with the netid pulled out of the
* request from the client. All subsequent
* users of the cache may or may not have
* the same netid. It doesn't matter. So
* when we refresh, we simply use the netid
* of the request which triggered the
* refresh attempt.
*/
retrieval = nfsauth_retrieve(exi, netid,
p->auth_flavor, &p->auth_clnt->authc_addr, &access,
p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
/*
* This can only be set in one other place
* and the state has to be NFS_AUTH_FRESH.
*/
strfree(netid);
mutex_enter(&p->auth_lock);
if (p->auth_state == NFS_AUTH_INVALID) {
mutex_exit(&p->auth_lock);
nfsauth_free_node(p);
if (retrieval == TRUE)
kmem_free(gids, ngids * sizeof (gid_t));
} else {
/*
* If we got an error, do not reset the
* time. This will cause the next access
* check for the client to reschedule this
* node.
*/
if (retrieval == TRUE) {
p->auth_access = access;
p->auth_srv_uid = uid;
p->auth_srv_gid = gid;
kmem_free(p->auth_srv_gids,
p->auth_srv_ngids * sizeof (gid_t));
p->auth_srv_ngids = ngids;
p->auth_srv_gids = gids;
p->auth_freshness = gethrestime_sec();
}
p->auth_state = NFS_AUTH_FRESH;
cv_broadcast(&p->auth_cv);
mutex_exit(&p->auth_lock);
}
}
list_destroy(&ren->ren_authlist);
exi_rele(ren->ren_exi);
kmem_free(ren, sizeof (refreshq_exi_node_t));
}
refreshq_thread_state = REFRESHQ_THREAD_HALTED;
cv_broadcast(&refreshq_cv);
CALLB_CPR_EXIT(&cprinfo);
zthread_exit();
}
int
nfsauth_cache_clnt_compar(const void *v1, const void *v2)
{
int c;
const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
if (a1->authc_addr.len < a2->authc_addr.len)
return (-1);
if (a1->authc_addr.len > a2->authc_addr.len)
return (1);
c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
if (c < 0)
return (-1);
if (c > 0)
return (1);
return (0);
}
static int
nfsauth_cache_compar(const void *v1, const void *v2)
{
int c;
const struct auth_cache *a1 = (const struct auth_cache *)v1;
const struct auth_cache *a2 = (const struct auth_cache *)v2;
if (a1->auth_flavor < a2->auth_flavor)
return (-1);
if (a1->auth_flavor > a2->auth_flavor)
return (1);
if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
return (-1);
if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
return (1);
if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
return (-1);
if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
return (1);
if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
return (-1);
if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
return (1);
c = memcmp(crgetgroups(a1->auth_clnt_cred),
crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
if (c < 0)
return (-1);
if (c > 0)
return (1);
return (0);
}
/*
* Get the access information from the cache or callup to the mountd
* to get and cache the access information in the kernel.
*/
static int
nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
struct netbuf *taddrmask;
struct netbuf addr; /* temporary copy of client's address */
const struct netbuf *claddr;
avl_tree_t *tree;
struct auth_cache ac; /* used as a template for avl_find() */
struct auth_cache_clnt *c;
struct auth_cache_clnt acc; /* used as a template for avl_find() */
struct auth_cache *p = NULL;
int access;
uid_t tmpuid;
gid_t tmpgid;
uint_t tmpngids;
gid_t *tmpgids;
avl_index_t where; /* used for avl_find()/avl_insert() */
ASSERT(cr != NULL);
/*
* Now check whether this client already
* has an entry for this flavor in the cache
* for this export.
* Get the caller's address, mask off the
* parts of the address that do not identify
* the host (port number, etc), and then hash
* it to find the chain of cache entries.
*/
claddr = svc_getrpccaller(req->rq_xprt);
addr = *claddr;
addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
bcopy(claddr->buf, addr.buf, claddr->len);
SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
ASSERT(taddrmask != NULL);
addrmask(&addr, taddrmask);
ac.auth_flavor = flavor;
ac.auth_clnt_cred = crdup(cr);
acc.authc_addr = addr;
tree = exi->exi_cache[hash(&addr)];
rw_enter(&exi->exi_cache_lock, RW_READER);
c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
if (c == NULL) {
struct auth_cache_clnt *nc;
rw_exit(&exi->exi_cache_lock);
nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
if (nc == NULL)
goto retrieve;
/*
* Initialize the new auth_cache_clnt
*/
nc->authc_addr = addr;
nc->authc_addr.buf = kmem_alloc(addr.maxlen,
KM_NOSLEEP | KM_NORMALPRI);
if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
kmem_free(nc, sizeof (*nc));
goto retrieve;
}
bcopy(addr.buf, nc->authc_addr.buf, addr.len);
rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
avl_create(&nc->authc_tree, nfsauth_cache_compar,
sizeof (struct auth_cache),
offsetof(struct auth_cache, auth_link));
rw_enter(&exi->exi_cache_lock, RW_WRITER);
c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
if (c == NULL) {
avl_insert(tree, nc, where);
rw_downgrade(&exi->exi_cache_lock);
c = nc;
} else {
rw_downgrade(&exi->exi_cache_lock);
avl_destroy(&nc->authc_tree);
rw_destroy(&nc->authc_lock);
kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
kmem_free(nc, sizeof (*nc));
}
}
ASSERT(c != NULL);
rw_enter(&c->authc_lock, RW_READER);
p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
if (p == NULL) {
struct auth_cache *np;
rw_exit(&c->authc_lock);
np = kmem_cache_alloc(exi_cache_handle,
KM_NOSLEEP | KM_NORMALPRI);
if (np == NULL) {
rw_exit(&exi->exi_cache_lock);
goto retrieve;
}
/*
* Initialize the new auth_cache
*/
np->auth_clnt = c;
np->auth_flavor = flavor;
np->auth_clnt_cred = ac.auth_clnt_cred;
np->auth_srv_ngids = 0;
np->auth_srv_gids = NULL;
np->auth_time = np->auth_freshness = gethrestime_sec();
np->auth_state = NFS_AUTH_NEW;
mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
rw_enter(&c->authc_lock, RW_WRITER);
rw_exit(&exi->exi_cache_lock);
p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
if (p == NULL) {
avl_insert(&c->authc_tree, np, where);
rw_downgrade(&c->authc_lock);
p = np;
} else {
rw_downgrade(&c->authc_lock);
cv_destroy(&np->auth_cv);
mutex_destroy(&np->auth_lock);
crfree(ac.auth_clnt_cred);
kmem_cache_free(exi_cache_handle, np);
}
} else {
rw_exit(&exi->exi_cache_lock);
crfree(ac.auth_clnt_cred);
}
mutex_enter(&p->auth_lock);
rw_exit(&c->authc_lock);
/*
* If the entry is in the WAITING state then some other thread is just
* retrieving the required info. The entry was either NEW, or the list
* of client's supplemental groups is going to be changed (either by
* this thread, or by some other thread). We need to wait until the
* nfsauth_retrieve() is done.
*/
while (p->auth_state == NFS_AUTH_WAITING)
cv_wait(&p->auth_cv, &p->auth_lock);
/*
* Here the entry cannot be in WAITING or INVALID state.
*/
ASSERT(p->auth_state != NFS_AUTH_WAITING);
ASSERT(p->auth_state != NFS_AUTH_INVALID);
/*
* If the cache entry is not valid yet, we need to retrieve the
* info ourselves.
*/
if (p->auth_state == NFS_AUTH_NEW) {
bool_t res;
/*
* NFS_AUTH_NEW is the default output auth_state value in a
* case we failed somewhere below.
*/
auth_state_t state = NFS_AUTH_NEW;
p->auth_state = NFS_AUTH_WAITING;
mutex_exit(&p->auth_lock);
kmem_free(addr.buf, addr.maxlen);
addr = p->auth_clnt->authc_addr;
atomic_inc_uint(&nfsauth_cache_miss);
res = nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor,
&addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids);
p->auth_access = access;
p->auth_time = p->auth_freshness = gethrestime_sec();
if (res == TRUE) {
if (uid != NULL)
*uid = tmpuid;
if (gid != NULL)
*gid = tmpgid;
if (ngids != NULL && gids != NULL) {
*ngids = tmpngids;
*gids = tmpgids;
/*
* We need a copy of gids for the
* auth_cache entry
*/
tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
KM_NOSLEEP | KM_NORMALPRI);
if (tmpgids != NULL)
bcopy(*gids, tmpgids,
tmpngids * sizeof (gid_t));
}
if (tmpgids != NULL || tmpngids == 0) {
p->auth_srv_uid = tmpuid;
p->auth_srv_gid = tmpgid;
p->auth_srv_ngids = tmpngids;
p->auth_srv_gids = tmpgids;
state = NFS_AUTH_FRESH;
}
}
/*
* Set the auth_state and notify waiters.
*/
mutex_enter(&p->auth_lock);
p->auth_state = state;
cv_broadcast(&p->auth_cv);
mutex_exit(&p->auth_lock);
} else {
uint_t nach;
time_t refresh;
refresh = gethrestime_sec() - p->auth_freshness;
p->auth_time = gethrestime_sec();
if (uid != NULL)
*uid = p->auth_srv_uid;
if (gid != NULL)
*gid = p->auth_srv_gid;
if (ngids != NULL && gids != NULL) {
*ngids = p->auth_srv_ngids;
*gids = kmem_alloc(*ngids * sizeof (gid_t), KM_SLEEP);
bcopy(p->auth_srv_gids, *gids, *ngids * sizeof (gid_t));
}
access = p->auth_access;
if ((refresh > NFSAUTH_CACHE_REFRESH) &&
p->auth_state == NFS_AUTH_FRESH) {
refreshq_auth_node_t *ran;
uint_t nacr;
p->auth_state = NFS_AUTH_STALE;
mutex_exit(&p->auth_lock);
nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
DTRACE_PROBE3(nfsauth__debug__cache__stale,
struct exportinfo *, exi,
struct auth_cache *, p,
uint_t, nacr);
ran = kmem_alloc(sizeof (refreshq_auth_node_t),
KM_SLEEP);
ran->ran_auth = p;
ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
mutex_enter(&refreshq_lock);
/*
* We should not add a work queue
* item if the thread is not
* accepting them.
*/
if (refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
refreshq_exi_node_t *ren;
/*
* Is there an existing exi_list?
*/
for (ren = list_head(&refreshq_queue);
ren != NULL;
ren = list_next(&refreshq_queue, ren)) {
if (ren->ren_exi == exi) {
list_insert_tail(
&ren->ren_authlist, ran);
break;
}
}
if (ren == NULL) {
ren = kmem_alloc(
sizeof (refreshq_exi_node_t),
KM_SLEEP);
exi_hold(exi);
ren->ren_exi = exi;
list_create(&ren->ren_authlist,
sizeof (refreshq_auth_node_t),
offsetof(refreshq_auth_node_t,
ran_node));
list_insert_tail(&ren->ren_authlist,
ran);
list_insert_tail(&refreshq_queue, ren);
}
cv_broadcast(&refreshq_cv);
} else {
strfree(ran->ran_netid);
kmem_free(ran, sizeof (refreshq_auth_node_t));
}
mutex_exit(&refreshq_lock);
} else {
mutex_exit(&p->auth_lock);
}
nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
DTRACE_PROBE2(nfsauth__debug__cache__hit,
uint_t, nach,
time_t, refresh);
kmem_free(addr.buf, addr.maxlen);
}
return (access);
retrieve:
crfree(ac.auth_clnt_cred);
/*
* Retrieve the required data without caching.
*/
ASSERT(p == NULL);
atomic_inc_uint(&nfsauth_cache_miss);
if (nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, &addr,
&access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
if (uid != NULL)
*uid = tmpuid;
if (gid != NULL)
*gid = tmpgid;
if (ngids != NULL && gids != NULL) {
*ngids = tmpngids;
*gids = tmpgids;
} else {
kmem_free(tmpgids, tmpngids * sizeof (gid_t));
}
}
kmem_free(addr.buf, addr.maxlen);
return (access);
}
/*
* Check if the requesting client has access to the filesystem with
* a given nfs flavor number which is an explicitly shared flavor.
*/
int
nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
int flavor, int perm, cred_t *cr)
{
int access;
if (! (perm & M_4SEC_EXPORTED)) {
return (NFSAUTH_DENIED);
}
/*
* Optimize if there are no lists
*/
if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
perm &= ~M_4SEC_EXPORTED;
if (perm == M_RO)
return (NFSAUTH_RO);
if (perm == M_RW)
return (NFSAUTH_RW);
}
access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
NULL);
return (access);
}
int
nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
int access, mapaccess;
struct secinfo *sp;
int i, flavor, perm;
int authnone_entry = -1;
/*
* By default root is mapped to anonymous user.
* This might get overriden later in nfsauth_cache_get().
*/
if (crgetuid(cr) == 0) {
if (uid != NULL)
*uid = exi->exi_export.ex_anon;
if (gid != NULL)
*gid = exi->exi_export.ex_anon;
} else {
if (uid != NULL)
*uid = crgetuid(cr);
if (gid != NULL)
*gid = crgetgid(cr);
}
if (ngids != NULL)
*ngids = 0;
if (gids != NULL)
*gids = NULL;
/*
* Get the nfs flavor number from xprt.
*/
flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
/*
* First check the access restrictions on the filesystem. If
* there are no lists associated with this flavor then there's no
* need to make an expensive call to the nfsauth service or to
* cache anything.
*/
sp = exi->exi_export.ex_secinfo;
for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
if (flavor != sp[i].s_secinfo.sc_nfsnum) {
if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
authnone_entry = i;
continue;
}
break;
}
mapaccess = 0;
if (i >= exi->exi_export.ex_seccnt) {
/*
* Flavor not found, but use AUTH_NONE if it exists
*/
if (authnone_entry == -1)
return (NFSAUTH_DENIED);
flavor = AUTH_NONE;
mapaccess = NFSAUTH_MAPNONE;
i = authnone_entry;
}
/*
* If the flavor is in the ex_secinfo list, but not an explicitly
* shared flavor by the user, it is a result of the nfsv4 server
* namespace setup. We will grant an RO permission similar for
* a pseudo node except that this node is a shared one.
*
* e.g. flavor in (flavor) indicates that it is not explictly
* shared by the user:
*
* / (sys, krb5)
* |
* export #share -o sec=sys (krb5)
* |
* secure #share -o sec=krb5
*
* In this case, when a krb5 request coming in to access
* /export, RO permission is granted.
*/
if (!(sp[i].s_flags & M_4SEC_EXPORTED))
return (mapaccess | NFSAUTH_RO);
/*
* Optimize if there are no lists.
* We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
*/
perm = sp[i].s_flags;
if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
perm &= ~M_4SEC_EXPORTED;
if (perm == M_RO)
return (mapaccess | NFSAUTH_RO);
if (perm == M_RW)
return (mapaccess | NFSAUTH_RW);
}
access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
/*
* For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
* the supplemental groups.
*/
if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
if (ngids != NULL && gids != NULL) {
kmem_free(*gids, *ngids * sizeof (gid_t));
*ngids = 0;
*gids = NULL;
}
}
/*
* Client's security flavor doesn't match with "ro" or
* "rw" list. Try again using AUTH_NONE if present.
*/
if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
/*
* Have we already encountered AUTH_NONE ?
*/
if (authnone_entry != -1) {
mapaccess = NFSAUTH_MAPNONE;
access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
NULL, NULL, NULL, NULL);
} else {
/*
* Check for AUTH_NONE presence.
*/
for (; i < exi->exi_export.ex_seccnt; i++) {
if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
mapaccess = NFSAUTH_MAPNONE;
access = nfsauth_cache_get(exi, req,
AUTH_NONE, cr, NULL, NULL, NULL,
NULL);
break;
}
}
}
}
if (access & NFSAUTH_DENIED)
access = NFSAUTH_DENIED;
return (access | mapaccess);
}
static void
nfsauth_free_clnt_node(struct auth_cache_clnt *p)
{
void *cookie = NULL;
struct auth_cache *node;
while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
nfsauth_free_node(node);
avl_destroy(&p->authc_tree);
kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
rw_destroy(&p->authc_lock);
kmem_free(p, sizeof (*p));
}
static void
nfsauth_free_node(struct auth_cache *p)
{
crfree(p->auth_clnt_cred);
kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
mutex_destroy(&p->auth_lock);
cv_destroy(&p->auth_cv);
kmem_cache_free(exi_cache_handle, p);
}
/*
* Free the nfsauth cache for a given export
*/
void
nfsauth_cache_free(struct exportinfo *exi)
{
int i;
/*
* The only way we got here was with an exi_rele, which means that no
* auth cache entry is being refreshed.
*/
for (i = 0; i < AUTH_TABLESIZE; i++) {
avl_tree_t *tree = exi->exi_cache[i];
void *cookie = NULL;
struct auth_cache_clnt *node;
while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
nfsauth_free_clnt_node(node);
}
}
/*
* Called by the kernel memory allocator when
* memory is low. Free unused cache entries.
* If that's not enough, the VM system will
* call again for some more.
*/
/*ARGSUSED*/
void
exi_cache_reclaim(void *cdrarg)
{
int i;
struct exportinfo *exi;
rw_enter(&exported_lock, RW_READER);
for (i = 0; i < EXPTABLESIZE; i++) {
for (exi = exptable[i]; exi; exi = exi->fid_hash.next) {
exi_cache_trim(exi);
}
}
rw_exit(&exported_lock);
atomic_inc_uint(&nfsauth_cache_reclaim);
}
void
exi_cache_trim(struct exportinfo *exi)
{
struct auth_cache_clnt *c;
struct auth_cache_clnt *nextc;
struct auth_cache *p;
struct auth_cache *next;
int i;
time_t stale_time;
avl_tree_t *tree;
for (i = 0; i < AUTH_TABLESIZE; i++) {
tree = exi->exi_cache[i];
stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
rw_enter(&exi->exi_cache_lock, RW_READER);
/*
* Free entries that have not been
* used for NFSAUTH_CACHE_TRIM seconds.
*/
for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
/*
* We are being called by the kmem subsystem to reclaim
* memory so don't block if we can't get the lock.
*/
if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
exi_cache_auth_reclaim_failed++;
rw_exit(&exi->exi_cache_lock);
return;
}
for (p = avl_first(&c->authc_tree); p != NULL;
p = next) {
next = AVL_NEXT(&c->authc_tree, p);
ASSERT(p->auth_state != NFS_AUTH_INVALID);
mutex_enter(&p->auth_lock);
/*
* We won't trim recently used and/or WAITING
* entries.
*/
if (p->auth_time > stale_time ||
p->auth_state == NFS_AUTH_WAITING) {
mutex_exit(&p->auth_lock);
continue;
}
DTRACE_PROBE1(nfsauth__debug__trim__state,
auth_state_t, p->auth_state);
/*
* STALE and REFRESHING entries needs to be
* marked INVALID only because they are
* referenced by some other structures or
* threads. They will be freed later.
*/
if (p->auth_state == NFS_AUTH_STALE ||
p->auth_state == NFS_AUTH_REFRESHING) {
p->auth_state = NFS_AUTH_INVALID;
mutex_exit(&p->auth_lock);
avl_remove(&c->authc_tree, p);
} else {
mutex_exit(&p->auth_lock);
avl_remove(&c->authc_tree, p);
nfsauth_free_node(p);
}
}
rw_exit(&c->authc_lock);
}
if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
rw_exit(&exi->exi_cache_lock);
exi_cache_clnt_reclaim_failed++;
continue;
}
for (c = avl_first(tree); c != NULL; c = nextc) {
nextc = AVL_NEXT(tree, c);
if (avl_is_empty(&c->authc_tree) == B_FALSE)
continue;
avl_remove(tree, c);
nfsauth_free_clnt_node(c);
}
rw_exit(&exi->exi_cache_lock);
}
}