nfs4_subr.c revision 8cd69bcfc9077598908e35956fca8ab65aeddb4a
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All Rights Reserved
*/
#include <nfs/nfs_clnt.h>
#include <nfs/nfs4_clnt.h>
/*
* client side statistics
*/
static const struct clstat4 clstat4_tmpl = {
{ "calls", KSTAT_DATA_UINT64 },
{ "badcalls", KSTAT_DATA_UINT64 },
{ "referrals", KSTAT_DATA_UINT64 },
{ "referlinks", KSTAT_DATA_UINT64 },
{ "clgets", KSTAT_DATA_UINT64 },
{ "cltoomany", KSTAT_DATA_UINT64 },
#ifdef DEBUG
{ "clalloc", KSTAT_DATA_UINT64 },
{ "noresponse", KSTAT_DATA_UINT64 },
{ "failover", KSTAT_DATA_UINT64 },
{ "remap", KSTAT_DATA_UINT64 },
#endif
};
#ifdef DEBUG
struct clstat4_debug clstat4_debug = {
{ "nrnode", KSTAT_DATA_UINT64 },
{ "access", KSTAT_DATA_UINT64 },
{ "dirent", KSTAT_DATA_UINT64 },
{ "dirents", KSTAT_DATA_UINT64 },
{ "reclaim", KSTAT_DATA_UINT64 },
{ "clreclaim", KSTAT_DATA_UINT64 },
{ "f_reclaim", KSTAT_DATA_UINT64 },
{ "a_reclaim", KSTAT_DATA_UINT64 },
{ "r_reclaim", KSTAT_DATA_UINT64 },
{ "r_path", KSTAT_DATA_UINT64 },
};
#endif
/*
* We keep a global list of per-zone client data, so we can clean up all zones
* if we get low on memory.
*/
static list_t nfs4_clnt_list;
static kmutex_t nfs4_clnt_list_lock;
static struct kmem_cache *chtab4_cache;
#ifdef DEBUG
static int nfs4_rfscall_debug;
static int nfs4_try_failover_any;
int nfs4_utf8_debug = 0;
#endif
/*
* NFSv4 readdir cache implementation
*/
typedef struct rddir4_cache_impl {
static int rddir4_cache_compar(const void *, const void *);
static void rddir4_cache_free(rddir4_cache_impl *);
static rddir4_cache *rddir4_cache_alloc(int);
static void rddir4_cache_hold(rddir4_cache *);
static int try_failover(enum clnt_stat);
static int nfs4_readdir_cache_hits = 0;
static int nfs4_readdir_cache_waits = 0;
static int nfs4_readdir_cache_misses = 0;
/*
* Shared nfs4 functions
*/
/*
* Copy an nfs_fh4. The destination storage (to->nfs_fh4_val) must already
* be allocated.
*/
void
{
}
/*
* nfs4cmpfh - compare 2 filehandles.
* Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is
* "less" than the second, +1 if the first is "greater" than the second.
*/
int
{
return (-1);
return (1);
return (-1);
return (1);
}
return (0);
}
/*
* Compare two v4 filehandles. Return zero if they're the same, non-zero
* if they're not. Like nfs4cmpfh(), but different filehandle
* representation, and doesn't provide information about greater than or
* less than.
*/
int
{
return (1);
}
int
{
return (1);
else
return (0);
}
{
switch (error) {
case 0:
return (NFS4_OK);
case EPERM:
return (NFS4ERR_PERM);
case ENOENT:
return (NFS4ERR_NOENT);
case EINTR:
return (NFS4ERR_IO);
case EIO:
return (NFS4ERR_IO);
case ENXIO:
return (NFS4ERR_NXIO);
case ENOMEM:
return (NFS4ERR_RESOURCE);
case EACCES:
return (NFS4ERR_ACCESS);
case EBUSY:
return (NFS4ERR_IO);
case EEXIST:
return (NFS4ERR_EXIST);
case EXDEV:
return (NFS4ERR_XDEV);
case ENODEV:
return (NFS4ERR_IO);
case ENOTDIR:
return (NFS4ERR_NOTDIR);
case EISDIR:
return (NFS4ERR_ISDIR);
case EINVAL:
return (NFS4ERR_INVAL);
case EMFILE:
return (NFS4ERR_RESOURCE);
case EFBIG:
return (NFS4ERR_FBIG);
case ENOSPC:
return (NFS4ERR_NOSPC);
case EROFS:
return (NFS4ERR_ROFS);
case EMLINK:
return (NFS4ERR_MLINK);
case EDEADLK:
return (NFS4ERR_DEADLOCK);
case ENOLCK:
return (NFS4ERR_DENIED);
case EREMOTE:
return (NFS4ERR_SERVERFAULT);
case ENOTSUP:
return (NFS4ERR_NOTSUPP);
case EDQUOT:
return (NFS4ERR_DQUOT);
case ENAMETOOLONG:
return (NFS4ERR_NAMETOOLONG);
case EOVERFLOW:
return (NFS4ERR_INVAL);
case ENOSYS:
return (NFS4ERR_NOTSUPP);
case ENOTEMPTY:
return (NFS4ERR_NOTEMPTY);
case EOPNOTSUPP:
return (NFS4ERR_NOTSUPP);
case ESTALE:
return (NFS4ERR_STALE);
case EAGAIN:
return (NFS4ERR_DELAY);
}
return (NFS4ERR_LOCKED);
default:
}
}
int
{
switch (status) {
case NFS4_OK:
return (0);
case NFS4ERR_PERM:
return (EPERM);
case NFS4ERR_NOENT:
return (ENOENT);
case NFS4ERR_IO:
return (EIO);
case NFS4ERR_NXIO:
return (ENXIO);
case NFS4ERR_ACCESS:
return (EACCES);
case NFS4ERR_EXIST:
return (EEXIST);
case NFS4ERR_XDEV:
return (EXDEV);
case NFS4ERR_NOTDIR:
return (ENOTDIR);
case NFS4ERR_ISDIR:
return (EISDIR);
case NFS4ERR_INVAL:
return (EINVAL);
case NFS4ERR_FBIG:
return (EFBIG);
case NFS4ERR_NOSPC:
return (ENOSPC);
case NFS4ERR_ROFS:
return (EROFS);
case NFS4ERR_MLINK:
return (EMLINK);
case NFS4ERR_NAMETOOLONG:
return (ENAMETOOLONG);
case NFS4ERR_NOTEMPTY:
return (ENOTEMPTY);
case NFS4ERR_DQUOT:
return (EDQUOT);
case NFS4ERR_STALE:
return (ESTALE);
case NFS4ERR_BADHANDLE:
return (ESTALE);
case NFS4ERR_BAD_COOKIE:
return (EINVAL);
case NFS4ERR_NOTSUPP:
return (EOPNOTSUPP);
case NFS4ERR_TOOSMALL:
return (EINVAL);
case NFS4ERR_SERVERFAULT:
return (EIO);
case NFS4ERR_BADTYPE:
return (EINVAL);
case NFS4ERR_DELAY:
return (ENXIO);
case NFS4ERR_SAME:
return (EPROTO);
case NFS4ERR_DENIED:
return (ENOLCK);
case NFS4ERR_EXPIRED:
return (EPROTO);
case NFS4ERR_LOCKED:
return (EACCES);
case NFS4ERR_GRACE:
return (EAGAIN);
case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */
return (ESTALE);
case NFS4ERR_SHARE_DENIED:
return (EACCES);
case NFS4ERR_WRONGSEC:
return (EPERM);
case NFS4ERR_CLID_INUSE:
return (EAGAIN);
case NFS4ERR_RESOURCE:
return (EAGAIN);
case NFS4ERR_MOVED:
return (EPROTO);
case NFS4ERR_NOFILEHANDLE:
return (EIO);
return (ENOTSUP);
case NFS4ERR_STALE_CLIENTID:
return (EIO);
case NFS4ERR_STALE_STATEID:
return (EIO);
case NFS4ERR_OLD_STATEID:
return (EIO);
case NFS4ERR_BAD_STATEID:
return (EIO);
case NFS4ERR_BAD_SEQID:
return (EIO);
case NFS4ERR_NOT_SAME:
return (EPROTO);
case NFS4ERR_LOCK_RANGE:
return (EPROTO);
case NFS4ERR_SYMLINK:
return (EPROTO);
case NFS4ERR_RESTOREFH:
return (EPROTO);
case NFS4ERR_LEASE_MOVED:
return (EPROTO);
case NFS4ERR_ATTRNOTSUPP:
return (ENOTSUP);
case NFS4ERR_NO_GRACE:
return (EPROTO);
case NFS4ERR_RECLAIM_BAD:
return (EPROTO);
case NFS4ERR_RECLAIM_CONFLICT:
return (EPROTO);
case NFS4ERR_BADXDR:
return (EINVAL);
case NFS4ERR_LOCKS_HELD:
return (EIO);
case NFS4ERR_OPENMODE:
return (EACCES);
case NFS4ERR_BADOWNER:
/*
* Client and server are in different DNS domains
* doesn't match. No good answer here. Return
* EACCESS, which translates to "permission denied".
*/
return (EACCES);
case NFS4ERR_BADCHAR:
return (EINVAL);
case NFS4ERR_BADNAME:
return (EINVAL);
case NFS4ERR_BAD_RANGE:
return (EIO);
case NFS4ERR_LOCK_NOTSUPP:
return (ENOTSUP);
case NFS4ERR_OP_ILLEGAL:
return (EINVAL);
case NFS4ERR_DEADLOCK:
return (EDEADLK);
case NFS4ERR_FILE_OPEN:
return (EACCES);
case NFS4ERR_ADMIN_REVOKED:
return (EPROTO);
case NFS4ERR_CB_PATH_DOWN:
return (EPROTO);
default:
#ifdef DEBUG
status);
#endif
return ((int)status);
}
}
void
{
/*
* for this mount point.
*/
return;
/*
* Happens once per client <-> server pair.
*/
return;
return;
}
"!NFSMAPID_DOMAIN does not match"
" the server: %s domain.\n"
"Please check configuration",
}
/*
* Happens once per mntinfo4_t.
* This error is deemed as one of the recovery facts "RF_BADOWNER",
* queue this in the mesg queue for this mount_info. This message
* is not printed, meaning its absent from id_to_dump_solo_fact()
*/
}
}
int
{
/*
* Here check that the nfsv4 time is valid for the system.
* nfsv4 time value is a signed 64-bit, and the system time
* may be either int64_t or int32_t (depends on the kernel),
* so if the kernel is 32-bit, the nfsv4 time value may not fit.
*/
#ifndef _LP64
return (EOVERFLOW);
}
#endif
/* Invalid to specify 1 billion (or more) nsecs */
return (EINVAL);
} else {
}
return (0);
}
int
{
/*
* nfsv4 time value is a signed 64-bit, and the system time
* may be either int64_t or int32_t (depends on the kernel),
* so all system time values will fit.
*/
} else {
}
return (0);
}
/*
* Converts a utf8 string to a valid null terminated filename string.
*
* XXX - Not actually translating the UTF-8 string as per RFC 2279.
* For now, just validate that the UTF-8 string off the wire
* does not have characters that will freak out UFS, and leave
* it at that.
*/
char *
{
return (NULL);
/*
* Check for obvious illegal filename chars
*/
#ifdef DEBUG
if (nfs4_utf8_debug) {
char *path;
"Invalid UTF-8 filename: %s", path);
}
#endif
return (NULL);
}
}
/*
* Converts a utf8 string to a C string.
* kmem_allocs a new string if not supplied
*/
char *
{
char *sp;
char *u8p;
int len;
int i;
return (NULL);
if (s)
*s = '\0';
return (NULL);
}
sp = s;
/*
* At least check for embedded nulls
*/
for (i = 0; i < len; i++) {
if (u8p[i] == '\0') {
#ifdef DEBUG
"Embedded NULL in UTF-8 string");
#endif
if (s == NULL)
return (NULL);
}
}
return (sp);
}
/*
* str_to_utf8 - converts a null-terminated C string to a utf8 string
*/
{
int len;
return (NULL);
str->utf8string_len = 0;
}
return (str);
}
{
return (NULL);
return (NULL);
if (src->utf8string_len > 0) {
KM_SLEEP);
} else {
dest->utf8string_len = 0;
}
return (dest);
}
int
{
return (0);
else if (a == NULL)
return (-1);
else if (b == NULL)
return (1);
alen = a->utf8string_len;
blen = b->utf8string_len;
aval = a->utf8string_val;
bval = b->utf8string_val;
return (0);
return (-1);
return (1);
return (0);
return (-1);
else if (cmp == 0)
return (1);
else if (cmp < 0)
return (-1);
return (1);
}
/*
* utf8_dir_verify - checks that the utf8 string is valid
*/
{
char *nm;
int len;
return (NFS4ERR_INVAL);
return (NFS4ERR_INVAL);
}
return (NFS4ERR_BADNAME);
return (NFS4ERR_BADNAME);
return (NFS4ERR_BADNAME);
return (NFS4ERR_BADNAME);
return (NFS4_OK);
}
/*
*/
extern void sec_clnt_freeh(AUTH *);
extern void sec_clnt_freeinfo(struct sec_data *);
/*
* authget() gets an auth handle based on the security
* information from the servinfo in mountinfo.
* The auth handle is stored in ch_client->cl_auth.
*
* First security flavor of choice is to use sv_secdata
* which is initiated by the client. If that fails, get
* secinfo from the server and then select one from the
* server secinfo list .
*
* For RPCSEC_GSS flavor, upon success, a secure context is
* established between client and server.
*/
int
{
int error, i;
/*
* SV4_TRYSECINFO indicates to try the secinfo list from
* sv_secinfo until a successful one is reached. Point
* sv_currsec to the selected security mechanism for
* later sessions.
*/
i++) {
/* done */
break;
}
/*
* Allow the caller retry with the security flavor
* pointed by svp->sv_secinfo->index when
* ETIMEDOUT/ECONNRESET occurs.
*/
break;
}
}
} else {
/* sv_currsec points to one of the entries in sv_secinfo */
if (svp->sv_currsec) {
} else {
/* If it's null, use sv_secdata. */
}
}
return (error);
}
/*
* Common handle get program for NFS, NFS ACL, and NFS AUTH client.
*/
int
{
int error;
return (EINVAL);
/*
* Find an unused handle or create one
*/
top:
/*
* Find the correct entry in the cache to check for free
* client handles. The search is based on the RPC program
* number, program version number, dev_t for the transport
* device, and the protocol family.
*/
break;
}
/*
* If we didn't find a cache entry for this quadruple, then
* create one. If we don't have one already preallocated,
* then drop the cache lock, create one, and then start over.
* If we did have a preallocated entry, then just add it to
* the front of the list.
*/
newch->ch_timesused = 0;
KM_SLEEP);
goto top;
}
/*
* We found a cache entry, but if it isn't on the front of the
* list, then move it to the front of the list to try to take
* advantage of locality of operations.
*/
}
/*
* If there was a free client handle cached, then remove it
* from the list, init it, and use it.
*/
}
/*
* Get an auth handle.
*/
}
ch->ch_timesused++;
return (0);
}
/*
* There weren't any free client handles which fit, so allocate
* a new one and use that.
*/
#ifdef DEBUG
#endif
}
if (error != 0) {
#ifdef DEBUG
#endif
/*
* Warning is unnecessary if error is EINTR.
*/
"clget: couldn't create handle: %m\n");
}
return (error);
}
/*
* Get an auth handle.
*/
#ifdef DEBUG
#endif
}
ch->ch_timesused++;
return (0);
}
static int
{
/*
* Set read buffer size to rsize
* and add room for RPC headers.
*/
if (ci.cl_readsize != 0)
/*
* If soft mount and server is down just try once.
* meaning: do not retransmit.
*/
ci.cl_retrans = 0;
else
/*
* clget4 calls authget() to get an auth handle. For RPCSEC_GSS
* security flavor, the client tries to establish a security context
* by contacting the server. If the connection is timed out or reset,
* e.g. server reboot, we will try again.
*/
firstcall = 1;
do {
if (error == 0)
break;
/*
* For forced unmount and zone shutdown, bail out but
* let the recovery thread do one more transmission.
*/
break;
}
/* do not retry for soft mount */
break;
/* let the caller deal with the failover case */
if (FAILOVER_MOUNT4(mi))
break;
firstcall = 0;
return (error);
}
void
{
}
/*
* Timestamp this cache entry so that we know when it was last
* used.
*/
/*
* Add the free client handle to the front of the list.
* This way, the list will be sorted in youngest to oldest
* order.
*/
}
static void
{
#ifdef DEBUG
int n = 0;
#endif
/*
* Need to reclaim some memory, so step through the cache
* looking through the lists for entries which can be freed.
*/
/*
* Here we step through each non-NULL quadruple and start to
* construct the reclaim list pointed to by cp. Note that
* cp will contain all eligible chtab entries. When this traversal
* completes, chtab entries from the last quadruple will be at the
* front of cp and entries from previously inspected quadruples have
* been appended to the rear of cp.
*/
continue;
/*
* Search each list for entries older then
* cl_holdtime seconds. The lists are maintained
* in youngest to oldest order so that when the
* first entry is found which is old enough, then
* all of the rest of the entries on the list will
* be old enough as well.
*/
}
}
}
}
/*
* If cp is empty, then there is nothing to reclaim here.
*/
return;
/*
* Step through the list of entries to free, destroying each client
* handle and kmem_free'ing the memory for each entry.
*/
#ifdef DEBUG
n++;
#endif
}
#ifdef DEBUG
/*
* Update clalloc so that nfsstat shows the current number
* of allocated client handles.
*/
#endif
}
/* ARGSUSED */
static void
clreclaim4(void *all)
{
/*
* The system is low on memory; go through and try to reclaim some from
* every zone on the system.
*/
}
/*
* Minimum time-out values indexed by call type
* These units are in "eights" of a second to avoid multiplies
*/
static unsigned int minimum_timeo[] = {
6, 7, 10
};
/*
* Back off for retransmission timeout, MAXTIMO is in hz of a sec
*/
static int
{
int error;
int timeo; /* in units of hz */
#ifdef DEBUG
char *bufp;
#endif
int firstcall;
/*
* If we know that we are rebooting then let's
* not bother with doing any over the wireness.
*/
return (EIO);
}
/* For TSOL, use a new cred which has net_mac_aware flag */
if (!cred_cloned && is_system_labeled()) {
cred_cloned = TRUE;
}
/*
* clget() calls clnt_tli_kinit() which clears the xid, so we
* are guaranteed to reprocess the retry as a new request.
*/
/*
* If hard mounted fs, retry call forever unless hard error
* occurs.
*
* For forced unmount, let the recovery thread through but return
* an error for all others. This is so that user processes can
* exit quickly. The recovery thread bails out after one
* transmission so that it can tell if it needs to continue.
*
* For zone shutdown, behave as above to encourage quick
* process exit, but also fail quickly when servers have
* timed out before and reduce the timeouts.
*/
firstcall = 1;
do {
"nfs4_rfscall: vfs_flag=0x%x, %s",
/*
* It's possible while we're retrying the admin
* decided to reboot.
*/
if (cred_cloned)
return (EIO);
}
if (cred_cloned)
return (EIO);
}
if (cred_cloned)
return (EIO);
}
}
firstcall = 0;
/*
* Mask out all signals except SIGHUP, SIGINT, SIGQUIT
* and SIGTERM. (Preserving the existing masks).
* Mask out SIGINT if mount option nointr is specified.
*/
/*
* If there is a current signal, then don't bother
* even trying to send out the request because we
* won't be able to block waiting for the response.
* Simply assume RPC_INTR and get on with it.
*/
else {
}
/*
* restore original signal mask
*/
switch (status) {
case RPC_SUCCESS:
break;
case RPC_INTR:
/*
* There is no way to recover from this error,
* even if mount option nointr is specified.
* SIGKILL, for example, cannot be blocked.
*/
break;
case RPC_UDERROR:
/*
* If the NFS server is local (vold) and
* it goes away then we get RPC_UDERROR.
* This is a retryable error, so we would
* loop, so check to see if the specific
* error was ECONNRESET, indicating that
* target did not exist at all. If so,
* return with RPC_PROGUNAVAIL and
* ECONNRESET to indicate why.
*/
break;
}
/*FALLTHROUGH*/
default: /* probably RPC_TIMEDOUT */
if (IS_UNRECOVERABLE_RPC(status))
break;
/*
* increment server not responding count
*/
mi->mi_noresponse++;
#ifdef DEBUG
#endif
/*
* On zone shutdown, mark server dead and move on.
*/
if (cred_cloned)
return (EIO);
}
/*
* NFS client failover support:
* return and let the caller take care of
* failover. We only return for failover mounts
* because otherwise we want the "not responding"
* message, the timer updates, etc.
*/
if (cred_cloned)
*rpc_statusp = status;
return (error);
}
if (flags & RFSCALL_SOFT)
break;
/*
* The call is in progress (over COTS).
* Try the CLNT_CALL again, but don't
* print a noisy error message.
*/
if (status == RPC_INPROGRESS)
break;
if ((status == RPC_CANTSEND) &&
else
} else
if (*doqueue && nfs_has_ctty()) {
*doqueue = 0;
if ((status == RPC_CANTSEND) &&
0, NULL);
else
RF_SRV_NOT_RESPOND, mi, 0,
}
}
}
} while (tryagain);
if (status != RPC_SUCCESS) {
/*
* Let soft mounts use the timed out message.
*/
if (status == RPC_INPROGRESS)
#ifdef DEBUG
if (nfs_has_ctty()) {
uprintf("NFS%d %s failed for %s\n",
bufp);
}
}
#else
"NFS %s failed for server %s: error %d (%s)\n",
if (nfs_has_ctty()) {
"NFS %s failed for server %s: error %d (%s)\n",
}
}
#endif
/*
* when CLNT_CALL() fails with RPC_AUTHERROR,
* re_errno is set appropriately depending on
* the authentication error
*/
if (status == RPC_VERSMISMATCH ||
}
} else {
/*
* Test the value of mi_down and mi_printed without
* holding the mi_lock mutex. If they are both zero,
* then it is okay to skip the down and printed
* processing. This saves on a mutex_enter and
* mutex_exit pair for a normal, successful RPC.
* This was just complete overhead.
*/
} else
}
if (*doqueue == 0) {
*doqueue = 1;
}
}
if (cred_cloned)
*rpc_statusp = status;
}
/*
* rfs4call - general wrapper for RPC calls initiated by the client
*/
void
{
int i, error;
int num_resops;
/* Set up the results struct for XDR usage */
resp->decode_len = 0;
/* Return now if it was an RPC error */
if (error) {
return;
}
/* else we'll count the processed operations */
for (i = 0; i < num_resops; i++) {
/*
* Count the individual operations
* processed by the server.
*/
}
}
/*
* nfs4rename_update - updates stored state after a rename. Currently this
* is the path of the object and anything under it, and the filehandle of
* the renamed object.
*/
void
{
}
/*
* Routine to look up the filehandle for the given path and rootvp.
*
* Return values:
* - success: returns zero and *statp is set to NFS4_OK, and *fhp is
* updated.
*/
#define RML_ORDINARY 1
#define RML_NAMED_ATTR 2
#define RML_ATTRDIR 3
static void
{
int num_argops;
int doqueue = 1;
char *path;
mntinfo4_t *mi;
switch (filetype) {
case RML_NAMED_ATTR:
break;
case RML_ATTRDIR:
break;
case RML_ORDINARY:
break;
default:
return;
}
lookuparg.trailer_len = 0;
/* 0: putfh directory */
goto exit;
/* get the object filehandle */
goto exit;
}
TAG_NONE, 0, 0);
goto exit;
}
/* get the object attributes */
/* See if there are enough fields in the response for parent info */
goto exit;
/* get the parent filehandle */
goto exit;
}
TAG_NONE, 0, 0);
goto exit;
}
/* get the parent attributes */
exit:
/*
* It is too hard to remember where all the OP_LOOKUPs are
*/
}
/*
* NFS client failover / volatile filehandle support
*
* Recover the filehandle for the given rnode.
*
* Errors are returned via the nfs4_error_t parameter.
*/
void
{
int is_stub;
int filetype = RML_ORDINARY;
int badfhcount = 0;
return;
}
/*
* Given the root fh, use the path stored in
* the rnode to find the fh for the new server.
*/
return;
/*
* Releases the osp, if it is provided.
* Puts a hold on the cred_otw and the new osp (if found).
*/
&first_time, &last_time);
}
}
/* file mount, doesn't need a remap */
goto done;
}
"nfs4_remap_file: remap_lookup returned %d/%d",
goto get_remap_cred;
}
goto done;
case NFS4_OK:
badfhcount = 0;
rp->r_delay_interval = 0;
uprintf("NFS File Available..\n");
}
break;
case NFS4ERR_FHEXPIRED:
case NFS4ERR_BADHANDLE:
case NFS4ERR_STALE:
/*
* If we ran into filehandle problems, we should try to
* remap the root vnode first and hope life gets better.
* But we need to avoid loops.
*/
if (badfhcount++ > 0)
goto done;
if (newfh.nfs_fh4_len != 0) {
newfh.nfs_fh4_len = 0;
}
if (newpfh.nfs_fh4_len != 0) {
newpfh.nfs_fh4_len = 0;
}
/* relative path - remap rootvp then retry */
goto done;
goto done;
goto again;
case NFS4ERR_DELAY:
badfhcount = 0;
goto done;
goto again;
case NFS4ERR_ACCESS:
/* get new cred, try again */
goto done;
goto get_remap_cred;
default:
goto done;
}
/*
* Check on the new and old rnodes before updating;
* if the vnode type or size changes, issue a warning
* and mark the file dead.
*/
if (flags & NFS4_REMAP_CKATTRS) {
"nfs4_remap_file: size %d vs. %d, type %d vs. %d",
goto done;
}
}
/*
* Turn this object into a "stub" object if we
* crossed an underlying server fs boundary.
*
* This stub will be for a mirror-mount.
* A referral would look like a boundary crossing
* as well, but would not be the same type of object,
* so we would expect to mark the object dead.
*
* See comment in r4_do_attrcache() for more details.
*/
is_stub = 0;
if (gar.n4g_fsid_valid) {
is_stub = 1;
#ifdef DEBUG
} else {
"remap_file: fsid attr not provided by server. rp=%p",
(void *)rp));
#endif
}
if (is_stub)
else
/*
* If we got parent info, use it to update the parent
*/
if (newpfh.nfs_fh4_len != 0) {
/* force update of attrs */
}
}
done:
if (newfh.nfs_fh4_len != 0)
if (newpfh.nfs_fh4_len != 0)
}
/*
* Client-side failover support: remap the filehandle for vp if it appears
* necessary. errors are returned via the nfs4_error_t parameter; though,
* if there is a problem, we will just try again later.
*/
void
{
return;
return;
return;
}
/*
* nfs4_make_dotdot() - find or create a parent vnode of a non-root node.
*
* Our caller has a filehandle for ".." relative to a particular
* directory object. We want to find or create a parent vnode
* with that filehandle and return it. We can of course create
* a vnode from this filehandle, but we need to also make sure
* that if ".." is a regular file (i.e. dvp is a V_XATTRDIR)
* that we have a parent FH for future reopens as well. If
* we have a remap failure, we won't be able to reopen this
* file, but we won't treat that as fatal because a reopen
* is at least unlikely. Someday nfs4_reopen() should look
* for a missing parent FH and try a remap to recover from it.
*
* need_start_op argument indicates whether this function should
* do a start_op before calling remap_lookup(). This should
* be FALSE, if you are the recovery thread or in an op; otherwise,
* set it to TRUE.
*/
int
{
#ifdef DEBUG
/*
* ensure need_start_op is correct
*/
{
/* C needs a ^^ operator! */
((! need_start_op) && (no_need_start_op)));
}
#endif
"nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp,
/*
* rootvp might be needed eventually. Holding it now will
* ensure that r4find_unlocked() will find it, if ".." is the root.
*/
if (e.error != 0)
goto out;
return (0);
}
/*
* Since we don't have the rnode, we have to go over the wire.
* remap_lookup() can get all of the filehandles and attributes
* we need in one operation.
*/
/* if a parent was not found return an error */
goto out;
}
recov_state.rs_flags = 0;
if (need_start_op) {
&recov_state, NULL);
if (e.error != 0) {
goto out;
}
}
if (need_start_op) {
NULL);
if (abort) {
&recov_state, FALSE);
if (e.error == 0)
goto out;
}
&recov_state, TRUE);
goto recov_retry;
}
if (e.error == 0)
goto out;
}
if ((e.error != 0) ||
if (need_start_op)
&recov_state, FALSE);
if (e.error == 0)
goto out;
}
if (need_start_op)
&recov_state, FALSE);
goto out;
}
/*
* It is possible for remap_lookup() to return with no error,
* but without providing the parent filehandle and attrs.
*/
/*
* Call remap_lookup() again, this time with the
* newpfh and pgar args in the first position.
*/
/*
* This remap_lookup call modifies pgar. The following
* line prevents trouble when checking the va_type of
* pva later in this code.
*/
if (nfs4_needs_recovery(&e, FALSE,
if (need_start_op) {
if (abort) {
FALSE);
if (e.error == 0)
goto out;
}
goto recov_retry;
}
if (e.error == 0)
goto out;
}
if (need_start_op)
goto out;
}
}
(e.error != 0) ||
if (need_start_op)
&recov_state, FALSE);
if (e.error == 0)
goto out;
}
}
if (need_start_op)
out:
if (newfh.nfs_fh4_len != 0)
if (newpfh.nfs_fh4_len != 0)
return (e.error);
}
#ifdef DEBUG
size_t r_path_memuse = 0;
#endif
/*
* NFS client failover support
*
* sv4_free() frees the malloc'd portion of a "servinfo_t".
*/
void
{
struct knetconfig *knconf;
if (svp->sv_secdata)
if (svp->sv_save_secinfo &&
if (svp->sv_secinfo)
}
}
}
}
}
void
{
int *ip;
char *buf;
char *cp;
/*
* 13 == "(file handle:"
* maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times
* 1 == ' '
* 8 == maximum strlen of "%x"
* 3 == ")\n\0"
*/
return;
while (*cp != '\0')
cp++;
ip++) {
while (*cp != '\0')
cp++;
}
}
/*
* The NFSv4 readdir cache subsystem.
*
* We provide a set of interfaces to allow the rest of the system to utilize
* a caching mechanism while encapsulating the details of the actual
* implementation. This should allow for better maintainability and
* extensibility by consolidating the implementation details in one location.
*/
/*
* Comparator used by AVL routines.
*/
static int
rddir4_cache_compar(const void *x, const void *y)
{
if (a->nfs4_cookie == b->nfs4_cookie) {
return (0);
return (-1);
return (1);
}
if (a->nfs4_cookie < b->nfs4_cookie)
return (-1);
return (1);
}
/*
* Allocate an opaque handle for the readdir cache.
*/
void
{
}
/*
* Purge the cache of all cached readdir responses.
*/
void
{
return;
}
}
/*
* Destroy the readdir cache.
*/
void
{
return;
}
/*
* Locate a readdir response from the readdir cache.
*
* Return values:
*
* NULL - If there is an unrecoverable situation like the operation may have
* been interrupted.
*
* rddir4_cache * - A pointer to a rddir4_cache is returned to the caller.
* The flags are set approprately, such that the caller knows
* what state the entry is in.
*/
{
top:
/*
* Check to see if the readdir cache has been disabled. If so, then
* simply allocate an rddir4_cache entry and return it, since caching
* operations do not apply.
*/
/*
* Drop the lock because we are doing a sleeping
* allocation.
*/
return (rdc);
}
return (nrdc);
}
/*
* If we didn't find an entry then create one and insert it
* into the cache.
*/
/*
* Check for the case where we have made a second pass through
* the cache due to a lockless allocation. If we find that no
* thread has already inserted this entry, do the insert now
* and return.
*/
return (nrdc);
}
#ifdef DEBUG
#endif
/*
* First, try to allocate an entry without sleeping. If that
* fails then drop the lock and do a sleeping allocation.
*/
return (nrdc);
}
/*
* Drop the lock and do a sleeping allocation. We incur
* additional overhead by having to search the cache again,
* but this case should be rare.
*/
/*
* We need to take another pass through the cache
* since we dropped our lock to perform the alloc.
* Another thread may have come by and inserted the
* entry we are interested in.
*/
goto top;
}
/*
* Check to see if we need to free our entry. This can happen if
* another thread came along beat us to the insert. We can
* safely call rddir4_cache_free directly because no other thread
* would have a reference to this entry.
*/
#ifdef DEBUG
#endif
/*
* Found something. Make sure it's ready to return.
*/
/*
* If the cache entry is in the process of being filled in, wait
* until this completes. The RDDIRWAIT bit is set to indicate that
* someone is waiting and when the thread currently filling the entry
* is done, it should do a cv_broadcast to wakeup all of the threads
* waiting for it to finish. If the thread wakes up to find that
* someone new is now trying to complete the the entry, go back
* to sleep.
*/
/*
* The entry is not complete.
*/
#ifdef DEBUG
#endif
/*
* We got interrupted, probably the user
* typed ^C or an alarm fired. We free the
* new entry if we allocated one.
*/
return (NULL);
}
}
}
/*
* The entry we were waiting on may have been purged from
* the cache and should no longer be used, release it and
* start over.
*/
goto top;
}
/*
* The entry is completed. Return it.
*/
return (rdc);
}
/*
* Allocate a cache element and return it. Can return NULL if memory is
* low.
*/
static rddir4_cache *
rddir4_cache_alloc(int flags)
{
rc->nfs4_cookie = 0;
rc->nfs4_ncookie = 0;
/*
* A readdir is required so set the flag.
*/
#ifdef DEBUG
#endif
}
return (rc);
}
/*
* Increment the reference count to this cache element.
*/
static void
{
}
/*
* Release a reference to this cache element. If the count is zero then
* free the element.
*/
void
{
/*
* Check to see if we have any waiters. If so, we can wake them
* so that they can proceed.
*/
}
} else
}
/*
* Free a cache element.
*/
static void
{
#ifdef DEBUG
#endif
}
/*
* Snapshot callback for nfs:0:nfs4_client as registered with the kstat
* framework.
*/
static int
{
if (rw == KSTAT_WRITE) {
#ifdef DEBUG
/*
* Currently only the global zone can write to kstats, but we
* add the check just for paranoia.
*/
if (INGLOBALZONE(curproc))
&clstat4_debug, sizeof (clstat4_debug));
#endif
} else {
#ifdef DEBUG
/*
* If we're displaying the "global" debug kstat values, we
* display them as-is to all zones since in fact they apply to
* the system as a whole.
*/
sizeof (clstat4_debug));
#endif
}
return (0);
}
/*
* Zone support
*/
static void *
{
#ifdef DEBUG
#endif
}
return (nfscl);
}
/*ARGSUSED*/
static void
{
return;
clreclaim4_zone(nfscl, 0);
}
}
/*
* Called by endpnt_destructor to make sure the client handles are
* cleaned up before the RPC endpoints. This becomes a no-op if
* clfini_zone (above) is called first. This function is needed
* (rather than relying on clfini_zone to clean up) because the ZSD
* callbacks have no ordering mechanism, so we have no way to ensure
* that clfini_zone is called before endpnt_destructor.
*/
void
{
clreclaim4_zone(nfscl, 0);
break;
}
}
}
int
nfs4_subr_init(void)
{
/*
* Allocate and initialize the client handle cache
*/
NULL, 0);
/*
* Initialize the list of per-zone client handles (and associated data).
* This needs to be done before we call zone_key_create().
*/
/*
* Initialize the zone_key for per-zone client handle lists.
*/
if (nfs4err_delay_time == 0)
return (0);
}
int
nfs4_subr_fini(void)
{
/*
* Deallocate the client handle cache
*/
/*
* Destroy the zone_key
*/
(void) zone_key_delete(nfs4clnt_zone_key);
return (0);
}
/*
* Set or Clear direct I/O flag
* VOP_RWLOCK() is held for write access to prevent a race condition
* which would occur if a process is in the middle of a write when
* directio flag gets set. It is possible that all pages may not get flushed.
*
* This is a copy of nfs_directio, changes here may need to be made
* there and vice versa.
*/
int
{
int error = 0;
if (cmd == DIRECTIO_ON) {
return (0);
/*
* Flush the page cache.
*/
return (0);
}
if (nfs4_has_pages(vp) &&
if (error) {
}
return (error);
}
}
return (0);
}
if (cmd == DIRECTIO_OFF) {
return (0);
}
return (EINVAL);
}
/*
* Return TRUE if the file has any pages. Always go back to
* the master vnode to check v_pages since none of the shadows
* can have pages.
*/
{
return (vn_has_cached_data(vp));
}
/*
* This table is used to determine whether the client should attempt
* failover based on the clnt_stat value returned by CLNT_CALL. The
* clnt_stat is used as an index into the table. If
* the error value that corresponds to the clnt_stat value in the
* table is non-zero, then that is the error to be returned AND
* that signals that failover should be attempted.
*
* Special note: If the RPC_ values change, then direct indexing of the
* table is no longer valid, but having the RPC_ values in the table
* allow the functions to detect the change and issue a warning.
* In this case, the code will always attempt failover as a defensive
* measure.
*/
static struct try_failover_tab {
int error;
} try_failover_table [] = {
RPC_SUCCESS, 0,
RPC_VERSMISMATCH, 0,
RPC_AUTHERROR, 0,
RPC_PROGUNAVAIL, 0,
RPC_PROCUNAVAIL, 0,
RPC_INTR, 0,
RPC_TLIERROR, 0,
RPC_UDERROR, 0,
RPC_INPROGRESS, 0,
};
/*
* nfs4_try_failover - determine whether the client should
* attempt failover based on the values stored in the nfs4_error_t.
*/
int
{
return (TRUE);
return (FALSE);
}
/*
* try_failover - internal version of nfs4_try_failover, called
* only by rfscall and aclcall. Determine if failover is warranted
* based on the clnt_stat and return the error number if it is.
*/
static int
{
int err = 0;
if (rpc_status == RPC_SUCCESS)
return (0);
#ifdef DEBUG
if (rpc_status != 0 && nfs4_try_failover_any) {
goto done;
}
#endif
/*
* The rpc status is used as an index into the table.
* If the rpc status is outside of the range of the
* table or if the rpc error numbers have been changed
* since the table was constructed, then print a warning
* (DEBUG only) and try failover anyway. Otherwise, just
* grab the resulting error number out of the table.
*/
sizeof (try_failover_table)/sizeof (try_failover_table[0]) ||
#ifdef DEBUG
#endif
} else
done:
if (rpc_status)
"nfs4_try_failover: %strying failover on error %d",
return (err);
}
void
{
}
void
{
}
#ifdef DEBUG
/*
* Return a 16-bit hash for filehandle, stateid, clientid, owner.
* use the same algorithm as for NFS v3.
*
*/
int
{
int i, rem;
/* protect against non word aligned */
len &= ~3;
}
/* hash left-over bytes */
for (i = 0; i < rem; i++)
return (key & 0xffff);
}
/*
* rnode4info - return filehandle and path information for an rnode.
* XXX MT issues: uses a single static buffer, no locking of path.
*/
char *
{
static char buf[80];
char *path;
char *type;
return ("null");
type = "attr";
type = "attrdir";
type = "root";
type = "dir";
type = "file";
else
type = "other";
return (buf);
}
#endif