nfs4_vfsops.c revision eabd0450c0ea06b7993daac8f9545c7061ae7cae
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All Rights Reserved
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/vfs_opreg.h>
#include <sys/pathname.h>
#include <sys/sysmacros.h>
#include <sys/bootconf.h>
#include <sys/netconfig.h>
#include <rpc/rpcsec_gss.h>
#include <nfs/nfs_clnt.h>
#include <nfs/nfs4_clnt.h>
/*
* Arguments passed to thread to free data structures from forced unmount.
*/
typedef struct {
int fm_flag;
static void nfs4_free_mount_thread(freemountargs_t *);
/*
*/
extern void sec_clnt_freeinfo(struct sec_data *);
/*
* The order and contents of this structure must be kept in sync with that of
* rfsreqcnt_v4_tmpl in nfs_stats.c
*/
static char *rfsnames_v4[] = {
"null", "compound", "reserved", "access", "close", "commit", "create",
"delegpurge", "delegreturn", "getattr", "getfh", "link", "lock",
"lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr",
"open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh",
"read", "readdir", "readlink", "remove", "rename", "renew",
"restorefh", "savefh", "secinfo", "setattr", "setclientid",
"setclientid_confirm", "verify", "write"
};
/*
* nfs4_max_mount_retry is the number of times the client will redrive
* a mount compound before giving up and returning failure. The intent
* is to redrive mount compounds which fail NFS4ERR_STALE so that
* if a component of the server path being mounted goes stale, it can
* "recover" by redriving the mount compund (LOOKUP ops). This recovery
* code is needed outside of the recovery framework because mount is a
* of the server path being mounted. The recovery code recovers real
* client objects, not STALE FHs which map to components of the server
* path being mounted.
*
* We could just fail the mount on the first time, but that would
* instantly trigger failover (from nfs4_mount), and the client should
* try to re-lookup the STALE FH before doing failover. The easiest
* way to "re-lookup" is to simply redrive the mount compound.
*/
static int nfs4_max_mount_retry = 2;
/*
* nfs4 vfs operations.
*/
static void nfs4_freevfs(vfs_t *);
int nfs4_vfsinit(void);
void nfs4_vfsfini(void);
static void nfs4setclientid_init(void);
static void nfs4setclientid_fini(void);
struct nfs4_server *, nfs4_error_t *, int *);
static void destroy_nfs4_server(nfs4_server_t *);
extern void nfs4_ephemeral_init(void);
extern void nfs4_ephemeral_fini(void);
/*
* Initialize the vfs structure
*/
static int nfs4fstyp;
/*
* Debug variable to check for rdma based
* transport startup and cleanup. Controlled
*/
extern int rdma_debug;
int
{
static const fs_operation_def_t nfs4_vfsops_template[] = {
};
int error;
nfs4_vfsops = NULL;
if (error != 0) {
"nfs4init: bad vfs ops template");
goto out;
}
if (error != 0) {
"nfs4init: bad vnode ops template");
goto out;
}
if (error != 0) {
"nfs4init: bad trigger vnode ops template");
goto out;
}
(void) nfs4_vfsinit();
(void) nfs4_init_dot_entries();
out:
if (error) {
if (nfs4_trigger_vnodeops != NULL)
if (nfs4_vnodeops != NULL)
(void) vfs_freevfsops_by_type(fstyp);
}
return (error);
}
void
nfs4fini(void)
{
(void) nfs4_destroy_dot_entries();
nfs4_vfsfini();
}
/*
* Create a new sec_data structure to store AUTH_DH related data:
* netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC
* flag set for NFS V4 since we are avoiding to contact the rpcbind
* daemon and is using the IP time service (IPPORT_TIMESERVER).
*
* sec_data can be freed by sec_clnt_freeinfo().
*/
static struct sec_data *
struct knetconfig *knconf) {
char *pf, *p;
return (NULL);
/*
* duplicate the knconf information for the
* new opaque data.
*/
/* move server netname to the sec_data structure */
return (secdata);
}
/*
* Returns (deep) copy of sec_data_t. Allocates all memory required; caller
* is responsible for freeing.
*/
return (NULL);
return (NULL);
return (tsecdata);
}
} else {
}
return (tsecdata);
}
{
return (NULL);
KM_SLEEP);
return (tdata);
}
static int
{
/*
* Iterate over the servinfo4 list to make sure
* we do not have a duplicate. Skip any servinfo4
* that has been marked "NOT IN USE"
*/
continue;
}
continue;
/* it's a duplicate */
return (1);
}
}
/* it's not a duplicate */
return (0);
}
void
{
}
}
}
}
}
}
}
}
}
}
int
{
int error;
int flags;
char *p, *pf;
char *userbufptr;
STRUCT_SIZE(args))))
return (EFAULT);
/*
* Allocate space for a knetconfig structure and
* its strings and copy in from user-land.
*/
STRUCT_SIZE(knconf_tmp))) {
return (EFAULT);
}
if (get_udatamodel() != DATAMODEL_LP64) {
} else {
}
if (error) {
kmem_free(p, KNC_STRSIZE);
return (error);
}
if (error) {
kmem_free(p, KNC_STRSIZE);
return (error);
}
/*
* Get server address
*/
STRUCT_SIZE(addr_tmp))) {
goto errout;
}
goto errout;
}
/*
* Get the root fhandle
*/
if (error)
goto errout;
/* Volatile fh: keep server paths, so use actual-size strings */
/*
* Get server's hostname
*/
if (flags & NFSMNT_HOSTNAME) {
if (error)
goto errout;
} else {
}
/*
* If there are syncaddr and netname data, load them in. This is
* to support data needed for NFSV4 when AUTH_DH is the negotiated
* flavor via SECINFO. (instead of using MOUNT protocol in V3).
*/
netname[0] = '\0';
if (flags & NFSMNT_SECURE) {
/* get syncaddr */
STRUCT_SIZE(addr_tmp))) {
goto errout;
}
goto errout;
}
/* get server's netname */
goto errout;
}
}
/*
* Get the extention data which has the security data structure.
* This includes data for AUTH_SYS as well.
*/
if (flags & NFSMNT_NEWARGS) {
/*
* Indicating the application is using the new
* sec_data structure to pass in the security
* data.
*/
if (STRUCT_FGETP(args,
&secdata, get_udatamodel());
}
}
}
if (error)
goto errout;
/*
* Failover support:
*
* We may have a linked list of nfs_args structures,
* which means the user is looking for failover. If
* the mount is either not "read-only" or "soft",
* we want to bail out with EINVAL.
*/
if (error)
return (error);
}
/*
* nfs mount vfsop
* Set up mount info record and attach it to vfs struct.
*/
int
{
int error;
return (EPERM);
return (ENOTDIR);
/*
* get arguments
*
* nfs_args is now versioned and is extensible, so
* uap->datalen might be different from sizeof (args)
* in a compatible situation.
*/
more:
else
if (error) {
if (args) {
}
return (error);
}
} else {
}
/*
* If the request changes the locking type, disallow the remount,
* because it's questionable whether we can transfer the
* locking state correctly.
*/
}
if (old_mi_llock != new_mi_llock)
return (EBUSY);
}
return (0);
}
/*
* For ephemeral mount trigger stub vnodes, we have two problems
* to solve: racing threads will likely fail the v_count check, and
* we want only one to proceed with the mount.
*
* For stubs, if the mount has already occurred (via a racing thread),
* just return success. If not, skip the v_count check and proceed.
* Note that we are already serialised at this point.
*/
/* mntpt is a v4 stub vnode */
/* ephemeral mount has already occurred */
return (0);
}
} else {
/* mntpt is a non-v4 or v4 non-stub vnode */
}
return (EBUSY);
}
}
/* make sure things are zeroed for errout: */
/*
* A valid knetconfig structure is required.
*/
if (!(flags & NFSMNT_KNCONF) ||
}
return (EINVAL);
}
}
return (EINVAL);
}
/*
* Allocate a servinfo4 struct.
*/
if (svp_tail) {
} else {
svp_2ndlast = svp;
}
/*
* Get server address
*/
goto errout;
}
/*
* Get the root fhandle
*/
goto errout;
}
/*
* Get server's hostname
*/
if (flags & NFSMNT_HOSTNAME) {
MAXNETNAMELEN)) {
goto errout;
}
} else {
char *p = "unknown-host";
}
/*
* RDMA MOUNT SUPPORT FOR NFS v4.
* Establish, is it possible to use RDMA, if so overload the
* knconf with rdma specific knconf and free the orignal knconf.
*/
/*
* Determine the addr type for RDMA, IPv4 or v6.
*/
&rdma_knconf) == 0) {
/*
* If successful, hijack the orignal knconf and
* replace with the new one, depending on the flags.
*/
} else {
if (flags & NFSMNT_TRYRDMA) {
#ifdef DEBUG
if (rdma_debug)
"no RDMA onboard, revert\n");
#endif
}
if (flags & NFSMNT_DORDMA) {
/*
* If proto=rdma is specified and no RDMA
* path to this server is avialable then
* ditch this server.
* This is not included in the mountable
* server list or the replica list.
* Check if more servers are specified;
* Failover case, otherwise bail out of mount.
*/
data = (char *)
!(flags & NFSMNT_SOFT)) {
svp_2ndlast = NULL;
goto more;
} else {
NULL;
goto more;
}
}
} else {
/*
* This is the last server specified
* in the nfs_args list passed down
* and its not rdma capable.
*/
/*
* Is this the only one
*/
#ifdef DEBUG
if (rdma_debug)
"No RDMA srv");
#endif
goto errout;
} else {
/*
* There is list, since some
* servers specified before
* this passed all requirements
*/
goto proceed;
}
}
}
}
}
/*
* If there are syncaddr and netname data, load them in. This is
* to support data needed for NFSV4 when AUTH_DH is the negotiated
* flavor via SECINFO. (instead of using MOUNT protocol in V3).
*/
}
/*
* Get the extention data which has the security data structure.
* This includes data for AUTH_SYS as well.
*/
if (flags & NFSMNT_NEWARGS) {
switch (args->nfs_args_ext) {
case NFS_ARGS_EXTA:
case NFS_ARGS_EXTB:
/*
* Indicating the application is using the new
* sec_data structure to pass in the security
* data.
*/
/*
* Need to validate the flavor here if
* sysspace, userspace was already
* validate from the nfs_copyin function.
*/
case AUTH_NONE:
case AUTH_UNIX:
case AUTH_LOOPBACK:
case AUTH_DES:
case RPCSEC_GSS:
break;
default:
goto errout;
}
}
break;
default:
break;
}
} else if (flags & NFSMNT_SECURE) {
/*
* NFSMNT_SECURE is deprecated but we keep it
* to support the rogue user-generated application
* that may use this undocumented interface to do
* AUTH_DH security, e.g. our own rexd.
*
* Also note that NFSMNT_SECURE is used for passing
* AUTH_DH info to be used in negotiation.
*/
} else {
}
/*
* User does not explictly specify a flavor, and a user
* defined default flavor is passed down.
*/
if (flags & NFSMNT_SECDEFAULT) {
}
/*
* Failover support:
*
* We may have a linked list of nfs_args structures,
* which means the user is looking for failover. If
* the mount is either not "read-only" or "soft",
* we want to bail out with EINVAL.
*/
goto more;
}
goto errout;
}
/*
* Determine the zone we're being mounted into.
*/
if (getzoneid() == GLOBAL_ZONEID) {
goto errout;
}
}
if (is_system_labeled()) {
if (error > 0)
goto errout;
if (error == -1) {
/* change mount to read-only to prevent write-down */
}
}
/*
* Stop the mount from going any further if the zone is going away.
*/
goto errout;
}
/*
* Get root vnode.
*/
if (error) {
/* if nfs4rootvp failed, it will free svp_head */
goto errout;
}
/*
* Send client id to the server, if necessary
*/
if (error)
goto errout;
/*
* Set option fields in the mount info record
*/
}
if (error)
goto errout;
/*
* Time to tie in the mirror mount info at last!
*/
if (flags & NFSMNT_EPHEMERAL)
if (error) {
rp4_rmhash(rp);
}
/* need to remove it from the zone */
if (removed)
}
return (error);
}
if (svp_head)
}
}
return (error);
}
#ifdef DEBUG
#define VERS_MSG "NFS4 server "
#else
#define VERS_MSG "NFS server "
#endif
#define READ_MSG \
VERS_MSG "%s returned 0 for read transfer size"
#define WRITE_MSG \
VERS_MSG "%s returned 0 for write transfer size"
#define SIZE_MSG \
VERS_MSG "%s returned 0 for maximum file size"
/*
* Get the symbolic link text from the server for a given filehandle
* of that symlink.
*
* (get symlink text) PUTFH READLINK
*/
static int
int flags)
{
int doqueue;
nfs4_error_t e;
int num_retry = nfs4_max_mount_retry;
recov_state.rs_flags = 0;
nfs4_error_zinit(&e);
if (! recovery) {
if (e.error) {
return (e.error);
}
}
/* 0. putfh symlink fh */
/* 1. readlink */
doqueue = 1;
"getlinktext_otw: initiating recovery\n"));
if (!e.error)
(void) xdr_free(xdr_COMPOUND4res_clnt,
goto recov_retry;
}
}
/*
*/
if (e.error != 0) {
if (! recovery)
return (e.error);
}
if (! recovery)
return (e.error);
}
/* res.status == NFS4_OK */
/* treat symlink name as data */
if (! recovery)
return (0);
}
/*
* Skip over consecutive slashes and "/./" in a pathname.
*/
void
{
} else {
pnp->pn_pathlen--;
}
}
}
/*
* Resolve a symbolic link path. The symlink is in the nth component of
* svp->sv_path and has an nfs4 file handle "fh".
* Upon return, the sv_path will point to the new path that has the nth
* component resolved to its symlink text.
*/
int
{
char *oldpath;
char component[MAXNAMELEN];
int oldpathlen;
/* Get the symbolic link text over the wire. */
return (error);
/*
* Compose the new pathname.
* Note:
* - only the nth component is resolved for the pathname.
* - pathname.pn_pathlen does not count the ending null byte.
*/
return (error);
}
/*
* Skip over previous components from the oldpath so that the
* oldpn.pn_path will point to the symlink component. Skip
* leading slashes and "/./" (no OP_LOOKUP on ".") so that
* pn_getcompnent can get the component.
*/
for (i = 1; i < nth; i++) {
if (error)
goto out;
}
/*
* Copy the old path upto the component right before the symlink
* if the symlink is not an absolute path.
*/
if (symlink[0] != '/') {
newpn.pn_pathlen++;
}
/* copy the resolved symbolic link text */
goto out;
}
/*
* Check if there is any remaining path after the symlink component.
* First, skip the symlink component.
*/
goto out;
/*
* Copy the remaining path to the new pathname if there is any.
*/
if (addlen > 0) {
goto out;
}
}
/* get the newpath and store it in the servinfo4_t */
out:
return (error);
}
/*
* Get the root filehandle for the given filesystem and server, and update
* svp.
*
* If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop
* to coordinate with recovery. Otherwise, the caller is assumed to be
* the recovery thread or have already done a start_fop.
*
* Errors are returned by the nfs4_error_t parameter.
*/
static void
{
int doqueue = 1;
int num_argops;
int llndx;
int nthcomp;
return;
}
recov_state.rs_flags = 0;
if (!recovery) {
&recov_state, NULL);
/*
* If recovery has been started and this request as
* initiated by a mount, then we must wait for recovery
* to finish before proceeding, otherwise, the error
* cleanup would remove data structures needed by the
* recovery thread.
*/
"nfs4getfh_otw: waiting 4 recovery\n"));
}
return;
}
/*
* If the client does not specify a specific flavor to use
* and has not gotten a secinfo list from the server yet,
* retrieve the secinfo list from the server and use a
* flavor from the list to mount.
*
* If fail to get the secinfo list from the server, then
* try the default flavor.
*/
}
}
if (recovery)
else
lookuparg.trailer_len = 0;
/* choose public or root filehandle */
if (flags & NFS4_GETFH_PUBLIC)
else
/* get fh */
"nfs4getfh_otw: %s call, mi 0x%p",
if (needrecov) {
if (recovery) {
(void) xdr_free(xdr_COMPOUND4res_clnt,
return;
}
(CE_NOTE, "nfs4getfh_otw: initiating recovery\n"));
}
/* have another go? */
goto recov_retry;
return;
}
/*
* No recovery, but check if error is set.
*/
if (!recovery)
return;
}
/* for non-recovery errors */
if (!recovery) {
}
return;
}
/*
* If any intermediate component in the path is a symbolic link,
* resolve the symlink, then try mount again using the new path.
*/
int where;
/*
* This must be from OP_LOOKUP failure. The (cfh) for this
* OP_LOOKUP is a symlink node. Found out where the
* OP_GETFH is for the (cfh) that is a symlink node.
*
* Example:
* (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR,
* LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR
*
* LOOKUP comp3 fails with SYMLINK because comp2 is a symlink.
* In this case, where = 7, nthcomp = 2.
*/
/*
* Need to call nfs4_end_op before resolve_sympath to avoid
* potential nfs4_start_op deadlock.
*/
if (!recovery)
flags);
return;
goto recov_retry;
}
/* getfh */
/* getattr fsinfo res */
resop++;
else
else
mi->mi_maxfilesize =
mi->mi_maxfilesize);
/*
* If the final component is a a symbolic link, resolve the symlink,
* then try mount again using the new path.
*
* Assume no symbolic link for root filesysm "/".
*/
/*
* nthcomp is the total result length minus
* the 1st 2 OPs (PUTROOTFH, GETFH),
* then divided by 3 (LOOKUP,GETFH,GETATTR)
*
* e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR
* LOOKUP 2nd-comp GETFH GETATTR
*
* (8 - 2)/3 = 2
*/
/*
* Need to call nfs4_end_op before resolve_sympath to avoid
* potential nfs4_start_op deadlock. See RFE 4777612.
*/
if (!recovery)
flags);
return;
goto recov_retry;
}
/*
* We need to figure out where in the compound the getfh
* for the parent directory is. If the object to be mounted is
* the root, then there is no lookup at all:
* PUTROOTFH, GETFH.
* If the object to be mounted is in the root, then the compound is:
* PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR.
* In either of these cases, the index of the GETFH is 1.
* If it is not at the root, then it's something like:
* PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR,
* LOOKUP, GETFH, GETATTR
* In this case, the index is llndx (last lookup index) - 2.
*/
else {
}
/* save the filehandles for the replica */
/* initialize fsid and supp_attrs for server fs */
svp->sv_supp_attrs =
if (!recovery)
}
/*
* Remap the root filehandle for the given filesystem.
*
* results returned via the nfs4_error_t parameter.
*/
void
{
int getfh_flags;
char *orig_sv_path;
int orig_sv_pathlen, num_retry;
getfh_flags |=
/*
* Just in case server path being mounted contains
* so we can redrive the initial mount compound with the
* initial sv_path -- not a symlink-expanded version.
*
* This could only happen if a symlink was expanded
* and the expanded mount compound failed stale. Because
* it could be the case that the symlink was removed at
* we need to use the initial sv_path when attempting
* to re-lookup everything and recover.
*/
do {
/*
* Get the root fh from the server. Retry nfs4_max_mount_retry
* (2) times if it fails with STALE since the recovery
* infrastructure doesn't do STALE recovery for components
* of the server path to the object being mounted.
*/
break;
/*
* For some reason, the mount compound failed. Before
* retrying, we need to restore the original sv_path
* because it might have contained symlinks that were
* expanded by nfsgetfh_otw before the failure occurred.
* replace current sv_path with orig sv_path -- just in case
* it changed due to embedded symlinks.
*/
}
} while (num_retry-- > 0);
return;
}
/* shouldn't happen */
"nfs4_remap_root: server root vnode type (%d) doesn't "
}
/*
* It's possible that recovery took place on the filesystem
* and the server has been updated between the time we did
* the nfs4getfh_otw and now. Re-drive the otw operation
* to make sure we have a good fh.
*/
goto remap_retry;
}
static int
{
mntinfo4_t *mi;
int error = 0;
int i;
char *droptext = "";
nfs4_error_t e;
char *orig_sv_path;
/*
* Create a mount record and link it to the vfs struct.
*/
if (!(flags & NFSMNT_SOFT))
if ((flags & NFSMNT_NOPRINT))
if (flags & NFSMNT_INT)
if (flags & NFSMNT_PUBLIC)
if (flags & NFSMNT_MIRRORMOUNT)
else
mi->mi_grace_wait = 0;
mi->mi_srvsettime = 0;
if (flags & NFSMNT_DIRECTIO)
/*
* Make a vfs struct for nfs. We do this here instead of below
* because rtvp needs a vfs before we can do a getattr on it.
*
* Assign a unique device id to the mount
*/
do {
} while (vfs_devismounted(nfs_dev));
/*
* Initialize fields used to support async putpage operations.
*/
for (i = 0; i < NFS4_ASYNC_TYPES; i++)
/*
*/
for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) {
sizeof (nfs4_open_owner_t),
}
/*
* Initialize the freed open owner list.
*/
mi->mi_foo_num = 0;
/*
* Initialize the msg buffer.
*/
mi->mi_msg_count = 0;
/*
* Initialize kstats
*/
/*
* Initialize the shared filehandle pool, and get the fname for
* the filesystem root.
*/
/*
* Save server path we're attempting to mount.
*/
/*
* Make the GETFH call to get root fh for each replica.
*/
droptext = ", dropping replica";
/*
* If the uid is set then set the creds for secure mounts
* by proxy processes such as automountd.
*/
}
VERS_MSG "Host %s is a duplicate%s",
continue;
}
/*
* Just in case server path being mounted contains
* so we can redrive the initial mount compound with the
* initial sv_path -- not a symlink-expanded version.
*
* This could only happen if a symlink was expanded
* and the expanded mount compound failed stale. Because
* it could be the case that the symlink was removed at
* we need to use the initial sv_path when attempting
* to re-lookup everything and recover.
*
* Other mount errors should evenutally be handled here also
* (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount
* failures will result in mount being redriven a few times.
*/
do {
NFS4_GETFH_NEEDSOP, tcr, &e);
break;
/*
* replace current sv_path with orig sv_path -- just in
* case it changed due to embedded symlinks.
*/
KM_SLEEP);
}
} while (num_retry-- > 0);
if (error) {
VERS_MSG "initial call to %s failed%s: %m",
continue;
}
VERS_MSG "%s returned a bad file type for "
continue;
}
VERS_MSG "%s returned a different file type "
continue;
}
}
if (error == 0)
goto bad;
}
/*
* Make the root vnode without attributes.
*/
/*
* Start the manager thread responsible for handling async worker
* threads.
*/
vfsp, 0, minclsyspri);
/*
* Create the thread that handles over-the-wire calls for
* VOP_INACTIVE.
* This needs to happen after the manager thread is created.
*/
mi, 0, minclsyspri);
/* If we didn't get a type, get one now */
if (error)
goto bad;
}
return (0);
bad:
/*
* An error occurred somewhere, need to clean up...
*/
/*
* We need to release our reference to the root vnode and
* destroy the mntinfo4 struct that we just created.
*/
rp4_rmhash(rp);
}
if (removed)
/*
* This releases the initial "hold" of the mi since it will never
* be referenced by the vfsp. Also, when mount returns to vfs.c
* with an error, the vfsp will be destroyed, not rele'd.
*/
return (error);
}
/*
* vfs operations
*/
static int
{
mntinfo4_t *mi;
int removed;
return (EPERM);
/*
* If the request is coming from the wrong zone,
* we don't want to create any new threads, and
* performance is not a concern. Do everything
* inline.
*/
"nfs4_unmount x-zone forced unmount of vfs %p\n",
(void *)vfsp));
} else {
/*
* Free data structures asynchronously, to avoid
* blocking the current thread (for performance
* reasons only).
*/
}
return (0);
}
/*
* Wait until all asynchronous putpage operations on
* this file system are complete before flushing rnodes
* from the cache.
*/
if (nfs4_async_stop_sig(vfsp))
return (EINTR);
/*
* About the only reason that this would fail would be
* that the harvester is already busy tearing down this
* node. So we fail back to the caller and let them try
* again when needed.
*/
&must_unlock, &eph_tree)) {
/*
* Note that we ignore must_unlock
* because it is garbage at this point.
* I.e., it only has meaning upon
* success.
*/
return (EBUSY);
}
/*
* If there are any active vnodes on this file system,
* then the file system is busy and can't be unmounted.
*/
if (check_rtable4(vfsp)) {
return (EBUSY);
}
/*
* The unmount can't fail from now on, so record any
* ephemeral changes.
*/
/*
* There are no active files that could require over-the-wire
* calls to the server, so stop the async manager and the
* inactive thread.
*/
/*
* Destroy all rnodes belonging to this file system from the
* rnode hash queues and purge any resources allocated to
* them.
*/
if (removed)
return (0);
}
/*
* find root of nfs
*/
static int
{
mntinfo4_t *mi;
return (EPERM);
if (svp) {
return (ENOENT);
}
} else
}
return (ENOENT);
}
return (0);
}
static int
{
int error;
return (error);
return (0);
}
/*
* Get file system statistics.
*/
static int
{
int error;
if (error)
return (error);
if (!error) {
} else {
}
return (error);
}
static kmutex_t nfs4_syncbusy;
/*
* Flush dirty nfs files for file system vfsp.
* If vfsp == NULL, all nfs files are flushed.
*
* SYNC_CLOSE in flag is passed to us to
* indicate that we are shutting down and or
* rebooting.
*/
static int
{
/*
* Cross-zone calls are OK here, since this translates to a
* VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
*/
}
/*
* if SYNC_CLOSE is set then we know that
* the system is rebooting, mark the mntinfo
* for later examination.
*/
mntinfo4_t *mi;
}
}
return (0);
}
/*
* vget is difficult, if not impossible, to support in v4 because we don't
* know the parent directory or name, which makes it impossible to create a
* useful shadow vnode. And we need the shadow vnode for things like
* OPEN.
*/
/* ARGSUSED */
/*
* XXX Check nfs4_vget_pseudo() for dependency.
*/
static int
{
return (EREMOTE);
}
/*
* nfs4_mountroot get called in the case where we are diskless booting. All
* we need from here is the ability to get the server info and from there we
* can simply call nfs4_rootvp.
*/
/* ARGSUSED */
static int
{
int error;
int vfsflags;
char *root_path;
char *name;
mntinfo4_t *mi;
static char token[10];
/* do this BEFORE getfile which causes xid stamps to be initialized */
if (why == ROOT_REMOUNT) {
/*
* Shouldn't happen.
*/
panic("nfs4_mountroot: why == ROOT_REMOUNT");
}
if (why == ROOT_UNMOUNT) {
/*
* Nothing to do for NFS.
*/
return (0);
}
/*
* why == ROOT_INIT
*/
*name = 0;
/*
* Get server address
* Get the root path
* Get server's transport
* Get server's hostname
* Get options
*/
vfsflags = 0;
if (error == EPROTONOSUPPORT)
"mount_root failed: server doesn't support NFS V4");
else
"nfs4_mountroot: mount_root failed: %m");
return (error);
}
/*
* Force root partition to always be mounted with AUTH_UNIX for now
*/
if (error) {
return (error);
}
/*
* Send client id to the server, if necessary
*/
if (error) {
goto errout;
}
if (error) {
"nfs4_mountroot: invalid root mount options");
goto errout;
}
(void) vfs_lock_wait(vfsp);
if (error) {
}
return (error);
}
/*
* Initialization routine for VFS routines. Should only be called once
*/
int
nfs4_vfsinit(void)
{
return (0);
}
void
nfs4_vfsfini(void)
{
}
void
{
mntinfo4_t *mi;
/* need to release the initial hold */
}
/*
* Client side SETCLIENTID and SETCLIENTID_CONFIRM
*/
struct nfs4_server nfs4_server_lst =
{ &nfs4_server_lst, &nfs4_server_lst };
static void
nfs4setclientid_init(void)
{
}
static void
nfs4setclientid_fini(void)
{
}
/*
* Set the clientid for the server for "mi". No-op if the clientid is
* already set.
*
* The recovery boolean should be set to TRUE if this function was called
* by the recovery code, and FALSE otherwise. This is used to determine
* if we need to call nfs4_start/end_op as well as grab the mi_recovlock
* for adding a mntinfo4_t to a nfs4_server_t.
*
* Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then
* 'n4ep->error' is set to geterrno4(n4ep->stat).
*/
void
{
struct nfs4_server *np;
int num_retries = 0;
time_t lease_time = 0;
recov_state.rs_flags = 0;
if (!recovery)
if (!np) {
struct nfs4_server *tnp;
if (tnp) {
/*
* another thread snuck in and put server on list.
* since we aren't adding it to the nfs4_server_list
* we need to set the ref count to 0 and destroy it.
*/
} else {
/*
* do not give list a reference until everything
* succeeds
*/
}
}
/*
* If we find the server already has N4S_CLIENTID_SET, then
* just return, we've already done SETCLIENTID to that server
*/
/* add mi to np's mntinfo4_list */
if (!recovery)
return;
}
/*
* Drop the mi_recovlock since nfs4_start_op will
* acquire it again for us.
*/
if (!recovery) {
return;
}
}
if (!recovery)
recovery);
return;
}
}
/* add mi to np's mntinfo4_list */
if (!recovery)
return;
}
/*
* Reset the N4S_CB_PINGED flag. This is used to
* indicate if we have received a CB_NULL from the
* server. Also we reset the waiter flag.
*/
/* any failure must now clear this flag */
/*
* If the uid is set then set the creds for secure mounts
* by proxy processes such as automountd.
*/
}
&retry_inuse);
}
}
/*
* Start recovery if failover is a possibility. If
* invoked by the recovery thread itself, then just
* return and let it handle the failover first. NB:
* recovery is not allowed if the mount is in progress
* since the infrastructure is not sufficiently setup
* to allow it. Just return the error (after suitable
* retries).
*/
/*
* Don't retry here, just return and let
* recovery take over.
*/
if (recovery)
/*
* Always retry if in recovery or once had
* contact with the server (but now it's
* overloaded).
*/
num_retries = 0;
num_retries = 0;
}
} else {
/*
* Since everything succeeded give the list a reference count if
* it hasn't been given one by add_new_nfs4_server() or if this
* is not a recovery situation in which case it is already on
* the list.
*/
}
}
if (!recovery)
if (retry_inuse) {
retry_inuse = 0;
} else
goto recov_retry;
}
/* broadcast before release in case no other threads are waiting */
}
int nfs4setclientid_otw_debug = 0;
/*
* This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM,
* but nothing else; the calling function must be designed to handle those
* other errors.
*/
static void
{
int doqueue = 1;
/* PUTROOTFH */
/* GETATTR */
/* SETCLIENTID */
/*
* Callback needs to happen on non-RDMA transport
* Check if we have saved the original knetconfig
* if so, use that instead.
*/
else
return;
/* getattr lease_time res */
#ifndef _LP64
/*
* The 32 bit client cannot handle a lease time greater than
* (INT32_MAX/1000000). This is due to the use of the
* lease_time in calls to drv_usectohz() in
* nfs4_renew_lease_thread(). The problem is that
* drv_usectohz() takes a time_t (which is just a long = 4
* bytes) as its parameter. The lease_time is multiplied by
* 1000000 to convert seconds to usecs for the parameter. If
* a number bigger than (INT32_MAX/1000000) is used then we
* overflow on the 32bit client.
*/
}
#endif
/*
* Keep track of the lease period for the mi's
* mi_msg_list. We need an appropiate time
* bound to associate past facts with a current
* event. The lease period is perfect for this.
*/
}
if (!(*retry_inusep)) {
"NFS4 mount (SETCLIENTID failed)."
" nfs4_client_id.id is in"
"use already by: r_netid<%s> r_addr<%s>",
}
/*
* XXX - The client should be more robust in its
* handling of clientid in use errors (regen another
* clientid and try again?)
*/
return;
}
return;
}
#ifdef DEBUG
if (nfs4setclientid_otw_debug) {
union {
int foo[2];
} cid;
"nfs4setclientid_otw: OK, clientid = %x,%x, "
}
#endif
/* Confirm the client id and get the lease_time attribute */
/* used to figure out RTT for np */
"propagation delay set to %ld sec",
return;
if (!(*retry_inusep)) {
"SETCLIENTID_CONFIRM failed. "
"nfs4_client_id.id is in use already by: "
"r_netid<%s> r_addr<%s>",
}
return;
}
return;
}
/* Add mi to np's mntinfo4 list */
/*
* Start lease management thread.
* Keep trying until we succeed.
*/
}
}
/*
* Add mi to sp's mntinfo4_list if it isn't already in the list. Makes
* mi's clientid the same as sp's.
* Assumes sp is locked down.
*/
void
{
int in_list = 0;
"nfs4_add_mi_to_server: add mi %p to sp %p",
(CE_NOTE,
"nfs4_add_mi_to_server: mi in list"));
in_list = 1;
}
}
/*
* First put a hold on the mntinfo4's vfsp so that references via
* mntinfo4_list will be valid.
*/
if (!in_list)
if (!in_list) {
if (sp->mntinfo4_list)
}
/* set mi's clientid to that of sp's for later matching */
/*
* Update the clientid for any other mi's belonging to sp. This
* must be done here while we hold sp->s_lock, so that
* find_nfs4_server() continues to work.
*/
}
}
}
/*
* Remove the mi from sp's mntinfo4_list and release its reference.
* Exception: if mi still has open files, flag it for later removal (when
* all the files are closed).
*
* If this is the last mntinfo4 in sp's list then tell the lease renewal
* thread to exit.
*/
static void
{
"nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p",
/*
* First make sure this mntinfo4 can be taken off of the list,
* ie: it doesn't have any open files remaining.
*/
if (mi->mi_open_files > 0) {
"nfs4_remove_mi_from_server_nolock: don't "
"remove mi since it still has files open"));
return;
}
/* last fs unmounted, kill the thread */
"remove_mi_from_nfs4_server_nolock: kill the thread"));
}
}
/*
* Remove mi from sp's mntinfo4_list and release the vfs reference.
*/
static void
{
/*
* We release a reference, and the caller must still have a
* reference.
*/
if (mi->mi_clientid_prev) {
} else {
/* This is the first mi in sp's mntinfo4_list */
/*
* Make sure the first mntinfo4 in the list is the actual
* mntinfo4 passed in.
*/
}
if (mi->mi_clientid_next)
/* Now mark the mntinfo4's links as being removed */
}
/*
* Free all the entries in sp's mntinfo4_list.
*/
static void
{
mntinfo4_t *mi;
/*
* Grab a reference in case there is only one left (which
* remove_mi() frees).
*/
}
}
/*
* Remove the mi from sp's mntinfo4_list as above, and rele the vfs.
*
* This version can be called with a null nfs4_server_t arg,
* and will either find the right one and handle locking, or
* do nothing because the mi wasn't added to an sp's mntinfo4_list.
*/
void
{
} else
/*
* If we had a valid esp as input, the calling function will be
* responsible for unlocking the esp nfs4_server.
*/
}
}
/*
* Return TRUE if the given server has any non-unmounted filesystems.
*/
{
mntinfo4_t *mi;
return (TRUE);
}
return (FALSE);
}
/*
* Mark sp as finished and notify any waiters.
*/
void
{
}
/*
* Create a new nfs4_server_t structure.
* Returns new node unlocked and not in list, but with a reference count of
* 1.
*/
struct nfs4_server *
{
struct nfs4_server *np;
union {
struct {
} un_curtime;
char id_val[] = "Solaris: %s, NFSv4 kernel client";
int len;
/*
* Build the nfs_client_id4 for this server mount. Ensure
* the verifier is useful and that the identification is
* somehow based on the server's address for the case of
* multi-homed servers.
*/
gethrestime(&tt);
/*
* calculate the length of the opaque identifier. Subtract 2
* for the "%s" and add the traditional +1 for null
* termination.
*/
/* save cred for issuing rfs4calls inside the renew thread */
np->s_thread_exit = 0;
np->state_ref_count = 0;
np->s_otw_call_count = 0;
return (np);
}
/*
* Create a new nfs4_server_t structure and add it to the list.
* Returns new node locked; reference must eventually be freed.
*/
static struct nfs4_server *
{
return (sp);
}
int nfs4_server_t_debug = 0;
#ifdef lint
extern void
#endif
#ifndef lint
#ifdef DEBUG
void
{
"dumping nfs4_server_t list in %s", txt));
"mi 0x%p, want clientid %llx, addr %d/%04X",
"node 0x%p, clientid %llx, addr %d/%04X, cnt %d",
np->state_ref_count));
" - address matches"));
" - clientid matches"));
" - thread not exiting"));
}
}
#endif
#endif
/*
* Move a mntinfo4_t from one server list to another.
* Locking of the two nfs4_server_t nodes will be done in list order.
*
* Returns NULL if the current nfs4_server_t for the filesystem could not
* be found (e.g., due to forced unmount). Otherwise returns a reference
* to the new nfs4_server_t, which must eventually be freed.
*/
{
int num_open;
#ifdef DEBUG
if (nfs4_server_t_debug)
#endif
continue;
p->s_thread_exit != NFS4_THREAD_EXIT) {
op = p;
}
p->s_thread_exit != NFS4_THREAD_EXIT) {
np = p;
}
break;
}
/*
* Filesystem has been forcibly unmounted. Bail out.
*/
return (NULL);
}
} else {
#ifdef DEBUG
"nfs4_move_mi: no target nfs4_server, will create."));
#endif
}
"nfs4_move_mi: for mi 0x%p, "
"old servinfo4 0x%p, new servinfo4 0x%p, "
"old nfs4_server 0x%p, new nfs4_server 0x%p, ",
/* discard any delegations */
mi->mi_open_files = 0;
"nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d",
return (np);
}
/*
* Need to have the nfs4_server_lst_lock.
* Search the nfs4_server list to find a match on this servinfo4
* based on its address.
*
* Returns NULL if no match is found. Otherwise returns a reference (which
* must eventually be freed) to a locked nfs4_server.
*/
{
return (np);
}
}
return (NULL);
}
/*
* Search the nfs4_server_lst to find a match based on clientid and
* addr.
* Locks the nfs4_server down if it is found and returns a reference that
* must eventually be freed.
*
* Returns NULL it no match is found. This means one of two things: either
* mi is in the process of being mounted, or mi has been unmounted.
*
* The caller should be holding mi->mi_recovlock, and it should continue to
* hold the lock until done with the returned nfs4_server_t. Once
* mi->mi_recovlock is released, there is no guarantee that the returned
* mi->nfs4_server_t will continue to correspond to mi.
*/
{
return (find_nfs4_server_all(mi, 0));
}
/*
* Same as above, but takes an "all" parameter which can be
* set to 1 if the caller wishes to find nfs4_server_t's which
* have been marked for termination by the exit of the renew
* thread. This should only be used by operations which are
* cleaning up and will not cause an OTW op.
*/
{
/*
* This can be called from nfs4_unmount() which can be called from the
* global zone, hence it's legal for the global zone to muck with
* another zone's server list, as long as it doesn't try to contact
* them.
*/
nfs_global_client_only != 0);
/*
* The nfs4_server_lst_lock global lock is held when we get a new
* clientid (via SETCLIENTID OTW). Holding this global lock and
* mi_recovlock (READER is fine) ensures that the nfs4_server
* and this mntinfo4 can't get out of sync, so the following search is
* always valid.
*/
#ifdef DEBUG
if (nfs4_server_t_debug) {
/* mi->mi_clientid is unprotected, ok for debug output */
mi->mi_curr_serv);
}
#endif
return (np);
}
}
return (NULL);
}
/*
* Release the reference to sp and destroy it if that's the last one.
*/
void
{
return;
}
return;
}
}
static void
{
/* destroy the nfs4_server */
}
/*
* Lock sp, but only if it's still active (in the list and hasn't been
* flagged as exiting) or 'all' is non-zero.
* Returns TRUE if sp got locked and adds a reference to sp.
*/
{
all != 0)) {
return (TRUE);
}
}
return (FALSE);
}
/*
* Fork off a thread to free the data structures for a mount.
*/
static void
{
}
static void
{
mntinfo4_t *mi;
zthread_exit();
/* NOTREACHED */
}
/*
* Thread to free the data structures for a given filesystem.
*/
static void
{
int removed;
/*
* We need to participate in the CPR framework if this is a kernel
* thread.
*/
if (async_thread) {
"nfsv4AsyncUnmount");
}
/*
* We need to wait for all outstanding OTW calls
* and recovery to finish before we remove the mi
* from the nfs4_server_t, as current pending
* calls might still need this linkage (in order
* to find a nfs4_server_t from a mntinfo4_t).
*/
if (sp) {
while (sp->s_otw_call_count != 0) {
if (async_thread) {
}
if (async_thread) {
}
}
}
while (mi->mi_in_recovery != 0) {
if (async_thread) {
}
if (async_thread) {
}
}
/*
* If we got an error, then do not nuke the
* tree. Either the harvester is busy reclaiming
* this node or we ran into some busy condition.
*
* The harvester will eventually come along and cleanup.
* The only problem would be the root mount point.
*
* Since the busy node can occur for a variety
* of reasons and can result in an entry staying
* in df output but no longer accessible from the
* directory tree, we are okay.
*/
&must_unlock, &eph_tree))
&eph_tree);
/*
* The original purge of the dnlc via 'dounmount'
* doesn't guarantee that another dnlc entry was not
* added while we waitied for all outstanding OTW
* and recovery calls to finish. So re-purge the
* dnlc now.
*/
(void) dnlc_purge_vfsp(vfsp, 0);
/*
* We need to explicitly stop the manager thread; the asyc worker
* threads can timeout and exit on their own.
*/
mi->mi_max_threads = 0;
if (mi->mi_manager_thread)
if (async_thread) {
}
if (removed)
}