nfs4_clnt.h revision 3fd6cc295d1c8c721b4b8abb49bbe0fefe51d034
2N/A * The contents of this file are subject to the terms of the 2N/A * Common Development and Distribution License, Version 1.0 only 2N/A * (the "License"). You may not use this file except in compliance 2N/A * See the License for the specific language governing permissions 2N/A * and limitations under the License. 2N/A * When distributing Covered Code, include this CDDL HEADER in each 2N/A * If applicable, add the following below this CDDL HEADER, with the 2N/A * fields enclosed by brackets "[]" replaced with your own identifying 2N/A * information: Portions Copyright [yyyy] [name of copyright owner] 2N/A * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 2N/A * Use is subject to license terms. 2N/A/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 2N/A/* All Rights Reserved */ 2N/A * Portions of this source code were derived from Berkeley 4.3 BSD 2N/A * under license from the Regents of the University of California. 2N/A#
pragma ident "%Z%%M% %I% %E% SMI" 2N/A/* Four states of nfs4_server's lease_valid */ 2N/A/* flag to tell the renew thread it should exit */ 2N/A/* Default number of seconds to wait on GRACE and DELAY errors */ 2N/A/* Number of hash buckets for open owners for each nfs4_server */ 2N/A/* Number of freed open owners (per mntinfo4_t) to keep around */ 2N/A/* Number of seconds to wait before retrying a SETCLIENTID(_CONFIRM) op */ 2N/A/* Number of times we should retry a SETCLIENTID(_CONFIRM) op */ 2N/A/* Number of times we should retry on open after getting NFS4ERR_BAD_SEQID */ 2N/A * Is the attribute cache valid? If client holds a delegation, then attrs 2N/A * are by definition valid. If not, then check to see if attrs have timed out. 2N/A * Flags to indicate whether to purge the DNLC for non-directory vnodes 2N/A * in a call to nfs_purge_caches. 2N/A * Swap is always valid, if no attributes (attrtime == 0) or 2N/A * if mtime matches cached mtime it is valid 2N/A * NOTE: mtime is now a timestruc_t. 2N/A * Caller should be holding the rnode r_statelock mutex. 2N/A * Macro to detect forced unmount or a zone shutdown. * Macro to help determine whether a request failed because the underlying * filesystem has been forcibly unmounted or because of zone shutdown. * Due to the way the address space callbacks are used to execute a delmap, * we must keep track of how many times the same thread has called * VOP_DELMAP()->nfs4_delmap(). This is done by having a list of * nfs4_delmapcall_t's associated with each rnode4_t. This list is protected * by the rnode4_t's r_statelock. The individual elements do not need to be * protected as they will only ever be created, modified and destroyed by * one thread (the call_id). * See nfs4_delmap() for further explanation. int error;
/* error from delmap */ * delmap address space callback args * The following are statistics that describe the behavior of the system as a * whole and don't correspond to any particular zone. * The NFS specific async_reqs structure. void (*
readahead)();
/* pointer to readahead function */ struct seg *
seg;
/* segment to do i/o to */ int (*
pageio)();
/* pointer to pageio function */ int (*
readdir)();
/* pointer to readdir function */ void (*
commit)();
/* pointer to commit function */ * Hash bucket for the mi's open owner list (mi_oo_list). {
0x20202020,
0x20202020,
0x20202020}}, \
{
0x61636365,
0x73732020,
0x20202020}}, \
{
0x636c6f73,
0x65202020,
0x20202020}}, \
{
0x6c6f7374,
0x20636c6f,
0x73652020}}, \
{
0x756e646f,
0x20636c6f,
0x73652020}}, \
{
0x636f6d6d,
0x69742020,
0x20202020}}, \
{
0x64656c65,
0x67726574,
0x75726e20}}, \
{
0x6673696e,
0x666f2020,
0x20202020}}, \
{
0x67657420,
0x736c6e6b,
0x20747874}}, \
{
0x67657461,
0x74747220,
0x20202020}}, \
{
0x696e6163,
0x74697665,
0x20202020}}, \
{
0x6c696e6b,
0x20202020,
0x20202020}}, \
{
0x6c6f636b,
0x20202020,
0x20202020}}, \
{
0x7265636c,
0x61696d20,
0x6c6f636b}}, \
{
0x72657365,
0x6e64206c,
0x6f636b20}}, \
{
0x7265696e,
0x7374206c,
0x6f636b20}}, \
{
0x756e6b6e,
0x6f776e20,
0x6c6f636b}}, \
{
0x6c6f636b,
0x5f746573,
0x74202020}}, \
{
0x756e6c6f,
0x636b2020,
0x20202020}}, \
{
0x72657365,
0x6e64206c,
0x6f636b75}}, \
{
0x7265696e,
0x73742075,
0x6e6c636b}}, \
{
0x6c6f6f6b,
0x75702020,
0x20202020}}, \
{
0x6c6f6f6b,
0x75702070,
0x6172656e}}, \
{
0x6c6f6f6b,
0x75702076,
0x616c6964}}, \
{
0x6c6f6f6b,
0x766c6420,
0x7061726e}}, \
{
0x6d6b6469,
0x72202020,
0x20202020}}, \
{
0x6d6b6e6f,
0x64202020,
0x20202020}}, \
{
0x6d6f756e,
0x74202020,
0x20202020}}, \
{
0x6f70656e,
0x20202020,
0x20202020}}, \
{
0x6f70656e,
0x5f636f6e,
0x6669726d}}, \
{
0x6c6f7374,
0x206f7065,
0x6e5f636f}}, \
{
0x6f70656e,
0x20646772,
0x61646520}}, \
{
0x6c737420,
0x6f70656e,
0x20646772}}, \
{
0x6c6f7374,
0x206f7065,
0x6e202020}}, \
{
0x6f70656e,
0x61747472,
0x20202020}}, \
{
0x70617468,
0x636f6e66,
0x20202020}}, \
{
0x70757472,
0x6f6f7466,
0x68202020}}, \
{
0x72656164,
0x20202020,
0x20202020}}, \
{
0x72656164,
0x61686561,
0x64202020}}, \
{
0x72656164,
0x64697220,
0x20202020}}, \
{
0x72656164,
0x6c696e6b,
0x20202020}}, \
{
0x72656c6f,
0x636b2020,
0x20202020}}, \
{
0x72656d61,
0x70206c6f,
0x6f6b7570}}, \
{
0x72656d70,
0x206c6b75,
0x70206164}}, \
{
0x72656d70,
0x206c6b75,
0x70206e61}}, \
{
0x72656d61,
0x70206d6f,
0x756e7420}}, \
{
0x726d6469,
0x72202020,
0x20202020}}, \
{
0x72656d6f,
0x76652020,
0x20202020}}, \
{
0x72656e61,
0x6d652020,
0x20202020}}, \
{
0x72656e61,
0x6d652028,
0x76666829}}, \
{
0x72656e65,
0x77202020,
0x20202020}}, \
{
0x72656f70,
0x656e2020,
0x20202020}}, \
{
0x6c6f7374,
0x2072656f,
0x70656e20}}, \
{
0x73656369,
0x6e666f20,
0x20202020}}, \
{
0x73657461,
0x74747220,
0x20202020}}, \
{
0x73657463,
0x6c69656e,
0x74696420}}, \
{
0x73636c6e,
0x7469645f,
0x636f6e66}}, \
{
0x73796d6c,
0x696e6b20,
0x20202020}}, \
{
0x77726974,
0x65202020,
0x20202020}} \
* These flags are for differentiating the search criterian for * find_open_owner(). The comparison is done with the open_owners's * 'oo_just_created' flag. * Hashed by the cr_uid and cr_ruid of credential 'oo_cred'. 'oo_cred_otw' * is stored upon a successful OPEN. This is needed when the user's effective * and real uid's don't match. The 'oo_cred_otw' overrides the credential * passed down by VFS for async read/write, commit, lock, and close operations. * The oo_ref_count keeps track the number of active references on this * data structure + number of nfs4_open_streams point to this structure. * 'oo_valid' tells whether this stuct is about to be freed or not. * 'oo_just_created' tells us whether this struct has just been created but * not been fully finalized (that is created upon an OPEN request and * finalized upon the OPEN success). * The 'oo_seqid_inuse' is for the open seqid synchronization. If a thread * is currently using the open owner and it's open_seqid, then it sets the * oo_seqid_inuse to true if it currently is not set. If it is set then it * does a cv_wait on the oo_cv_seqid_sync condition variable. When the thread * is done it unsets the oo_seqid_inuse and does a cv_signal to wake a process * waiting on the condition variable. * 'oo_last_good_seqid' is the last valid seqid this open owner sent OTW, * and 'oo_last_good_op' is the operation that issued the last valid seqid. * mntinfo4_t::mi_lock > oo_lock (for searching mi_oo_list) * oo_seqid_inuse > mntinfo4_t::mi_lock * oo_seqid_inuse > rnode4_t::r_statelock * oo_seqid_inuse > rnode4_t::r_statev4_lock * oo_seqid_inuse > nfs4_open_stream_t::os_sync_lock * The 'oo_seqid_inuse'/'oo_cv_seqid_sync' protects: * The 'oo_lock' protects: * Fix this to always be 8 bytes * Static server information. * These fields are read-only once they are initialized: * These fields are protected by sv_lock: * nfs_rtable4_lock > sv_lock * rnode4_t::r_statelock > sv_lock char *
sv_path;
/* Path name on server */ /* points to one of the sec_data */ /* entries in sv_secinfo */ #
define SV4_NOTINUSE 0x004 /* servinfo4_t had fatal errors */ * Lock call types. See nfs4frlock(). * This structure holds the information for a lost open/close/open downgrade/ * lock/locku request. It is also used for requests that are queued up so * that the recovery thread can release server state after a forced * "lr_op" is 0 if the struct is uninitialized. Otherwise, it is set to * the proper OP_* nfs_opnum4 number. The other fields contain information * to reconstruct the call. * lr_dvp is used for OPENs with CREATE, so that we can do a PUTFH of the * parent directroy without relying on vtodv (since we may not have a vp * for the file we wish to create). * lr_putfirst means that the request should go to the front of the resend * queue, rather than the end. * Recovery actions. Some actions can imply further recovery using a * different recovery action (e.g., recovering the clientid leads to * recovering open files and locks). * Administrative and debug message framework. * NFS private data per mounted file system * The mi_lock mutex protects the following fields: * Normally the netconfig information for the mount comes from * mi_curr_serv and mi_klmconfig is NULL. If NLM calls need to use a * different transport, mi_klmconfig contains the necessary netconfig * The mi_async_lock mutex protects the following fields: * The nfs4_server_t::s_lock protects the following fields: * The mntinfo4_t::mi_recovlock protects the following fields: * mi4_globals::mig_lock > mi_async_lock * mi_async_lock > nfs4_server_t::s_lock > mi_lock * mi_recovlock > mi_rename_lock > nfs_rtable4_lock * nfs4_server_t::s_recovlock > mi_recovlock * rnode4_t::r_rwlock > mi_rename_lock * nfs_rtable4_lock > mi_lock * nfs4_server_t::s_lock > mi_msg_list_lock * mi_recovlock > nfs4_server_t::s_lock * mi_recovlock > nfs4_server_lst_lock * The 'mi_oo_list' represents the hash buckets that contain the * nfs4_open_owenrs for this particular mntinfo4. * The 'mi_foo_list' represents the freed nfs4_open_owners for this mntinfo4. * 'mi_foo_num' is the current number of freed open owners on the list, * 'mi_foo_max' is the maximum number of freed open owners that are allowable * mi_rootfh and mi_srvparentfh are read-only once created, but that just * refers to the pointer. The contents must be updated to keep in sync * The mi_msg_list_lock protects against adding/deleting entries to the * 'mi_zone' is initialized at structure creation time, and never * changes; it may be read without a lock. * mi_zone_node is linkage into the mi4_globals.mig_list, and is * protected by mi4_globals.mig_list_lock. int mi_tsize;
/* transfer size (bytes) */ int mi_stsize;
/* server's max transfer size (bytes) */ int mi_timeo;
/* inital timeout in 10th sec */ * async I/O management. There may be a pool of threads to handle * async I/O requests, etc., plus there is always one thread that * handles over-the-wire requests for VOP_INACTIVE. The async pool * can also help out with VOP_INACTIVE. * Client Side Failover stats * Volatile fh flags (nfsv4) * Shared filehandle pool. * Used on servers with volatile filehandles, to check for * pre-existing rnodes with a given fileid, before we create * another rnode for the same file on the server. * not guaranteed to be accurate. * only should be used by debug queue. * The values for mi_flags. * MI4_HARD hard or soft mount * MI4_PRINTED responding message printed * MI4_INT allow INTR on hard mount * MI4_DOWN server is down * MI4_NOAC don't cache attributes * MI4_NOCTO no close-to-open consistency * MI4_LLOCK local locking only (no lockmgr) * MI4_GRPID System V group id inheritance * MI4_SHUTDOWN System is rebooting or shutting down * MI4_LINK server supports link * MI4_SYMLINK server supports symlink * MI4_ACL server supports NFSv4 ACLs * MI4_NOPRINT don't print messages * MI4_DIRECTIO do direct I/O * MI4_RECOV_ACTIV filesystem has recovery a thread * MI4_REMOVE_ON_LAST_CLOSE remove from server's list * MI4_RECOV_FAIL client recovery failed * MI4_MOUNTING mount in progress, don't failover * MI4_POSIX_LOCK if server is using POSIX locking * MI4_LOCK_DEBUG cmn_err'd posix lock err msg * MI4_DEAD mount has been terminated * MI4_INACTIVE_IDLE inactive thread idle * MI4_BADOWNER_DEBUG badowner error msg per mount * MI4_ASYNC_MGR_STOP tell async manager to die * MI4_TIMEDOUT saw a timeout during zone shutdown /* 0x1000 is available */ /* 0x4000 is available */ /* 0x8000 is available */ /* 0x10000 is available */ /* 0x80000 is available */ * MI4R_NEED_CLIENTID is sort of redundant (it's the nfs4_server_t flag * that's important), but some flag is needed to indicate that recovery is * going on for the filesystem. * vfs pointer to mount info * vnode pointer to mount info * lease_valid is initially set to NFS4_LEASE_NOT_STARTED. This is when the * nfs4_server is first created. lease_valid is then set to * NFS4_LEASE_UNITIALIZED when the renew thread is started. The extra state of * NFS4_LEASE_NOT_STARTED is needed for client recovery (so we know if a thread * already exists when we do SETCLIENTID). lease_valid is then set to * NFS4_LEASE_VALID (if it is at NFS4_LEASE_UNITIALIZED) when a state creating * operation (OPEN) is done. lease_valid stays at NFS4_LEASE_VALID as long as * the lease is renewed. It is set to NFS4_LEASE_INVALID when the lease * expires. Client recovery is needed to set the lease back to * NFS4_LEASE_VALID from NFS4_LEASE_INVALID. * The s_cred is the credential used to mount the first file system for this * server. It used as the credential for the renew thread's calls to the * The renew thread waits on the condition variable cv_thread_exit. If the cv * is signalled, then the thread knows it must check s_thread_exit to see if * it should exit. The cv is signaled when the last file system is unmounted * from a particular server. s_thread_exit is set to 0 upon thread startup, * and set to NFS4_THREAD_EXIT, when the last file system is unmounted thereby * telling the thread to exit. s_thread_exit is needed to avoid spurious * state_ref_count is incremented every time a new file is opened and * decremented every time a file is closed otw. This keeps track of whether * the nfs4_server has state associated with it or not. * s_refcnt is the reference count for storage management of the struct * mntinfo4_list points to the doubly linked list of mntinfo4s that share * this nfs4_server (ie: <clientid, saddr> pair) in the current zone. This is * needed for a nfs4_server to get a mntinfo4 for use in rfs4call. * s_recovlock is used to synchronize recovery operations. The thread * that is recovering the client must acquire it as a writer. If the * thread is using the clientid (including recovery operations on other * state), acquire it as a reader. * The 's_otw_call_count' keeps track of the number of outstanding over the * wire requests for this structure. The struct will not go away as long * as this is non-zero (or s_refcnt is non-zero). * The 's_cv_otw_count' is used in conjuntion with the 's_otw_call_count' * variable to let the renew thread when an outstanding otw request has * 'zoneid' and 'zone_globals' are set at creation of this structure * and are read-only after that; no lock is required to read them. * s_lock protects: everything except cv_thread_exit and s_recovlock. * s_program is used as the index into the nfs4_callback_globals's * nfs4prog2server table. When a callback request comes in, we can * use that request's program number (minus NFS4_CALLBACK) as an index * into the nfs4prog2server. That entry will hold the nfs4_server_t ptr. * We can then access that nfs4_server_t and its 's_deleg_list' (its list of * nfs4_server::s_lock > mntinfo4::mi_lock * nfs_rtable4_lock > s_lock * nfs4_server_lst_lock > s_lock #
define N4S_INSERTED 0x2 /* server has been put in global list */ * Operation hints for the recovery framework (mostly). * OH_ACCESS, OH_GETACL, OH_GETATTR, OH_LOOKUP, OH_READDIR * These hints exist to allow user visit/readdir a R4SRVSTUB dir. * (dir represents the root of a server fs that has not yet been * This macro evaluates to non-zero if the given op releases state at the * Flags for nfs4getfh_otw. * The os_open_ref_count keeps track the number of open file descriptor * refernces on this data structure. It will be bumped for any successful * OTW OPEN call and any OPEN call that determines the OTW call is not * necessary and the open stream hasn't just been created (see * nfs4_is_otw_open_necessary). * os_mapcnt is a count of the number of mmapped pages for a particular * open stream; this in conjunction w/ os_open_ref_count is used to * determine when to do a close to the server. This is necessary because * of the semantics of doing open, mmap, close; the OTW close must be wait * until all open and mmap references have vanished. * 'os_valid' tells us whether this structure is about to be freed or not, * if it is then don't return it in find_open_stream(). * 'os_final_close' is set when a CLOSE OTW was attempted. This is needed * so we can properly count the os_open_ref_count in cases where we VOP_CLOSE * without a VOP_OPEN, and have nfs4_inactive() drive the OTW CLOSE. It * that tried to close OTW but failed, and left the state cleanup to * 'os_force_close' is used to let us know if an intervening thread came * and reopened the open stream after we decided to issue a CLOSE_FORCE, * but before we could actually process the CLOSE_FORCE. * 'os_pending_close' is set when an over-the-wire CLOSE is deferred to the * 'open_stateid' is set the last open stateid returned by the server unless * 'os_delegation' is 1, in which case 'open_stateid' refers to the * delegation stateid returned by the server. This is used in cases where the * client tries to OPEN a file but already has a suitable delegation, so we * just stick the delegation stateid in the open stream. * os_dc_openacc are open access bits which have been granted to the * open stream by virtue of a delegation, but which have not been seen * by the server. This applies even if the open stream does not have * os_delegation set. These bits are used when setting file locks to * determine whether an open with CLAIM_DELEGATE_CUR needs to be done * before the lock request can be sent to the server. See * nfs4frlock_check_deleg(). * maps require. We need to keep track of this so we can provide the proper * 'os_failed_reopen' tells us that we failed to successfully reopen this * open stream; therefore, we should not use this open stateid as it is * not valid anymore. This flag is also used to indicate an unsuccessful * attempt to reopen a delegation open stream with CLAIM_DELEGATE_CUR. * If 'os_orig_oo_name' is different than os_open_owner's oo_name * then this tells us that this open stream's open owner used a * bad seqid (that is, got NFS4ERR_BAD_SEQID). If different, this open * stream will no longer be used for future OTW state releasing calls. * rnode4_t::r_os_lock > os_sync_lock * os_sync_lock > rnode4_t::r_statelock * os_sync_lock > rnode4_t::r_statev4_lock * os_sync_lock > mntinfo4_t::mi_lock (via hold over rfs4call) * The 'os_sync_lock' protects: * The rnode4_t::r_os_lock protects: * These fields are set at creation time and * This structure describes the format of the lock_owner_name * field of the lock owner. * The lo_prev_rnode and lo_next_rnode are for a circular list that hangs * off the rnode. If the links are NULL it means this object is not on the * 'lo_pending_rqsts' is non-zero if we ever tried to send a request and * didn't get a response back. This is used to figure out if we have * possible remote v4 locks, so that we can clean up at process exit. In * theory, the client should be able to figure out if the server received * the request (based on what seqid works), so maybe we can get rid of this * structure. The rnode's list accounts for one reference. * 'lo_just_created' is set to NFS4_JUST_CREATED when we first create the * data structure. It is then set to NFS4_PERM_CREATED when a lock request * is successful using this lock owner structure. We need to keep 'temporary' * lock owners around so we can properly keep the lock seqid synchronization * first time (especially with the DENIED error case). Once * 'lo_just_created' is set to NFS4_PERM_CREATED, it doesn't change. * 'lo_valid' tells us whether this structure is about to be freed or not, * if it is then don't return it from find_lock_owner(). * Retrieving and setting of 'lock_seqid' is protected by the * NFS4_LOCK_SEQID_INUSE flag. Waiters for NFS4_LOCK_SEQID_INUSE should * use 'lo_cv_seqid_sync'. * The setting of 'lock_stateid' is protected by the * NFS4_LOCK_SEQID_INUSE flag and 'lo_lock'. The retrieving of the * 'lock_stateid' is protected by 'lo_lock', with the additional * requirement that the calling function can handle NFS4ERR_OLD_STATEID and * NFS4ERR_BAD_STATEID as appropiate. * The setting of NFS4_BAD_SEQID_LOCK to lo_flags tells us whether this lock * owner used a bad seqid (that is, got NFS4ERR_BAD_SEQID). With this set, * this lock owner will no longer be used for future OTW calls. Once set, * rnode4_t::r_statev4_lock > lo_lock * Fix this to always be 12 bytes /* for nfs4_lock_owner_t lookups */ /* Number of times to retry a call that fails with state independent error */ * Per-zone data for dealing with callbacks. Included here solely for the /* this table maps the program number to the nfs4_server structure */ * Structure to hold the bad seqid information that is passed * to the recovery framework. * The following defines are used for rs_flags in * a nfs4_recov_state_t structure. * NFS4_RS_RENAME_HELD Indicates that the mi_rename_lock was held. * NFS4_RS_GRACE_MSG Set once we have uprintf'ed a grace message. * NFS4_RS_DELAY_MSG Set once we have uprintf'ed a delay message. * NFS4_RS_RECALL_HELD1 r_deleg_recall_lock for vp1 was held. * NFS4_RS_RECALL_HELD2 r_deleg_recall_lock for vp2 was held. * Information that is retrieved from nfs4_start_op() and that is * passed into nfs4_end_op(). * rs_sp is a reference to the nfs4_server that was found, or NULL. * rs_num_retry_despite_err is the number times client retried an * OTW op despite a recovery error. It is only incremented for hints * exempt to normal R4RECOVERR processing * needs review for possible removal.) * It is initialized wherever nfs4_recov_state_t is declared -- usually * very near initialization of rs_flags. * Flags for nfs4_check_remap, nfs4_remap_file and nfs4_remap_root. /* Used for preformed "." and ".." dirents */ * Filehandles can change in v4, so rather than storing the filehandle * directly in the rnode, etc., we manage the filehandle through one of * Locking: sfh_fh and sfh_tree is protected by the filesystem's * mi_fh_lock. The reference count and flags are protected by sfh_lock. * mntinfo4_t::mi_fh_lock > sfh_lock. * Path and file name management. * This type stores the name of an entry in the filesystem and keeps enough * information that it can provide a complete path. All fields are * protected by fn_lock, except for the reference count, which is managed * Lock order: child and then parent. char *
fn_name;
/* the actual name */ * Per-zone data for managing client handles, included in this file for the #
endif /* _NFS4_CLNT_H */