rnode.h revision 108322fb1c3ed341aba9c80c9774df0ed9e35768
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#ifndef _NFS_RNODE_H
#define _NFS_RNODE_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/avl.h>
#include <sys/list.h>
#include <nfs/nfs.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum nfs_access_type {
NFS_ACCESS_UNKNOWN,
NFS_ACCESS_ALLOWED,
NFS_ACCESS_DENIED
} nfs_access_type_t;
typedef struct acache_hash {
struct acache *next; /* next and prev must be first */
struct acache *prev;
krwlock_t lock;
} acache_hash_t;
typedef struct acache {
struct acache *next; /* next and prev must be first */
struct acache *prev;
uint32_t known;
uint32_t allowed;
struct rnode *rnode;
cred_t *cred;
struct acache *list;
struct acache_hash *hashq;
} acache_t;
#define NFS_FHANDLE_LEN 64
typedef struct nfs_fhandle {
int fh_len;
char fh_buf[NFS_FHANDLE_LEN];
} nfs_fhandle;
typedef struct rddir_cache {
lloff_t _cookie; /* cookie used to find this cache entry */
lloff_t _ncookie; /* cookie used to find the next cache entry */
char *entries; /* buffer containing dirent entries */
int eof; /* EOF reached after this request */
int entlen; /* size of dirent entries in buf */
int buflen; /* size of the buffer used to store entries */
int flags; /* control flags, see below */
kcondvar_t cv; /* cv for blocking */
int error; /* error from RPC operation */
kmutex_t lock;
uint_t count; /* reference count */
avl_node_t tree; /* AVL tree links */
} rddir_cache;
#define nfs_cookie _cookie._p._l
#define nfs_ncookie _ncookie._p._l
#define nfs3_cookie _cookie._f
#define nfs3_ncookie _ncookie._f
#define RDDIR 0x1 /* readdir operation in progress */
#define RDDIRWAIT 0x2 /* waiting on readdir in progress */
#define RDDIRREQ 0x4 /* a new readdir is required */
#define RDDIRCACHED 0x8 /* entry is in the cache */
#define HAVE_RDDIR_CACHE(rp) (avl_numnodes(&(rp)->r_dir) > 0)
typedef struct symlink_cache {
char *contents; /* contents of the symbolic link */
int len; /* length of the contents */
int size; /* size of the allocated buffer */
} symlink_cache;
typedef struct commit {
page_t *c_pages; /* list of pages to commit */
offset3 c_commbase; /* base offset to do commit from */
count3 c_commlen; /* len to commit */
kcondvar_t c_cv; /* condvar for waiting for commit */
} commit_t;
/*
* The various values for the commit states. These are stored in
* the p_fsdata byte in the page struct.
*/
#define C_NOCOMMIT 0 /* no commit is required */
#define C_COMMIT 1 /* a commit is required so do it now */
#define C_DELAYCOMMIT 2 /* a commit is required, but can be delayed */
/*
* The lock manager holds state making it possible for the client
* and server to be out of sync. For example, if the response from
* the server granting a lock request is lost, the server will think
* the lock is granted and the client will think the lock is lost.
* To deal with this, a list of processes for which the client is
* not sure if the server holds a lock is attached to the rnode.
* When such a process closes the rnode, an unlock request is sent
* to the server to unlock the entire file.
*
* The list is kept as a singularly linked NULL terminated list.
* Because it is only added to under extreme error conditions, the
* list shouldn't get very big. DEBUG kernels print a console warning
* when the number of entries on a list go beyond nfs_lmpl_high_water
* an arbitrary number defined in nfs_add_locking_id()
*/
#define RLMPL_PID 1
#define RLMPL_OWNER 2
typedef struct lock_manager_pid_list {
int lmpl_type;
pid_t lmpl_pid;
union {
pid_t _pid;
struct {
int len;
char *owner;
} _own;
} un;
struct lock_manager_pid_list *lmpl_next;
} lmpl_t;
#define lmpl_opid un._pid
#define lmpl_own_len un._own.len
#define lmpl_owner un._own.owner
/*
* A homegrown reader/writer lock implementation. It addresses
* two requirements not addressed by the system primitives. They
* are that the `enter" operation is optionally interruptible and
* that that they can be re`enter'ed by writers without deadlock.
*/
typedef struct nfs_rwlock {
int count;
int waiters;
kthread_t *owner;
kmutex_t lock;
kcondvar_t cv;
} nfs_rwlock_t;
/*
* The format of the hash bucket used to lookup rnodes from a file handle.
*/
typedef struct rhashq {
struct rnode *r_hashf;
struct rnode *r_hashb;
krwlock_t r_lock;
} rhashq_t;
/*
* Remote file information structure.
*
* The rnode is the "inode" for remote files. It contains all the
* information necessary to handle remote file on the client side.
*
* Note on file sizes: we keep two file sizes in the rnode: the size
* according to the client (r_size) and the size according to the server
* (r_attr.va_size). They can differ because we modify r_size during a
* write system call (nfs_rdwr), before the write request goes over the
* wire (before the file is actually modified on the server). If an OTW
* request occurs before the cached data is written to the server the file
* size returned from the server (r_attr.va_size) may not match r_size.
* r_size is the one we use, in general. r_attr.va_size is only used to
* determine whether or not our cached data is valid.
*
* Each rnode has 3 locks associated with it (not including the rnode
* hash table and free list locks):
*
* r_rwlock: Serializes nfs_write and nfs_setattr requests
* and allows nfs_read requests to proceed in parallel.
* Serializes reads/updates to directories.
*
* r_lkserlock: Serializes lock requests with map, write, and
* readahead operations.
*
* r_statelock: Protects all fields in the rnode except for
* those listed below. This lock is intented
* to be held for relatively short periods of
* time (not accross entire putpage operations,
* for example).
*
* The following members are protected by the mutex rpfreelist_lock:
* r_freef
* r_freeb
*
* The following members are protected by the hash bucket rwlock:
* r_hashf
* r_hashb
*
* Note: r_modaddr is only accessed when the r_statelock mutex is held.
* Its value is also controlled via r_rwlock. It is assumed that
* there will be only 1 writer active at a time, so it safe to
* set r_modaddr and release r_statelock as long as the r_rwlock
* writer lock is held.
*
* 64-bit offsets: the code formerly assumed that atomic reads of
* r_size were safe and reliable; on 32-bit architectures, this is
* not true since an intervening bus cycle from another processor
* could update half of the size field. The r_statelock must now
* be held whenever any kind of access of r_size is made.
*
* Lock ordering:
* r_rwlock > r_lkserlock > r_statelock
*/
struct exportinfo; /* defined in nfs/export.h */
struct servinfo; /* defined in nfs/nfs_clnt.h */
struct failinfo; /* defined in nfs/nfs_clnt.h */
struct mntinfo; /* defined in nfs/nfs_clnt.h */
#ifdef _KERNEL
typedef struct rnode {
/* the hash fields must be first to match the rhashq_t */
struct rnode *r_hashf; /* hash queue forward pointer */
struct rnode *r_hashb; /* hash queue back pointer */
struct rnode *r_freef; /* free list forward pointer */
struct rnode *r_freeb; /* free list back pointer */
rhashq_t *r_hashq; /* pointer to the hash bucket */
vnode_t *r_vnode; /* vnode for remote file */
nfs_rwlock_t r_rwlock; /* serializes write/setattr requests */
nfs_rwlock_t r_lkserlock; /* serialize lock with other ops */
kmutex_t r_statelock; /* protects (most of) rnode contents */
nfs_fhandle r_fh; /* file handle */
struct servinfo *r_server; /* current server */
char *r_path; /* path to this rnode */
u_offset_t r_nextr; /* next byte read offset (read-ahead) */
cred_t *r_cred; /* current credentials */
cred_t *r_unlcred; /* unlinked credentials */
char *r_unlname; /* unlinked file name */
vnode_t *r_unldvp; /* parent dir of unlinked file */
len_t r_size; /* client's view of file size */
struct vattr r_attr; /* cached vnode attributes */
hrtime_t r_attrtime; /* time attributes become invalid */
hrtime_t r_mtime; /* client time file last modified */
long r_mapcnt; /* count of mmapped pages */
uint_t r_count; /* # of refs not reflect in v_count */
uint_t r_awcount; /* # of outstanding async write */
uint_t r_gcount; /* getattrs waiting to flush pages */
ushort_t r_flags; /* flags, see below */
short r_error; /* async write error */
kcondvar_t r_cv; /* condvar for blocked threads */
int (*r_putapage) /* address of putapage routine */
(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *);
avl_tree_t r_dir; /* cache of readdir responses */
rddir_cache *r_direof; /* pointer to the EOF entry */
symlink_cache r_symlink; /* cached readlink response */
writeverf3 r_verf; /* version 3 write verifier */
u_offset_t r_modaddr; /* address for page in writerp */
commit_t r_commit; /* commit information */
u_offset_t r_truncaddr; /* base for truncate operation */
vsecattr_t *r_secattr; /* cached security attributes (acls) */
cookieverf3 r_cookieverf; /* version 3 readdir cookie verifier */
lmpl_t *r_lmpl; /* pids that may be holding locks */
nfs3_pathconf_info *r_pathconf; /* cached pathconf information */
acache_t *r_acache; /* list of access cache entries */
kthread_t *r_serial; /* id of purging thread */
list_t r_indelmap; /* list of delmap callers */
} rnode_t;
#endif /* _KERNEL */
/*
* Flags
*/
#define RREADDIRPLUS 0x1 /* issue a READDIRPLUS instead of READDIR */
#define RDIRTY 0x2 /* dirty pages from write operation */
#define RSTALE 0x4 /* file handle is stale */
#define RMODINPROGRESS 0x8 /* page modification happening */
#define RTRUNCATE 0x10 /* truncating, don't commit */
#define RHAVEVERF 0x20 /* have a write verifier to compare against */
#define RCOMMIT 0x40 /* commit in progress */
#define RCOMMITWAIT 0x80 /* someone is waiting to do a commit */
#define RHASHED 0x100 /* rnode is in hash queues */
#define ROUTOFSPACE 0x200 /* an out of space error has happened */
#define RDIRECTIO 0x400 /* bypass the buffer cache */
#define RLOOKUP 0x800 /* a lookup has been performed */
#define RWRITEATTR 0x1000 /* attributes came from WRITE */
#define RINDNLCPURGE 0x2000 /* in the process of purging DNLC references */
#define RDELMAPLIST 0x4000 /* delmap callers tracking for as callback */
/*
* Convert between vnode and rnode
*/
#define RTOV(rp) ((rp)->r_vnode)
#define VTOR(vp) ((rnode_t *)((vp)->v_data))
#define VTOFH(vp) (RTOFH(VTOR(vp)))
#define RTOFH(rp) ((fhandle_t *)(&(rp)->r_fh.fh_buf))
#define VTOFH3(vp) (RTOFH3(VTOR(vp)))
#define RTOFH3(rp) ((nfs_fh3 *)(&(rp)->r_fh))
#ifdef _KERNEL
extern int nfs_async_readahead(vnode_t *, u_offset_t, caddr_t,
struct seg *, cred_t *,
void (*)(vnode_t *, u_offset_t,
caddr_t, struct seg *, cred_t *));
extern int nfs_async_putapage(vnode_t *, page_t *, u_offset_t, size_t,
int, cred_t *, int (*)(vnode_t *, page_t *,
u_offset_t, size_t, int, cred_t *));
extern int nfs_async_pageio(vnode_t *, page_t *, u_offset_t, size_t,
int, cred_t *, int (*)(vnode_t *, page_t *,
u_offset_t, size_t, int, cred_t *));
extern void nfs_async_readdir(vnode_t *, rddir_cache *,
cred_t *, int (*)(vnode_t *,
rddir_cache *, cred_t *));
extern void nfs_async_commit(vnode_t *, page_t *, offset3, count3,
cred_t *, void (*)(vnode_t *, page_t *,
offset3, count3, cred_t *));
extern void nfs_async_inactive(vnode_t *, cred_t *, void (*)(vnode_t *,
cred_t *));
extern int writerp(rnode_t *, caddr_t, int, struct uio *, int);
extern int nfs_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *);
extern void nfs_invalidate_pages(vnode_t *, u_offset_t, cred_t *);
extern int rfs2call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t,
xdrproc_t, caddr_t, cred_t *, int *, enum nfsstat *,
int, struct failinfo *);
extern int rfs3call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t,
xdrproc_t, caddr_t, cred_t *, int *, nfsstat3 *,
int, struct failinfo *);
extern void nfs_setswaplike(vnode_t *, vattr_t *);
extern vnode_t *makenfsnode(fhandle_t *, struct nfsfattr *, struct vfs *,
hrtime_t, cred_t *, char *, char *);
extern vnode_t *makenfs3node_va(nfs_fh3 *, vattr_t *, struct vfs *, hrtime_t,
cred_t *, char *, char *);
extern vnode_t *makenfs3node(nfs_fh3 *, fattr3 *, struct vfs *, hrtime_t,
cred_t *, char *, char *);
extern void rp_addfree(rnode_t *, cred_t *);
extern void rp_rmhash(rnode_t *);
extern int check_rtable(struct vfs *);
extern void destroy_rtable(struct vfs *, cred_t *);
extern void rflush(struct vfs *, cred_t *);
extern nfs_access_type_t nfs_access_check(rnode_t *, uint32_t, cred_t *);
extern void nfs_access_cache(rnode_t *rp, uint32_t, uint32_t, cred_t *);
extern int nfs_access_purge_rp(rnode_t *);
extern int nfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
int, cred_t *);
extern int nfs3_putapage(vnode_t *, page_t *, u_offset_t *, size_t *,
int, cred_t *);
extern void nfs_printfhandle(nfs_fhandle *);
extern void nfs_write_error(vnode_t *, int, cred_t *);
extern rddir_cache *rddir_cache_alloc(int);
extern void rddir_cache_hold(rddir_cache *);
extern void rddir_cache_rele(rddir_cache *);
#ifdef DEBUG
extern char *rddir_cache_buf_alloc(size_t, int);
extern void rddir_cache_buf_free(void *, size_t);
#endif
extern int nfs_rw_enter_sig(nfs_rwlock_t *, krw_t, int);
extern int nfs_rw_tryenter(nfs_rwlock_t *, krw_t);
extern void nfs_rw_exit(nfs_rwlock_t *);
extern int nfs_rw_lock_held(nfs_rwlock_t *, krw_t);
extern void nfs_rw_init(nfs_rwlock_t *, char *, krw_type_t, void *);
extern void nfs_rw_destroy(nfs_rwlock_t *);
extern int nfs_directio(vnode_t *, int, cred_t *);
extern int nfs3_rddir_compar(const void *, const void *);
extern int nfs_rddir_compar(const void *, const void *);
extern struct zone *nfs_zone(void);
extern zoneid_t nfs_zoneid(void);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _NFS_RNODE_H */