nfs_server.c revision bd6f16401fad6aa1309b1415c995fe7394dff03a
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
* Use is subject to license terms.
*/
#include <sys/pathname.h>
#include <sys/sysmacros.h>
#include <rpc/auth_unix.h>
#include <rpc/auth_des.h>
#include <rpc/rpc_rdma.h>
#include <nfs/nfs_clnt.h>
#include <nfs/nfs_dispatch.h>
#include <nfs/nfs4_drc.h>
#define MAXHOST 32
/*
* Module linkage information.
*/
&mod_miscops, "NFS server module"
};
static struct modlinkage modlinkage = {
};
char _depends_on[] = "misc/klmmod";
int
_init(void)
{
int status;
if ((status = nfs_srvinit()) != 0) {
return (status);
}
if (status != 0) {
/*
* Could not load module, cleanup previous
* initialization work.
*/
nfs_srvfini();
}
/*
* Initialise some placeholders for nfssys() calls. These have
* to be declared by the nfs module, since that handles nfssys()
* calls - also used by NFS clients - but are provided by this
* nfssrv module. These also then serve as confirmation to the
* relevant code in nfs that nfssrv has been loaded, as they're
* initially NULL.
*/
/* setup DSS paths here; must be done before initial server startup */
return (status);
}
int
_fini()
{
return (EBUSY);
}
int
{
}
/*
* PUBLICFH_CHECK() checks if the dispatch routine supports
* RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
* incoming request is using the public filehandle. The check duplicates
* the exportmatch() call done in checkexport(), and we should consider
* modifying those routines to avoid the duplication. For now, we optimize
* by calling exportmatch() only after checking that the dispatch routine
* supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
* public (i.e., not the placeholder).
*/
static void nfs_srv_shutdown_all(int);
static void rfs4_server_start(int);
static void nullfree(void);
struct rpc_disptable *);
static void hanfsv4_failover(void);
bool_t);
}
/*
* Public Filehandle common nfs routines
*/
static int MCLpath(char **);
static void URLparse(char *);
/*
* NFS callout table.
* This table is used by svc_getreq() to dispatch a request with
* dispatch routine.
*
* for NFS_PROGRAM. Careful, if this is ever changed.
*/
static SVC_CALLOUT __nfs_sc_clts[] = {
};
static SVC_CALLOUT_TABLE nfs_sct_clts = {
};
static SVC_CALLOUT __nfs_sc_cots[] = {
};
static SVC_CALLOUT_TABLE nfs_sct_cots = {
};
static SVC_CALLOUT __nfs_sc_rdma[] = {
};
static SVC_CALLOUT_TABLE nfs_sct_rdma = {
};
/*
* Used to track the state of the server so that initialization
* can be done properly.
*/
typedef enum {
NFS_SERVER_STOPPED, /* server state destroyed */
NFS_SERVER_STOPPING, /* server state being destroyed */
NFS_SERVER_QUIESCED, /* server state preserved */
NFS_SERVER_OFFLINE /* server pool offline */
static kmutex_t nfs_server_upordown_lock;
static kcondvar_t nfs_server_upordown_cv;
/*
* DSS: distributed stable storage
* lists of all DSS paths: current, and before last warmstart
*/
/*
* RDMA wait variables.
*/
static kcondvar_t rdma_wait_cv;
static kmutex_t rdma_wait_mutex;
/*
* Will be called at the point the server pool is being unregistered
* from the pool list. From that point onwards, the pool is waiting
* to be drained and as such the server state is stale and pertains
* to the old instantiation of the NFS server pool.
*/
void
nfs_srv_offline(void)
{
if (nfs_server_upordown == NFS_SERVER_RUNNING) {
}
}
/*
* Will be called at the point the server pool is being destroyed so
* all transports have been closed and no service threads are in
* existence.
*
* If we quiesce the server, we're shutting it down without destroying the
* server state. This allows it to warm start subsequently.
*/
void
nfs_srv_stop_all(void)
{
int quiesce = 0;
}
/*
* This alternative shutdown routine can be requested via nfssys()
*/
void
nfs_srv_quiesce_all(void)
{
int quiesce = 1;
}
static void
nfs_srv_shutdown_all(int quiesce) {
if (quiesce) {
if (nfs_server_upordown == NFS_SERVER_RUNNING ||
/* reset DSS state, for subsequent warm restart */
rfs4_dss_numnewpaths = 0;
"NFSv4 state has been preserved");
}
} else {
if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
}
}
}
static int
{
struct T_info_ack tinfo;
/*
* Find out what type of transport this is.
*/
return (error);
/*
* Based on our query of the transport type...
*
* NOTE: This assumes that NFS_PROGRAM is first in the array!!
* And the second entry is the NFS_ACL_PROGRAM.
*/
case T_CLTS:
return (EINVAL);
*sctpp = &nfs_sct_clts;
break;
case T_COTS:
case T_COTS_ORD:
/* For the NFS_ACL program, check the max version */
if (versmax > NFS_ACL_VERSMAX)
*sctpp = &nfs_sct_cots;
break;
default:
}
return (error);
}
/*
* NFS Server system call.
* Does all of the work of running a NFS server.
* uap->fd is the fd of an open transport provider
*/
int
{
int error;
int readsize;
char buf[KNC_STRSIZE];
#ifdef lint
#endif
/* Check privileges in nfssys() */
return (EBADF);
/*
* Set read buffer size to rsize
* and add room for RPC headers.
*/
if (readsize < RPC_MAXDATASIZE)
KNC_STRSIZE, &len);
if (error) {
return (error);
}
if (error) {
return (error);
}
if ((nfs_versmin > nfs_versmax) ||
(nfs_versmin < NFS_VERSMIN) ||
(nfs_versmax > NFS_VERSMAX)) {
}
if (error =
return (error);
}
/* Initialize nfsv4 server */
/* Create a transport handle. */
if (error)
/* HA-NFSv4: save the cluster nodeid */
if (cluster_bootflags & CLUSTER_BOOTED)
return (error);
}
static void
{
/*
* Determine if the server has previously been "started" and
* if not, do the per instance initialization
*/
if (nfs_server_upordown != NFS_SERVER_RUNNING) {
/* Do we need to stop and wait on the previous server? */
while (nfs_server_upordown == NFS_SERVER_STOPPING ||
if (nfs_server_upordown != NFS_SERVER_RUNNING) {
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
/* is this an nfsd warm start? */
if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
"server was previously quiesced; "
"existing NFSv4 state will be re-used");
/*
* HA-NFSv4: this is also the signal
* that a Resource Group failover has
* occurred.
*/
if (cluster_bootflags & CLUSTER_BOOTED)
} else {
/* cold start */
}
/*
* Check to see if delegation is to be
* enabled at the server
*/
if (nfs4_srv_delegation != FALSE)
}
}
}
/*
* If RDMA device available,
* start RDMA listener.
*/
int
{
int error;
int svc_state = 0;
}
/* Set the versions in the callout table */
/* For the NFS_ACL program, check the max version */
else
/* Initialize nfsv4 server */
while (!error) {
/*
* wait till either interrupted by a signal on
*/
stat = rdma_kwait();
/*
* stop services if running -- either on a HCA detach event
*/
svc_state) {
svc_state = 0;
}
/*
* wait loop and return;
*/
return (0);
/*
* restart stopped services on a HCA attach event
* (if not already running)
*/
goto restart;
/*
*/
}
return (error);
}
/* ARGSUSED */
void
{
}
/* ARGSUSED */
void
{
}
/* ARGSUSED */
static void
{
/* return (EOPNOTSUPP); */
}
static void
nullfree(void)
{
}
static char *rfscallnames_v2[] = {
"RFS2_NULL",
"RFS2_GETATTR",
"RFS2_SETATTR",
"RFS2_ROOT",
"RFS2_LOOKUP",
"RFS2_READLINK",
"RFS2_READ",
"RFS2_WRITECACHE",
"RFS2_WRITE",
"RFS2_CREATE",
"RFS2_REMOVE",
"RFS2_RENAME",
"RFS2_LINK",
"RFS2_SYMLINK",
"RFS2_MKDIR",
"RFS2_RMDIR",
"RFS2_READDIR",
"RFS2_STATFS"
};
static struct rpcdisp rfsdisptab_v2[] = {
/*
* NFS VERSION 2
*/
/* RFS_NULL = 0 */
{rpc_null,
xdr_void, NULL_xdrproc_t, 0,
xdr_void, NULL_xdrproc_t, 0,
0},
/* RFS_GETATTR = 1 */
/* RFS_SETATTR = 2 */
/* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
xdr_void, NULL_xdrproc_t, 0,
xdr_void, NULL_xdrproc_t, 0,
0},
/* RFS_LOOKUP = 4 */
/* RFS_READLINK = 5 */
/* RFS_READ = 6 */
{rfs_read,
/* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
xdr_void, NULL_xdrproc_t, 0,
xdr_void, NULL_xdrproc_t, 0,
0},
/* RFS_WRITE = 8 */
/* RFS_CREATE = 9 */
/* RFS_REMOVE = 10 */
#ifdef _LITTLE_ENDIAN
#else
#endif
/* RFS_RENAME = 11 */
#ifdef _LITTLE_ENDIAN
#else
#endif
/* RFS_LINK = 12 */
{rfs_link,
#ifdef _LITTLE_ENDIAN
#else
#endif
/* RFS_SYMLINK = 13 */
#ifdef _LITTLE_ENDIAN
#else
#endif
/* RFS_MKDIR = 14 */
/* RFS_RMDIR = 15 */
#ifdef _LITTLE_ENDIAN
#else
#endif
/* RFS_READDIR = 16 */
/* RFS_STATFS = 17 */
};
static char *rfscallnames_v3[] = {
"RFS3_NULL",
"RFS3_GETATTR",
"RFS3_SETATTR",
"RFS3_LOOKUP",
"RFS3_ACCESS",
"RFS3_READLINK",
"RFS3_READ",
"RFS3_WRITE",
"RFS3_CREATE",
"RFS3_MKDIR",
"RFS3_SYMLINK",
"RFS3_MKNOD",
"RFS3_REMOVE",
"RFS3_RMDIR",
"RFS3_RENAME",
"RFS3_LINK",
"RFS3_READDIR",
"RFS3_READDIRPLUS",
"RFS3_FSSTAT",
"RFS3_FSINFO",
"RFS3_PATHCONF",
"RFS3_COMMIT"
};
static struct rpcdisp rfsdisptab_v3[] = {
/*
* NFS VERSION 3
*/
/* RFS_NULL = 0 */
xdr_void, NULL_xdrproc_t, 0,
xdr_void, NULL_xdrproc_t, 0,
0},
/* RFS3_GETATTR = 1 */
/* RFS3_SETATTR = 2 */
nullfree, 0,
/* RFS3_LOOKUP = 3 */
/* RFS3_ACCESS = 4 */
/* RFS3_READLINK = 5 */
/* RFS3_READ = 6 */
/* RFS3_WRITE = 7 */
nullfree, 0,
/* RFS3_CREATE = 8 */
nullfree, 0,
/* RFS3_MKDIR = 9 */
nullfree, 0,
/* RFS3_SYMLINK = 10 */
nullfree, 0,
/* RFS3_MKNOD = 11 */
nullfree, 0,
/* RFS3_REMOVE = 12 */
nullfree, 0,
/* RFS3_RMDIR = 13 */
nullfree, 0,
/* RFS3_RENAME = 14 */
nullfree, 0,
/* RFS3_LINK = 15 */
nullfree, 0,
/* RFS3_READDIR = 16 */
/* RFS3_READDIRPLUS = 17 */
/* RFS3_FSSTAT = 18 */
/* RFS3_FSINFO = 19 */
/* RFS3_PATHCONF = 20 */
/* RFS3_COMMIT = 21 */
};
static char *rfscallnames_v4[] = {
"RFS4_NULL",
"RFS4_COMPOUND",
"RFS4_NULL",
"RFS4_NULL",
"RFS4_NULL",
"RFS4_NULL",
"RFS4_NULL",
"RFS4_NULL",
"RFS4_CREATE"
};
static struct rpcdisp rfsdisptab_v4[] = {
/*
* NFS VERSION 4
*/
/* RFS_NULL = 0 */
{rpc_null,
xdr_void, NULL_xdrproc_t, 0,
xdr_void, NULL_xdrproc_t, 0,
nullfree, RPC_IDEMPOTENT, 0},
/* RFS4_compound = 1 */
rfs4_compound_free, 0, 0},
};
union rfs_args {
/*
* NFS VERSION 2
*/
/* RFS_NULL = 0 */
/* RFS_GETATTR = 1 */
/* RFS_SETATTR = 2 */
struct nfssaargs nfs2_setattr_args;
/* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
/* RFS_LOOKUP = 4 */
struct nfsdiropargs nfs2_lookup_args;
/* RFS_READLINK = 5 */
/* RFS_READ = 6 */
struct nfsreadargs nfs2_read_args;
/* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
/* RFS_WRITE = 8 */
struct nfswriteargs nfs2_write_args;
/* RFS_CREATE = 9 */
struct nfscreatargs nfs2_create_args;
/* RFS_REMOVE = 10 */
struct nfsdiropargs nfs2_remove_args;
/* RFS_RENAME = 11 */
struct nfsrnmargs nfs2_rename_args;
/* RFS_LINK = 12 */
struct nfslinkargs nfs2_link_args;
/* RFS_SYMLINK = 13 */
struct nfsslargs nfs2_symlink_args;
/* RFS_MKDIR = 14 */
struct nfscreatargs nfs2_mkdir_args;
/* RFS_RMDIR = 15 */
struct nfsdiropargs nfs2_rmdir_args;
/* RFS_READDIR = 16 */
struct nfsrddirargs nfs2_readdir_args;
/* RFS_STATFS = 17 */
/*
* NFS VERSION 3
*/
/* RFS_NULL = 0 */
/* RFS3_GETATTR = 1 */
/* RFS3_SETATTR = 2 */
/* RFS3_LOOKUP = 3 */
/* RFS3_ACCESS = 4 */
/* RFS3_READLINK = 5 */
/* RFS3_READ = 6 */
/* RFS3_WRITE = 7 */
/* RFS3_CREATE = 8 */
/* RFS3_MKDIR = 9 */
/* RFS3_SYMLINK = 10 */
/* RFS3_MKNOD = 11 */
/* RFS3_REMOVE = 12 */
/* RFS3_RMDIR = 13 */
/* RFS3_RENAME = 14 */
/* RFS3_LINK = 15 */
/* RFS3_READDIR = 16 */
/* RFS3_READDIRPLUS = 17 */
/* RFS3_FSSTAT = 18 */
/* RFS3_FSINFO = 19 */
/* RFS3_PATHCONF = 20 */
/* RFS3_COMMIT = 21 */
/*
* NFS VERSION 4
*/
/* RFS_NULL = 0 */
/* COMPUND = 1 */
};
union rfs_res {
/*
* NFS VERSION 2
*/
/* RFS_NULL = 0 */
/* RFS_GETATTR = 1 */
struct nfsattrstat nfs2_getattr_res;
/* RFS_SETATTR = 2 */
struct nfsattrstat nfs2_setattr_res;
/* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
/* RFS_LOOKUP = 4 */
struct nfsdiropres nfs2_lookup_res;
/* RFS_READLINK = 5 */
struct nfsrdlnres nfs2_readlink_res;
/* RFS_READ = 6 */
struct nfsrdresult nfs2_read_res;
/* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
/* RFS_WRITE = 8 */
struct nfsattrstat nfs2_write_res;
/* RFS_CREATE = 9 */
struct nfsdiropres nfs2_create_res;
/* RFS_REMOVE = 10 */
enum nfsstat nfs2_remove_res;
/* RFS_RENAME = 11 */
enum nfsstat nfs2_rename_res;
/* RFS_LINK = 12 */
enum nfsstat nfs2_link_res;
/* RFS_SYMLINK = 13 */
enum nfsstat nfs2_symlink_res;
/* RFS_MKDIR = 14 */
struct nfsdiropres nfs2_mkdir_res;
/* RFS_RMDIR = 15 */
enum nfsstat nfs2_rmdir_res;
/* RFS_READDIR = 16 */
struct nfsrddirres nfs2_readdir_res;
/* RFS_STATFS = 17 */
struct nfsstatfs nfs2_statfs_res;
/*
* NFS VERSION 3
*/
/* RFS_NULL = 0 */
/* RFS3_GETATTR = 1 */
/* RFS3_SETATTR = 2 */
/* RFS3_LOOKUP = 3 */
/* RFS3_ACCESS = 4 */
/* RFS3_READLINK = 5 */
/* RFS3_READ = 6 */
/* RFS3_WRITE = 7 */
/* RFS3_CREATE = 8 */
/* RFS3_MKDIR = 9 */
/* RFS3_SYMLINK = 10 */
/* RFS3_MKNOD = 11 */
/* RFS3_REMOVE = 12 */
/* RFS3_RMDIR = 13 */
/* RFS3_RENAME = 14 */
/* RFS3_LINK = 15 */
/* RFS3_READDIR = 16 */
/* RFS3_READDIRPLUS = 17 */
/* RFS3_FSSTAT = 18 */
/* RFS3_FSINFO = 19 */
/* RFS3_PATHCONF = 20 */
/* RFS3_COMMIT = 21 */
/*
* NFS VERSION 4
*/
/* RFS_NULL = 0 */
/* RFS4_COMPOUND = 1 */
};
static struct rpc_disptable rfs_disptable[] = {
{sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
{sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
{sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
};
/*
* If nfs_portmon is set, then clients are required to use privileged
* ports (ports < IPPORT_RESERVED) in order to get NFS services.
*
* N.B.: this attempt to carry forward the already ill-conceived notion
* is it transport-dependent, it's laughably easy to spoof. If you're
* really interested in security, you must start with secure RPC instead.
*/
static int nfs_portmon = 0;
#ifdef DEBUG
static int cred_hits = 0;
static int cred_misses = 0;
#endif
#ifdef DEBUG
/*
* Debug code to allow disabling of rfs_dispatch() use of
* fastxdrargs() and fastxdrres() calls for testing purposes.
*/
static int rfs_no_fast_xdrargs = 0;
static int rfs_no_fast_xdrres = 0;
#endif
union acl_args {
/*
* ACL VERSION 2
*/
/* ACL2_NULL = 0 */
/* ACL2_GETACL = 1 */
/* ACL2_SETACL = 2 */
/* ACL2_GETATTR = 3 */
/* ACL2_ACCESS = 4 */
/* ACL2_GETXATTRDIR = 5 */
/*
* ACL VERSION 3
*/
/* ACL3_NULL = 0 */
/* ACL3_GETACL = 1 */
/* ACL3_SETACL = 2 */
/* ACL3_GETXATTRDIR = 3 */
};
union acl_res {
/*
* ACL VERSION 2
*/
/* ACL2_NULL = 0 */
/* ACL2_GETACL = 1 */
/* ACL2_SETACL = 2 */
/* ACL2_GETATTR = 3 */
/* ACL2_ACCESS = 4 */
/* ACL2_GETXATTRDIR = 5 */
/*
* ACL VERSION 3
*/
/* ACL3_NULL = 0 */
/* ACL3_GETACL = 1 */
/* ACL3_SETACL = 2 */
/* ACL3_GETXATTRDIR = 3 */
};
static bool_t
{
return (TRUE);
return (TRUE);
}
return (FALSE);
}
static void
struct rpc_disptable *disptable)
{
int which;
char *args;
union {
} args_buf;
char *res;
union {
} res_buf;
int dis_flags = 0;
int error = 0;
int anon_ok;
unsigned int nfslog_rec_id;
int dupstat;
int authres;
char **procnames;
error++;
goto done;
}
error++;
goto done;
}
/*
* Deserialize into the args struct.
*/
#ifdef DEBUG
#else
if ((auth_flavor == RPCSEC_GSS) ||
#endif
{
error++;
/*
* Check if we are outside our capabilities.
*/
goto done;
"Failed to decode arguments for %s version %u "
"procedure %s client %s%s",
goto done;
}
}
/*
* If Version 4 use that specific dispatch function.
*/
goto done;
}
/*
* Find export information and check authentication,
* setting the credential if everything is ok.
*/
void *fh;
case NFS_VERSION:
break;
case NFS_V3:
break;
}
/*
* Fix for bug 1038302 - corbin
* There is a problem here if anonymous access is
* disallowed. If the current request is part of the
* client's mount process for the requested filesystem,
* then it will carry root (uid 0) credentials on it, and
* will be denied by checkauth if that client does not
* have explicit root=0 permission. This will cause the
* client's mount operation to fail. As a work-around,
* we check here to see if the request is a getattr or
* statfs operation on the exported vnode itself, and
* pass a flag to checkauth with the result of this test.
*
* The filehandle refers to the mountpoint itself if
* the fh_data and fh_xdata portions of the filehandle
* are equal.
*
* Added anon_ok argument to checkauth().
*/
anon_ok = 1;
else
anon_ok = 0;
#ifdef DEBUG
cred_misses++;
} else
cred_hits++;
#else
}
#endif
/*
* Don't allow non-V4 clients access
* to pseudo exports
*/
error++;
goto done;
}
/*
* authres > 0: authentication OK - proceed
* authres == 0: authentication weak - return error
* authres < 0: authentication timeout - drop
*/
if (authres <= 0) {
if (authres == 0) {
error++;
}
goto done;
}
/* check to see if we might need charmap */
}
}
} else
} else
if (!(dis_flags & RPC_IDEMPOTENT)) {
&dupcached);
switch (dupstat) {
case DUP_ERROR:
error++;
goto done;
/* NOTREACHED */
case DUP_INPROGRESS:
error++;
goto done;
/* NOTREACHED */
case DUP_NEW:
case DUP_DROP:
error++;
goto done;
}
if (dis_flags & RPC_AVOIDWORK) {
} else {
}
break;
case DUP_DONE:
break;
}
} else {
error++;
goto done;
}
}
error++;
goto done;
}
/*
* Check to see if logging has been enabled on the server.
* If so, then obtain the export info struct to be used for
* the later writing of the log record. This is done for
* the case that a lookup is done across a non-logged public
* file system.
*/
if (nfslog_buffer_list != NULL) {
/*
* Is logging enabled?
*/
/*
* Copy the netbuf for logging purposes, before it is
* freed by svc_sendreply().
*/
if (logging_enabled) {
/*
* If RPC_MAPRESP flag set (i.e. in V2 ops) the
* res gets copied directly into the mbuf and
* may be freed soon after the sendreply. So we
* must copy it here to a safe place...
*/
}
}
}
/*
* Serialize and send results struct
*/
#ifdef DEBUG
#else
#endif
{
error++;
}
} else {
error++;
}
}
/*
* Log if needed
*/
if (logging_enabled) {
}
/*
* Free results struct. With the addition of NFS V4 we can
* have non-idempotent procedures with functions.
*/
}
done:
/*
* Free arguments struct
*/
if (disp) {
error++;
}
} else {
error++;
}
}
}
static void
{
"NFS", rfs_disptable);
}
static char *aclcallnames_v2[] = {
"ACL2_NULL",
"ACL2_GETACL",
"ACL2_SETACL",
"ACL2_GETATTR",
"ACL2_ACCESS",
"ACL2_GETXATTRDIR"
};
static struct rpcdisp acldisptab_v2[] = {
/*
* ACL VERSION 2
*/
/* ACL2_NULL = 0 */
{rpc_null,
xdr_void, NULL_xdrproc_t, 0,
xdr_void, NULL_xdrproc_t, 0,
0},
/* ACL2_GETACL = 1 */
/* ACL2_SETACL = 2 */
#ifdef _LITTLE_ENDIAN
#else
#endif
/* ACL2_GETATTR = 3 */
#ifdef _LITTLE_ENDIAN
#else
#endif
/* ACL2_ACCESS = 4 */
#ifdef _LITTLE_ENDIAN
#else
#endif
/* ACL2_GETXATTRDIR = 5 */
};
static char *aclcallnames_v3[] = {
"ACL3_NULL",
"ACL3_GETACL",
"ACL3_SETACL",
"ACL3_GETXATTRDIR"
};
static struct rpcdisp acldisptab_v3[] = {
/*
* ACL VERSION 3
*/
/* ACL3_NULL = 0 */
{rpc_null,
xdr_void, NULL_xdrproc_t, 0,
xdr_void, NULL_xdrproc_t, 0,
0},
/* ACL3_GETACL = 1 */
/* ACL3_SETACL = 2 */
nullfree, 0,
/* ACL3_GETXATTRDIR = 3 */
};
static struct rpc_disptable acl_disptable[] = {
{sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
{sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
};
static void
{
"ACL", acl_disptable);
}
int
{
struct authdes_cred *adc;
switch (flavor) {
case AUTH_DES:
return (0);
break;
default:
break;
}
return (1);
}
/*
* checkauth() will check the access permission against the export
*
* This routine is used by NFS V3 and V2 code.
*/
static int
{
int anon_res = 0;
/*
* Check for privileged port number
* N.B.: this assumes that we know the format of a netbuf.
*/
if (nfs_portmon) {
return (0);
IPPORT_RESERVED) ||
IPPORT_RESERVED)) {
"nfs_server: client %s%ssent NFS request from "
"unprivileged port",
return (0);
}
}
/*
* return 1 on success or 0 on failure
*/
/*
* A failed AUTH_UNIX svc_get_cred() implies we couldn't set
* the credentials; below we map that to anonymous.
*/
"nfs_server: couldn't get unix cred for %s",
client_name(req));
return (0);
}
/*
* Short circuit checkauth() on operations that support the
* public filehandle, and if the request for that operation
* is using the public filehandle. Note that we must call
* sec_svc_getcred() first so that xp_cookie is set to the
* right value. Normally xp_cookie is just the RPC flavor
* of the the request, but in the case of RPCSEC_GSS it
* could be a pseudo flavor.
*/
if (publicfh_ok)
return (1);
/*
* Check if the auth flavor is valid for this export
*/
if (access & NFSAUTH_DROP)
return (-1); /* drop the request */
if (access & NFSAUTH_DENIED) {
/*
* If anon_ok == 1 and we got NFSAUTH_DENIED, it was
* probably due to the flavor not matching during the
* the mount attempt. So map the flavor to AUTH_NONE
* so that the credentials get mapped to the anonymous
* user.
*/
if (anon_ok == 1)
else
return (0); /* deny access */
} else if (access & NFSAUTH_MAPNONE) {
/*
* Access was granted even though the flavor mismatched
* because AUTH_NONE was one of the exported flavors.
*/
} else if (access & NFSAUTH_WRONGSEC) {
/*
* NFSAUTH_WRONGSEC is used for NFSv4. If we get here,
* it means a client ignored the list of allowed flavors
* returned via the MOUNT protocol. So we just disallow it!
*/
return (0);
}
switch (rpcflavor) {
case AUTH_NONE:
break;
case AUTH_UNIX:
access & NFSAUTH_ROOT) {
/*
* It is root, so apply rootid to get real UID
* Find the secinfo structure. We should be able
* to find it by the time we reach here.
* nfsauth_access() has done the checking.
*/
break;
}
}
}
}
break;
case AUTH_DES:
case RPCSEC_GSS:
/*
* Find the secinfo structure. We should be able
* to find it by the time we reach here.
* nfsauth_access() has done the checking.
*/
nfsflavor) {
break;
}
}
if (!secp) {
"no secinfo data for flavor %d",
return (0);
}
"nfs_server: client %s%sused invalid "
"auth window value",
return (0);
}
/*
* Map root principals listed in the share's root= list to root,
* and map any others principals that were mapped to root by RPC
* to anon.
*/
return (1);
/*
* NOTE: If and when kernel-land privilege tracing is
* added this may have to be replaced with code that
* retrieves root's supplementary groups (e.g., using
* kgss_get_group_info(). In the meantime principals
* mapped to uid 0 get all privileges, so setting cr's
* supplementary groups for them does nothing.
*/
return (1);
}
/*
* Not a root princ, or not in root list, map UID 0/nobody to
* the anon ID for the share. (RPC sets cr's UIDs and GIDs to
* UID_NOBODY and GID_NOBODY, respectively.)
*/
return (1);
break;
default:
return (0);
} /* switch on rpcflavor */
/*
* Even if anon access is disallowed via ex_anon == -1, we allow
* this access if anon_ok is set. So set creds to the default
* "nobody" id.
*/
if (anon_res != 0) {
if (anon_ok == 0) {
"nfs_server: client %s%ssent wrong "
"authentication for %s",
return (0);
}
return (0);
}
return (1);
}
/*
* returns 0 on failure, -1 on a drop, -2 on wrong security flavor,
* and 1 on success
*/
int
{
struct exportinfo *exi;
/*
* Check the access right per auth flavor on the vnode of
* this export for the given request.
*/
if (access & NFSAUTH_WRONGSEC)
return (-2); /* no access for this security flavor */
if (access & NFSAUTH_DROP)
return (-1); /* drop the request */
if (access & NFSAUTH_DENIED) {
return (0); /* deny access */
} else if (access & NFSAUTH_LIMITED) {
} else if (access & NFSAUTH_MAPNONE) {
/*
* Access was granted even though the flavor mismatched
* because AUTH_NONE was one of the exported flavors.
*/
}
/*
* XXX probably need to redo some of it for nfsv4?
* return 1 on success or 0 on failure
*/
switch (rpcflavor) {
case AUTH_NONE:
break;
case AUTH_UNIX:
/*
* It is root, so apply rootid to get real UID
* Find the secinfo structure. We should be able
* to find it by the time we reach here.
* nfsauth_access() has done the checking.
*/
break;
}
}
}
}
break;
default:
/*
* Find the secinfo structure. We should be able
* to find it by the time we reach here.
* nfsauth_access() has done the checking.
*/
nfsflavor) {
break;
}
}
if (!secp) {
"no secinfo data for flavor %d",
return (0);
}
"nfs_server: client %s%sused invalid "
"auth window value",
return (0);
}
/*
* Map root principals listed in the share's root= list to root,
* and map any others principals that were mapped to root by RPC
* to anon. If not going to anon, set to rootid (root_mapping).
*/
return (1);
/*
* NOTE: If and when kernel-land privilege tracing is
* added this may have to be replaced with code that
* retrieves root's supplementary groups (e.g., using
* kgss_get_group_info(). In the meantime principals
* mapped to uid 0 get all privileges, so setting cr's
* supplementary groups for them does nothing.
*/
return (1);
}
/*
* Not a root princ, or not in root list, map UID 0/nobody to
* the anon ID for the share. (RPC sets cr's UIDs and GIDs to
* UID_NOBODY and GID_NOBODY, respectively.)
*/
return (1);
break;
} /* switch on rpcflavor */
/*
* Even if anon access is disallowed via ex_anon == -1, we allow
* this access if anon_ok is set. So set creds to the default
* "nobody" id.
*/
if (anon_res != 0) {
"nfs_server: client %s%ssent wrong "
"authentication for %s",
return (0);
}
return (1);
}
static char *
{
/*
* If it's a Unix cred then use the
* hostname from the credential.
*/
hostname = ((struct authunix_parms *)
}
hostname = "";
return (hostname);
}
static char *
{
uchar_t *b;
char *frontspace = "";
/*
* We assume we are called in tandem with client_name and the
* format string looks like "...client %s%sblah blah..."
*
* If it's a Unix cred then client_name returned
* a host name, so we need insert a space between host name
* and IP address.
*/
frontspace = " ";
/*
* Convert the caller's IP address to a dotted string
*/
b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
struct sockaddr_in6 *sin6;
} else {
/*
* No IP address to print. If there was a host name
* printed, then we print a space.
*/
}
return (buf);
}
/*
* NFS Server initialization routine. This routine should only be called
* once. It performs the following tasks:
* - Call sub-initialization routines (localize access to variables)
* - Initialize all locks
* - initialize the version 3 write verifier
*/
int
nfs_srvinit(void)
{
int error;
error = nfs_exportinit();
if (error != 0)
return (error);
error = rfs4_srvrinit();
if (error != 0) {
return (error);
}
rfs_srvrinit();
nfsauth_init();
return (0);
}
/*
* NFS Server finalization routine. This routine is called to cleanup the
* initialization work previously performed if the NFS server module could
* not be loaded correctly.
*/
void
nfs_srvfini(void)
{
nfsauth_fini();
rfs_srvrfini();
}
/*
* Set up an iovec array of up to cnt pointers.
*/
void
{
iovp++;
m = m->b_cont;
}
}
/*
* Common code between NFS Version 2 and NFS Version 3 for the public
* filehandle multicomponent lookups.
*/
/*
* Public filehandle evaluation of a multi-component lookup, following
* symbolic links, if necessary. This may result in a vnode in another
* filesystem, which is OK as long as the other filesystem is exported.
*
* Note that the exi will be set either to NULL or a new reference to the
* exportinfo struct that corresponds to the vnode of the multi-component path.
* It is the callers responsibility to release this reference.
*/
int
{
int pathflag;
int error;
/*
* check if the given path is a url or native path. Since p is
* modified by MCLpath(), it may be empty after returning from
* there, and should be checked.
*/
return (EIO);
/*
* If pathflag is SECURITY_QUERY, turn the SEC_QUERY bit
* on in sec->sec_flags. This bit will later serve as an
* indication in makefh_ol() or makefh3_ol() to overload the
* filehandle to contain the sec modes used by the server for
* the path.
*/
if (pathflag == SECURITY_QUERY) {
p++;
return (EIO);
} else {
"nfs_server: invalid security index %d, "
return (EIO);
}
}
if (p[0] == '\0') {
goto publicfh_done;
}
/*
* If name resolves to "/" we get EINVAL since we asked for
* the vnode of the directory that the file is in. Try again
* with NULL directory vnode.
*/
if (!error) {
} else {
/*
* This should not happen, the filesystem is
* in an inconsistent state. Fail the lookup
* at this point.
*/
}
}
}
if (error)
goto publicfh_done;
goto publicfh_done;
}
do {
/*
* *vpp may be an AutoFS node, so we perform
* a VOP_ACCESS() to trigger the mount of the intended
* filesystem, so we can perform the lookup in the
* intended filesystem.
*/
/*
* If vnode is covered, get the
* the topmost vnode.
*/
if (error) {
goto publicfh_done;
}
}
/*
* If realvp is different from *vpp
* then release our reference on *vpp, so that
* the export access check be performed on the
* real filesystem instead.
*/
} else {
break;
}
/* LINTED */
} while (TRUE);
/*
* Let nfs_vptexi() figure what the real parent is.
*/
} else {
/*
* If vnode is covered, get the
* the topmost vnode.
*/
if (error) {
goto publicfh_done;
}
}
/*
* *vpp is a file, obtain realvp of the parent
* directory vnode.
*/
}
}
/*
* The pathname may take us from the public filesystem to another.
* If that's the case then just set the exportinfo to the new export
* and build filehandle for it. Thanks to per-access checking there's
* no security issues with doing this. If the client is not allowed
* access to this new export then it will get an access error when it
* tries to use the filehandle
*/
goto publicfh_done;
}
/*
* Not allowed access to pseudo exports.
*/
goto publicfh_done;
}
/*
* Do a lookup for the index file. We know the index option doesn't
* allow paths through handling in the share command, so mc_dvp will
* be the parent for the index file vnode, if its present. Use
* temporary pointers to preserve and reuse the vnode pointers of the
* original directory in case there's no index file. Note that the
* index file is a native path, and should not be interpreted by
* the URL parser in rfs_pathname()
*/
error = 0;
} else { /* ok or error other than ENOENT */
if (tmc_dvp)
if (error)
goto publicfh_done;
/*
* Found a valid vp for index "filename". Sanity check
* for odd case where a directory is provided as index
* option argument and leads us to another filesystem
*/
/* Release the reference on the old exi value */
goto publicfh_done;
}
}
}
if (mc_dvp)
return (error);
}
/*
* Evaluate a multi-component path
*/
int
char *path, /* pathname to evaluate */
int pathflag) /* flag to identify path, e.g. URL */
{
char namebuf[TYPICALMAXPATHLEN];
int error;
/*
* If pathname starts with '/', then set startdvp to root.
*/
if (*path == '/') {
while (*path == '/')
path++;
}
if (error == 0) {
/*
* Call the URL parser for URL paths to modify the original
* string to handle any '%' encoded characters that exist.
* Done here to avoid an extra bcopy in the lookup.
* We need to be careful about pathlen's. We know that
* rfs_pathname() is called with a non-empty path. However,
* it could be emptied due to the path simply being all /'s,
* which is valid to proceed with the lookup, or due to the
* URL parser finding an encoded null character at the
* beginning of path which should not proceed with the lookup.
*/
return (ENOENT);
}
}
if (error == ENAMETOOLONG) {
/*
* This thread used a pathname > TYPICALMAXPATHLEN bytes long.
*/
return (error);
return (ENOENT);
}
}
}
return (error);
}
/*
* Adapt the multicomponent lookup path depending on the pathtype
*/
static int
{
unsigned char c = (unsigned char)**path;
/*
* If the MCL path is between 0x20 and 0x7E (graphic printable
* character of the US-ASCII coded character set), its a URL path,
* per RFC 1738.
*/
if (c >= 0x20 && c <= 0x7E)
return (URLPATH);
/*
* If the first octet of the MCL path is not an ASCII character
* then it must be interpreted as a tag value that describes the
* format of the remaining octets of the MCL path.
*
* If the first octet of the MCL path is 0x81 it is a query
* for the security info.
*/
switch (c) {
case 0x80: /* native path, i.e. MCL via mount protocol */
(*path)++;
return (NATIVEPATH);
case 0x81: /* security query */
(*path)++;
return (SECURITY_QUERY);
default:
return (-1);
}
}
((c >= 'A' && c <= 'F') ? (c - 'A' + 10) :\
((c >= 'a' && c <= 'f') ? (c - 'a' + 10) : 0)))
/*
* The implementation of URLparse guarantees that the final string will
* fit in the original one. Replaces '%' occurrences followed by 2 characters
* with its corresponding hexadecimal character.
*/
static void
{
char *p, *q;
p = q = str;
while (*p) {
*q = *p;
if (*p++ == '%') {
if (*p) {
*q = fromhex(*p) * 16;
p++;
if (*p) {
*q += fromhex(*p);
p++;
}
}
}
q++;
}
*q = '\0';
}
/*
* Get the export information for the lookup vnode, and verify its
* useable.
*/
int
struct exportinfo **exi)
{
int walk;
int error = 0;
else {
/*
* If nosub is set for this export then
* a lookup relative to the public fh
* must not terminate below the
* exported directory.
*/
}
return (error);
}
/*
* Do the main work of handling HA-NFSv4 Resource Group failover on
* Sun Cluster.
* We need to detect whether any RG admin paths have been added or removed,
* and adjust resources accordingly.
* Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
* order to scale, the list and array of paths need to be held in more
* suitable data structures.
*/
static void
hanfsv4_failover(void)
{
int i, start_grace, numadded_paths = 0;
char **added_paths = NULL;
/*
* Note: currently, rfs4_dss_pathlist cannot be NULL, since
* it will always include an entry for NFS4_DSS_VAR_DIR. If we
* make the latter dynamically specified too, the following will
* need to be adjusted.
*/
/*
* First, look for removed paths: RGs that have been failed-over
* away from this node.
* Walk the "currently-serving" rfs4_dss_pathlist and, for each
* path, check if it is on the "passed-in" rfs4_dss_newpaths array
* from nfsd. If not, that RG path has been removed.
*
* Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
* any duplicates.
*/
do {
int found = 0;
/* used only for non-HA so may not be removed */
continue;
}
for (i = 0; i < rfs4_dss_numnewpaths; i++) {
int cmpret;
char *newpath = rfs4_dss_newpaths[i];
/*
* Since nfsd has sorted rfs4_dss_newpaths for us,
* once the return from strcmp is negative we know
* we've passed the point where "path" should be,
* and can stop searching: "path" has been removed.
*/
if (cmpret < 0)
break;
if (cmpret == 0) {
found = 1;
break;
}
}
if (found == 0) {
/*
* This path has been removed.
* We must clear out the servinst reference to
* it, since it's now owned by another
* node: we should not attempt to touch it.
*/
/* remove from "currently-serving" list, and destroy */
/* allow for NUL */
} else {
/* path was found; not removed */
}
} while (dss_path != rfs4_dss_pathlist);
/*
* Now, look for added paths: RGs that have been failed-over
* to this node.
* Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
* for each path, check if it is on the "currently-serving"
* rfs4_dss_pathlist. If not, that RG path has been added.
*
* Note: we don't do duplicate detection here; nfsd does that for us.
*
* Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
* an upper bound for the size needed for added_paths[numadded_paths].
*/
/* probably more space than we need, but guaranteed to be enough */
if (rfs4_dss_numnewpaths > 0) {
}
/* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
for (i = 0; i < rfs4_dss_numnewpaths; i++) {
int found = 0;
char *newpath = rfs4_dss_newpaths[i];
do {
/* used only for non-HA */
continue;
}
found = 1;
break;
}
} while (dss_path != rfs4_dss_pathlist);
if (found == 0) {
}
}
/* did we find any added paths? */
if (numadded_paths > 0) {
/* create a new server instance, and start its grace period */
start_grace = 1;
/* read in the stable storage state from these paths */
/*
* Multiple failovers during a grace period will cause
* clients of the same resource group to be partitioned
* into different server instances, with different
* grace periods. Since clients of the same resource
* group must be subject to the same grace period,
* we need to reset all currently active grace periods.
*/
}
if (rfs4_dss_numnewpaths > 0)
}
/*
* Used by NFSv3 and NFSv4 server to query label of
*/
{
char *path;
} else {
/*
* v_path not cached. Fall back on pathname of exported
* file system as we rely on pathname from which we can
* derive a label. The exported file system portion of
* path is sufficient to obtain a label.
*/
return (NULL);
}
}
/*
* Caller has verified that the file is either
* exported or visible. So if the path falls in
* global zone, admin_low is returned; otherwise
* the zone's label is returned.
*/
return (zone_label);
}
/*
* TX NFS routine used by NFSv3 and NFSv4 to do label check
* on client label and server's file object lable.
*/
struct exportinfo *exi)
{
return (B_FALSE);
}
"comparing server's file label(1) with client label(2) (vp(3))",
if (flag == EQUALITY_CHECK)
else
return (result);
}