nfs_tbind.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* nfs_tbind.c, common part for nfsd and lockd.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <tiuser.h>
#include <fcntl.h>
#include <netconfig.h>
#include <stropts.h>
#include <errno.h>
#include <syslog.h>
#include <sys/resource.h>
#include <signal.h>
#include <netdir.h>
#include <unistd.h>
#include <string.h>
#include <malloc.h>
#include <stdlib.h>
#include "nfs_tbind.h"
/*
* Determine valid semantics for most applications.
*/
#define OK_TPI_TYPE(_nconf) \
#define BE32_TO_U32(a) \
/*
* Number of elements to add to the poll array on each allocation.
*/
#define POLL_ARRAY_INC_SIZE 64
/*
* Number of file descriptors by which the process soft limit may be
* increased on each call to nofile_increase(0).
*/
#define NOFILE_INC_SIZE 64
struct conn_ind {
};
struct conn_entry {
};
/*
* this file contains transport routines common to nfsd and lockd
*/
static int nofile_increase(int);
static int reuseaddr(int);
static void add_to_poll_list(int, struct netconfig *);
static char *serv_name_to_port_name(char *);
static int bind_to_proto(char *, char *, struct netbuf **,
struct netconfig **);
static int bind_to_provider(char *, char *, struct netbuf **,
struct netconfig **);
static void conn_close_oldest(void);
static void cots_listen_event(int, int);
static int do_poll_clts_action(int, int);
static int do_poll_cots_action(int, int);
static void remove_from_poll_list(int);
static int is_listen_fd_index(int);
static struct pollfd *poll_array;
static struct conn_entry *conn_polled;
static int num_conns; /* Current number of connections */
struct netbuf *);
/*
* Called to create and prepare a transport descriptor for in-kernel
* RPC service.
* Returns -1 on failure and a valid descriptor on success.
*/
int
{
int fd;
return (-1);
}
/*
* Open the transport device.
*/
if (fd == -1) {
(nofile_increase(0) == 0)) {
/* Try again with a higher NOFILE limit. */
}
if (fd == -1) {
return (-1);
}
}
/*
* Pop timod because the RPC module must be as close as possible
* to the transport.
*/
return (-1);
}
/*
* Common code for CLTS and COTS transports
*/
return (-1);
}
/* Tell rpcmod to act like a server stream. */
return (-1);
}
/*
* Re-push timod so that we will still be doing TLI
* operations on the descriptor.
*/
return (-1);
}
return (fd);
}
static int
nofile_increase(int limit)
{
return (-1);
}
if (limit > 0)
else
return (-1);
}
return (0);
}
int
{
int fd;
struct nd_addrlist *addrlist;
char reqbuf[128];
return (-1);
}
/* nfs4_callback service does not used a fieed port number */
"Cannot get address for transport %s host %s service %s",
return (-1);
}
/*
* If we're running over TCP, then set the
* SO_REUSEADDR option so that we can bind
* to our preferred address even if previously
* left connections exist in FIN_WAIT states.
* This is somewhat bogus, but otherwise you have
* to wait 2 minutes to restart after killing it.
*/
"couldn't set SO_REUSEADDR option on transport");
}
}
else
/* LINTED pointer alignment */
return (-1);
}
/*
* XXX - what about the space tb->addr.buf points to? This should
* should't be called with T_ALL.
*/
if (addrlist)
return (-1);
}
/* make sure we bound to the right address */
return (-1);
}
/*
* Call nfs4svc_setport so that the kernel can be
* informed what port number the daemon is listing
* for incoming connection requests.
*/
/*
* Disable the Nagle algorithm on TCP connections.
* Connections accepted from this listener will
* inherit the listener options.
*/
/* LINTED pointer alignment */
/* LINTED pointer alignment */
"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
}
}
return (fd);
}
static int
{
char reqbuf[128];
int *ip;
/* LINTED pointer alignment */
/* LINTED pointer alignment */
*ip = 1;
t_error("t_optmgmt");
return (-1);
}
return (0);
}
void
{
int error;
/*
* Save the error code across syslog(), just in case syslog()
* gets its own error and, therefore, overwrites errno.
*/
} else {
}
}
/*
* Called to set up service over a particular transport.
*/
void
{
register int sock;
int vers;
int err;
int l;
if (provider)
&retnconf);
else
&retnconf);
if (sock == -1) {
"Cannot establish %s service over %s: transport setup problem.",
return;
}
return;
}
/*
* Register all versions of the programs in the protocol block list.
*/
vers++) {
continue;
retaddr);
}
}
/* Don't drop core if supporting module(s) aren't loaded. */
/*
* svc() doesn't block, it returns success or failure.
*/
else
if (err < 0) {
"Cannot establish %s service over <file desc."
" %d, protocol %s> : %m. Exiting",
exit(1);
}
}
/*
* We successfully set up the server over this transport.
* Add this descriptor to the one being polled on.
*/
}
/*
* Set up the NFS service over all the available transports.
* Returns -1 for failure, 0 for success.
*/
int
{
int l;
return (-1);
}
OK_TPI_TYPE(nconf) &&
}
(void) endnetconfig(nc);
return (0);
}
/*
* poll on the open transport descriptors for events and errors.
*/
void
poll_for_action(void)
{
int nfds;
int i;
/*
* Keep polling until all transports have been closed. When this
* happens, we return.
*/
while ((int)num_fds > 0) {
switch (nfds) {
case 0:
continue;
case -1:
/*
* Some errors from poll could be
* due to temporary conditions, and we try to
* be robust in the face of them. Other
* errors (should never happen in theory)
* are fatal (eg. EINVAL, EFAULT).
*/
switch (errno) {
case EINTR:
continue;
case EAGAIN:
case ENOMEM:
(void) sleep(10);
continue;
default:
"poll failed: %m. Exiting");
exit(1);
}
default:
break;
}
/*
* Go through the poll list looking for events.
*/
if (poll_array[i].revents) {
nfds--;
/*
* We have a message, so try to read it.
* Record the error return in errno,
* so that syslog(LOG_ERR, "...%m")
* dumps the corresponding error string.
*/
NC_TPI_CLTS) {
poll_array[i].fd, i);
} else {
poll_array[i].fd, i);
}
if (errno == 0)
continue;
/*
* Most returned error codes mean that there is
* fatal condition which we can only deal with
* by closing the transport.
*/
poll_array[i].fd,
(void) sleep(5);
}
}
}
"All transports have been closed with errors. Exiting.");
}
/*
*/
static void
{
static int poll_array_size = 0;
/*
* If the arrays are full, allocate new ones.
*/
if (num_fds == poll_array_size) {
struct conn_entry *tnp;
if (poll_array_size != 0) {
tpa = poll_array;
tnp = conn_polled;
} else
/*
* Allocate new arrays.
*/
poll_array = (struct pollfd *)
conn_polled = (struct conn_entry *)
exit(1);
}
/*
* Copy the data of the old ones into new arrays, and
* free the old ones.
*/
if (tpa) {
num_fds * sizeof (struct conn_entry));
}
}
/*
* Set the descriptor and event list. All possible events are
* polled for.
*/
/*
* Copy the transport data over too.
*/
/*
* Set the descriptor to non-blocking. Avoids a race
* between data arriving on the stream and then having it
* flushed before we can read it.
*/
exit(1);
}
/*
* Count this descriptor.
*/
++num_fds;
}
static void
remove_from_poll_list(int fd)
{
int i;
int num_to_copy;
for (i = 0; i < num_fds; i++) {
--num_fds;
num_to_copy = num_fds - i;
(void) memcpy((void *)&poll_array[i],
(void *)&poll_array[i+1],
num_to_copy * sizeof (struct pollfd));
sizeof (struct pollfd));
(void) memcpy((void *)&conn_polled[i],
(void *)&conn_polled[i+1],
num_to_copy * sizeof (struct conn_entry));
sizeof (struct conn_entry));
return;
}
}
}
/*
* Called to read and interpret the event on a connectionless descriptor.
* Returns 0 if successful, or a UNIX error code if failure.
*/
static int
{
int error;
int ret;
int flags;
static int oldfd = -1;
/*
* We just need to have some space to consume the
* message in the event we can't use the TLI interface to do the
* job.
*
* We flush the message using getmsg(). For the control part
* we allocate enough for any TPI header plus 32 bytes for address
* and options. For the data part, there is nothing magic about
* the size of the array, but 256 bytes is probably better than
* 1 byte, and we don't expect any data portion anyway.
*
* If the array sizes are too small, we handle this because getmsg()
* (called to consume the message) will return MOREDATA|MORECTL.
* Thus we just call getmsg() until it's read the message.
*/
char databuf[256];
/*
* If this is the same descriptor as the last time
* do_poll_clts_action was called, we can save some
* de-allocation and allocation.
*/
if (unitdata) {
}
if (uderr) {
}
}
/*
* Allocate a unitdata structure for receiving the event.
*/
/* LINTED pointer alignment */
/*
* Save the error code across
* syslog(), just in case
* syslog() gets its own error
* and therefore overwrites errno.
*/
return (error);
}
goto flush_it;
}
}
flags = 0;
/*
* The idea is we wait for T_UNITDATA_IND's. Of course,
* we don't get any, because rpcmod filters them out.
* However, we need to call t_rcvudata() to let TLI
* tell us we have a T_UDERROR_IND.
*
* algorithm is:
* t_rcvudata(), expecting TLOOK.
* t_look(), expecting T_UDERR.
* t_rcvuderr(), expecting success (0).
* expand destination address into ASCII,
* and dump it.
*/
(void) syslog(LOG_WARNING,
/*
* Even though we don't expect any data, in case we do,
* keep reading until there is no more.
*/
goto try_again;
return (0);
}
switch (t_errno) {
case TNODATA:
return (0);
case TSYSERR:
/*
* System errors are returned to caller.
* Save the error code across
* syslog(), just in case
* syslog() gets its own error
* and therefore overwrites errno.
*/
return (error);
case TLOOK:
break;
default:
goto flush_it;
}
switch (ret) {
case 0:
return (0);
case -1:
/*
* System errors are returned to caller.
*/
/*
* Save the error code across
* syslog(), just in case
* syslog() gets its own error
* and therefore overwrites errno.
*/
return (error);
}
goto flush_it;
case T_UDERR:
break;
default:
(void) syslog(LOG_WARNING,
}
/* LINTED pointer alignment */
/*
* Save the error code across
* syslog(), just in case
* syslog() gets its own error
* and therefore overwrites errno.
*/
return (error);
}
goto flush_it;
}
}
if (ret == 0) {
/*
* Save the datagram error in errno, so that the
* %m argument to syslog picks up the error string.
*/
/*
* Log the datagram error, then log the host that
* probably triggerred. Cannot log both in the
* same transaction because of packet size limitations
*/
/*
* Try to map the client's address back to a
* name.
*/
host->h_hostservs) {
"Bad NFS response was sent to client with host name: %s; service port: %s",
} else {
int i, j;
char *buf;
char *hex = "0123456789abcdef";
/*
* Mapping failed, print the whole thing
* in ASCII hex.
*/
}
buf[j] = '\0';
"Bad NFS response was sent to client with transport address: 0x%s",
buf);
}
return (0);
}
switch (t_errno) {
case TNOUDERR:
goto flush_it;
case TSYSERR:
/*
* System errors are returned to caller.
* Save the error code across
* syslog(), just in case
* syslog() gets its own error
* and therefore overwrites errno.
*/
return (error);
default:
goto flush_it;
}
/*
* If we get here, then we could not cope with whatever message
* we attempted to read, so flush it. If we did read a message,
* and one isn't present, that is all right, because fd is in
* nonblocking mode.
*/
/*
* Read and discard the message. Do this this until there is
*/
do {
flags = 0;
if (ret == -1)
return (errno);
} while (ret != 0);
return (0);
}
static void
conn_close_oldest(void)
{
int fd;
int i1;
/*
* Find the oldest connection that is not already in the
* process of shutting down.
*/
return;
break;
}
#ifdef DEBUG
printf("too many connections (%d), releasing oldest (%d)\n",
#else
#endif
/*
* For politeness, send a T_DISCON_REQ to the transport
* provider. We close the stream anyway.
*/
num_conns--;
} else {
/*
* For orderly release, we do not close the stream
* until the T_ORDREL_IND arrives to complete
* the handshake.
*/
}
}
static boolean_t
{
return (FALSE);
}
/* LINTED pointer alignment */
return (FALSE);
}
return (FALSE);
}
"rejecting inbound connection(%s) with %d bytes of connect data",
return (FALSE);
}
} else {
}
return (TRUE);
}
static int
{
return (-1);
}
return (0);
do {
else {
}
}
break;
}
return (0);
}
static void
{
int event;
int new_fd;
int ret = 0;
char *clnt;
char *clnt_uaddr = NULL;
else {
}
/*
* If we have already accepted the maximum number of
* connections allowed on the command line, then drop
* the oldest connection (for any protocol) before
* accepting the new connection. Unless explicitly
* set on the command line, max_conns_allowed is -1.
*/
/*
* Create a new transport endpoint for the same proto as
* the listener.
*/
if (new_fd == -1) {
continue;
}
continue;
}
#ifdef DEBUG
#endif
goto do_next_conn;
}
switch (event) {
case T_LISTEN:
#ifdef DEBUG
#endif
continue;
case T_DISCONNECT:
#ifdef DEBUG
"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
#endif
&conn_head);
continue;
default:
"unexpected event 0x%x during accept processing (%s)",
goto do_next_conn;
}
}
}
"Cannot set address mask for %s",
return;
}
/* Tell KRPC about the new stream. */
else
if (ret < 0) {
"unable to register new connection: %m");
} else {
/*
* This is the only error that could be
* caused by the client, so who was it?
*/
else
/*
* If we don't know who the client was,
* remain silent.
*/
if (clnt)
"unable to register new connection: client %s has dropped connection", clnt);
if (clnt_serv)
if (clnt_uaddr)
}
goto do_next_conn;
}
/*
* Poll on the new descriptor so that we get disconnect
* and orderly release indications.
*/
num_conns++;
/* Reset nconf in case it has been moved. */
}
}
static int
{
char buf[256];
int event;
int i1;
int flags;
const char *errorstr;
switch (event) {
case T_LISTEN:
#ifdef DEBUG
#endif
break;
case T_DATA:
#ifdef DEBUG
#endif
/*
* Receive a private notification from CONS rpcmod.
*/
if (i1 == -1) {
break;
}
if (i1 < sizeof (int))
break;
/*
* This connection has been idle for too long,
* so release it as politely as we can. If we
* have already initiated an orderly release
* and we get notified that the stream is
* still idle, pull the plug. This prevents
* hung connections from continuing to consume
* resources.
*/
#ifdef DEBUG
printf("initiating orderly release of idle connection\n");
#endif
goto fdclose;
}
/*
* For NC_TPI_COTS_ORD, the stream is closed
* and removed from the poll list when the
* T_ORDREL is received from the provider. We
* don't wait for it here because it may take
* a while for the transport to shut down.
*/
"unable to send orderly release %m");
}
} else
"unexpected event from CONS rpcmod %d", i1);
break;
case T_ORDREL:
#ifdef DEBUG
#endif
/* Perform an orderly release. */
/* T_ORDREL on listen fd's should be ignored */
if (!is_listen_fd_index(conn_index)) {
goto fdclose;
}
break;
break;
} else {
/*
* check to make sure we do not close
* listen fd
*/
if (is_listen_fd_index(conn_index))
break;
else
goto fdclose;
}
case T_DISCONNECT:
#ifdef DEBUG
#endif
/*
* T_DISCONNECT on listen fd's should be ignored.
*/
if (is_listen_fd_index(conn_index))
break;
else
goto fdclose;
case T_ERROR:
default:
errno);
}
} else if (event == -1)
else
errorstr = "";
"unexpected TLI event (0x%x) on "
"connection-oriented transport(%s,%d):%s",
num_conns--;
return (0);
}
}
return (0);
}
static char *
serv_name_to_port_name(char *name)
{
/*
* Map service names (used primarily in logging) to
* RPC port names (used by netdir_*() routines).
*/
return ("nfs");
return ("lockd");
return ("nfs4_callback");
}
return ("unrecognized");
}
static int
{
struct nd_hostserv hs;
return (-1);
}
if (OK_TPI_TYPE(nconf) &&
}
}
(void) endnetconfig(nc);
provider);
return (-1);
}
static int
{
struct nd_hostserv hs;
return (-1);
}
}
}
(void) endnetconfig(nc);
proto);
return (-1);
}
/*
* Create an address mask appropriate for the transport.
* The mask is used to obtain the host-specific part of
* a network address when comparing addresses.
* For an internet address the host-specific part is just
* the 32 bit IP address and this part of the mask is set
* to all-ones. The port number part of the mask is zeroes.
*/
static int
{
/*
* Find the size of the address we need to mask.
*/
t_error("t_getinfo");
return (-1);
}
return (-1);
}
return (-1);
}
/*
* Set the mask so that the port is ignored.
*/
/* LINTED pointer alignment */
(ulong_t)~0;
/* LINTED pointer alignment */
(ushort_t)~0;
/* LINTED pointer alignment */
/* LINTED pointer alignment */
(ushort_t)~0;
} else {
/*
* Set all mask bits.
*/
}
return (0);
}
/*
* For listen fd's index is always less than end_listen_fds.
* end_listen_fds is defined externally in the daemon that uses this library.
* It's value is equal to the number of open file descriptors after the
* last listen end point was opened but before any connection was accepted.
*/
static int
is_listen_fd_index(int index)
{
return (index < end_listen_fds);
}