socksubr.c revision d28d4716f00b89f935829f300be20bd9116569dc
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/sysmacros.h>
#include <sys/vfs_opreg.h>
#include <sys/pathname.h>
#include <sys/socketvar.h>
#define _SUN_TPI_VERSION 2
/*
* Macros that operate on struct cmsghdr.
* The CMSG_VALID macro does not assume that the last option buffer is padded.
*/
(ISALIGNED_cmsghdr(cmsg) && \
int sockfs_defer_nl7c_init = 0;
struct kmem_cache *socket_cache;
/*
* sockconf_lock protects the socket configuration (socket types and
* socket filters) which is changed via the sockconfig system call.
*/
static int sockfs_update(kstat_t *, int);
static int sockfs_snapshot(kstat_t *, void *, int);
extern smod_info_t *sotpi_smod_create(void);
extern void sendfile_init();
extern void nl7c_init(void);
extern int modrootloaded;
/*
* kernel structure for passing the sockinfo data back up to the user.
* the strings array allows us to convert AF_UNIX addresses into strings
* with a common method regardless of which n-bit kernel we're running.
*/
struct k_sockinfo {
};
/*
* Returns with the vnode held.
*/
int
{
int error;
/*
* Lookup the underlying filesystem vnode.
*/
if (error)
return (error);
/* Check that it is the correct vnode */
return (ENOTSOCK);
}
/*
* If devpath went through devfs, the device should already
* be configured. If devpath is a mknod file, however, we
* need to make sure the device is properly configured.
* To do this, we do something similar to spec_open()
* we need to return a vnode.
*/
if (error == 0)
if (error != 0)
return (ENXIO);
}
/* device is configured at this point */
if (!STREAMSTAB(maj)) {
return (ENOSTR);
}
return (0);
}
/*
* Update the accessed, updated, or changed times in an sonode
* with the current time.
*
* Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
* attributes in a fstat call. (They return the current time and 0 for
* all timestamps, respectively.) We maintain the current timestamps
* here primarily so that should sockmod be popped the resulting
* file descriptor will behave like a stream w.r.t. the timestamps.
*/
void
{
if (SOCK_IS_NONSTR(so))
return;
}
/*
* Init function called when sockfs is loaded.
*/
int
{
static const fs_operation_def_t sock_vfsops_template[] = {
};
int error;
char *err_str;
if (error != 0) {
"sockinit: bad vfs ops template");
return (error);
}
if (error != 0) {
err_str = "sockinit: bad socket vnode ops template";
/* vn_make_ops() does not reset socktpi_vnodeops on failure. */
goto failure;
}
sizeof (struct sonode), 0, sonode_constructor,
error = socktpi_init();
if (error != 0) {
goto failure;
}
if (error != 0) {
goto failure;
}
/*
* Set up the default create and destroy functions
*/
/*
* Build initial list mapping socket parameters to vnode.
*/
smod_init();
/*
* it is possible to preload the sockparams list here using
* calls like:
*/
/*
* Create a unique dev_t for use in so_fsid.
*/
dev = 0;
if (!modrootloaded) {
} else {
nl7c_init();
}
/* Initialize socket filters */
sof_init();
return (0);
(void) vfs_freevfsops_by_type(fstype);
if (socket_vnodeops != NULL)
return (error);
}
/*
* Caller must hold the mutex. Used to set SOLOCKED.
*/
void
{
}
}
/*
* Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
* Used to clear SOLOCKED or SOASYNC_UNBIND.
*/
void
{
/*
* Process the T_DISCON_IND on sti_discon_ind_mp.
*
* Call to so_drain_discon_ind will result in so_lock
* being dropped and re-acquired later.
*/
if (!SOCK_IS_NONSTR(so)) {
}
}
/*
* Caller must hold the mutex. Used to set SOREADLOCKED.
* If the caller wants nonblocking behavior it should set fmode.
*/
int
{
return (EWOULDBLOCK);
}
return (0);
}
/*
* Like so_lock_read above but allows signals.
*/
int
{
return (EWOULDBLOCK);
return (EINTR);
}
return (0);
}
/*
* Caller must hold the mutex. Used to clear SOREADLOCKED,
* set in so_lock_read() or so_lock_read_intr().
*/
void
{
}
/*
* Verify that the specified offset falls within the mblk and
* that the resulting pointer is aligned.
* Returns NULL if not.
*/
void *
{
eprintline(0);
return (NULL);
}
eprintline(0);
return (NULL);
}
return ((void *)ptr1);
}
/*
* Return the AF_UNIX underlying filesystem vnode matching a given name.
* Makes sure the sending and the destination sonodes are compatible.
* The vnode is returned held.
*
* The underlying filesystem VSOCK vnode has a v_stream pointer that
* references the actual stream head (hence indirectly the actual sonode).
*/
static int
{
int error;
if (error) {
return (error);
}
/*
* Traverse lofs mounts get the real vnode
*/
}
goto done2;
}
if (checkaccess) {
/*
* Check that we have permissions to access the destination
* vnode. This check is not done in BSD but it is required
*/
goto done2;
}
}
/*
* Check if the remote socket has been closed.
*
* Synchronize with vn_rele_stream by holding v_lock while traversing
* v_stream->sd_vnode.
*/
else
goto done2;
}
/*
* holding v_lock on underlying filesystem vnode and acquiring
* it on sockfs vnode. Assumes that no code ever attempts to
* acquire these locks in the reverse order.
*/
goto done;
}
error = EPROTOTYPE;
goto done;
}
return (0);
done:
return (error);
}
/*
* provider we have to do these ugly checks in the socket layer to
* preserve compatibility with SunOS 4.X.
*/
int
{
int family;
switch (family) {
case AF_INET:
return (EAFNOSUPPORT);
}
return (EINVAL);
}
break;
case AF_INET6: {
#ifdef DEBUG
struct sockaddr_in6 *sin6;
#endif /* DEBUG */
return (EAFNOSUPPORT);
}
return (EINVAL);
}
#ifdef DEBUG
/* Verify that apps don't forget to clear sin6_scope_id etc */
if (sin6->sin6_scope_id != 0 &&
"(%d) on socket. Pid = %d\n",
}
#endif /* DEBUG */
break;
}
case AF_UNIX:
return (0);
}
return (ENOENT);
}
return (EAFNOSUPPORT);
}
/* MAXPATHLEN + soun_family + nul termination */
return (ENAMETOOLONG);
}
break;
default:
/*
* Default is don't do any length or sa_family check
* to allow non-sockaddr style addresses.
*/
break;
}
return (0);
}
/*
* Translate an AF_UNIX sockaddr_un to the transport internal name.
* Assumes caller has called so_addr_verify first.
*/
/*ARGSUSED*/
int
{
int error;
struct sockaddr_un *soun;
void *addr;
/*
* Lookup vnode for the specified path name and verify that
* it is a socket.
*/
if (error) {
return (error);
}
/*
* Use the address of the peer vnode as the address to send
* to. We release the peer vnode here. In case it has been
* closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
* transport the message will get an error or be dropped.
*/
return (0);
}
/*
* Esballoc free function for messages that contain SO_FILEP option.
* Decrement the reference count on the file pointers using closef.
*/
void
{
int i;
/*
* We need pointer size alignment for fd_fds. On a LP64
* kernel, the required alignment is 8 bytes while
* the option headers and values are only 4 bytes
* aligned. So its safer to do a bcopy compared to
* assigning fdbuf->fd_fds[i] to fp.
*/
}
}
/*
* Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
* Waits if memory is not available.
*/
mblk_t *
{
return (mp);
}
/*
* Extract file descriptors from a fdbuf.
*/
/*ARGSUSED*/
static int
{
int i, fd;
int *rp;
int numfd;
/*
* Allocate a file descriptor and increment the f_count.
* The latter is needed since we always call fdbuf_free
* which performs a closef.
*/
for (i = 0; i < numfd; i++) {
goto cleanup;
/*
* We need pointer size alignment for fd_fds. On a LP64
* kernel, the required alignment is 8 bytes while
* the option headers and values are only 4 bytes
* aligned. So its safer to do a bcopy compared to
* assigning fdbuf->fd_fds[i] to fp.
*/
if (AU_AUDITING())
}
return (0);
/*
* Undo whatever partial work the loop above has done.
*/
{
int j;
for (j = 0; j < i; j++) {
dprint(0,
("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
}
}
return (EMFILE);
}
/*
* Insert file descriptors into an fdbuf.
* Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
* by calling fdbuf_free().
*/
int
{
int numfd, i;
int *fds;
int fdbufsize;
fdbuf->fd_ebuflen = 0;
for (i = 0; i < numfd; i++) {
return (EBADF);
}
/*
* The maximum alignment for fdbuf (or any option header
* and its value) it 4 bytes. On a LP64 kernel, the alignment
* is not sufficient for pointers (fd_fds in this case). Since
* we just did a kmem_alloc (we get a double word alignment),
* we don't need to do anything on the send side (we loose
* the double word alignment because fdbuf goes after an
* option header (eg T_unitdata_req) which is only 4 byte
* aligned). We take care of this when we extract the file
* descriptor in fdbuf_extract or fdbuf_free.
*/
if (AU_AUDITING())
}
return (0);
}
static int
fdbuf_optlen(int rightslen)
{
int numfd;
}
static t_uscalar_t
fdbuf_cmsglen(int fdbuflen)
{
(int)sizeof (struct file *) * (int)sizeof (int));
}
/*
* Return non-zero if the mblk and fdbuf are consistent.
*/
static int
{
if (fdbuflen >= FDBUF_HDRSIZE &&
/*
* Check that the SO_FILEP portion of the
* message has not been modified by
* the loopback transport. The sending sockfs generates
* a message that is esballoc'ed with the free function
* being fdbuf_free() and where free_arg contains the
* identical information as the SO_FILEP content.
*
* If any of these constraints are not satisfied we
* silently ignore the option.
*/
return (1);
} else {
"sockfs: mismatched fdbuf content (%p)",
(void *)mp);
return (0);
}
} else {
"sockfs: mismatched fdbuf len %d, %d\n",
return (0);
}
}
/*
* When the file descriptors returned by sorecvmsg can not be passed
* to the application this routine will cleanup the references on
* the files. Start at startoff bytes into the buffer.
*/
static void
{
int i;
for (i = 0; i < numfd; i++) {
if (startoff < 0)
startoff = 0;
if (startoff < (int)sizeof (int)) {
/*
* This file descriptor is partially or fully after
* the offset
*/
dprint(0,
("close_fds: cleanup[%d] = %d\n", i, fds[i]));
}
startoff -= (int)sizeof (int);
}
}
/*
* Close all file descriptors contained in the control part starting at
* the startoffset.
*/
void
int startoff)
{
return;
if (oldflg) {
return;
}
/* Scan control part for file descriptors. */
(int)CMSG_CONTENTLEN(cmsg),
}
}
}
/*
* in the control buffer. Returns with *fdlenp == -1 if there are no
* file descriptor options present. This is different than there being
* a zero-length file descriptor option.
* Fail if there are multiple SCM_RIGHT cmsgs.
*/
int
{
void *fds;
int fdlen;
*fdlenp = -1;
return (0);
}
if (oldflg) {
if (controllen == 0)
*fdlenp = -1;
else
*fdlenp = controllen;
return (0);
}
fdlen = 0;
return (EINVAL);
}
}
*fdlenp = -1;
} else
return (0);
}
/*
* Return the length of the options including any file descriptor options.
*/
{
t_uscalar_t optlen = 0;
return (0);
if (oldflg)
return ((t_uscalar_t)(sizeof (struct T_opthdr) +
} else {
}
sizeof (struct T_opthdr));
}
return (optlen);
}
/*
* Copy options from control to the mblk. Skip any file descriptor options.
*/
void
{
return;
if (oldflg) {
/* No real options - caller has handled file descriptors */
return;
}
/*
* Note: The caller handles file descriptors prior
* to calling this function.
*/
continue;
}
}
/*
* Return the length of the control message derived from the options.
* Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
* When oldflg is set only include SO_FILEP.
* so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
* allocates the space that so_opt2cmsg fills. If one changes, the other should
* also be checked for any possible impacts.
*/
{
t_uscalar_t cmsglen = 0;
t_uscalar_t last_roundup = 0;
continue;
}
int fdbuflen;
continue;
if (oldflg) {
continue;
}
if (oldflg)
continue;
if (get_udatamodel() == DATAMODEL_NATIVE) {
} else {
}
} else {
if (oldflg)
continue;
}
/*
* Exclude roundup for last option to not set
* MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
*/
}
cmsglen -= last_roundup;
return (cmsglen);
}
/*
* Copy options from options to the control. Convert SO_FILEP to
* file descriptors.
* Returns errno or zero.
* so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
* allocates the space that so_opt2cmsg fills. If one changes, the other should
* also be checked for any possible impacts.
*/
int
{
int fdbuflen;
int error;
#endif
continue;
}
return (EPROTO);
if (oldflg) {
(int)controllen);
if (error != 0)
return (error);
continue;
} else {
int fdlen;
fdlen = (int)fdbuf_cmsglen(
(int)_TPI_TOPT_DATALEN(tohp));
sizeof (struct cmsghdr));
if (error != 0)
return (error);
}
if (oldflg)
continue;
sizeof (intptr_t));
if (get_udatamodel() == DATAMODEL_NATIVE) {
sizeof (struct cmsghdr);
/*
* on LP64 systems, the struct timeval in
* the destination will not be 8-byte aligned,
* so use bcopy to avoid alignment trouble
*/
} else {
sizeof (struct cmsghdr);
}
} else {
if (oldflg)
continue;
sizeof (struct cmsghdr));
/* copy content to control data part */
}
/* move to next CMSG structure! */
}
return (0);
}
/*
* Extract the SO_SRCADDR option value if present.
*/
void
{
*srclenp = 0;
}
}
}
/*
* Verify if the SO_UNIX_CLOSE option is present.
*/
int
{
dprint(1,
("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
return (1);
}
return (0);
}
/*
* Allocate an M_PROTO message.
*
* If allocation fails the behavior depends on sleepflg:
* _ALLOC_NOSLEEP fail immediately
* _ALLOC_INTR sleep for memory until a signal is caught
* _ALLOC_SLEEP sleep forever. Don't return NULL.
*/
mblk_t *
{
/* Round up size for reuse */
else
int error; /* Dummy - error not returned to caller */
switch (sleepflg) {
case _ALLOC_SLEEP:
} else {
&error);
}
break;
case _ALLOC_INTR:
} else {
}
/* Caught signal while sleeping for memory */
return (NULL);
}
break;
case _ALLOC_NOSLEEP:
default:
return (NULL);
}
}
return (mp);
}
/*
* Allocate an M_PROTO message with a single component.
* len is the length of buf. size is the amount to allocate.
*
* buf can be NULL with a non-zero len.
* This results in a bzero'ed chunk being placed the message.
*/
mblk_t *
{
if (size == 0)
/* Round up size for reuse */
return (NULL);
if (len != 0) {
else
}
return (mp);
}
/*
* The caller has to ensure that there is enough room in the mblk.
*
* buf can be NULL with a non-zero len.
* This results in a bzero'ed chunk being placed the message.
*/
void
{
if (len != 0) {
/* Assert for room left */
else
}
}
/*
* Create a message using two kernel buffers.
* If size is set that will determine the allocation size (e.g. for future
* soappendmsg calls). If size is zero it is derived from the buffer
* lengths.
*/
mblk_t *
{
if (size == 0)
if (mp)
return (mp);
}
/*
* Create a message using three kernel buffers.
* If size is set that will determine the allocation size (for future
* soappendmsg calls). If size is zero it is derived from the buffer
* lengths.
*/
mblk_t *
{
if (size == 0)
}
return (mp);
}
#ifdef DEBUG
char *
{
static char buf[1024];
buf[0] = 0;
if (state & SS_ISCONNECTED)
if (state & SS_ISCONNECTING)
if (state & SS_ISDISCONNECTING)
if (state & SS_CANTSENDMORE)
if (state & SS_CANTRCVMORE)
if (state & SS_ISBOUND)
if (state & SS_NONBLOCK)
if (state & SS_ACCEPTCONN)
if (state & SS_SAVEDEOR)
if (state & SS_RCVATMARK)
if (state & SS_OOBPEND)
if (state & SS_HAVEOOBDATA)
if (state & SS_HADOOBDATA)
if (mode & SM_CONNREQUIRED)
if (mode & SM_FDPASSING)
if (mode & SM_OPTDATA)
if (mode & SM_BYTESTREAM)
return (buf);
}
char *
{
static char buf[1024];
return (buf);
}
switch (family) {
case AF_INET: {
struct sockaddr_in sin;
break;
}
case AF_INET6: {
struct sockaddr_in6 sin6;
break;
}
case AF_UNIX: {
break;
}
default:
break;
}
return (buf);
}
/* The logical equivalence operator (a if-and-only-if b) */
#define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
/*
* Verify limitations and invariants on oob state.
* Return 1 if OK, otherwise 0 so that it can be used as
* ASSERT(verify_oobstate(so));
*/
int
{
/*
* The possible state combinations are:
* 0
* SS_OOBPEND
* SS_OOBPEND|SS_HAVEOOBDATA
* SS_OOBPEND|SS_HADOOBDATA
* SS_HADOOBDATA
*/
case 0:
case SS_OOBPEND:
case SS_OOBPEND|SS_HAVEOOBDATA:
case SS_OOBPEND|SS_HADOOBDATA:
case SS_HADOOBDATA:
break;
default:
printf("Bad oob state 1 (%p): state %s\n",
return (0);
}
/* SS_RCVATMARK should only be set when SS_OOBPEND is set */
printf("Bad oob state 2 (%p): state %s\n",
return (0);
}
/*
* (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
* For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
*/
printf("Bad oob state 3 (%p): state %s\n",
return (0);
}
/*
* Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
*/
printf("Bad oob state 4 (%p): state %s\n",
return (0);
}
if (!SOCK_IS_NONSTR(so) &&
printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
return (0);
}
return (1);
}
#endif /* DEBUG */
/* initialize sockfs zone specific kstat related items */
void *
{
}
return (ksp);
}
/* tear down sockfs zone specific kstat related items */
/*ARGSUSED*/
void
{
}
}
/*
* Zones:
* Note that nactive is going to be different for each zone.
* This means we require kstat to call sockfs_update and then sockfs_snapshot
* for the same zone, or sockfs_snapshot will be taken into the wrong size
* buffer. This is safe, but if the buffer is too small, user will not be
* given details of all sockets. However, as this kstat has a ks_lock, kstat
* driver will keep it locked between the update and the snapshot, so no
* other process (zone) can currently get inbetween resulting in a wrong size
* buffer allocation.
*/
static int
{
return (EACCES);
}
nactive++;
}
}
return (0);
}
static int
{
int ns; /* # of sonodes we've copied */
return (EACCES);
}
/*
* for each sonode on the socklist, we massage the important
* info into buf, in k_sockinfo format.
*/
ns = 0;
/* only stuff active sonodes and the same zone: */
continue;
}
/*
* If the sonode was activated between the update and the
* snapshot, we're done - as this is only a snapshot.
*/
break;
}
/* copy important info into buf: */
if (sn_len != 0) {
/* AF_UNIX socket names are NULL terminated */
}
}
if (sn_len != 0) {
}
}
ns++;
pksi++;
}
return (0);
}
{
int error = 0;
int iovcnt = 0;
short fflag;
rwflag = 0;
iovcnt = 1;
/* If read sync is not asked for, filter sync flags */
error = 0;
out:
if (error != 0) {
return (0);
} else {
*err = 0;
return (cnt);
}
}
int
{
if (fromkernel) {
return (0);
}
}
int
{
if (tokernel) {
return (0);
}
}