sockvnops.c revision 7d6c035b71d7c7b33a49c71bff266bf8aa9e0c24
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/socketvar.h>
#define _SUN_TPI_VERSION 2
#include <sys/autoconf.h>
#include <inet/udp_impl.h>
#include <inet/tcp_impl.h>
struct caller_context *);
struct caller_context *);
int32_t *);
static int socktpi_poll(struct vnode *, short, int, short *,
struct pollhead **);
struct vnodeops *socktpi_vnodeops;
const fs_operation_def_t socktpi_vnodeops_template[] = {
};
/*
* Do direct function call to the transport layer below; this would
* also allow the transport to utilize read-side synchronous stream
* not be modified on a running system. By default this is enabled
* for performance reasons and may be disabled for debugging purposes.
*/
/*
* Open routine used by socket() call. Note that vn_open checks for
* VSOCK and fails the open (and VOP_OPEN is fs_nosys). The VSOCK check is
* needed since VSOCK type vnodes exist in various underlying filesystems as
* a result of an AF_UNIX bind to a pathname.
*
* Sockets assume that the driver will clone (either itself
* or by using the clone driver) i.e. a socket() call will always
* result in a new vnode being created. This routine single-threads
*/
int
{
int error = 0;
if (error == 0) {
/*
* this is a post SVR4 tty driver - a socket can not
* be a controlling terminal. Fail the open.
*/
return (ENOTTY); /* XXX */
}
/*
* If caller is interested in doing direct function call
* directly beneath the streamhead to see if it qualifies.
*
* We turn off the direct interface when qualifications fail.
* In the acceptor case, we simply turn off the SS_DIRECT
* flag on the socket. We do the fallback after the accept
* has completed, before the new socket is returned to the
* application.
*/
/*
* SS_DIRECT is currently supported and tested
* have the following assertions.
*/
/*
* Abort direct call interface if the module directly
* underneath the stream head is not defined with the
* _D_DIRECT flag. This could happen in the tcp or
* udp case, when some other module is autopushed
* above it, or for some reasons the expected module
* isn't purely D_MP (which is the main requirement).
*/
int rval;
/* Continue on without direct calls */
if (!(flag & SO_ACCEPTOR)) {
_SIOCSOCKFALLBACK, 0, 0, K_TO_K,
return (error);
}
}
}
}
} else {
/*
* While the same socket can not be reopened (unlike specfs)
* the stream head sets STREOPENFAIL when the autopush fails.
*/
/*
* Open failed part way through.
*/
return (error);
/*NOTREACHED*/
}
}
"sockfs open:maj %d vp %p so %p error %d", maj,
return (error);
}
/*ARGSUSED2*/
static int
int flag,
int count,
{
int error = 0;
if (count > 1)
return (0);
/*
* Only call NL7C's close on last open reference.
*/
so->so_nl7c_flags = 0;
nl7c_close(so);
}
/*
* Only call the close routine when the last open reference through
* any [s, v]node goes away.
*/
/* Could avoid this when CANTSENDMORE for !dgram */
}
/*
* Disassemble the linkage from the AF_UNIX underlying file
* system vnode to this socket (by atomically clearing
* v_stream in vn_rele_stream) before strclose clears sd_vnode
* and frees the stream head.
*/
}
so->so_kssl_type);
}
}
}
}
/*
* Flush the T_DISCON_IND on so_discon_ind_mp.
*/
/*
* Decrement the device driver's reference count for streams
* opened via the clone dip. The driver was held in clone_open().
* The absence of clone_close() forces this asymmetry.
*/
return (error);
}
/*ARGSUSED2*/
static int
int ioflag,
struct caller_context *ct)
{
/* The imaginary "sockmod" has been popped - act as a stream */
}
lmsg.msg_namelen = 0;
lmsg.msg_controllen = 0;
}
/* ARGSUSED2 */
static int
int ioflag,
struct caller_context *ct)
{
int so_state;
int so_mode;
int error;
else
/* The imaginary "sockmod" has been popped - act as a stream */
}
/* State checks */
if (so_state & SS_CANTSENDMORE) {
return (EPIPE);
}
if (error != 0) {
return (error);
}
}
(SS_ISCONNECTED|SS_ISBOUND)) {
if (so_mode & SM_CONNREQUIRED)
return (ENOTCONN);
else
return (EDESTADDRREQ);
}
if (!(so_mode & SM_CONNREQUIRED)) {
/*
* Note that this code does not prevent so_faddr_sa
* from changing while it is being used. Thus
* if an "unconnect"+connect occurs concurrently with
* this write the datagram might be delivered to a
* garbled address.
*/
}
if (so_mode & SM_BYTESTREAM) {
/* Send M_DATA messages */
/* NL7C consumed the data */
return (error);
}
}
} else {
/* Send T_DATA_REQ messages without MORE_flag set */
}
}
int
{
if (fromkernel) {
return (0);
}
}
int
{
if (tokernel) {
return (0);
}
}
int
{
int error = 0;
switch (cmd) {
case _I_INSERT:
case _I_REMOVE:
/*
* Since there's no compelling reason to support these ioctls
* on sockets, and doing so would increase the complexity
* markedly, prevent it.
*/
return (EOPNOTSUPP);
case I_FIND:
case I_LIST:
case I_LOOK:
case I_POP:
case I_PUSH:
/*
* To prevent races and inconsistencies between the actual
* state of the stream and the state according to the sonode,
* we serialize all operations which modify or operate on the
* list of modules on the socket's stream.
*/
return (error);
default:
break;
/*
* The imaginary "sockmod" has been popped; act as a stream.
*/
}
/*
* Process socket-specific ioctls.
*/
switch (cmd) {
case FIONBIO: {
return (EFAULT);
if (value) {
} else {
}
return (0);
}
case FIOASYNC: {
return (EFAULT);
/*
* SS_ASYNC flag not already set correctly?
* (!value != !(so->so_state & SS_ASYNC))
* but some engineers find that too hard to read.
*/
return (error);
}
case SIOCSPGRP:
case FIOSETOWN: {
return (EFAULT);
/* Any change? */
return (error);
}
case SIOCGPGRP:
case FIOGETOWN:
return (EFAULT);
return (0);
case SIOCATMARK: {
int retval;
/*
* strwaitmark has a finite timeout after which it
* returns -1 if the mark state is undetermined.
* In order to avoid any race between the mark state
* in sockfs and the mark state in the stream head this
* routine loops until the mark state can be determined
* (or the urgent data indication has been removed by some
* other thread).
*/
do {
if (so_state & SS_RCVATMARK) {
retval = 1;
} else if (!(so_state & SS_OOBPEND)) {
/*
* No SIGURG has been generated -- there is no
* pending or present urgent data. Thus can't
* possibly be at the mark.
*/
retval = 0;
} else {
/*
* Have the stream head wait until there is
* either some messages on the read queue, or
* STRATMARK or STRNOTATMARK gets set. The
* STRNOTATMARK flag is used so that the
* transport can send up a MSGNOTMARKNEXT
* M_DATA to indicate that it is not
* at the mark and additional data is not about
* to be send upstream.
*
* If the mark state is undetermined this will
* return -1 and we will loop rechecking the
* socket state.
*/
}
} while (retval == -1);
return (EFAULT);
return (0);
}
case I_FDINSERT:
case I_SENDFD:
case I_RECVFD:
case I_ATMARK:
case _SIOCSOCKFALLBACK:
/*
* These ioctls do not apply to sockets. I_FDINSERT can be
* used to send M_PROTO messages without modifying the socket
* descriptor passing since they assume a twisted stream.
* SIOCATMARK must be used instead of I_ATMARK.
*
* _SIOCSOCKFALLBACK from an application should never be
* processed. It is only generated by socktpi_open() or
* in response to I_POP or I_PUSH.
*/
#ifdef DEBUG
"Unsupported STREAMS ioctl 0x%x on socket. "
#endif /* DEBUG */
return (EOPNOTSUPP);
case _I_GETPEERCRED:
return (EINVAL);
} else {
}
return (error);
default:
/*
* Do the higher-order bits of the ioctl cmd indicate
* that it is an I_* streams ioctl?
*/
#ifdef DEBUG
"Unsupported STREAMS ioctl 0x%x on socket. "
#endif /* DEBUG */
return (EOPNOTSUPP);
}
}
}
/*
* Handle plumbing-related ioctls.
*/
static int
{
static const char sockmod_name[] = "sockmod";
int error;
return (EOPNOTSUPP);
/*
* The imaginary "sockmod" has been popped - act as a stream.
* If this is a push of sockmod then change back to a socket.
*/
"socket version\n"));
return (0);
}
}
}
switch (cmd) {
case I_PUSH:
if (error == 0)
if (error != 0)
return (error);
}
if (error == 0)
so->so_pushcnt++;
return (error);
case I_POP:
if (so->so_pushcnt == 0) {
/* Emulate sockmod being popped */
("socktpi_ioctl: going to STREAMS version\n"));
return (so_sock2stream(so));
}
if (error == 0)
so->so_pushcnt--;
return (error);
case I_LIST: {
int i, nmods;
if (error == 0)
(*rvalp)++; /* Add one for sockmod */
return (error);
}
if (error != 0)
return (error);
if (nmods <= 0)
return (EINVAL);
/*
* Ceiling nmods at nstrpush to prevent someone from
* maliciously consuming lots of kernel memory.
*/
if (error != 0)
goto done;
/*
* Considering the module list as a 0-based array of sl_nmods
* modules, sockmod should conceptually exist at slot
* so_pushcnt. Insert sockmod at this location by sliding all
* of the module names after so_pushcnt over by one. We know
* that there will be room to do this since we allocated
* sl_modlist with an additional slot.
*/
/*
* Copy all of the entries out to ustrlist.
*/
if (error != 0)
goto done;
}
if (error == 0)
*rvalp = 0;
done:
return (error);
}
case I_LOOK:
if (so->so_pushcnt == 0) {
}
case I_FIND:
return (error);
/* if not found and string was sockmod return 1 */
if (error == ENAMETOOLONG)
*rvalp = 1;
}
return (error);
default:
break;
}
return (0);
}
/*
* Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
* from listener to acceptor.
*/
/* ARGSUSED */
int
{
int error = 0;
else
else
/*
* of the FASYNC flag passed to fcntl(F_SETFL).
* This exists solely for BSD fcntl() FASYNC compatibility.
*/
/*
* SS_ASYNC flag not already set correctly?
* (!(nflags & FASYNC) != !(so->so_state & SS_ASYNC))
* but some engineers find that too hard to read.
*/
}
return (error);
}
/*
* Get the made up attributes for the vnode.
* 4.3BSD returns the current time for all the timestamps.
* 4.4BSD returns 0 for all the timestamps.
* Here we use the access and modified times recorded in the sonode.
*
* Just like in BSD there is not effect on the underlying file system node
* bound to an AF_UNIX pathname.
*
* When sockmod has been popped this will act just like a stream. Since
* a socket is always a clone there is no need to inspect the attributes
* of the "realvp".
*/
/* ARGSUSED */
int
int flags,
{
static int sonode_shift = 0;
/*
* Calculate the amount of bitshift to a sonode pointer which will
* still keep it unique. See below.
*/
if (sonode_shift == 0)
ASSERT(sonode_shift > 0);
/*
* The imaginary "sockmod" has been popped - act
* as a stream
*/
} else {
}
/*
* If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
* So we shift down the sonode pointer to try and get the most
* uniqueness into 16-bits.
*/
/*
* We need to zero out the va_rdev to avoid some fstats getting
* EOVERFLOW. This also mimics SunOS 4.x and BSD behavior.
*/
return (0);
}
/*
* Set attributes.
* Just like in BSD there is not effect on the underlying file system node
* bound to an AF_UNIX pathname.
*
* When sockmod has been popped this will act just like a stream. Since
* a socket is always a clone there is no need to modify the attributes
* of the "realvp".
*/
/* ARGSUSED */
int
int flags,
{
/*
* If times were changed, update sonode.
*/
}
return (0);
}
int
{
else
return (0); /* Allow all access. */
}
/*
* 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
* This code does the same to be compatible and also to not give an
* application the impression that the data has actually been "synced"
* to the other end of the connection.
*/
/* ARGSUSED */
int
{
return (EINVAL);
}
/* ARGSUSED */
static void
{
/*
* If no one has reclaimed the vnode, remove from the
* cache now.
*/
/*
* Drop the temporary hold by vn_rele now
*/
return;
}
/* We are the sole owner of so now */
}
/* ARGSUSED */
int
{
return (EINVAL);
}
/*
* Sockets are not seekable.
* (and there is a bug to fix STREAMS to make them fail this as well).
*/
/*ARGSUSED*/
int
{
return (ESPIPE);
}
/*
* Wrapper around the streams poll routine that implements socket poll
* semantics.
* The sockfs never calls pollwakeup itself - the stream head take care
* of all pollwakeups. Since sockfs never holds so_lock when calling the
* stream head there can never be a deadlock due to holding so_lock across
* pollwakeup and acquiring so_lock in this routine.
*
* However, since the performance of VOP_POLL is critical we avoid
* acquiring so_lock here. This is based on two assumptions:
* - The poll implementation holds locks to serialize the VOP_POLL call
* and a pollwakeup for the same pollhead. This ensures that should
* e.g. so_state change during a socktpi_poll call the pollwakeup
* (which strsock_* and strrput conspire to issue) is issued after
* the state change. Thus the pollwakeup will block until VOP_POLL has
* returned and then wake up poll and have it call VOP_POLL again.
* - The reading of so_state without holding so_lock does not result in
* stale data that is older than the latest state change that has dropped
* so_lock. This is ensured by the mutex_exit issuing the appropriate
* memory barrier to force the data into the coherency domain.
*/
static int
short events,
int anyyet,
short *reventsp,
{
short origevents = events;
int error;
/* The imaginary "sockmod" has been popped - act as a stream */
}
if (!(so_state & SS_ISCONNECTED) &&
/* Not connected yet - turn off write side events */
}
/*
* Check for errors without calling strpoll if the caller wants them.
* and there is no need to ask the stream head for this information.
*/
return (0);
}
/*
* Ignore M_PROTO only messages such as the T_EXDATA_IND messages.
* will not trigger a POLLIN event with POLLRDDATA set.
* The handling of urgent data (causing POLLRDBAND) is done by
* inspecting SS_OOBPEND below.
*/
events |= POLLRDDATA;
/*
* After shutdown(output) a stream head write error is set.
* However, we should not return output events.
*/
if (error)
return (error);
if (so_state & SS_HASCONNIND)
if (so_state & SS_OOBPEND)
}
}
}
return (0);
}
/*
* Wrapper for getmsg. If the socket has been converted to a stream
* pass the request to the stream head.
*/
int
int *flagsp,
int fmode,
)
{
/*
* Use the stream head to find the real socket vnode.
* This is needed when namefs sits above sockfs. Some
* sockets (like SCTP) are not streams.
*/
return (ENOSTR);
}
/* The imaginary "sockmod" has been popped - act as a stream */
}
return (ENOSTR);
}
/*
* Wrapper for putmsg. If the socket has been converted to a stream
* pass the request to the stream head.
*
* Note that a while a regular socket (SOV_SOCKSTREAM) does support the
* streams ioctl set it does not support putmsg and getmsg.
* Allowing putmsg would prevent sockfs from tracking the state of
*/
int
int flag,
int fmode
)
{
/*
* Use the stream head to find the real socket vnode.
* This is needed when namefs sits above sockfs.
*/
return (ENOSTR);
}
/* The imaginary "sockmod" has been popped - act as a stream */
}
return (ENOSTR);
}
/*
* Special function called only from f_getfl().
* Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
* No locks are acquired here, so it is safe to use while uf_lock is held.
* This exists solely for BSD fcntl() FASYNC compatibility.
*/
int
{
return (0);
return (FASYNC);
}