socktpi.c revision bd118333506194b55077122465f5051a4e3ac349
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/pathname.h>
#include <sys/socketvar.h>
#define _SUN_TPI_VERSION 2
/*
* Possible failures when memory can't be allocated. The documented behavior:
*
* 5.5: 4.X: XNET:
* EINTR
* (4.X does not document EINTR but returns it)
* (4.X getpeername and getsockname do not fail in practice)
* listen: - - ENOBUFS
* EINTR
* EINTR
*
* Resolution. When allocation fails:
* recv: return EINTR
* send: return EINTR
* connect, accept: EINTR
* bind, listen, shutdown (unbind, unix_close, disconnect): sleep
* socket, socketpair: ENOBUFS
* getpeername, getsockname: sleep
* getsockopt, setsockopt: sleep
*/
#ifdef SOCK_TEST
/*
* Variables that make sockfs do something other than the standard TPI
* for the AF_INET transports.
*
* solisten_tpi_tcp:
* TCP can handle a O_T_BIND_REQ with an increased backlog even though
* the transport is already bound. This is needed to avoid loosing the
* port number should listen() do a T_UNBIND_REQ followed by a
* O_T_BIND_REQ.
*
* soconnect_tpi_udp:
* UDP and ICMP can handle a T_CONN_REQ.
* This is needed to make the sequence of connect(), getsockname()
* return the local IP address used to send packets to the connected to
* destination.
*
* soconnect_tpi_tcp:
* TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
* Set this to non-zero to send TPI conformant messages to TCP in this
* respect. This is a performance optimization.
*
* soaccept_tpi_tcp:
* TCP can handle a T_CONN_REQ without the acceptor being bound.
* This is a performance optimization that has been picked up in XTI.
*
* soaccept_tpi_multioptions:
* When inheriting SOL_SOCKET options from the listener to the accepting
* socket send them as a single message for AF_INET{,6}.
*/
int solisten_tpi_tcp = 0;
int soconnect_tpi_udp = 0;
int soconnect_tpi_tcp = 0;
int soaccept_tpi_tcp = 0;
int soaccept_tpi_multioptions = 1;
#else /* SOCK_TEST */
#define soconnect_tpi_tcp 0
#define soconnect_tpi_udp 0
#define solisten_tpi_tcp 0
#define soaccept_tpi_tcp 0
#define soaccept_tpi_multioptions 1
#endif /* SOCK_TEST */
#ifdef SOCK_TEST
extern int do_useracc;
extern clock_t sock_test_timelimit;
#endif /* SOCK_TEST */
/*
* applications working. Turn on this flag to disable these checks.
*/
int xnet_skip_checks = 0;
int xnet_check_print = 0;
int xnet_truncate_print = 0;
extern void sigintr(k_sigset_t *, int);
extern void sigunintr(k_sigset_t *);
extern void *nl7c_lookup_addr(void *, t_uscalar_t);
extern void *nl7c_add_addr(void *, t_uscalar_t);
extern void nl7c_listener_addr(void *, queue_t *);
static int sotpi_unbind(struct sonode *, int);
/* TPI sockfs sonode operations */
int);
socklen_t, int, int);
static int sotpi_listen(struct sonode *, int);
struct uio *);
static int sotpi_shutdown(struct sonode *, int);
static int sotpi_getsockname(struct sonode *);
sotpi_accept, /* sop_accept */
sotpi_bind, /* sop_bind */
sotpi_listen, /* sop_listen */
sotpi_connect, /* sop_connect */
sotpi_recvmsg, /* sop_recvmsg */
sotpi_sendmsg, /* sop_sendmsg */
sotpi_getpeername, /* sop_getpeername */
sotpi_getsockname, /* sop_getsockname */
sotpi_shutdown, /* sop_shutdown */
sotpi_getsockopt, /* sop_getsockopt */
sotpi_setsockopt /* sop_setsockopt */
};
/*
* Common create code for socket and accept. If tso is set the values
* from that node is used instead of issuing a T_INFO_REQ.
*
* Assumes that the caller has a VN_HOLD on accessvp.
* The VN_RELE will occur either when sotpi_create() fails or when
* the returned sonode is freed.
*/
struct sonode *
{
}
} else {
flags |= SO_SOCKSTR;
}
}
/*
* Tell local transport that it is talking to sockets.
*/
flags |= SO_SOCKSTR;
}
return (NULL);
}
return (NULL);
}
if (version == SOV_DEFAULT)
return (so);
}
/*
* Bind the socket to an unspecified address in sockfs only.
* required in all cases.
*/
static void
{
}
/*
* bind the socket.
*
* If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2
* are passed in we allow rebinding. Note that for backwards compatibility
* even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind.
* Thus the rebinding code is currently not executed.
*
* The constraints for rebinding are:
* - it is a SOCK_DGRAM, or
* - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
* and no listen() has been done.
* This rebinding code was added based on some language in the XNET book
* about not returning EINVAL it the protocol allows rebinding. However,
* this language is not present in the Posix socket draft. Thus maybe the
* rebinding logic should be deleted from the source.
*
* A null "name" can be used to unbind the socket if:
* - it is a SOCK_DGRAM, or
* - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
* and no listen() has been done.
*/
static int
{
struct T_bind_req bind_req;
struct T_bind_ack *bind_ack;
int error = 0;
void *addr;
int unbind_on_err = 1;
int save_so_backlog;
if (!(flags & _SOBIND_LOCK_HELD)) {
} else {
}
/*
* Make sure that there is a preallocated unbind_req message
* before binding. This message allocated when the socket is
* created but it might be have been consumed.
*/
/* NOTE: holding so_lock while sleeping */
so->so_unbind_mp =
}
if (flags & _SOBIND_REBIND) {
/*
* Called from solisten after doing an sotpi_unbind() or
* potentially without the unbind (latter for AF_INET{,6}).
*/
("sobind rebind UNIX: addrlen %d, addr 0x%p, vp %p\n",
so->so_ux_bound_vp));
} else {
}
} else if (flags & _SOBIND_UNSPEC) {
/*
* The caller checked SS_ISBOUND but not necessarily
* under so_lock
*/
/* No error */
goto done;
}
/* Set an initial local address */
case AF_UNIX:
/*
* Use an address with same size as struct sockaddr
* just like BSD.
*/
so->so_laddr_len =
/*
* Pass down an address with the implicit bind
* magic number and the rest all zeros.
* The transport will return a unique address.
*/
break;
case AF_INET:
case AF_INET6:
/*
* An unspecified bind in TPI has a NULL address.
* Set the address in sockfs to have the sa_family.
*/
addrlen = 0;
break;
default:
/*
* An unspecified bind in TPI has a NULL address.
* Set the address in sockfs to be zero length.
*
* Can not assume there is a sa_family for all
* protocol families. For example, AF_X25 does not
* have a family field.
*/
addrlen = 0;
break;
}
} else {
/*
* If it is ok to rebind the socket, first unbind
* with the transport. A rebind to the NULL address
* is interpreted as an unbind.
* Note that a bind to NULL in BSD does unbind the
* socket but it fails with EINVAL.
* Note that regular sockets set SOV_SOCKBSD i.e.
* _SOBIND_SOCKBSD gets set here hence no type of
* socket does currently allow rebinding.
*
* If the name is NULL just do an unbind.
*/
unbind_on_err = 0;
goto done;
}
unbind_on_err = 0;
goto done;
}
if (error) {
goto done;
}
goto done;
}
}
if (xnet_check_print) {
"caused EINVAL\n");
}
goto done;
}
case AF_UNIX:
/*
* All AF_UNIX addresses are nul terminated
* when copied (copyin_name) in so the minimum
* length is 3 bytes.
*/
goto done;
}
/*
* Verify so_family matches the bound family.
* BSD does not check this for AF_UNIX resulting
* in funny mknods.
*/
goto done;
}
break;
case AF_INET:
goto done;
}
goto done;
}
if ((flags & _SOBIND_XPG4_2) &&
/*
* sockets however application failures have
* been observed when it is applied to
* all sockets.
*/
goto done;
}
/*
* Force a zero sa_family to match so_family.
*
* Some programs like inetd(1M) don't set the
* family field. Other programs leave
* sin_family set to garbage - SunOS 4.X does
* not check the family field on a bind.
* We use the family field that
* was passed in to the socket() call.
*/
break;
case AF_INET6: {
#ifdef DEBUG
#endif /* DEBUG */
goto done;
}
goto done;
}
/*
* With IPv6 we require the family to match
* unlike in IPv4.
*/
goto done;
}
#ifdef DEBUG
/*
* Verify that apps don't forget to clear
* sin6_scope_id etc
*/
if (sin6->sin6_scope_id != 0 &&
"bind with uninitialized sin6_scope_id "
"(%d) on socket. Pid = %d\n",
(int)sin6->sin6_scope_id,
}
if (sin6->__sin6_src_id != 0) {
"bind with uninitialized __sin6_src_id "
"(%d) on socket. Pid = %d\n",
(int)sin6->__sin6_src_id,
}
#endif /* DEBUG */
break;
}
default:
/*
* Don't do any length or sa_family check to allow
* non-sockaddr style addresses.
*/
goto done;
}
break;
}
goto done;
}
/*
* Save local address.
*/
case AF_INET6:
case AF_INET:
break;
case AF_UNIX: {
struct sockaddr_un *soun =
/*
* Create vnode for the specified path name.
* Keep vnode held with a reference in so_ux_bound_vp.
* Use the vnode pointer as the address used in the
* bind with the transport.
*
* Use the same mode as in BSD. In particular this does
* not observe the umask.
*/
/* MAXPATHLEN + soun_family + nul termination */
if (so->so_laddr_len >
goto done;
}
/* NOTE: holding so_lock */
if (error) {
error = EADDRINUSE;
goto done;
}
/*
* Establish pointer from the underlying filesystem
* vnode to the socket node.
* so_ux_bound_vp and v_stream->sd_vnode form the
* cross-linkage between the underlying filesystem
* node and the socket node.
*/
/*
* Use the vnode pointer value as a unique address
* (together with the magic number to avoid conflicts
* with implicit binds) in the transport provider.
*/
break;
}
} /* end switch (so->so_family) */
}
/*
* set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
* the transport can start passing up T_CONN_IND messages
* as soon as it receives the bind req and strsock_proto()
* insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
*/
if (flags & _SOBIND_LISTEN) {
}
/*
* or if an implicit NL7C socket via AF_NCA mark socket as NL7C.
*
* NL7C supports the TCP transport only so check AF_INET and AF_INET6
* family sockets only. If match mark as such.
*/
/*
* NL7C is not supported in non-global zones,
* we enforce this restriction here.
*/
/* An NL7C socket, mark it */
} else
}
/*
* for other transports we will send in a O_T_BIND_REQ.
*/
if (tcp_udp_xport &&
/* NOTE: holding so_lock while sleeping */
/* Done using so_laddr_sa - can drop the lock */
if (error) {
goto done;
}
if (error) {
goto done;
}
/*
* Even if some TPI message (e.g. T_DISCON_IND) was received in
* strsock_proto while the lock was dropped above, the bind
* is allowed to complete.
*/
/* Mark as bound. This will be undone if we detect errors below. */
if (flags & _SOBIND_NOXLATE) {
}
/* note that we've already set SS_ACCEPTCONN above */
/*
* Recompute addrlen - an unspecied bind sent down an
* address of length zero but we expect the appropriate length
* in return.
*/
/*
* The alignment restriction is really too strict but
* we want enough alignment to inspect the fields of
* a sockaddr_in.
*/
goto done;
}
if (!(flags & _SOBIND_UNSPEC)) {
/*
* Verify that the transport didn't return something we
* did not want e.g. an address other than what we asked for.
*
* using the new TPI (in which the transport would fail
* the request instead of assigning a different address).
*
* NOTE2: For protocols that we don't know (i.e. any
* other than AF_INET6, AF_INET and AF_UNIX), we
* cannot know if the transport should be expected to
* return the same address as that requested.
*
* down a T_BIND_REQ. We use O_T_BIND_REQ for others.
*
* For example, in the case of netatalk it may be
* inappropriate for the transport to return the
* requested address (as it may have allocated a local
* port number in behaviour similar to that of an
* AF_INET bind request with a port number of zero).
*
* Given the definition of O_T_BIND_REQ, where the
* transport may bind to an address other than the
* requested address, it's not possible to determine
* whether a returned address that differs from the
* requested address is a reason to fail (because the
* requested address was not available) or succeed
* (because the transport allocated an appropriate
*
* sockfs currently requires that the transport return
* the requested address in the T_BIND_ACK, unless
* there is code here to allow for any discrepancy.
* Such code exists for AF_INET and AF_INET6.
*
* Netatalk chooses to return the requested address
* rather than the (correct) allocated address. This
* means that netatalk violates the TPI specification
* (and would not function correctly if used from a
* TLI application), but it does mean that it works
* with sockfs.
*
* As noted above, using the newer XTI bind primitive
* (T_BIND_REQ) in preference to O_T_BIND_REQ would
* allow sockfs to be more sure about whether or not
* the bind request had succeeded (as transports are
* not permitted to bind to a different address than
* that requested - they must return failure).
* Unfortunately, support for T_BIND_REQ may not be
* present in all transport implementations (netatalk,
* for example, doesn't have it), making the
* transition difficult.
*/
/* Assumes that the requested address was in use */
error = EADDRINUSE;
goto done;
}
case AF_INET6:
case AF_INET: {
/*
* Take advantage of the alignment
* of sin_port and sin6_port which fall
* in the same place in their data structures.
* Just use sin_port for either address family.
*
* This may become a problem if (heaven forbid)
* there's a separate ipv6port_reserved... :-P
*
* Binding to port 0 has the semantics of letting
* the transport bind to any port.
*
* If the transport is TCP or UDP since we had sent
* a T_BIND_REQ we would not get a port other than
* what we asked for.
*/
if (tcp_udp_xport) {
/*
* Pick up the new port number if we bound to
* port 0.
*/
break;
}
error = EADDRINUSE;
goto done;
}
/*
* Pick up the new port number if we bound to port 0.
*/
/*
* Unfortunately, addresses aren't _quite_ the same.
*/
goto done;
}
} else {
goto done;
}
}
break;
}
case AF_UNIX:
error = EADDRINUSE;
("addrlen %d, addr 0x%x, vp %p\n",
so->so_ux_bound_vp));
goto done;
}
break;
default:
/*
* NOTE: This assumes that addresses can be
* byte-compared for equivalence.
*/
error = EADDRINUSE;
goto done;
}
/*
* Don't mark SS_LADDR_VALID, as we cannot be
* sure that the returned address is the real
* bound address when talking to an unknown
* transport.
*/
break;
}
} else {
/*
* Save for returned address for getsockname.
* Needed for unspecific bind unless transport supports
* the TI_GETMYNAME ioctl.
* Do this for AF_INET{,6} even though they do, as
* caching info here is much better performance than
* Any which can't for some reason _must_ _not_ set
* LADDR_VALID here for the caching version of getsockname
* to not break;
*/
case AF_UNIX:
/*
* Record the address bound with the transport
* for use by socketpair.
*/
break;
case AF_INET:
case AF_INET6:
break;
default:
/*
* Don't mark SS_LADDR_VALID, as we cannot be
* sure that the returned address is the real
* bound address when talking to an unknown
* transport.
*/
break;
}
}
/*
* Was an AF_NCA bind() so add it to the addr list for
* reporting purposes.
*/
}
}
done:
if (error) {
/* reset state & backlog to values held on entry */
if (clear_acceptconn_on_err == B_TRUE)
if (restore_backlog_on_err == B_TRUE)
int err;
/* LINTED - statement has no consequent: if */
if (err) {
} else {
}
}
}
if (!(flags & _SOBIND_LOCK_HELD)) {
} else {
/* If the caller held the lock don't release it here */
}
return (error);
}
/* bind the socket */
int
int flags)
{
if ((flags & _SOBIND_SOCKETPAIR) == 0)
flags &= ~_SOBIND_SOCKETPAIR;
}
/*
* Unbind a socket - used when bind() fails, when bind() specifies a NULL
* address, or when listen needs to unbind and bind.
* If the _SOUNBIND_REBIND flag is specified the addresses are retained
* so that a sobind can pick them up.
*/
static int
{
struct T_unbind_req unbind_req;
int error = 0;
goto done;
}
/*
* Flush the read and write side (except stream head read queue)
* and send down T_UNBIND_REQ.
*/
0, _ALLOC_SLEEP);
if (error) {
goto done;
}
if (error) {
goto done;
}
/*
* Even if some TPI message (e.g. T_DISCON_IND) was received in
* strsock_proto while the lock was dropped above, the unbind
* is allowed to complete.
*/
if (!(flags & _SOUNBIND_REBIND)) {
/*
* Clear out bound address.
*/
}
/* Clear out address */
so->so_laddr_len = 0;
}
done:
/* If the caller held the lock don't release it here */
return (error);
}
/*
* listen on the socket.
* For TPI conforming transports this has to first unbind with the transport
* and then bind again using the new backlog.
*/
int
{
int error = 0;
return (EOPNOTSUPP);
/*
* If the socket is ready to accept connections already, then
* return without doing anything. This avoids a problem where
* a second listen() call fails if a connection is pending and
* leaves the socket unbound. Only when we are not unbinding
* with the transport can we safely increase the backlog.
*/
/*CONSTCOND*/
return (0);
return (EINVAL);
if (backlog < 0)
backlog = 0;
/*
* Use the same qlimit as in BSD. BSD checks the qlimit
* before queuing the next connection implying that a
* listen(sock, 0) allows one connection to be queued.
* BSD also uses 1.5 times the requested backlog.
*
* XNS Issue 4 required a strict interpretation of the backlog.
* This has been waived subsequently for Issue 4 and the change
* incorporated in XNS Issue 5. So we aren't required to do
* anything special for XPG apps.
*/
else
/*
* If the listen doesn't change the backlog we do nothing.
* This avoids an EPROTO error from the transport.
*/
goto done;
/*
* Must have been explicitly bound in the UNIX domain.
*/
goto done;
}
} else if (backlog > 0) {
/*
* AF_INET{,6} hack to avoid losing the port.
* Assumes that all AF_INET{,6} transports can handle a
* O_T_BIND_REQ with a non-zero CONIND_number when the TPI
* has already bound thus it is possible to avoid the unbind.
*/
/*CONSTCOND*/
!solisten_tpi_tcp)) {
if (error)
goto done;
}
} else {
}
if (error)
goto done;
done:
return (error);
}
/*
* Disconnect either a specified seqno or all (-1).
* The former is used on listening sockets only.
*
* When seqno == -1 sodisconnect could call sotpi_unbind. However,
* the current use of sodisconnect(seqno == -1) is only for shutdown
* so there is no point (and potentially incorrect) to unbind.
*/
int
{
struct T_discon_req discon_req;
int error = 0;
if (!(flags & _SODISCONNECT_LOCK_HELD)) {
} else {
}
goto done;
}
/*
* Flush the write side (unless this is a listener)
* and then send down a T_DISCON_REQ.
* (Don't flush on listener since it could flush {O_}T_CONN_RES
* and other messages.)
*/
0, _ALLOC_SLEEP);
if (error) {
goto done;
}
if (error) {
goto done;
}
/*
* Even if some TPI message (e.g. T_DISCON_IND) was received in
* strsock_proto while the lock was dropped above, the disconnect
* is allowed to complete. However, it is not possible to
* assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
*/
done:
if (!(flags & _SODISCONNECT_LOCK_HELD)) {
} else {
/* If the caller held the lock don't release it here */
}
return (error);
}
int
{
struct T_conn_ind *conn_ind;
struct T_conn_res *conn_res;
int error = 0;
void *src;
void *opt;
/*
* Defer single-threading the accepting socket until
* the T_CONN_IND has been received and parsed and the
* new sonode has been opened.
*/
/* Check that we are not already connected */
goto conn_bad;
goto e_bad;
/*
* Save SEQ_number for error paths.
*/
goto disconnect_unlocked;
}
case AF_INET:
case AF_INET6:
} else {
/*
* The transport (in this case TCP) hasn't sent up
* a pointer to an instance for the accept fast-path.
* Disable fast-path completely because the call to
* sotpi_create() below would otherwise create an
* incomplete TCP instance, which would lead to
* problems when sockfs sends a normal T_CONN_RES
* message down the new stream.
*/
optlen = 0;
}
break;
case AF_UNIX:
default:
if (optlen != 0) {
goto disconnect_unlocked;
}
}
srclen = 0;
}
/* Extract src address from options */
if (optlen != 0)
}
break;
}
/*
* Create the new socket.
*/
/*
* Accept can not fail with ENOBUFS. sotpi_create
* sleeps waiting for memory until a signal is caught
* so return EINTR.
*/
goto e_disc_unl;
}
#ifdef DEBUG
/*
* SO_DEBUG is used to trigger the dprint* and eprint* macros thus
* it's inherited early to allow debugging of the accept code itself.
*/
#endif /* DEBUG */
/*
* Save the SRC address from the T_CONN_IND
* for getpeername to work on AF_UNIX and on transports that do not
* support TI_GETPEERNAME.
*
* NOTE: AF_UNIX NUL termination is ensured by the sender's
* copyin_name().
*/
goto disconnect_vp_unlocked;
}
(sizeof (struct T_conn_res) + sizeof (intptr_t))) {
}
sizeof (intptr_t), 0, _ALLOC_INTR);
/*
* Accept can not fail with ENOBUFS.
* A signal was caught so return EINTR.
*/
goto disconnect_vp_unlocked;
}
} else {
}
/*
* New socket must be bound at least in sockfs and, except for AF_INET,
* (or AF_INET6) it also has to be bound in the transport provider.
* After accepting the connection on nso so_laddr_sa will be set to
* contain the same address as the listener's local address
* so the address we bind to isn't important.
*/
/*CONSTCOND*/
/*
* Optimization for AF_INET{,6} transports
* that can handle a T_CONN_RES without being bound.
*/
} else {
/* Perform NULL bind with the transport provider. */
goto disconnect_vp_unlocked;
}
}
/*
* Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
* so that any data arriving on the new socket will cause the
* appropriate signals to be delivered for the new socket.
*
* No other thread (except strsock_proto and strsock_misc)
* can access the new socket thus we relax the locking.
*/
error = 0;
}
}
/*
* Make note of the socket level options. TCP and IP level options
* are already inherited. We could do all this after accept is
* successful but doing it here simplifies code and no harm done
* for error case.
*/
conn_res->ACCEPTOR_id = 0;
/* Send down the T_CONN_RES on acceptor STREAM */
if (error) {
goto disconnect_vp;
}
if (error) {
goto disconnect_vp;
}
sizeof (struct T_ok_ack));
} else {
sizeof (struct T_ok_ack));
}
/*
* An NL7C marked listen()er so the new socket
* inherits the listen()er's NL7C state.
*
* When calling NL7C to process the new socket
* pass the nonblocking i/o state of the listen
* socket as this is the context we are in.
*/
if (nl7c_process(nso,
/*
* NL7C has completed processing on the
* socket, close the socket and back to
* the top to await the next T_CONN_IND.
*/
CRED());
goto again;
}
/* Pass the new socket out */
}
/*
* Pass out new socket.
*/
return (0);
}
/*
* Copy local address from listener.
*/
/*
* This is the non-performance case for sockets (e.g. AF_UNIX sockets)
* which don't support the FireEngine accept fast-path. It is also
* used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
* again. Neither sockfs nor TCP attempt to find out if some other
* random module has been inserted in between (in which case we
* should follow TLI accept behaviour). We blindly assume the worst
* case and revert back to old behaviour i.e. TCP will not send us
* any option (eager) and the accept should happen on the listener
* queue. Any queued T_conn_ind have already got their options removed
* by so_sock2_stream() when "sockmod" was I_POP'd.
*/
/*
* Fill in the {O_}T_CONN_RES before getting SOLOCKED.
*/
#ifdef _ILP32
queue_t *q;
/*
* Find read queue in driver
*/
while (SAMESTR(q))
q = q->q_next;
q = RD(q);
#else
#endif /* _ILP32 */
} else {
}
conn_res->OPT_length = 0;
conn_res->OPT_offset = 0;
if (error) {
goto disconnect_vp;
}
if (error) {
goto disconnect_vp;
}
/*
* Pass out new socket.
*/
return (0);
goto disconnect_unlocked;
return (error);
return (error);
conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */
? EOPNOTSUPP : EINVAL;
return (error);
}
/*
* connect a socket.
*
* Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
* unconnect (by specifying a null address).
*/
int
int fflag,
int flags)
{
struct T_conn_req conn_req;
int error = 0;
void *src;
void *addr;
/*
* Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
* avoid sleeping for memory with SOLOCKED held.
* We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen
* + sizeof (struct T_opthdr).
* (the AF_UNIX so_ux_addr_xlate() does not make the address
* exceed so_faddr_maxlen).
*/
/*
* Connect can not fail with ENOBUFS. A signal was
* caught so return EINTR.
*/
return (error);
}
/*
* Make sure that there is a preallocated unbind_req
* message before any binding. This message allocated when
* the socket is created but it might be have been
* consumed.
*/
/* NOTE: holding so_lock while sleeping */
so->so_unbind_mp =
goto done;
}
}
/*
* Can't have done a listen before connecting.
*/
error = EOPNOTSUPP;
goto done;
}
/*
* Must be bound with the transport
*/
/*CONSTCOND*/
/*
* Optimization for AF_INET{,6} transports
* that can handle a T_CONN_REQ without being bound.
*/
} else {
if (error)
goto done;
}
}
/*
* Handle a connect to a name parameter of type AF_UNSPEC like a
* connect to a null address. This is the portable method to
* unconnect a socket.
*/
if ((namelen >= sizeof (sa_family_t)) &&
namelen = 0;
}
/*
* Check that we are not already connected.
* A connection-oriented socket cannot be reconnected.
* A connected connection-less socket can be
* - connected to a different address by a subsequent connect
* - "unconnected" by a connect to the NULL address
*/
/* Connection-oriented socket */
goto done;
}
/* Connection-less socket */
/*
* Remove the connected state and clear SO_DGRAM_ERRIND
* since it was set when the socket was connected.
* If this is UDP also send down a T_DISCON_REQ.
*/
int val;
/*CONSTCOND*/
/* XXX What about implicitly unbinding here? */
} else {
so->so_faddr_len = 0;
}
val = 0;
goto done;
}
}
goto done;
}
/*
* Mark the socket if so_faddr_sa represents the transport level
* address.
*/
if (flags & _SOCONNECT_NOXLATE) {
struct sockaddr_ux *soaddr_ux;
if (namelen != sizeof (struct sockaddr_ux)) {
goto done;
}
}
/*
* Length and family checks.
*/
if (error)
goto bad;
/*
* Save foreign address. Needed for AF_UNIX as well as
* transport providers that do not support TI_GETPEERNAME.
* Also used for cached foreign address for TCP and UDP.
*/
goto done;
}
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
*/
srclen = 0;
} else {
/*
* Pass the sockaddr_un source address as an option
* and translate the remote address.
* Holding so_lock thus so_laddr_sa can not change.
*/
("sotpi_connect UNIX: srclen %d, src %p\n",
(flags & _SOCONNECT_XPG4_2),
if (error)
goto bad;
}
} else {
srclen = 0;
}
/*
* When connecting a datagram socket we issue the SO_DGRAM_ERRIND
* option which asks the transport provider to send T_UDERR_IND
* messages. These T_UDERR_IND messages are used to return connected
* style errors (e.g. ECONNRESET) for connected datagram sockets.
*
* In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets)
* we send down a T_CONN_REQ. This is needed to let the
* transport assign a local address that is consistent with
* the remote address. Applications depend on a getsockname()
* after a connect() to retrieve the "source" IP address for
* the connected socket. Invalidate the cached local address
* to force getsockname() to enquire of the transport.
*/
/*
* Datagram socket.
*/
val = 1;
goto done;
}
/*
* Send down T_CONN_REQ etc.
* Clear fflag to avoid returning EWOULDBLOCK.
*/
fflag = 0;
} else if (so->so_laddr_len != 0) {
/*
* If the local address or port was "any" then it may be
* changed by the transport as a result of the
* connect. Invalidate the cached version if we have one.
*/
case AF_INET:
INADDR_ANY ||
break;
case AF_INET6:
break;
default:
break;
}
}
/*
* Check for failure of an earlier call
*/
goto so_bad;
/*
* Send down T_CONN_REQ. Message was allocated above.
*/
if (srclen == 0) {
conn_req.OPT_length = 0;
conn_req.OPT_offset = 0;
} else {
/*
* There is a AF_UNIX sockaddr_un to include as a source
* address option.
*/
}
/*
* Set SS_ISCONNECTING before sending down the T_CONN_REQ
* in order to have the right state when the T_CONN_CON shows up.
*/
#ifdef C2_AUDIT
if (audit_active)
#endif /* C2_AUDIT */
if (error != 0)
goto bad;
goto bad;
/* Allow other threads to access the socket */
/*
* Wait until we get a T_CONN_CON or an error
*/
}
done:
switch (error) {
case EINPROGRESS:
case EALREADY:
case EISCONN:
case EINTR:
/* Non-fatal errors */
/* FALLTHRU */
case 0:
break;
case EHOSTUNREACH:
if (flags & _SOCONNECT_XPG4_2) {
/*
* ENETUNREACH be returned but does not require
* EHOSTUNREACH. In order to keep the test suite
* happy we mess with the errno here.
*/
error = ENETUNREACH;
}
/* FALLTHRU */
default:
/*
* Fatal errors: clear SS_ISCONNECTING in case it was set,
* and invalidate local-address cache
*/
/* A discon_ind might have already unbound us */
if ((flags & _SOCONNECT_DID_BIND) &&
int err;
/* LINTED - statement has no conseq */
if (err) {
}
}
break;
}
if (need_unlock)
return (error);
goto done;
}
int
{
struct T_ordrel_req ordrel_req;
int error = 0;
/*
* SunOS 4.X has no check for datagram sockets.
* 5.X checks that it is connected (ENOTCONN)
*/
if (!xnet_skip_checks) {
if (xnet_check_print) {
"caused ENOTCONN\n");
}
}
goto done;
}
/*
* Record the current state and then perform any state changes.
* Then use the difference between the old and new states to
* determine which messages need to be sent.
* This prevents e.g. duplicate T_ORDREL_REQ when there are
* duplicate calls to shutdown().
*/
switch (how) {
case 0:
break;
case 1:
break;
case 2:
break;
default:
goto done;
}
/*
* Assumes that the SS_CANT* flags are never cleared in the above code.
*/
switch (state_change) {
case 0:
("sotpi_shutdown: nothing to send in state 0x%x\n",
goto done;
case SS_CANTRCVMORE:
/*
* strseteof takes care of read side wakeups,
* pollwakeups, and signals.
*/
/*
* Get the read lock before flushing data to avoid problems
* with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
*/
/* Flush read side queue */
break;
case SS_CANTSENDMORE:
break;
/*
* strseteof takes care of read side wakeups,
* pollwakeups, and signals.
*/
/*
* Get the read lock before flushing data to avoid problems
* with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
*/
/* Flush read side queue */
break;
}
/*
* If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them
* was set due to this call and the new state has both of them set:
* Send the AF_UNIX close indication
* For T_COTS send a discon_ind
*
* If cantsend was set due to this call:
* For T_COTSORD send an ordrel_ind
*
* Note that for T_CLTS there is no message sent here.
*/
/*
* For SunOS 4.X compatibility we tell the other end
* that we are unable to receive at this point.
*/
}
if ((state_change & SS_CANTSENDMORE) &&
/* Send an orderly release */
0, _ALLOC_SLEEP);
/*
* Send down the T_ORDREL_REQ even if there is flow control.
* This prevents shutdown from blocking.
* Note that there is no T_OK_ACK for ordrel_req.
*/
if (error) {
goto done;
}
}
done:
return (error);
}
/*
* For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send
* a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer
* that we have closed.
* Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length
* T_UNITDATA_REQ containing the same option.
*
* For SOCK_DGRAM half-connections (somebody connected to this end
* but this end is not connect) we don't know where to send any
* SO_UNIX_CLOSE.
*
* We have to ignore stream head errors just in case there has been
* a shutdown(output).
* Ignore any flow control to try to get the message more quickly to the peer.
* While locally ignoring flow control solves the problem when there
* is only the loopback transport on the stream it would not provide
* the correct AF_UNIX socket semantics when one or more modules have
* been pushed.
*/
void
{
int error;
return;
/* zero length + header */
struct T_optdata_req tdr;
/* NOTE: holding so_lock while sleeping */
} else {
struct T_unitdata_req tudr;
void *addr;
void *src;
/* Connecteded DGRAM socket */
/*
* For AF_UNIX the destination address is translated to
* an internal name and the source address is passed as
* an option.
*/
/*
* Length and family checks.
*/
if (error) {
return;
}
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
*/
srclen = 0;
} else {
/*
* Pass the sockaddr_un source address as an option
* and translate the remote address.
* Holding so_lock thus so_laddr_sa can not change.
*/
("so_ux_close: srclen %d, src %p\n",
if (error) {
return;
}
}
if (srclen == 0) {
/* NOTE: holding so_lock while sleeping */
} else {
/*
* There is a AF_UNIX sockaddr_un to include as a
* source address option.
*/
sizeof (struct T_opthdr));
/* NOTE: holding so_lock while sleeping */
}
}
}
/*
* Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK.
*/
int
{
int error;
/*
* There is never any oob data with addresses or control since
* the T_EXDATA_IND does not carry any options.
*/
msg->msg_controllen = 0;
msg->msg_namelen = 0;
return (EINVAL);
}
return (EWOULDBLOCK);
}
/*
* Since recv* can not return ENOBUFS we can not use dupmsg.
* Instead we revert to the consolidation private
* allocb_wait plus bcopy.
*/
}
} else {
/*
* Update the state indicating that the data has been consumed.
* Keep SS_OOBPEND set until data is consumed past the mark.
*/
}
("after recvoob(%p): counts %d/%d state %s\n",
error = 0;
if (n > 0)
if (error)
break;
}
return (error);
}
/*
* Called by sotpi_recvmsg when reading a non-zero amount of data.
* In addition, the caller typically verifies that there is some
* potential state to clear by checking
* if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK))
* before calling this routine.
* Note that such a check can be made without holding so_lock since
* sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
* decrements so_oobsigcnt.
*
* When data is read *after* the point that all pending
* oob data has been consumed the oob indication is cleared.
*
* SIOCATMARK returning true until we have read past
* the mark.
*/
static void
{
("sorecv_update_oobstate: counts %d/%d state %s\n",
if (so->so_oobsigcnt == 0) {
/* No more pending oob indications */
}
}
/*
* Handle recv* calls for an so which has NL7C saved recv mblk_t(s).
*/
static int
{
int error = 0;
ssize_t n;
/*
* We have some data, uiomove up to resid bytes.
*/
if (n > 0)
if (error)
break;
}
} else {
/*
* We only handle data, save for caller to handle.
*/
}
} else {
}
}
}
/* Free any mblk_t(s) which we have consumed */
}
/* Last mblk_t so return the saved rval from kstrgetmsg() */
so->so_nl7c_rcv_rval = 0;
} else {
/* More mblk_t(s) to process so no rval to return */
}
return (error);
}
/*
* Receive the next message on the queue.
* If msg_controllen is non-zero when called the caller is interested in
* any received control info (options).
* If msg_namelen is non-zero when called the caller is interested in
* any received source address.
* The routine returns with msg_control and msg_name pointing to
* kmem_alloc'ed memory which the caller has to free.
*/
int
{
union T_primitives *tpr;
void *control;
int error;
int flags;
int first;
/*
* If we are not connected because we have never been connected
* we return ENOTCONN. If we have been connected (but are no longer
* connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return
* the EOF.
*
* An alternative would be to post an ENOTCONN error in stream head
* (read+write) and clear it when we're connected. However, that error
*/
return (ENOTCONN);
}
/*
* Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but
* after checking that the read queue is empty) and returns zero.
* This implementation will sleep (in kstrgetmsg) even if uio_resid
* is zero.
*/
/* Check that the transport supports OOB */
return (EOPNOTSUPP);
}
/*
* Set msg_controllen and msg_namelen to zero here to make it
* simpler in the cases that no control or name is returned.
*/
msg->msg_controllen = 0;
msg->msg_namelen = 0;
namelen, controllen));
/*
* If an NL7C enabled socket and not waiting for write data.
*/
NL7C_ENABLED) {
if (so->so_nl7c_uri) {
/*
* Close uri processing for a previous request.
*/
nl7c_close(so);
}
if (nl7c_process(so,
/*
* NL7C has completed processing on the socket,
* clear the enabled bit as no further NL7C
* processing will be needed.
*/
so->so_nl7c_flags = 0;
}
}
/*
* Only one reader is allowed at any given time. This is needed
* for T_EXDATA handling and, in the future, MSG_WAITALL.
*
* This is slightly different that BSD behavior in that it fails with
* EWOULDBLOCK when using nonblocking io. In BSD the read queue access
* is single-threaded using sblock(), which is dropped while waiting
* for data to appear. The difference shows up e.g. if one
* file descriptor does not have O_NONBLOCK but a dup'ed file descriptor
* does use nonblocking io and different threads are reading each
* file descriptor. In BSD there would never be an EWOULDBLOCK error
* in this case as long as the read queue doesn't get empty.
* In this implementation the thread using nonblocking io can
* get an EWOULDBLOCK error due to the blocking thread executing
* e.g. in the uiomove in kstrgetmsg.
* This difference is not believed to be significant.
*/
if (error)
return (error);
/*
* Tell kstrgetmsg to not inspect the stream head errors until all
* queued data has been consumed.
* Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set.
* Also, If uio_fmode indicates nonblocking kstrgetmsg will not block.
*
* MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and
* to T_OPTDATA_IND that do not contain any user-visible control msg.
* Note that MSG_WAITALL set with MSG_PEEK is a noop.
*/
flags &= ~MSG_WAITALL;
}
pflag |= MSG_DISCARDTAIL;
if (flags & MSG_DONTWAIT)
timout = 0;
else
timout = -1;
first = 1;
/*
* If so saved NL7C rcv mblk_t(s) uiomove them first
* else get'm from the streamhead.
*/
pri = 0;
} else {
}
if (error) {
switch (error) {
case EINTR:
case EWOULDBLOCK:
if (!first)
error = 0;
break;
case ETIME:
/* Returned from kstrgetmsg when timeout expires */
if (!first)
error = 0;
else
error = EWOULDBLOCK;
break;
default:
break;
}
return (error);
}
/*
* For datagrams the MOREDATA flag is used to set MSG_TRUNC.
* For non-datagrams MOREDATA is used to set MSG_EOR.
*/
/*
* 4.3BSD and 4.4BSD clears the mark when peeking across it.
* The draft Posix socket spec states that the mark should
* not be cleared when peeking. We follow the latter.
*/
}
/* Set MSG_EOR based on MOREDATA */
}
}
/*
* If some data was received (i.e. not EOF) and the
*/
first = 0;
goto retry;
}
return (0);
}
/* strsock_proto has already verified length and alignment */
case T_DATA_IND: {
}
/*
* Set msg_flags to MSG_EOR based on
* MORE_flag and MOREDATA.
*/
else
}
/*
* If some data was received (i.e. not EOF) and the
*/
first = 0;
goto retry;
}
return (0);
}
case T_UNITDATA_IND: {
void *addr;
void *abuf;
void *opt;
}
if (namelen != 0) {
/* Caller wants source address */
addrlen, 1);
goto err;
}
/*
* Can not use the transport level address.
* If there is a SO_SRCADDR option carrying
* the socket level address it will be
* extracted below.
*/
addrlen = 0;
}
}
if (optlen != 0) {
/*
* Extract any source address option.
* Determine how large cmsg buffer is needed.
*/
goto err;
}
!(flags & MSG_XPG4_2));
if (controllen != 0)
else if (ncontrollen != 0)
} else {
controllen = 0;
}
if (namelen != 0) {
/*
* Return address to caller.
* Caller handles truncation if length
* exceeds msg_namelen.
* NOTE: AF_UNIX NUL termination is ensured by
* the sender's copyin_name().
*/
}
if (controllen != 0) {
/*
* Return control msg to caller.
* Caller handles truncation if length
* exceeds msg_controllen.
*/
!(flags & MSG_XPG4_2),
if (error) {
if (msg->msg_namelen != 0)
msg->msg_namelen);
goto err;
}
}
return (0);
}
case T_OPTDATA_IND: {
struct T_optdata_req *tdr;
void *opt;
}
if (optlen != 0) {
/*
* Determine how large cmsg buffer is needed.
*/
goto err;
}
!(flags & MSG_XPG4_2));
if (controllen != 0)
else if (ncontrollen != 0)
} else {
controllen = 0;
}
if (controllen != 0) {
/*
* Return control msg to caller.
* Caller handles truncation if length
* exceeds msg_controllen.
*/
!(flags & MSG_XPG4_2),
if (error) {
goto err;
}
}
/*
* Set msg_flags to MSG_EOR based on
* DATA_flag and MOREDATA.
*/
else
}
/*
* If some data was received (i.e. not EOF) and the
* Not possible to wait if control info was received.
*/
controllen == 0 &&
first = 0;
goto retry;
}
return (0);
}
case T_EXDATA_IND: {
("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
"state %s\n",
/*
* kstrgetmsg handles MSGMARK so there is nothing to
* inspect in the T_EXDATA_IND.
* strsock_proto makes the stream head queue the T_EXDATA_IND
* as a separate message with no M_DATA component. Furthermore,
* the stream head does not consolidate M_DATA messages onto
* an MSGMARK'ed message ensuring that the T_EXDATA_IND
* remains a message by itself. This is needed since MSGMARK
* marks both the whole message as well as the last byte
* of the message.
*/
/*
* Even though we are peeking we consume the
* T_EXDATA_IND thereby moving the mark information
* to SS_RCVATMARK. Then the oob code below will
* retry the peeking kstrgetmsg.
* Note that the stream head read queue is
* never flushed without holding SOREADLOCKED
* thus the T_EXDATA_IND can not disappear
* underneath us.
*/
("sotpi_recvmsg: consume EXDATA_IND "
"counts %d/%d state %s\n",
pflag |= MSG_DISCARDTAIL;
pri = 0;
if (error) {
#ifdef SOCK_DEBUG
}
#endif /* SOCK_DEBUG */
return (error);
}
} /* end "if (flags & MSG_PEEK)" */
/*
* Decrement the number of queued and pending oob.
*
* SS_RCVATMARK is cleared when we read past a mark.
* SS_HAVEOOBDATA is cleared when we've read past the
* last mark.
* SS_OOBPEND is cleared if we've read past the last
* mark and no (new) SIGURG has been posted.
*/
so->so_oobsigcnt--;
/*
* Since the T_EXDATA_IND has been removed from the stream
* head, but we have not read data past the mark,
* sockfs needs to track that the socket is still at the mark.
*
* Since no data was received call kstrgetmsg again to wait
* for data.
*/
("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
goto retry;
}
default:
ASSERT(0);
goto err;
}
/* NOTREACHED */
err:
return (error);
}
/*
* Sending data with options on a datagram socket.
* Assumes caller has verified that SS_ISBOUND etc. are set.
*/
static int
void *control,
int flags)
{
struct T_unitdata_req tudr;
int error;
void *addr;
void *src;
int size;
void *fds;
int fdlen;
return (EMSGSIZE);
}
/*
* For AF_UNIX the destination address is translated to an internal
* name and the source address is passed as an option.
* Also, file descriptors are passed as file pointers in an
* option.
*/
/*
* Length and family checks.
*/
if (error) {
return (error);
}
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
*/
srclen = 0;
} else {
/*
* Pass the sockaddr_un source address as an option
* and translate the remote address.
*
* Note that this code does not prevent so_laddr_sa
* from changing while it is being used. Thus
* if an unbind+bind occurs concurrently with this
* send the peer might see a partially new and a
* partially old "from" address.
*/
("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
(flags & MSG_XPG4_2),
if (error) {
return (error);
}
}
} else {
srclen = 0;
}
!(flags & MSG_XPG4_2));
if (srclen != 0)
else
/*
* File descriptors only when SM_FDPASSING set.
*/
if (error)
return (error);
if (fdlen != -1) {
return (EOPNOTSUPP);
if (error)
return (error);
} else {
/*
* Caught a signal waiting for memory.
* Let send* return EINTR.
*/
return (EINTR);
}
}
if (fdlen != -1) {
(t_uscalar_t)sizeof (struct T_opthdr);
}
if (srclen != 0) {
/*
* There is a AF_UNIX sockaddr_un to include as a source
* address option.
*/
}
/* At most 3 bytes left in the message */
#ifdef C2_AUDIT
if (audit_active)
#endif /* C2_AUDIT */
#ifdef SOCK_DEBUG
if (error) {
}
#endif /* SOCK_DEBUG */
return (error);
}
/*
* Sending data with options on a connected stream socket.
* Assumes caller has verified that SS_ISCONNECTED is set.
*/
static int
int more,
void *control,
int flags)
{
struct T_optdata_req tdr;
int error;
int first = 1;
int size;
void *fds;
int fdlen;
/*
* Has to be bound and connected. However, since no locks are
* held the state could have changed after sotpi_sendmsg checked it
* thus it is not possible to ASSERT on the state.
*/
/* Options on connection-oriented only when SM_OPTDATA set. */
return (EOPNOTSUPP);
do {
/*
* Set the MORE flag if uio_resid does not fit in this
* message or if the caller passed in "more".
* Error for transports with zero tidu_size.
*/
if (iosize <= 0)
return (EMSGSIZE);
} else {
if (more)
else
}
/*
* File descriptors only when SM_FDPASSING set.
*/
if (error)
return (error);
if (fdlen != -1) {
return (EOPNOTSUPP);
if (error)
return (error);
} else {
/*
* Caught a signal waiting for memory.
* Let send* return EINTR.
*/
}
}
if (fdlen != -1) {
(t_uscalar_t)sizeof (struct T_opthdr);
}
/* At most 3 bytes left in the message */
0, MSG_BAND, 0);
if (error) {
return (0);
return (error);
}
first = 0;
/*
* Recheck for fatal errors. Fail write even though
* some data have been written. This is consistent
* with strwrite semantics and BSD sockets semantics.
*/
return (EPIPE);
}
if (error != 0) {
return (error);
}
}
}
return (0);
}
/*
* Sending data on a datagram socket.
* Assumes caller has verified that SS_ISBOUND etc. are set.
*
* For AF_UNIX the destination address is translated to an internal
* name and the source address is passed as an option.
*/
int
int flags)
{
struct T_unitdata_req tudr;
int error;
void *addr;
void *src;
goto done;
}
/*
* Length and family checks.
*/
if (error) {
goto done;
}
/*
* Already have a transport internal address. Do not
* pass any (transport internal) source address.
*/
srclen = 0;
} else {
/*
* Pass the sockaddr_un source address as an option
* and translate the remote address.
*
* Note that this code does not prevent so_laddr_sa
* from changing while it is being used. Thus
* if an unbind+bind occurs concurrently with this
* send the peer might see a partially new and a
* partially old "from" address.
*/
("sosend_dgram UNIX: srclen %d, src %p\n",
(flags & MSG_XPG4_2),
if (error) {
goto done;
}
}
} else {
srclen = 0;
}
if (srclen == 0) {
tudr.OPT_length = 0;
tudr.OPT_offset = 0;
/*
* Caught a signal waiting for memory.
* Let send* return EINTR.
*/
goto done;
}
} else {
/*
* There is a AF_UNIX sockaddr_un to include as a source
* address option.
*/
/*
* Caught a signal waiting for memory.
* Let send* return EINTR.
*/
goto done;
}
}
#ifdef C2_AUDIT
if (audit_active)
#endif /* C2_AUDIT */
done:
#ifdef SOCK_DEBUG
if (error) {
}
#endif /* SOCK_DEBUG */
return (error);
}
/*
* Sending data on a connected stream socket.
* Assumes caller has verified that SS_ISCONNECTED is set.
*/
int
int more,
int sflag)
{
struct T_data_req tdr;
int error;
int first = 1;
("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
/*
* Has to be bound and connected. However, since no locks are
* held the state could have changed after sotpi_sendmsg checked it
* thus it is not possible to ASSERT on the state.
*/
do {
/*
* Set the MORE flag if uio_resid does not fit in this
* message or if the caller passed in "more".
* Error for transports with zero tidu_size.
*/
if (iosize <= 0)
return (EMSGSIZE);
} else {
if (more)
else
}
/*
* Caught a signal waiting for memory.
* Let send* return EINTR.
*/
if (first)
return (EINTR);
else
return (0);
}
if (error) {
return (0);
return (error);
}
first = 0;
/*
* Recheck for fatal errors. Fail write even though
* some data have been written. This is consistent
* with strwrite semantics and BSD sockets semantics.
*/
return (EPIPE);
}
if (error != 0) {
return (error);
}
}
}
return (0);
}
/*
* Check the state for errors and call the appropriate send function.
*
* If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set)
* this function issues a setsockopt to toggle SO_DONTROUTE before and
* after sending the message.
*/
static int
{
int so_state;
int so_mode;
int error;
int dontroute;
int flags;
if (so_state & SS_CANTSENDMORE) {
return (EPIPE);
}
if (error != 0) {
return (error);
}
}
if (!(so_state & SS_ISCONNECTED)) {
if (so_mode & SM_CONNREQUIRED)
return (ENOTCONN);
else
return (EDESTADDRREQ);
}
if (so_mode & SM_CONNREQUIRED) {
namelen = 0;
} else {
/*
* Note that this code does not prevent so_faddr_sa
* from changing while it is being used. Thus
* if an "unconnect"+connect occurs concurrently with
* this send the datagram might be delivered to a
* garbaled address.
*/
}
} else {
if (!(so_state & SS_ISCONNECTED) &&
(so_mode & SM_CONNREQUIRED)) {
/* Required but not connected */
return (ENOTCONN);
}
/*
* Ignore the address on connection-oriented sockets.
* Just like BSD this code does not generate an error for
* TCP (a CONNREQUIRED socket) when sending to an address
* delivered on the connection as if no address had been
* supplied.
*/
if ((so_state & SS_ISCONNECTED) &&
!(so_mode & SM_CONNREQUIRED)) {
return (EISCONN);
}
if (!(so_state & SS_ISBOUND)) {
if (error) {
return (error);
}
}
/*
* Handle delayed datagram errors. These are only queued
* when the application sets SO_DGRAM_ERRIND.
* Return the error if we are sending to the address
* that was returned in the last T_UDERROR_IND.
* If sending to some other address discard the delayed
* error indication.
*/
if (so->so_delayed_error) {
struct T_uderror_ind *tudi;
void *addr;
so->so_delayed_error = 0;
addrlen, 1);
case AF_INET: {
/* Compare just IP address and port */
break;
}
case AF_INET6: {
/* Compare just IP address and port. Not flow */
break;
}
case AF_UNIX:
default:
}
if (match) {
#ifdef DEBUG
("sockfs delayed error %d for %s\n",
#endif /* DEBUG */
return (error);
}
}
}
dontroute = 0;
val = 1;
if (error)
return (error);
dontroute = 1;
}
error = EOPNOTSUPP;
goto done;
}
if (msg->msg_controllen != 0) {
if (!(so_mode & SM_CONNREQUIRED)) {
flags);
} else {
/* Can't generate T_EXDATA_REQ with options */
error = EOPNOTSUPP;
goto done;
}
flags);
}
goto done;
}
if (!(so_mode & SM_CONNREQUIRED)) {
/*
* If there is no SO_DONTROUTE to turn off return immediately
* from sosend_dgram. This can allow tail-call optimizations.
*/
if (!dontroute) {
}
} else {
int sflag;
/* Ignore msg_name in the connected state */
prim = T_EXDATA_REQ;
/*
* Send down T_EXDATA_REQ even if there is flow
* control for data.
*/
sflag = MSG_IGNFLOW;
} else {
if (so_mode & SM_BYTESTREAM) {
/* Byte stream transport - use write */
/*
* If there is no SO_DONTROUTE to turn off
* return immediately from strwrite. This can
* allow tail-call optimizations.
*/
if (!dontroute)
CRED()));
goto done;
}
prim = T_DATA_REQ;
sflag = 0;
}
/*
* If there is no SO_DONTROUTE to turn off return immediately
* from sosend_svc. This can allow tail-call optimizations.
*/
if (!dontroute)
}
done:
if (dontroute) {
val = 0;
}
return (error);
}
/*
* Update so_faddr by asking the transport (unless AF_UNIX).
*/
int
{
void *addr;
goto done;
}
if (xnet_check_print) {
}
goto done;
}
#ifdef DEBUG
#endif /* DEBUG */
/* Transport has different name space - return local info */
error = 0;
goto done;
}
/* Allocate local buffer to use with ioctl */
/*
* Issue TI_GETPEERNAME with signals masked.
* Put the result in so_faddr_sa so that getpeername works after
* a shutdown(output).
* If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
* back to the socket.
*/
res = 0;
/*
* If there is an error record the error in so_error put don't fail
* the getpeername. Instead fallback on the recorded
* so->so_faddr_sa.
*/
if (error) {
/*
* Various stream head errors can be returned to the ioctl.
* However, it is impossible to determine which ones of
* these are really socket level errors that were incorrectly
* consumed by the ioctl. Thus this code silently ignores the
* error - to code explicitly does not reinstate the error
* using soseterror().
* Experiments have shows that at least this set of
* errors are reported and should not be reinstated on the
* socket:
* EINVAL E.g. if an I_LINK was in effect when
* getpeername was called.
* EPIPE The ioctl error semantics prefer the write
* side error over the read side error.
* ENOTCONN The transport just got disconnected but
* sockfs had not yet seen the T_DISCON_IND
* when issuing the ioctl.
*/
error = 0;
}
#ifdef DEBUG
#endif /* DEBUG */
done:
return (error);
}
/*
* Update so_laddr by asking the transport (unless AF_UNIX).
*/
int
{
void *addr;
/* Return an all zero address except for the family */
/*
* Can not assume there is a sa_family for all
* protocol families.
*/
}
#ifdef DEBUG
#endif /* DEBUG */
/* Transport has different name space - return local info */
error = 0;
goto done;
}
/* Allocate local buffer to use with ioctl */
/*
* Issue TI_GETMYNAME with signals masked.
* Put the result in so_laddr_sa so that getsockname works after
* a shutdown(output).
* If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
* back to the socket.
*/
res = 0;
/*
* If there is an error record the error in so_error put don't fail
* the getsockname. Instead fallback on the recorded
* so->so_laddr_sa.
*/
if (error) {
/*
* Various stream head errors can be returned to the ioctl.
* However, it is impossible to determine which ones of
* these are really socket level errors that were incorrectly
* consumed by the ioctl. Thus this code silently ignores the
* error - to code explicitly does not reinstate the error
* using soseterror().
* Experiments have shows that at least this set of
* errors are reported and should not be reinstated on the
* socket:
* EINVAL E.g. if an I_LINK was in effect when
* getsockname was called.
* EPIPE The ioctl error semantics prefer the write
* side error over the read side error.
*/
error = 0;
}
#ifdef DEBUG
#endif /* DEBUG */
done:
return (error);
}
/*
* Get socket options. For SOL_SOCKET options some options are handled
* by the sockfs while others use the value recorded in the sonode as a
* fallback should the T_SVR4_OPTMGMT_REQ fail.
*
* On the return most *optlenp bytes are copied to optval.
*/
int
{
struct T_optmgmt_req optmgmt_req;
struct T_optmgmt_ack *optmgmt_ack;
int error = 0;
/*
* Check for SOL_SOCKET options.
* Certain SOL_SOCKET options are returned directly whereas
* others only provide a default (fallback) value should
* the T_SVR4_OPTMGMT_REQ fail.
*/
if (level == SOL_SOCKET) {
/* Check parameters */
switch (option_name) {
case SO_TYPE:
case SO_ERROR:
case SO_DEBUG:
case SO_ACCEPTCONN:
case SO_REUSEADDR:
case SO_KEEPALIVE:
case SO_DONTROUTE:
case SO_BROADCAST:
case SO_USELOOPBACK:
case SO_OOBINLINE:
case SO_SNDBUF:
case SO_RCVBUF:
#ifdef notyet
case SO_SNDLOWAT:
case SO_RCVLOWAT:
case SO_SNDTIMEO:
case SO_RCVTIMEO:
#endif /* notyet */
case SO_DGRAM_ERRIND:
goto done2;
}
break;
case SO_LINGER:
goto done2;
}
break;
}
switch (option_name) {
case SO_TYPE:
goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
case SO_ERROR:
goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
case SO_ACCEPTCONN:
else
value = 0;
#ifdef DEBUG
if (value) {
("sotpi_getsockopt: 0x%x is set\n",
option_name));
} else {
("sotpi_getsockopt: 0x%x not set\n",
option_name));
}
#endif /* DEBUG */
goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
case SO_DEBUG:
case SO_REUSEADDR:
case SO_KEEPALIVE:
case SO_DONTROUTE:
case SO_BROADCAST:
case SO_USELOOPBACK:
case SO_OOBINLINE:
case SO_DGRAM_ERRIND:
#ifdef DEBUG
if (value) {
("sotpi_getsockopt: 0x%x is set\n",
option_name));
} else {
("sotpi_getsockopt: 0x%x not set\n",
option_name));
}
#endif /* DEBUG */
goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */
/*
* The following options are only returned by sockfs when the
* T_SVR4_OPTMGMT_REQ fails.
*/
case SO_LINGER:
break;
case SO_SNDBUF: {
/*
* If the option has not been set then get a default
* value from the read queue. This value is
* returned if the transport fails
* the T_SVR4_OPTMGMT_REQ.
*/
if (lvalue == 0) {
("got SO_SNDBUF %ld from q\n", lvalue));
}
break;
}
case SO_RCVBUF: {
/*
* If the option has not been set then get a default
* value from the read queue. This value is
* returned if the transport fails
* the T_SVR4_OPTMGMT_REQ.
*
* XXX If SO_RCVBUF has been set and this is an
* XPG 4.2 application then do not ask the transport
* since the transport might adjust the value and not
* return exactly what was set by the application.
* For non-XPG 4.2 application we return the value
* that the transport is actually using.
*/
if (lvalue == 0) {
("got SO_RCVBUF %ld from q\n", lvalue));
} else if (flags & _SOGETSOCKOPT_XPG4_2) {
goto copyout; /* skip asking transport */
}
break;
}
#ifdef notyet
/*
* We do not implement the semantics of these options
* thus we shouldn't implement the options either.
*/
case SO_SNDLOWAT:
break;
case SO_RCVLOWAT:
break;
case SO_SNDTIMEO:
break;
case SO_RCVTIMEO:
break;
#endif /* notyet */
}
}
goto done2;
}
/* Send request */
/* Let option management work in the presence of data flow control */
if (error) {
goto done2;
}
if (error) {
/* We have a fallback value */
error = 0;
goto copyout;
}
goto done2;
}
/* We have a fallback value */
error = 0;
goto copyout;
}
goto done;
}
/* check to ensure that the option is within bounds */
/* We have a fallback value */
error = 0;
goto copyout;
}
goto done;
}
copyout: {
}
done:
return (error);
}
/*
* Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ.
* SOL_SOCKET options are also recorded in the sonode. A setsockopt for
* SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
* setsockopt has to work even if the transport does not support the option.
*/
int
{
struct T_optmgmt_req optmgmt_req;
int error = 0;
if (xnet_check_print)
return (EINVAL);
}
/* Caller allocates aligned optval, or passes null */
/* If optval is null optlen is 0, and vice-versa */
/* Ignore any flow control problems with the transport. */
goto done;
}
/*
* For SOCKET or TCP level options, try to set it here itself
* provided socket has not been popped and we know the tcp
* structure (stored in so_priv).
*/
switch (level) {
case SOL_SOCKET:
switch (option_name) { /* Check length param */
case SO_DEBUG:
case SO_REUSEADDR:
case SO_DONTROUTE:
case SO_BROADCAST:
case SO_USELOOPBACK:
case SO_OOBINLINE:
case SO_DGRAM_ERRIND:
goto done2;
}
break;
case SO_LINGER:
if (optlen !=
(t_uscalar_t)sizeof (struct linger)) {
goto done2;
}
break;
}
switch (option_name) { /* Do actions */
case SO_LINGER: {
} else {
tcp->tcp_linger = 0;
tcp->tcp_lingertime = 0;
}
break;
}
case SO_DEBUG:
#ifdef SOCK_TEST
if (intvalue & 2)
else
sock_test_timelimit = 0;
if (intvalue & 4)
do_useracc = 0;
else
do_useracc = 1;
#endif /* SOCK_TEST */
break;
case SO_DONTROUTE:
/*
* SO_DONTROUTE, SO_USELOOPBACK and
* SO_BROADCAST are only of interest to IP.
* We track them here only so
* that we can report their current value.
*/
if (onoff)
else
break;
case SO_USELOOPBACK:
if (onoff)
else
break;
case SO_BROADCAST:
if (onoff)
else
break;
case SO_REUSEADDR:
if (onoff)
else
break;
case SO_OOBINLINE:
if (onoff)
else
break;
case SO_DGRAM_ERRIND:
if (onoff)
else
break;
}
break;
case IPPROTO_TCP:
switch (option_name) {
case TCP_NODELAY:
goto done2;
}
break;
}
break;
default:
break;
}
}
if (handled) {
goto done2;
}
/* Let option management work in the presence of data flow control */
if (error) {
goto done;
}
if (error) {
goto done;
}
/* No need to verify T_optmgmt_ack */
done:
/*
* Check for SOL_SOCKET options and record their values.
* If we know about a SOL_SOCKET parameter and the transport
* failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
* EPROTO) we let the setsockopt succeed.
*/
if (level == SOL_SOCKET) {
/* Check parameters */
switch (option_name) {
case SO_DEBUG:
case SO_REUSEADDR:
case SO_KEEPALIVE:
case SO_DONTROUTE:
case SO_BROADCAST:
case SO_USELOOPBACK:
case SO_OOBINLINE:
case SO_SNDBUF:
case SO_RCVBUF:
#ifdef notyet
case SO_SNDLOWAT:
case SO_RCVLOWAT:
case SO_SNDTIMEO:
case SO_RCVTIMEO:
#endif /* notyet */
case SO_DGRAM_ERRIND:
goto done2;
}
break;
case SO_LINGER:
goto done2;
}
break;
}
switch (option_name) {
case SO_TYPE:
case SO_ERROR:
case SO_ACCEPTCONN:
/* Can't be set */
error = ENOPROTOOPT;
goto done2;
case SO_LINGER: {
if (l->l_onoff) {
} else {
}
break;
}
case SO_DEBUG:
#ifdef SOCK_TEST
if (intvalue & 2)
else
sock_test_timelimit = 0;
if (intvalue & 4)
do_useracc = 0;
else
do_useracc = 1;
#endif /* SOCK_TEST */
/* FALLTHRU */
case SO_REUSEADDR:
case SO_KEEPALIVE:
case SO_DONTROUTE:
case SO_BROADCAST:
case SO_USELOOPBACK:
case SO_OOBINLINE:
case SO_DGRAM_ERRIND:
if (intvalue != 0) {
("sotpi_setsockopt: setting 0x%x\n",
option_name));
} else {
("sotpi_setsockopt: clearing 0x%x\n",
option_name));
}
break;
/*
* The following options are only returned by us when the
* T_SVR4_OPTMGMT_REQ fails.
* XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
* since the transport might adjust the value and not
* return exactly what was set by the application.
*/
case SO_SNDBUF:
break;
case SO_RCVBUF:
break;
#ifdef notyet
/*
* We do not implement the semantics of these options
* thus we shouldn't implement the options either.
*/
case SO_SNDLOWAT:
break;
case SO_RCVLOWAT:
break;
case SO_SNDTIMEO:
break;
case SO_RCVTIMEO:
break;
#endif /* notyet */
}
if (error) {
("setsockopt: ignoring error %d for 0x%x\n",
error, option_name));
error = 0;
}
}
}
ret:
return (error);
}