tcp_socket.c revision dd49f125507979bb2ab505a8daf2a46d1be27051
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/* This file contains all TCP kernel socket related functions. */
#include <sys/squeue_impl.h>
#define _SUN_TPI_VERSION 2
#include <sys/tpicommon.h>
#include <sys/socketvar.h>
#include <inet/proto_set.h>
#include <inet/tcp_impl.h>
sock_upcalls_t *, int, cred_t *);
static int tcp_getsockopt(sock_lower_handle_t, int, int, void *,
static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *,
static void tcp_clr_flowctrl(sock_lower_handle_t);
cred_t *);
NULL,
NULL,
NULL,
};
/* ARGSUSED */
static void
{
struct sock_proto_props sopp;
extern struct module_info tcp_rinfo;
/* All Solaris components should pass a cred for this operation. */
}
/*ARGSUSED*/
static int
{
/*
* KSSL can move a socket from one listener to another, in which
* case `lproto_handle' points to the new listener. To ensure that
* the original listener is used the information is obtained from
* the eager.
*/
/*
* It is possible for the accept thread to race with the thread that
* made the su_newconn upcall in tcp_newconn_notify. Both
* tcp_newconn_notify and tcp_accept require that conn_upper_handle
* and conn_upcalls be set before returning, so they both write to
* them. However, we're guaranteed that the value written is the same
* for both threads.
*/
/*
* We should have a minimum of 2 references on the conn at this
* point. One for TCP and one for the newconn notification
* (which is now taken over by IP). In the normal case we would
* also have another reference (making a total of 3) for the conn
* being in the classifier hash list. However the eager could have
* received an RST subsequently and tcp_closei_local could have
* removed the eager from the classifier hash list, hence we can't
* assert that reference.
*/
/*
* An error is returned if this conn has been reset, which will
* cause the socket to be closed immediately. The eager will be
* unlinked from the listener during close.
*/
return (ECONNABORTED);
/*
* Non-STREAMS listeners never defer the notification of new
* connections.
*/
return (0);
}
static int
{
int error;
/* All Solaris components should pass a cred for this operation. */
if (error != 0) {
/* failed to enter */
return (ENOSR);
}
/* binding to a NULL address really means unbind */
else
} else {
}
if (error < 0) {
else
}
return (error);
}
/* ARGSUSED */
static int
{
int error;
/* All Solaris components should pass a cred for this operation. */
if (error != 0) {
/* failed to enter */
return (ENOBUFS);
}
if (error == 0) {
/*
* sockfs needs to know what's the maximum number of socket
* that can be queued on the listener.
*/
} else if (error < 0) {
else
}
return (error);
}
static int
{
int error;
/* All Solaris components should pass a cred for this operation. */
if (error != 0) {
return (error);
}
if (error != 0) {
/* failed to enter */
return (ENOSR);
}
/*
* TCP supports quick connect, so no need to do an implicit bind
*/
if (error == 0) {
} else if (error < 0) {
case TCPS_SYN_SENT:
break;
case TCPS_ESTABLISHED:
break;
case TCPS_LISTEN:
error = EOPNOTSUPP;
break;
default:
break;
}
} else {
}
}
struct sock_proto_props sopp;
}
done:
}
/* ARGSUSED3 */
int
{
/* All Solaris components should pass a cred for this operation. */
return (ENOTCONN);
}
/* ARGSUSED3 */
int
{
/* All Solaris components should pass a cred for this operation. */
}
/* returns UNIX error, the optlen is a value-result arg */
static int
{
int error;
void *optvalp_buf;
int len;
if (error != 0) {
if (error < 0) {
}
return (error);
}
return (ENOMEM);
}
if (len == -1) {
return (EINVAL);
}
/*
* update optlen and copy option value
*/
return (0);
}
static int
{
int error;
/*
* Entering the squeue synchronously can result in a context switch,
* which can cause a rather sever performance degradation. So we try to
* handle whatever options we can without entering the squeue.
*/
if (level == IPPROTO_TCP) {
switch (option_name) {
case TCP_NODELAY:
return (EINVAL);
return (0);
default:
break;
}
}
return (ENOMEM);
}
if (error != 0) {
if (error < 0) {
}
return (error);
}
return (error);
}
/* ARGSUSED */
static int
{
/* All Solaris components should pass a cred for this operation. */
if (msg->msg_controllen != 0) {
return (EOPNOTSUPP);
}
case M_DATA:
if (tcpstate < TCPS_ESTABLISHED) {
/*
* We return ENOTCONN if the endpoint is trying to
* connect or has never been connected, and EPIPE if it
* has been disconnected. The connection id helps us
* distinguish between the last two cases.
*/
} else if (tcpstate > TCPS_CLOSE_WAIT) {
return (EPIPE);
}
/*
* Squeue Flow Control
*/
}
/*
* The application may pass in an address in the msghdr, but
* we ignore the address on connection-oriented sockets.
* Just like BSD this code does not generate an error for
* TCP (a CONNREQUIRED socket) when sending to an address
* delivered on the connection as if no address had been
* supplied.
*/
} else {
}
return (0);
default:
ASSERT(0);
}
return (0);
}
/* ARGSUSED */
static int
{
/* All Solaris components should pass a cred for this operation. */
/*
*/
return (ENOTCONN);
/* shutdown the send side */
SOCK_OPCTL_SHUT_SEND, 0);
}
/* shutdown the recv side */
SOCK_OPCTL_SHUT_RECV, 0);
return (0);
}
static void
{
int error;
/*
* If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl()
* is currently running.
*/
return;
}
} else {
/*
* Send back a window update immediately if TCP is above
* ESTABLISHED state and the increase of the rcv window
* that the other side knows is at least 1 MSS after flow
* control is lifted.
*/
}
}
}
/* ARGSUSED */
static int
{
int error;
/* All Solaris components should pass a cred for this operation. */
/*
* If we don't have a helper stream then create one.
* ip_create_helper_stream takes care of locking the conn_t,
* so this check for NULL is just a performance optimization.
*/
/*
* Create a helper stream for non-STREAMS socket.
*/
if (error != 0) {
ip0dbg(("tcp_ioctl: create of IP helper stream "
"failed %d\n", error));
return (error);
}
}
switch (cmd) {
case ND_SET:
case ND_GET:
case _SIOCSOCKFALLBACK:
case TCP_IOC_ABORT_CONN:
case TI_GETPEERNAME:
case TI_GETMYNAME:
ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket",
cmd));
break;
default:
/*
* If the conn is not closing, pass on to IP using
* helper stream. Bump the ioctlref to prevent tcp_close
* if it ends up queued or aborted/interrupted.
*/
break;
}
break;
}
return (error);
}
/* ARGSUSED */
static int
{
/* All Solaris components should pass a cred for this operation. */
/*
* Drop IP's reference on the conn. This is the last reference
* on the connp if the state was less than established. If the
* connection has gone into timewait state, then we will have
* one ref for the TCP and one more ref (total of two) for the
* classifier connected hash list (a timewait connections stays
* in connected hash till closed).
*
* We can't assert the references because there might be other
* transient reference places because of some walkers or queued
* packets in squeue for the timewait state.
*/
/*
* EINPROGRESS tells sockfs to wait for a 'closed' upcall before
* freeing the socket.
*/
return (EINPROGRESS);
}
/* ARGSUSED */
{
return (NULL);
}
return (NULL);
}
/*
* Put the ref for TCP. Ref for IP was already put
* by ipcl_conn_create. Also Make the conn_t globally
* visible to walkers
*/
*errorp = 0;
return ((sock_lower_handle_t)connp);
}
/*
* tcp_fallback
*
* A direct socket is falling back to using STREAMS. The queue
* that is being passed down was created using tcp_open() with
* the SO_FALLBACK flag set. As a result, the queue is not
* associated with a conn, and the q_ptrs instead contain the
* dev and minor area that should be used.
*
* The 'issocket' flag indicates whether the FireEngine
* optimizations should be used. The common case would be that
* optimizations are enabled, and they might be subsequently
* disabled using the _SIOCSOCKFALLBACK ioctl.
*/
/*
* An active connection is falling back to TPI. Gather all the information
* required by the STREAM head and TPI sonode and send it up.
*/
static void
{
struct stroptions *stropt;
struct T_capability_ack tca;
short opts;
int error;
if (!issocket)
/*
* free the helper stream
*/
/*
* Notify the STREAM head about options
*/
if (tcp->tcp_snd_sack_ok)
/*
* Collect the information needed to sync with the sonode
*/
if (error != 0)
faddrlen = 0;
opts = 0;
if (connp->conn_oobinline)
opts |= SO_OOBINLINE;
opts |= SO_DONTROUTE;
/*
* Notify the socket that the protocol is now quiescent,
* and it's therefore safe move data from the socket
* to the stream head.
*/
}
/*
* All eagers in q0 are marked as being non-STREAM, so they will
* make su_newconn upcalls when the handshake completes, which
* will fail (resulting in the conn being closed). So we just blow
* off everything in q0 instead of waiting for the inevitable.
*/
if (tcp->tcp_conn_req_cnt_q0 != 0)
}
/*
* An eager is falling back to TPI. All we have to do is send
* up a T_CONN_IND.
*/
static void
{
/*
* Notify the socket that the protocol is now quiescent,
* and it's therefore safe move data from the socket
* to tcp's rcv queue.
*/
NULL, 0, 0);
}
else
}
if (!issocket)
/*
* The stream for this eager does not yet exist, so mark it as
* being detached.
*/
/* Send up the connection indication */
/*
* sending eager as an option since it violates
* the option semantics. So remove the eager as
*/
if (!issocket) {
struct T_conn_ind *conn_ind;
conn_ind->OPT_length = 0;
conn_ind->OPT_offset = 0;
}
/*
* Sockfs guarantees that the listener will not be closed
* during fallback. So we can safely use the listener's queue.
*/
}
int
{
int error;
NULL);
/* Pre-allocate the T_ordrel_ind mblk. */
/*
* Enter the squeue so that no new packets can come in
*/
if (error != 0) {
/* failed to enter, free all the pre-allocated messages. */
return (ENOMEM);
}
/*
* Both endpoints must be of the same type (either STREAMS or
* non-STREAMS) for fusion to be enabled. So if we are fused,
* we have to unfuse.
*/
/* The eager will deal with opts when accept() is called */
} else {
quiesced_cb, arg);
}
/*
* No longer a direct socket
*
* Note that we intentionally leave the upper_handle and upcalls
* intact, since eagers may still be using them.
*/
/*
* There should be atleast two ref's (IP + TCP)
*/
return (0);
}
/*
* Notifies a non-STREAMS based listener about a new connection. This
* function is executed on the *eager*'s squeue once the 3 way handshake
* has completed. Note that the behavior differs from STREAMS, where the
* T_CONN_IND is sent up by tcp_send_conn_ind while on the *listener*'s
* squeue.
*
* Returns B_TRUE if the notification succeeded, in which case `tcp' will
* be moved over to the ESTABLISHED list (q) of the listener. Othwerise,
* B_FALSE is returned and `tcp' is killed.
*/
{
struct sock_proto_props sopp;
/*
* Take the eager out, if it is in the list of droppable eagers
* as we are here because the 3W handshake is over.
*/
/*
* The eager already has an extra ref put in tcp_input_data
* so that it stays till accept comes back even though it
* might get into TCPS_CLOSED as a result of a TH_RST etc.
*/
/* Move from SYN_RCVD to ESTABLISHED list */
/*
* Insert at end of the queue because connections are accepted
* in chronological order. Leaving the older connections at front
* of the queue helps reducing search time.
*/
else
/* we have timed out before */
if (tcp->tcp_syn_rcvd_timeout != 0) {
tcp->tcp_syn_rcvd_timeout = 0;
if (listener->tcp_syn_defense &&
/*
* Turn off the defense mode if we
* believe the SYN attack is over.
*/
if (listener->tcp_ip_addr_cache) {
IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
}
}
}
if (addr_cache != NULL) {
/*
* We have finished a 3-way handshake with this
* remote host. This proves the IP addr is good.
* Cache it!
*/
}
/*
* Notify the ULP about the newconn. It is guaranteed that no
* tcp_accept() call will be made for the eager if the
* notification fails.
*/
/*
* Normally this should not happen, but the listener might
* have done a fallback to TPI followed by a close(), in
* which case tcp_closemp for this conn might have been
* used by tcp_eager_cleanup().
*/
if (tcp->tcp_closemp_used) {
return (B_FALSE);
}
return (B_FALSE);
}
if (econnp->conn_keepalive) {
tcp->tcp_ka_last_intrvl = 0;
}
/* Update the necessary parameters */
return (B_TRUE);
}