721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * CDDL HEADER START
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * The contents of this file are subject to the terms of the
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Common Development and Distribution License (the "License").
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * You may not use this file except in compliance with the License.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * See the License for the specific language governing permissions
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * and limitations under the License.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * When distributing Covered Code, include this CDDL HEADER in each
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * If applicable, add the following below this CDDL HEADER, with the
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner]
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * CDDL HEADER END
9ee3959a9e59422575d29f2ec8ffb62ce0918ac8Anders Persson * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* This file contains all TCP kernel socket related functions. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic void tcp_activate(sock_lower_handle_t, sock_upper_handle_t,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_bind(sock_lower_handle_t, struct sockaddr *,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_listen(sock_lower_handle_t, int, cred_t *);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_connect(sock_lower_handle_t, const struct sockaddr *,
ca3c8f41e333bc4783da7cb75b242843c8846c8aDavid Höppnerstatic int tcp_getpeername(sock_lower_handle_t, struct sockaddr *,
ca3c8f41e333bc4783da7cb75b242843c8846c8aDavid Höppnerstatic int tcp_getsockname(sock_lower_handle_t, struct sockaddr *,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_getsockopt(sock_lower_handle_t, int, int, void *,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_setsockopt(sock_lower_handle_t, int, int, const void *,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_shutdown(sock_lower_handle_t, int, cred_t *);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic void tcp_clr_flowctrl(sock_lower_handle_t);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poonstatic int tcp_close(sock_lower_handle_t, int, cred_t *);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER |
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ;
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3;
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 :
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
dd49f125507979bb2ab505a8daf2a46d1be27051Anders Persson * KSSL can move a socket from one listener to another, in which
dd49f125507979bb2ab505a8daf2a46d1be27051Anders Persson * case `lproto_handle' points to the new listener. To ensure that
dd49f125507979bb2ab505a8daf2a46d1be27051Anders Persson * the original listener is used the information is obtained from
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * It is possible for the accept thread to race with the thread that
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * made the su_newconn upcall in tcp_newconn_notify. Both
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * tcp_newconn_notify and tcp_accept require that conn_upper_handle
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * and conn_upcalls be set before returning, so they both write to
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * them. However, we're guaranteed that the value written is the same
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * for both threads.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson econnp->conn_upcalls == lconnp->conn_upcalls);
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson ASSERT(eager->tcp_tcps == listener->tcp_tcps);
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * We should have a minimum of 2 references on the conn at this
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * point. One for TCP and one for the newconn notification
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * (which is now taken over by IP). In the normal case we would
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * also have another reference (making a total of 3) for the conn
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * being in the classifier hash list. However the eager could have
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * received an RST subsequently and tcp_closei_local could have
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * removed the eager from the classifier hash list, hence we can't
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * assert that reference.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Non-STREAMS listeners never defer the notification of new
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * connections.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson ASSERT(!listener->tcp_eager_prev_q0->tcp_conn_def_q0);
b1cd7879d8fc9fb80c59139db15960e8e6d6bf93Anders Persson return ((eager->tcp_state < TCPS_ESTABLISHED) ? ECONNABORTED : 0);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* failed to enter */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* binding to a NULL address really means unbind */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = tcp_do_bind(connp, sa, len, cr, B_TRUE);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* failed to enter */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE);
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * sockfs needs to know what's the maximum number of socket
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * that can be queued on the listener.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon } else if (error < 0) {
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = proto_verify_ip_addr(connp->conn_family, sa, len);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* failed to enter */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * TCP supports quick connect, so no need to do an implicit bind
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon } else if (error < 0) {
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED3 */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon return (conn_getpeername(connp, addr, addrlenp));
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED3 */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon return (conn_getsockname(connp, addr, addrlenp));
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* returns UNIX error, the optlen is a value-result arg */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon len = tcp_opt_get(connp, level, option_name, optvalp_buf);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * update optlen and copy option value
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon const void *optvalp, socklen_t optlen, cred_t *cr)
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Entering the squeue synchronously can result in a context switch,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * which can cause a rather sever performance degradation. So we try to
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * handle whatever options we can without entering the squeue.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon mutex_enter(&connp->conn_tcp->tcp_non_sq_lock);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 :
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon mutex_exit(&connp->conn_tcp->tcp_non_sq_lock);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = proto_opt_check(level, option_name, optlen, NULL,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * We return ENOTCONN if the endpoint is trying to
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * connect or has never been connected, and EPIPE if it
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * has been disconnected. The connection id helps us
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * distinguish between the last two cases.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN :
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Squeue Flow Control
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) {
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * The application may pass in an address in the msghdr, but
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * we ignore the address on connection-oriented sockets.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Just like BSD this code does not generate an error for
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * TCP (a CONNREQUIRED socket) when sending to an address
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * passed in with sendto/sendmsg. Instead the data is
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * delivered on the connection as if no address had been
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * X/Open requires that we check the connected state.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* shutdown the send side */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* shutdown the recv side */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_clr_flowctrl(sock_lower_handle_t proto_handle)
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl()
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * is currently running.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Send back a window update immediately if TCP is above
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * ESTABLISHED state and the increase of the rcv window
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * that the other side knows is at least 1 MSS after flow
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * control is lifted.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * If we don't have a helper stream then create one.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * ip_create_helper_stream takes care of locking the conn_t,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * so this check for NULL is just a performance optimization.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps;
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Create a helper stream for non-STREAMS socket.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon ip0dbg(("tcp_ioctl: create of IP helper stream "
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket",
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * If the conn is not closing, pass on to IP using
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * helper stream. Bump the ioctlref to prevent tcp_close
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * from closing the rq/wq out from underneath the ioctl
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * if it ends up queued or aborted/interrupted.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon if (connp->conn_state_flags & (CONN_CLOSING)) {
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* All Solaris components should pass a cred for this operation. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Drop IP's reference on the conn. This is the last reference
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * on the connp if the state was less than established. If the
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * connection has gone into timewait state, then we will have
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * one ref for the TCP and one more ref (total of two) for the
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * classifier connected hash list (a timewait connections stays
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * in connected hash till closed).
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * We can't assert the references because there might be other
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * transient reference places because of some walkers or queued
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * packets in squeue for the timewait state.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * EINPROGRESS tells sockfs to wait for a 'closed' upcall before
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * freeing the socket.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon/* ARGSUSED */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon uint_t *smodep, int *errorp, int flags, cred_t *credp)
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) ||
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon connp = tcp_create_common(credp, isv6, B_TRUE, errorp);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Put the ref for TCP. Ref for IP was already put
ca3c8f41e333bc4783da7cb75b242843c8846c8aDavid Höppner * by ipcl_conn_create. Also make the conn_t globally
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * visible to walkers
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP |
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * tcp_fallback
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * A direct socket is falling back to using STREAMS. The queue
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * that is being passed down was created using tcp_open() with
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * the SO_FALLBACK flag set. As a result, the queue is not
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * associated with a conn, and the q_ptrs instead contain the
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * dev and minor area that should be used.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * The 'issocket' flag indicates whether the FireEngine
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * optimizations should be used. The common case would be that
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * optimizations are enabled, and they might be subsequently
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * disabled using the _SIOCSOCKFALLBACK ioctl.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * An active connection is falling back to TPI. Gather all the information
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * required by the STREAM head and TPI sonode and send it up.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Perssontcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * free the helper stream
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Notify the STREAM head about options
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson stropt = (struct stroptions *)stropt_mp->b_rptr;
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson stropt_mp->b_wptr += sizeof (struct stroptions);
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson stropt->so_wroff = connp->conn_ht_iphc_len + (tcp->tcp_loopback ? 0 :
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Collect the information needed to sync with the sonode
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID);
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson (void) tcp_getsockname((sock_lower_handle_t)connp,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson (struct sockaddr *)&laddr, &laddrlen, CRED());
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson error = tcp_getpeername((sock_lower_handle_t)connp,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson (struct sockaddr *)&faddr, &faddrlen, CRED());
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Notify the socket that the protocol is now quiescent,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * and it's therefore safe move data from the socket
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * to the stream head.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * All eagers in q0 are marked as being non-STREAM, so they will
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * make su_newconn upcalls when the handshake completes, which
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * will fail (resulting in the conn being closed). So we just blow
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * off everything in q0 instead of waiting for the inevitable.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * An eager is falling back to TPI. All we have to do is send
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * up a T_CONN_IND.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Perssontcp_fallback_eager(tcp_t *eager, boolean_t issocket,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson so_proto_quiesced_cb_t quiesced_cb, sock_quiesce_arg_t *arg)
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Notify the socket that the protocol is now quiescent,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * and it's therefore safe move data from the socket
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * to tcp's rcv queue.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson mp = (*quiesced_cb)(connp->conn_upper_handle, arg, NULL, NULL, 0,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * The stream for this eager does not yet exist, so mark it as
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * being detached.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson connp->conn_rq = listener->tcp_connp->conn_rq;
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson connp->conn_wq = listener->tcp_connp->conn_wq;
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson /* Send up the connection indication */
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * TLI/XTI applications will get confused by
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * sending eager as an option since it violates
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * the option semantics. So remove the eager as
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * option since TLI/XTI app doesn't need it anyway.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Sockfs guarantees that the listener will not be closed
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * during fallback. So we can safely use the listener's queue.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poontcp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon stropt_mp = allocb_wait(sizeof (struct stroptions), BPRI_HI, STR_NOSIG,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* Pre-allocate the T_ordrel_ind mblk. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon ((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND;
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Enter the squeue so that no new packets can come in
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* failed to enter, free all the pre-allocated messages. */
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * Both endpoints must be of the same type (either STREAMS or
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * non-STREAMS) for fusion to be enabled. So if we are fused,
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * we have to unfuse.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon /* The eager will deal with opts when accept() is called */
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson tcp_fallback_eager(tcp, direct_sockfs, quiesced_cb, arg);
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon tcp_fallback_noneager(tcp, stropt_mp, q, direct_sockfs,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * No longer a direct socket
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Note that we intentionally leave the upper_handle and upcalls
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * intact, since eagers may still be using them.
721fffe35d40e548a5a58dc53a2ec9c6762172d9Kacheong Poon * There should be atleast two ref's (IP + TCP)
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Notifies a non-STREAMS based listener about a new connection. This
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * function is executed on the *eager*'s squeue once the 3 way handshake
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * has completed. Note that the behavior differs from STREAMS, where the
e82bc0ba9649a7146fdab88089eaa4b8502b2da4Anders Persson * T_CONN_IND is sent up by tcp_send_conn_ind() while on the *listener*'s
e82bc0ba9649a7146fdab88089eaa4b8502b2da4Anders Persson * Returns B_TRUE if the notification succeeded and an upper handle was
e82bc0ba9649a7146fdab88089eaa4b8502b2da4Anders Persson * obtained. `tcp' should be closed on failure.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Perssontcp_newconn_notify(tcp_t *tcp, ip_recv_attr_t *ira)
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Take the eager out, if it is in the list of droppable eagers
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * as we are here because the 3W handshake is over.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * The eager already has an extra ref put in tcp_input_data
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * so that it stays till accept comes back even though it
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * might get into TCPS_CLOSED as a result of a TH_RST etc.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson /* Move from SYN_RCVD to ESTABLISHED list */
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = tcp->tcp_eager_prev_q0;
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp->tcp_eager_next_q0;
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Insert at end of the queue because connections are accepted
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * in chronological order. Leaving the older connections at front
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * of the queue helps reducing search time.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson /* we have timed out before */
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson (listener->tcp_tcps->tcps_conn_req_max_q0 >> 5) &&
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson 10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() -
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Turn off the defense mode if we
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * believe the SYN attack is over.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson kmem_free((void *)listener->tcp_ip_addr_cache,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache);
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * We have finished a 3-way handshake with this
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * remote host. This proves the IP addr is good.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] =
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * Notify the ULP about the newconn. It is guaranteed that no
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * tcp_accept() call will be made for the eager if the
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson * notification fails.
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson if ((upper = (*lconnp->conn_upcalls->su_newconn)
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson (lconnp->conn_upper_handle, (sock_lower_handle_t)econnp,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson &sock_tcp_downcalls, ira->ira_cred, ira->ira_cpid,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
3e95bd4ab92abca814bd28e854607d1975c7dc88Anders Persson /* Update the necessary parameters */