/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/squeue_impl.h>
#include <inet/tcp_impl.h>
/* Control whether TCP can enter defensive mode when under memory pressure. */
/*
* Routines related to the TCP_IOC_ABORT_CONN ioctl command.
*
* TCP_IOC_ABORT_CONN is a non-transparent ioctl command used for aborting
* TCP connections. To invoke this ioctl, a tcp_ioc_abort_conn_t structure
* (defined in tcp.h) needs to be filled in and passed into the kernel
* via an I_STR ioctl command (see streamio(7I)). The tcp_ioc_abort_conn_t
* structure contains the four-tuple of a TCP connection and a range of TCP
* states (specified by ac_start and ac_end). The use of wildcard addresses
* and ports is allowed. Connections with a matching four tuple and a state
* within the specified range will be aborted. The valid states for the
* ac_start and ac_end fields are in the range TCPS_SYN_SENT to TCPS_TIME_WAIT,
* inclusive.
*
* An application which has its connection aborted by this ioctl will receive
* an error that is dependent on the connection state at the time of the abort.
* If the connection state is < TCPS_TIME_WAIT, an application should behave as
* though a RST packet has been received. If the connection state is equal to
* TCPS_TIME_WAIT, the 2MSL timeout will immediately be canceled by the kernel
* and all resources associated with the connection will be freed.
*/
static void tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *);
static int tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *, int, int *,
boolean_t, tcp_stack_t *);
/*
* Macros used for accessing the different types of sockaddr
* structures inside a tcp_ioc_abort_conn_t.
*/
/*
* Return the correct error code to mimic the behavior
* of a connection reset.
*/
switch ((state)) { \
case TCPS_SYN_SENT: \
case TCPS_SYN_RCVD: \
(err) = ECONNREFUSED; \
break; \
case TCPS_ESTABLISHED: \
case TCPS_FIN_WAIT_1: \
case TCPS_FIN_WAIT_2: \
case TCPS_CLOSE_WAIT: \
(err) = ECONNRESET; \
break; \
case TCPS_CLOSING: \
case TCPS_LAST_ACK: \
case TCPS_TIME_WAIT: \
(err) = 0; \
break; \
default: \
} \
}
/*
* Check if a tcp structure matches the info in acp.
*/
(TCP_AC_V4LPORT((acp)) == 0 || \
(TCP_AC_V4RPORT((acp)) == 0 || \
&(connp)->conn_laddr_v6)) && \
&(connp)->conn_faddr_v6)) && \
(TCP_AC_V6LPORT((acp)) == 0 || \
(TCP_AC_V6RPORT((acp)) == 0 || \
/*
* Build a message containing a tcp_ioc_abort_conn_t structure
* which is filled in with information from acp and tp.
*/
static mblk_t *
{
return (NULL);
sizeof (uint32_t));
} else {
}
return (mp);
}
/*
* Print a tcp_ioc_abort_conn_t structure.
*/
static void
{
} else {
}
/*
* Don't print this message to the console if the operation was done
* to a non-global zone.
*/
logflags |= SL_CONSOLE;
"TCP_IOC_ABORT_CONN: local = %s:%d, remote = %s:%d, "
}
/*
* Called using SQ_FILL when a message built using
* tcp_ioctl_abort_build_msg is put into a queue.
* Note that when we get here there is no wildcard in acp any more.
*/
/* ARGSUSED2 */
static void
{
/*
* Don't accept any input on a closed tcp as this TCP logically does
* not exist on the system. Don't proceed further with this TCP.
* For eg. this packet could trigger another close of this tcp
* which would be disastrous for tcp_refcnt. tcp_close_detached /
* tcp_clean_death / tcp_closei_local must be called at most once
* on a TCP.
*/
return;
}
/*
* If we get here, we are already on the correct
* squeue. This ioctl follows the following path
* tcp_wput -> tcp_wput_ioctl -> tcp_ioctl_abort_conn
* ->tcp_ioctl_abort->squeue_enter (if on a
* different squeue)
*/
int errcode;
}
}
/*
* Abort all matching connections on a hash chain.
*/
static int
{
nmatch = 0;
/*
* We are missing a check on sin6_scope_id for linklocals here,
* but current usage is just for aborting based on zoneid
* for shared-IP zones.
*/
break;
}
} else {
}
nmatch++;
if (exact)
break;
}
/* Avoid holding lock for too long. */
if (nmatch >= 500)
break;
}
/* Pass mp into the correct tcp */
}
goto startover;
return (err);
}
/*
* Abort all connections that matches the attributes specified in acp.
*/
static int
{
}
} else {
}
}
/*
* For cases where remote addr, local port, and remote port are non-
* wildcards, tcp_ioctl_abort_bucket will only be called once.
*/
if (index != -1) {
} else {
/*
* loop through all entries for wildcard case
*/
for (index = 0;
index++) {
if (err != 0)
break;
}
}
/*
* Don't print this message to the console if the operation was done
* to a non-global zone.
*/
logflags |= SL_CONSOLE;
return (err);
}
/*
* Process the TCP_IOC_ABORT_CONN ioctl request.
*/
void
{
int err;
goto out;
}
/* check permissions */
goto out;
}
}
/* check that a zone with the supplied zoneid exists */
} else {
goto out;
}
}
/*
* For exclusive stacks we set the zoneid to zero
* to make TCP operate as if in the global zone.
*/
goto out;
}
out:
}
if (err != 0)
else
}
/*
* Timeout function to reset the TCP stack variable tcps_reclaim to false.
*/
void
{
int i;
for (i = 0; i < tcps->tcps_sc_cnt; i++)
/*
* This happens only when a stack is going away. tcps_reclaim_tid
* should not be reset to 0 when returning in this case.
*/
if (!tcps->tcps_reclaim) {
return;
}
tcps->tcps_reclaim_tid = 0;
} else {
/* Stay in defensive mode and restart the timer */
}
}
/*
* Kmem reclaim call back function. When the system is under memory
* pressure, we set the TCP stack variable tcps_reclaim to true. This
* variable is reset to false after tcps_reclaim_period msecs. During this
* period, TCP will be more aggressive in aborting connections not making
* progress, meaning retransmitting for some time (tcp_early_abort seconds).
* TCP will also not accept new connection request for those listeners whose
* q or q0 is not empty.
*/
/* ARGSUSED */
void
{
if (!tcp_do_reclaim)
return;
/*
* The reclaim function may be called even when the system is not
* really under memory pressure.
*/
return;
int i;
/*
* During boot time, the first netstack_t is created and
* initialized before TCP has registered with the netstack
* framework. If this reclaim function is called before TCP
* has finished its initialization, netstack_next() will
* return the first netstack_t (since its netstack_flags is
* not NSF_UNINIT). And its netstack_tcp will be NULL. We
* need to catch it.
*
* All subsequent netstack_t creation will not have this
* problem since the initialization is not finished until TCP
* has finished its own tcp_stack_t initialization. Hence
* netstack_next() will not return one with NULL netstack_tcp.
*/
continue;
}
/*
* Even if the system is under memory pressure, the reason may
* not be because of TCP activity. Check the number of
* connections in each stack. If the number exceeds the
* threshold (maxusers), turn on defensive mode.
*/
for (i = 0; i < tcps->tcps_sc_cnt; i++)
continue;
}
if (!tcps->tcps_reclaim) {
}
}
}
/*
* Given a tcp_stack_t and a port (in host byte order), find a listener
* configuration for that port and return the ratio.
*/
{
break;
}
}
return (ratio);
}
/*
* To remove all listener limit configuration in a tcp_stack_t.
*/
void
{
}
}
/*
* When a CPU is added, we need to allocate the per CPU stats struct.
*/
void
{
int i;
return;
KM_SLEEP);
}
}
/*
* Diagnostic routine used to return a string associated with the tcp state.
* Note that if the caller does not supply a buffer, it will use an internal
* static string. This means that if multiple threads call this function at
* the same time, output can be corrupted... Note also that this function
* does not check the size of the supplied buffer. The caller has to make
* sure that it is big enough.
*/
char *
{
char *buf;
char *cp;
else
return ("NULL_TCP");
case TCPS_CLOSED:
cp = "TCP_CLOSED";
break;
case TCPS_IDLE:
cp = "TCP_IDLE";
break;
case TCPS_BOUND:
cp = "TCP_BOUND";
break;
case TCPS_LISTEN:
cp = "TCP_LISTEN";
break;
case TCPS_SYN_SENT:
cp = "TCP_SYN_SENT";
break;
case TCPS_SYN_RCVD:
cp = "TCP_SYN_RCVD";
break;
case TCPS_ESTABLISHED:
cp = "TCP_ESTABLISHED";
break;
case TCPS_CLOSE_WAIT:
cp = "TCP_CLOSE_WAIT";
break;
case TCPS_FIN_WAIT_1:
cp = "TCP_FIN_WAIT_1";
break;
case TCPS_CLOSING:
cp = "TCP_CLOSING";
break;
case TCPS_LAST_ACK:
cp = "TCP_LAST_ACK";
break;
case TCPS_FIN_WAIT_2:
cp = "TCP_FIN_WAIT_2";
break;
case TCPS_TIME_WAIT:
cp = "TCP_TIME_WAIT";
break;
default:
break;
}
switch (format) {
case DISP_ADDR_AND_PORT:
/*
* Note that we use the remote address in the tcp_b
* structure. This means that it will print out
* the real destination address, not the next hop's
* address if source routing is used.
*/
} else {
}
sizeof (local_addrbuf));
sizeof (remote_addrbuf));
break;
case DISP_PORT_ONLY:
default:
break;
}
return (buf);
}