tcp_time_wait.c revision 66cd0f60c3182913d379abb730ae755bf6367126
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* This file contains functions related to TCP time wait processing. Also
* refer to the time wait handling comments in tcp_impl.h.
*/
#include <sys/squeue_impl.h>
#include <inet/tcp_impl.h>
#include <inet/tcp_cluster.h>
/*
* TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
* Running it every 5 seconds seems to give the best results.
*/
/*
* Remove a connection from the list of detached TIME_WAIT connections.
* It returns B_FALSE if it can't remove the connection from the list
* as the connection has already been removed from the list due to an
* earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
*/
{
if (tcp_time_wait == NULL) {
tcp_time_wait = *((tcp_squeue_priv_t **)
} else {
}
/* 0 means that the tcp_t has not been added to the time wait list. */
if (tcp->tcp_time_wait_expire == 0) {
if (locked)
return (B_FALSE);
}
NULL;
} else {
}
} else {
}
tcp->tcp_time_wait_expire = 0;
if (locked)
return (B_TRUE);
}
/*
* Add a connection to the list of detached TIME_WAIT connections
* and set its time to expire.
*/
void
{
/* Freed above */
/* must have happened at the time of detaching the tcp */
/*
* Since tcp_time_wait_expire is lbolt64, it should not wrap around
* in practice. Hence it cannot be 0. Note that zero means that the
* tcp_t is not in the TIME_WAIT list.
*/
/*
* Even if the list was empty before, there may be a timer
* running since a tcp_t can be removed from the list
* in other places, such as tcp_clean_death(). So check if
* a timer is needed.
*/
if (tcp_time_wait->tcp_time_wait_tid == 0) {
}
} else {
/* The list is not empty, so a timer must be running. */
}
}
/*
* Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
* tcp_t. Used in tcp_time_wait_collector().
*/
/* ARGSUSED */
static void
{
return;
}
/*
* Because they have no upstream client to rebind or tcp_close()
* them later, we axe the connection here and now.
*/
}
/*
* Blows away all tcps whose TIME_WAIT has expired. List traversal
* is done forwards from the head.
* This walks all stack instances since
* tcp_time_wait remains global across all stacks.
*/
/* ARGSUSED */
void
tcp_time_wait_collector(void *arg)
{
}
}
/*
* In order to reap time waits reliably, we should use a
* source of time that is not adjustable by the user -- hence
* the call to ddi_get_lbolt64().
*/
now = ddi_get_lbolt64();
/*
* lbolt64 should not wrap around in practice... So we can
* do a direct comparison.
*/
break;
/*
* This is essentially a TW reclaim fast path optimization for
* performance where the timewait collector checks under the
* fanout lock (so that no one else can get access to the
* conn_t) that the refcnt is 2 i.e. one for TCP and one for
* the classifier hash list. If ref count is indeed 2, we can
* just remove the conn under the fanout lock and avoid
* cleaning up the conn under the squeue, provided that
* clustering callbacks are not enabled. If clustering is
* enabled, we need to make the clustering callback before
* setting the CONDEMNED flag and after dropping all locks and
* so we forego this optimization and fall back to the slow
* path. Also please see the comments in tcp_closei_local
* regarding the refcnt logic.
*
* Since we are holding the tcp_time_wait_lock, its better
* not to block on the fanout_lock because other connections
* can't add themselves to time_wait list. So we do a
* tryenter instead of mutex_enter.
*/
if (mutex_tryenter(lock)) {
(cl_inet_disconnect == NULL)) {
connp->conn_fanout);
/*
* Set the CONDEMNED flag now itself so that
* the refcnt cannot increase due to any
* walker.
*/
if (tcp_time_wait->tcp_free_list_cnt <
/* Add to head of tcp_free_list */
continue;
} else {
/* Do not add to tcp_free_list */
}
} else {
/*
* We can reuse the closemp here since conn has
* detached (otherwise we wouldn't even be in
* time_wait list). tcp_closemp_used can safely
* be changed without taking a lock as no other
* thread can concurrently access it at this
* point in the connection lifecycle.
*/
else
"tcp_timewait_collector: "
"concurrent use of tcp_closemp: "
"connp %p tcp %p\n", (void *)connp,
(void *)tcp);
}
} else {
/*
* We can reuse the closemp here since conn has
* detached (otherwise we wouldn't even be in
* time_wait list). tcp_closemp_used can safely
* be changed without taking a lock as no other
* thread can concurrently access it at this
* point in the connection lifecycle.
*/
else
"concurrent use of tcp_closemp: "
"connp %p tcp %p\n", (void *)connp,
(void *)tcp);
}
}
/*
* If the time wait list is not empty and there is no timer running,
* restart it.
*/
tcp_time_wait->tcp_time_wait_tid == 0) {
/* This ensures that we won't wake up too often. */
}
}
/*
* tcp_time_wait_processing() handles processing of incoming packets when
* the tcp_t is in the TIME_WAIT state.
*
* A TIME_WAIT tcp_t that has an associated open TCP end point (not in
* detached state) is never put on the time wait list.
*/
void
{
if (tcp->tcp_snd_ts_ok) {
goto done;
}
}
if (gap < 0) {
goto done;
}
/*
* When TCP receives a duplicate FIN in
* TIME_WAIT state, restart the 2 MSL timer.
* See page 73 in RFC 793. Make sure this TCP
* is already on the TIME_WAIT list. If not,
* just restart the timer.
*/
if (TCP_IS_DETACHED(tcp)) {
B_TRUE) {
}
} else {
}
goto done;
}
flags |= TH_ACK_NEEDED;
seg_len = 0;
goto process_ack;
}
/* Fix seg_seq, and chew the gap off the front. */
}
/*
* Make sure that when we accept the connection, pick
* an ISS greater than (tcp_snxt + ISS_INCR/2) for the
* old connection.
*
* The next ISS generated is equal to tcp_iss_incr_extra
* + ISS_INCR/2 + other components depending on the
* value of tcp_strong_iss. We pre-calculate the new
* ISS here and compare with tcp_snxt to determine if
* we need to make adjustment to tcp_iss_incr_extra.
*
* The above calculation is ugly and is a
* waste of CPU cycles...
*/
switch (tcps->tcps_strong_iss) {
case 2: {
/* Add time and MD5 components. */
struct {
} arg;
/* We use MAPPED addresses in tcp_iss_init */
sizeof (arg));
break;
}
case 1:
/* Add time component and min random (i.e. 1). */
break;
default:
/* Add only time component. */
break;
}
/*
* New ISS not guaranteed to be ISS_INCR/2
* ahead of the current tcp_snxt, so add the
* difference to tcp_iss_incr_extra.
*/
}
/*
* If tcp_clean_death() can not perform the task now,
* drop the SYN packet and let the other side re-xmit.
* Otherwise pass the SYN packet back in, since the
* old tcp state has been cleaned up or freed.
*/
goto done;
/* Drops ref on nconnp */
return;
}
goto done;
}
/*
* rgap is the amount of stuff received out of window. A negative
* value is the amount out of window.
*/
if (rgap < 0) {
/* Fix seg_len and make sure there is something left. */
if (seg_len <= 0) {
goto done;
}
flags |= TH_ACK_NEEDED;
seg_len = 0;
goto process_ack;
}
}
/*
* Check whether we can update tcp_ts_recent. This test is
* NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP
* Extensions for High Performance: An Update", Internet Draft.
*/
if (tcp->tcp_snd_ts_ok &&
}
/* Always ack out of order packets */
flags |= TH_ACK_NEEDED;
seg_len = 0;
} else if (seg_len > 0) {
}
(void) tcp_clean_death(tcp, 0);
goto done;
}
/*
* Do not delete the TCP structure if it is in
* TIME_WAIT state. Refer to RFC 1122, 4.2.2.13.
*/
goto done;
}
if (bytes_acked <= 0) {
if (bytes_acked == 0 && seg_len == 0 &&
} else {
/* Acks something not sent */
flags |= TH_ACK_NEEDED;
}
}
if (flags & TH_ACK_NEEDED) {
/*
* Time to send an ack for some reason.
*/
}
done:
}