tcp_misc.c revision 721fffe35d40e548a5a58dc53a2ec9c6762172d9
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/strlog.h>
#include <sys/policy.h>
#include <sys/strsun.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
/* Control whether TCP can enter defensive mode when under memory pressure. */
static boolean_t tcp_do_reclaim = B_TRUE;
/*
* Routines related to the TCP_IOC_ABORT_CONN ioctl command.
*
* TCP_IOC_ABORT_CONN is a non-transparent ioctl command used for aborting
* TCP connections. To invoke this ioctl, a tcp_ioc_abort_conn_t structure
* (defined in tcp.h) needs to be filled in and passed into the kernel
* via an I_STR ioctl command (see streamio(7I)). The tcp_ioc_abort_conn_t
* structure contains the four-tuple of a TCP connection and a range of TCP
* states (specified by ac_start and ac_end). The use of wildcard addresses
* and ports is allowed. Connections with a matching four tuple and a state
* within the specified range will be aborted. The valid states for the
* ac_start and ac_end fields are in the range TCPS_SYN_SENT to TCPS_TIME_WAIT,
* inclusive.
*
* An application which has its connection aborted by this ioctl will receive
* an error that is dependent on the connection state at the time of the abort.
* If the connection state is < TCPS_TIME_WAIT, an application should behave as
* though a RST packet has been received. If the connection state is equal to
* TCPS_TIME_WAIT, the 2MSL timeout will immediately be canceled by the kernel
* and all resources associated with the connection will be freed.
*/
static mblk_t *tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *, tcp_t *);
static void tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *);
static void tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
ip_recv_attr_t *dummy);
static int tcp_ioctl_abort(tcp_ioc_abort_conn_t *, tcp_stack_t *tcps);
void tcp_ioctl_abort_conn(queue_t *, mblk_t *);
static int tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *, int, int *,
boolean_t, tcp_stack_t *);
/*
* Macros used for accessing the different types of sockaddr
* structures inside a tcp_ioc_abort_conn_t.
*/
#define TCP_AC_V4LADDR(acp) ((sin_t *)&(acp)->ac_local)
#define TCP_AC_V4RADDR(acp) ((sin_t *)&(acp)->ac_remote)
#define TCP_AC_V4LOCAL(acp) (TCP_AC_V4LADDR(acp)->sin_addr.s_addr)
#define TCP_AC_V4REMOTE(acp) (TCP_AC_V4RADDR(acp)->sin_addr.s_addr)
#define TCP_AC_V4LPORT(acp) (TCP_AC_V4LADDR(acp)->sin_port)
#define TCP_AC_V4RPORT(acp) (TCP_AC_V4RADDR(acp)->sin_port)
#define TCP_AC_V6LADDR(acp) ((sin6_t *)&(acp)->ac_local)
#define TCP_AC_V6RADDR(acp) ((sin6_t *)&(acp)->ac_remote)
#define TCP_AC_V6LOCAL(acp) (TCP_AC_V6LADDR(acp)->sin6_addr)
#define TCP_AC_V6REMOTE(acp) (TCP_AC_V6RADDR(acp)->sin6_addr)
#define TCP_AC_V6LPORT(acp) (TCP_AC_V6LADDR(acp)->sin6_port)
#define TCP_AC_V6RPORT(acp) (TCP_AC_V6RADDR(acp)->sin6_port)
/*
* Return the correct error code to mimic the behavior
* of a connection reset.
*/
#define TCP_AC_GET_ERRCODE(state, err) { \
switch ((state)) { \
case TCPS_SYN_SENT: \
case TCPS_SYN_RCVD: \
(err) = ECONNREFUSED; \
break; \
case TCPS_ESTABLISHED: \
case TCPS_FIN_WAIT_1: \
case TCPS_FIN_WAIT_2: \
case TCPS_CLOSE_WAIT: \
(err) = ECONNRESET; \
break; \
case TCPS_CLOSING: \
case TCPS_LAST_ACK: \
case TCPS_TIME_WAIT: \
(err) = 0; \
break; \
default: \
(err) = ENXIO; \
} \
}
/*
* Check if a tcp structure matches the info in acp.
*/
#define TCP_AC_ADDR_MATCH(acp, connp, tcp) \
(((acp)->ac_local.ss_family == AF_INET) ? \
((TCP_AC_V4LOCAL((acp)) == INADDR_ANY || \
TCP_AC_V4LOCAL((acp)) == (connp)->conn_laddr_v4) && \
(TCP_AC_V4REMOTE((acp)) == INADDR_ANY || \
TCP_AC_V4REMOTE((acp)) == (connp)->conn_faddr_v4) && \
(TCP_AC_V4LPORT((acp)) == 0 || \
TCP_AC_V4LPORT((acp)) == (connp)->conn_lport) && \
(TCP_AC_V4RPORT((acp)) == 0 || \
TCP_AC_V4RPORT((acp)) == (connp)->conn_fport) && \
(acp)->ac_start <= (tcp)->tcp_state && \
(acp)->ac_end >= (tcp)->tcp_state) : \
((IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6LOCAL((acp))) || \
IN6_ARE_ADDR_EQUAL(&TCP_AC_V6LOCAL((acp)), \
&(connp)->conn_laddr_v6)) && \
(IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6REMOTE((acp))) || \
IN6_ARE_ADDR_EQUAL(&TCP_AC_V6REMOTE((acp)), \
&(connp)->conn_faddr_v6)) && \
(TCP_AC_V6LPORT((acp)) == 0 || \
TCP_AC_V6LPORT((acp)) == (connp)->conn_lport) && \
(TCP_AC_V6RPORT((acp)) == 0 || \
TCP_AC_V6RPORT((acp)) == (connp)->conn_fport) && \
(acp)->ac_start <= (tcp)->tcp_state && \
(acp)->ac_end >= (tcp)->tcp_state))
#define TCP_AC_MATCH(acp, connp, tcp) \
(((acp)->ac_zoneid == ALL_ZONES || \
(acp)->ac_zoneid == (connp)->conn_zoneid) ? \
TCP_AC_ADDR_MATCH(acp, connp, tcp) : 0)
/*
* Build a message containing a tcp_ioc_abort_conn_t structure
* which is filled in with information from acp and tp.
*/
static mblk_t *
tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *acp, tcp_t *tp)
{
mblk_t *mp;
tcp_ioc_abort_conn_t *tacp;
mp = allocb(sizeof (uint32_t) + sizeof (*acp), BPRI_LO);
if (mp == NULL)
return (NULL);
*((uint32_t *)mp->b_rptr) = TCP_IOC_ABORT_CONN;
tacp = (tcp_ioc_abort_conn_t *)((uchar_t *)mp->b_rptr +
sizeof (uint32_t));
tacp->ac_start = acp->ac_start;
tacp->ac_end = acp->ac_end;
tacp->ac_zoneid = acp->ac_zoneid;
if (acp->ac_local.ss_family == AF_INET) {
tacp->ac_local.ss_family = AF_INET;
tacp->ac_remote.ss_family = AF_INET;
TCP_AC_V4LOCAL(tacp) = tp->tcp_connp->conn_laddr_v4;
TCP_AC_V4REMOTE(tacp) = tp->tcp_connp->conn_faddr_v4;
TCP_AC_V4LPORT(tacp) = tp->tcp_connp->conn_lport;
TCP_AC_V4RPORT(tacp) = tp->tcp_connp->conn_fport;
} else {
tacp->ac_local.ss_family = AF_INET6;
tacp->ac_remote.ss_family = AF_INET6;
TCP_AC_V6LOCAL(tacp) = tp->tcp_connp->conn_laddr_v6;
TCP_AC_V6REMOTE(tacp) = tp->tcp_connp->conn_faddr_v6;
TCP_AC_V6LPORT(tacp) = tp->tcp_connp->conn_lport;
TCP_AC_V6RPORT(tacp) = tp->tcp_connp->conn_fport;
}
mp->b_wptr = (uchar_t *)mp->b_rptr + sizeof (uint32_t) + sizeof (*acp);
return (mp);
}
/*
* Print a tcp_ioc_abort_conn_t structure.
*/
static void
tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *acp)
{
char lbuf[128];
char rbuf[128];
sa_family_t af;
in_port_t lport, rport;
ushort_t logflags;
af = acp->ac_local.ss_family;
if (af == AF_INET) {
(void) inet_ntop(af, (const void *)&TCP_AC_V4LOCAL(acp),
lbuf, 128);
(void) inet_ntop(af, (const void *)&TCP_AC_V4REMOTE(acp),
rbuf, 128);
lport = ntohs(TCP_AC_V4LPORT(acp));
rport = ntohs(TCP_AC_V4RPORT(acp));
} else {
(void) inet_ntop(af, (const void *)&TCP_AC_V6LOCAL(acp),
lbuf, 128);
(void) inet_ntop(af, (const void *)&TCP_AC_V6REMOTE(acp),
rbuf, 128);
lport = ntohs(TCP_AC_V6LPORT(acp));
rport = ntohs(TCP_AC_V6RPORT(acp));
}
logflags = SL_TRACE | SL_NOTE;
/*
* Don't print this message to the console if the operation was done
* to a non-global zone.
*/
if (acp->ac_zoneid == GLOBAL_ZONEID || acp->ac_zoneid == ALL_ZONES)
logflags |= SL_CONSOLE;
(void) strlog(TCP_MOD_ID, 0, 1, logflags,
"TCP_IOC_ABORT_CONN: local = %s:%d, remote = %s:%d, "
"start = %d, end = %d\n", lbuf, lport, rbuf, rport,
acp->ac_start, acp->ac_end);
}
/*
* Called using SQ_FILL when a message built using
* tcp_ioctl_abort_build_msg is put into a queue.
* Note that when we get here there is no wildcard in acp any more.
*/
/* ARGSUSED2 */
static void
tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
ip_recv_attr_t *dummy)
{
conn_t *connp = (conn_t *)arg;
tcp_t *tcp = connp->conn_tcp;
tcp_ioc_abort_conn_t *acp;
/*
* Don't accept any input on a closed tcp as this TCP logically does
* not exist on the system. Don't proceed further with this TCP.
* For eg. this packet could trigger another close of this tcp
* which would be disastrous for tcp_refcnt. tcp_close_detached /
* tcp_clean_death / tcp_closei_local must be called at most once
* on a TCP.
*/
if (tcp->tcp_state == TCPS_CLOSED ||
tcp->tcp_state == TCPS_BOUND) {
freemsg(mp);
return;
}
acp = (tcp_ioc_abort_conn_t *)(mp->b_rptr + sizeof (uint32_t));
if (tcp->tcp_state <= acp->ac_end) {
/*
* If we get here, we are already on the correct
* squeue. This ioctl follows the following path
* tcp_wput -> tcp_wput_ioctl -> tcp_ioctl_abort_conn
* ->tcp_ioctl_abort->squeue_enter (if on a
* different squeue)
*/
int errcode;
TCP_AC_GET_ERRCODE(tcp->tcp_state, errcode);
(void) tcp_clean_death(tcp, errcode);
}
freemsg(mp);
}
/*
* Abort all matching connections on a hash chain.
*/
static int
tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *acp, int index, int *count,
boolean_t exact, tcp_stack_t *tcps)
{
int nmatch, err = 0;
tcp_t *tcp;
MBLKP mp, last, listhead = NULL;
conn_t *tconnp;
connf_t *connfp;
ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
connfp = &ipst->ips_ipcl_conn_fanout[index];
startover:
nmatch = 0;
mutex_enter(&connfp->connf_lock);
for (tconnp = connfp->connf_head; tconnp != NULL;
tconnp = tconnp->conn_next) {
tcp = tconnp->conn_tcp;
/*
* We are missing a check on sin6_scope_id for linklocals here,
* but current usage is just for aborting based on zoneid
* for shared-IP zones.
*/
if (TCP_AC_MATCH(acp, tconnp, tcp)) {
CONN_INC_REF(tconnp);
mp = tcp_ioctl_abort_build_msg(acp, tcp);
if (mp == NULL) {
err = ENOMEM;
CONN_DEC_REF(tconnp);
break;
}
mp->b_prev = (mblk_t *)tcp;
if (listhead == NULL) {
listhead = mp;
last = mp;
} else {
last->b_next = mp;
last = mp;
}
nmatch++;
if (exact)
break;
}
/* Avoid holding lock for too long. */
if (nmatch >= 500)
break;
}
mutex_exit(&connfp->connf_lock);
/* Pass mp into the correct tcp */
while ((mp = listhead) != NULL) {
listhead = listhead->b_next;
tcp = (tcp_t *)mp->b_prev;
mp->b_next = mp->b_prev = NULL;
SQUEUE_ENTER_ONE(tcp->tcp_connp->conn_sqp, mp,
tcp_ioctl_abort_handler, tcp->tcp_connp, NULL,
SQ_FILL, SQTAG_TCP_ABORT_BUCKET);
}
*count += nmatch;
if (nmatch >= 500 && err == 0)
goto startover;
return (err);
}
/*
* Abort all connections that matches the attributes specified in acp.
*/
static int
tcp_ioctl_abort(tcp_ioc_abort_conn_t *acp, tcp_stack_t *tcps)
{
sa_family_t af;
uint32_t ports;
uint16_t *pports;
int err = 0, count = 0;
boolean_t exact = B_FALSE; /* set when there is no wildcard */
int index = -1;
ushort_t logflags;
ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
af = acp->ac_local.ss_family;
if (af == AF_INET) {
if (TCP_AC_V4REMOTE(acp) != INADDR_ANY &&
TCP_AC_V4LPORT(acp) != 0 && TCP_AC_V4RPORT(acp) != 0) {
pports = (uint16_t *)&ports;
pports[1] = TCP_AC_V4LPORT(acp);
pports[0] = TCP_AC_V4RPORT(acp);
exact = (TCP_AC_V4LOCAL(acp) != INADDR_ANY);
}
} else {
if (!IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6REMOTE(acp)) &&
TCP_AC_V6LPORT(acp) != 0 && TCP_AC_V6RPORT(acp) != 0) {
pports = (uint16_t *)&ports;
pports[1] = TCP_AC_V6LPORT(acp);
pports[0] = TCP_AC_V6RPORT(acp);
exact = !IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6LOCAL(acp));
}
}
/*
* For cases where remote addr, local port, and remote port are non-
* wildcards, tcp_ioctl_abort_bucket will only be called once.
*/
if (index != -1) {
err = tcp_ioctl_abort_bucket(acp, index,
&count, exact, tcps);
} else {
/*
* loop through all entries for wildcard case
*/
for (index = 0;
index < ipst->ips_ipcl_conn_fanout_size;
index++) {
err = tcp_ioctl_abort_bucket(acp, index,
&count, exact, tcps);
if (err != 0)
break;
}
}
logflags = SL_TRACE | SL_NOTE;
/*
* Don't print this message to the console if the operation was done
* to a non-global zone.
*/
if (acp->ac_zoneid == GLOBAL_ZONEID || acp->ac_zoneid == ALL_ZONES)
logflags |= SL_CONSOLE;
(void) strlog(TCP_MOD_ID, 0, 1, logflags, "TCP_IOC_ABORT_CONN: "
"aborted %d connection%c\n", count, ((count > 1) ? 's' : ' '));
if (err == 0 && count == 0)
err = ENOENT;
return (err);
}
/*
* Process the TCP_IOC_ABORT_CONN ioctl request.
*/
void
tcp_ioctl_abort_conn(queue_t *q, mblk_t *mp)
{
int err;
IOCP iocp;
MBLKP mp1;
sa_family_t laf, raf;
tcp_ioc_abort_conn_t *acp;
zone_t *zptr;
conn_t *connp = Q_TO_CONN(q);
zoneid_t zoneid = connp->conn_zoneid;
tcp_t *tcp = connp->conn_tcp;
tcp_stack_t *tcps = tcp->tcp_tcps;
iocp = (IOCP)mp->b_rptr;
if ((mp1 = mp->b_cont) == NULL ||
iocp->ioc_count != sizeof (tcp_ioc_abort_conn_t)) {
err = EINVAL;
goto out;
}
/* check permissions */
if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) {
err = EPERM;
goto out;
}
if (mp1->b_cont != NULL) {
freemsg(mp1->b_cont);
mp1->b_cont = NULL;
}
acp = (tcp_ioc_abort_conn_t *)mp1->b_rptr;
laf = acp->ac_local.ss_family;
raf = acp->ac_remote.ss_family;
/* check that a zone with the supplied zoneid exists */
if (acp->ac_zoneid != GLOBAL_ZONEID && acp->ac_zoneid != ALL_ZONES) {
zptr = zone_find_by_id(zoneid);
if (zptr != NULL) {
zone_rele(zptr);
} else {
err = EINVAL;
goto out;
}
}
/*
* For exclusive stacks we set the zoneid to zero
* to make TCP operate as if in the global zone.
*/
if (tcps->tcps_netstack->netstack_stackid != GLOBAL_NETSTACKID)
acp->ac_zoneid = GLOBAL_ZONEID;
if (acp->ac_start < TCPS_SYN_SENT || acp->ac_end > TCPS_TIME_WAIT ||
acp->ac_start > acp->ac_end || laf != raf ||
(laf != AF_INET && laf != AF_INET6)) {
err = EINVAL;
goto out;
}
tcp_ioctl_abort_dump(acp);
err = tcp_ioctl_abort(acp, tcps);
out:
if (mp1 != NULL) {
freemsg(mp1);
mp->b_cont = NULL;
}
if (err != 0)
miocnak(q, mp, 0, err);
else
miocack(q, mp, 0, 0);
}
/*
* Timeout function to reset the TCP stack variable tcps_reclaim to false.
*/
void
tcp_reclaim_timer(void *arg)
{
tcp_stack_t *tcps = (tcp_stack_t *)arg;
int64_t tot_conn = 0;
int i;
extern pgcnt_t lotsfree, needfree;
for (i = 0; i < tcps->tcps_sc_cnt; i++)
tot_conn += tcps->tcps_sc[i]->tcp_sc_conn_cnt;
/*
* This happens only when a stack is going away. tcps_reclaim_tid
* should not be reset to 0 when returning in this case.
*/
mutex_enter(&tcps->tcps_reclaim_lock);
if (!tcps->tcps_reclaim) {
mutex_exit(&tcps->tcps_reclaim_lock);
return;
}
if ((freemem >= lotsfree + needfree) || tot_conn < maxusers) {
tcps->tcps_reclaim = B_FALSE;
tcps->tcps_reclaim_tid = 0;
} else {
/* Stay in defensive mode and restart the timer */
tcps->tcps_reclaim_tid = timeout(tcp_reclaim_timer,
tcps, MSEC_TO_TICK(tcps->tcps_reclaim_period));
}
mutex_exit(&tcps->tcps_reclaim_lock);
}
/*
* Kmem reclaim call back function. When the system is under memory
* pressure, we set the TCP stack variable tcps_reclaim to true. This
* variable is reset to false after tcps_reclaim_period msecs. During this
* period, TCP will be more aggressive in aborting connections not making
* progress, meaning retransmitting for some time (tcp_early_abort seconds).
* TCP will also not accept new connection request for those listeners whose
* q or q0 is not empty.
*/
/* ARGSUSED */
void
tcp_conn_reclaim(void *arg)
{
netstack_handle_t nh;
netstack_t *ns;
tcp_stack_t *tcps;
extern pgcnt_t lotsfree, needfree;
if (!tcp_do_reclaim)
return;
/*
* The reclaim function may be called even when the system is not
* really under memory pressure.
*/
if (freemem >= lotsfree + needfree)
return;
netstack_next_init(&nh);
while ((ns = netstack_next(&nh)) != NULL) {
int i;
int64_t tot_conn = 0;
tcps = ns->netstack_tcp;
/*
* Even if the system is under memory pressure, the reason may
* not be because of TCP activity. Check the number of
* connections in each stack. If the number exceeds the
* threshold (maxusers), turn on defensive mode.
*/
for (i = 0; i < tcps->tcps_sc_cnt; i++)
tot_conn += tcps->tcps_sc[i]->tcp_sc_conn_cnt;
if (tot_conn < maxusers) {
netstack_rele(ns);
continue;
}
mutex_enter(&tcps->tcps_reclaim_lock);
if (!tcps->tcps_reclaim) {
tcps->tcps_reclaim = B_TRUE;
tcps->tcps_reclaim_tid = timeout(tcp_reclaim_timer,
tcps, MSEC_TO_TICK(tcps->tcps_reclaim_period));
TCP_STAT(tcps, tcp_reclaim_cnt);
}
mutex_exit(&tcps->tcps_reclaim_lock);
netstack_rele(ns);
}
netstack_next_fini(&nh);
}
/*
* Given a tcp_stack_t and a port (in host byte order), find a listener
* configuration for that port and return the ratio.
*/
uint32_t
tcp_find_listener_conf(tcp_stack_t *tcps, in_port_t port)
{
tcp_listener_t *tl;
uint32_t ratio = 0;
mutex_enter(&tcps->tcps_listener_conf_lock);
for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
tl = list_next(&tcps->tcps_listener_conf, tl)) {
if (tl->tl_port == port) {
ratio = tl->tl_ratio;
break;
}
}
mutex_exit(&tcps->tcps_listener_conf_lock);
return (ratio);
}
/*
* Ndd param helper routine to return the current list of listener limit
* configuration.
*/
/* ARGSUSED */
int
tcp_listener_conf_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
{
tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
tcp_listener_t *tl;
mutex_enter(&tcps->tcps_listener_conf_lock);
for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
tl = list_next(&tcps->tcps_listener_conf, tl)) {
(void) mi_mpprintf(mp, "%d:%d ", tl->tl_port, tl->tl_ratio);
}
mutex_exit(&tcps->tcps_listener_conf_lock);
return (0);
}
/*
* Ndd param helper routine to add a new listener limit configuration.
*/
/* ARGSUSED */
int
tcp_listener_conf_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
cred_t *cr)
{
tcp_listener_t *new_tl;
tcp_listener_t *tl;
long lport;
long ratio;
char *colon;
tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
if (ddi_strtol(value, &colon, 10, &lport) != 0 || lport <= 0 ||
lport > USHRT_MAX || *colon != ':') {
return (EINVAL);
}
if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
return (EINVAL);
mutex_enter(&tcps->tcps_listener_conf_lock);
for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
tl = list_next(&tcps->tcps_listener_conf, tl)) {
/* There is an existing entry, so update its ratio value. */
if (tl->tl_port == lport) {
tl->tl_ratio = ratio;
mutex_exit(&tcps->tcps_listener_conf_lock);
return (0);
}
}
if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
NULL) {
mutex_exit(&tcps->tcps_listener_conf_lock);
return (ENOMEM);
}
new_tl->tl_port = lport;
new_tl->tl_ratio = ratio;
list_insert_tail(&tcps->tcps_listener_conf, new_tl);
mutex_exit(&tcps->tcps_listener_conf_lock);
return (0);
}
/*
* Ndd param helper routine to remove a listener limit configuration.
*/
/* ARGSUSED */
int
tcp_listener_conf_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
cred_t *cr)
{
tcp_listener_t *tl;
long lport;
tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
if (ddi_strtol(value, NULL, 10, &lport) != 0 || lport <= 0 ||
lport > USHRT_MAX) {
return (EINVAL);
}
mutex_enter(&tcps->tcps_listener_conf_lock);
for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
tl = list_next(&tcps->tcps_listener_conf, tl)) {
if (tl->tl_port == lport) {
list_remove(&tcps->tcps_listener_conf, tl);
mutex_exit(&tcps->tcps_listener_conf_lock);
kmem_free(tl, sizeof (tcp_listener_t));
return (0);
}
}
mutex_exit(&tcps->tcps_listener_conf_lock);
return (ESRCH);
}
/*
* To remove all listener limit configuration in a tcp_stack_t.
*/
void
tcp_listener_conf_cleanup(tcp_stack_t *tcps)
{
tcp_listener_t *tl;
mutex_enter(&tcps->tcps_listener_conf_lock);
while ((tl = list_head(&tcps->tcps_listener_conf)) != NULL) {
list_remove(&tcps->tcps_listener_conf, tl);
kmem_free(tl, sizeof (tcp_listener_t));
}
mutex_destroy(&tcps->tcps_listener_conf_lock);
list_destroy(&tcps->tcps_listener_conf);
}
/*
* Call back function for CPU state change.
*/
/* ARGSUSED */
int
tcp_cpu_update(cpu_setup_t what, int id, void *arg)
{
cpu_t *cp;
netstack_handle_t nh;
netstack_t *ns;
tcp_stack_t *tcps;
int i;
ASSERT(MUTEX_HELD(&cpu_lock));
cp = cpu[id];
switch (what) {
case CPU_CONFIG:
case CPU_ON:
case CPU_INIT:
case CPU_CPUPART_IN:
netstack_next_init(&nh);
while ((ns = netstack_next(&nh)) != NULL) {
tcps = ns->netstack_tcp;
if (cp->cpu_seqid >= tcps->tcps_sc_cnt) {
for (i = tcps->tcps_sc_cnt; i <= cp->cpu_seqid;
i++) {
ASSERT(tcps->tcps_sc[i] == NULL);
tcps->tcps_sc[i] = kmem_zalloc(
sizeof (tcp_stats_cpu_t), KM_SLEEP);
}
membar_producer();
tcps->tcps_sc_cnt = cp->cpu_seqid + 1;
}
netstack_rele(ns);
}
netstack_next_fini(&nh);
break;
case CPU_UNCONFIG:
case CPU_OFF:
case CPU_CPUPART_OUT:
/* Nothing to do */
break;
default:
break;
}
return (0);
}
/*
* Diagnostic routine used to return a string associated with the tcp state.
* Note that if the caller does not supply a buffer, it will use an internal
* static string. This means that if multiple threads call this function at
* the same time, output can be corrupted... Note also that this function
* does not check the size of the supplied buffer. The caller has to make
* sure that it is big enough.
*/
char *
tcp_display(tcp_t *tcp, char *sup_buf, char format)
{
char buf1[30];
static char priv_buf[INET6_ADDRSTRLEN * 2 + 80];
char *buf;
char *cp;
in6_addr_t local, remote;
char local_addrbuf[INET6_ADDRSTRLEN];
char remote_addrbuf[INET6_ADDRSTRLEN];
conn_t *connp;
if (sup_buf != NULL)
buf = sup_buf;
else
buf = priv_buf;
if (tcp == NULL)
return ("NULL_TCP");
connp = tcp->tcp_connp;
switch (tcp->tcp_state) {
case TCPS_CLOSED:
cp = "TCP_CLOSED";
break;
case TCPS_IDLE:
cp = "TCP_IDLE";
break;
case TCPS_BOUND:
cp = "TCP_BOUND";
break;
case TCPS_LISTEN:
cp = "TCP_LISTEN";
break;
case TCPS_SYN_SENT:
cp = "TCP_SYN_SENT";
break;
case TCPS_SYN_RCVD:
cp = "TCP_SYN_RCVD";
break;
case TCPS_ESTABLISHED:
cp = "TCP_ESTABLISHED";
break;
case TCPS_CLOSE_WAIT:
cp = "TCP_CLOSE_WAIT";
break;
case TCPS_FIN_WAIT_1:
cp = "TCP_FIN_WAIT_1";
break;
case TCPS_CLOSING:
cp = "TCP_CLOSING";
break;
case TCPS_LAST_ACK:
cp = "TCP_LAST_ACK";
break;
case TCPS_FIN_WAIT_2:
cp = "TCP_FIN_WAIT_2";
break;
case TCPS_TIME_WAIT:
cp = "TCP_TIME_WAIT";
break;
default:
(void) mi_sprintf(buf1, "TCPUnkState(%d)", tcp->tcp_state);
cp = buf1;
break;
}
switch (format) {
case DISP_ADDR_AND_PORT:
if (connp->conn_ipversion == IPV4_VERSION) {
/*
* Note that we use the remote address in the tcp_b
* structure. This means that it will print out
* the real destination address, not the next hop's
* address if source routing is used.
*/
IN6_IPADDR_TO_V4MAPPED(connp->conn_laddr_v4, &local);
IN6_IPADDR_TO_V4MAPPED(connp->conn_faddr_v4, &remote);
} else {
local = connp->conn_laddr_v6;
remote = connp->conn_faddr_v6;
}
(void) inet_ntop(AF_INET6, &local, local_addrbuf,
sizeof (local_addrbuf));
(void) inet_ntop(AF_INET6, &remote, remote_addrbuf,
sizeof (remote_addrbuf));
(void) mi_sprintf(buf, "[%s.%u, %s.%u] %s",
local_addrbuf, ntohs(connp->conn_lport), remote_addrbuf,
ntohs(connp->conn_fport), cp);
break;
case DISP_PORT_ONLY:
default:
(void) mi_sprintf(buf, "[%u, %u] %s",
ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp);
break;
}
return (buf);
}