sctp.c revision 5dddb8bae488ea9cca9f94c51a6cc9396c43dba2
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#define _SUN_TPI_VERSION 2
#include <sys/xti_inet.h>
#include <sys/isa_defs.h>
#include <inet/ipclassifier.h>
#include <inet/ipsec_impl.h>
#include <inet/sctp_crc32.h>
#include "sctp_impl.h"
#include "sctp_addr.h"
#include "sctp_asconf.h"
int sctpdebug;
void sctp_display_all();
static void sctp_process_recvq(void *);
static void sctp_rq_tq_init(void);
static void sctp_rq_tq_fini(void);
static void sctp_conn_cache_init();
static void sctp_conn_cache_fini();
static int sctp_conn_cache_constructor();
static void sctp_conn_cache_destructor();
void sctp_inc_taskq(void);
/*
* SCTP receive queue taskq
*
* At SCTP initialization time, a default taskq is created for
* servicing packets received when the interrupt thread cannot
* get a hold on the sctp_t. The number of taskq can be increased in
* sctp_find_next_tq() when an existing taskq cannot be dispatched.
* The taskqs are never removed. But the max number of taskq which
* can be created is controlled by sctp_recvq_tq_list_max_sz. Note
* that SCTP recvq taskq is not tied to any specific CPU or ill.
*
* Those taskqs are stored in an array recvq_tq_list. And they are
* used in a round robin fashion. The current taskq being used is
* determined by recvq_tq_list_cur.
*/
/* This lock protects the SCTP recvq_tq_list array and recvq_tq_list_cur_sz. */
static kmutex_t sctp_rq_tq_lock;
int sctp_recvq_tq_list_max_sz = 16;
static taskq_t **recvq_tq_list;
/* Current number of recvq taskq. At least 1 for the default taskq. */
static uint32_t recvq_tq_list_cur = 0;
/* The minimum number of threads for each taskq. */
int sctp_recvq_tq_thr_min = 4;
/* The maximum number of threads for each taskq. */
int sctp_recvq_tq_thr_max = 16;
/* The minimum number of tasks for each taskq. */
int sctp_recvq_tq_task_min = 5;
/* The maxiimum number of tasks for each taskq. */
int sctp_recvq_tq_task_max = 50;
/*
* Default queue used for sending packets. No need to have lock for it
* as it should never be changed.
*/
int sctp_g_q_fd;
/* The default sctp_t for responding out of the blue packets. */
/* Protected by sctp_g_lock */
struct kmem_cache *sctp_conn_cache;
#define SCTP_CONDEMNED(sctp) \
mutex_enter(&sctp_g_lock); \
#define SCTP_UNLINK(sctp) \
mutex_enter(&sctp_g_lock); \
/*
* Hooks for Sun Cluster. On non-clustered nodes these will remain NULL.
* PSARC/2005/602.
*/
/*
* Return the version number of the SCTP kernel interface.
*/
int
sctp_itf_ver(int cl_ver)
{
if (cl_ver != SCTP_ITF_VER)
return (-1);
return (SCTP_ITF_VER);
}
/*
* Called when we need a new sctp instantiation but don't really have a
* new q to hang it off of. Copy the priv flag from the passed in structure.
*/
sctp_t *
{
return (NULL);
return (NULL);
}
/* Inherit information from the "parent" */
return (NULL);
}
}
/*
* Link to the global as soon as possible so that this sctp_t
* can be found.
*/
return (sctp);
}
/*
* We are dying for some reason. Try to do it gracefully.
*/
void
{
/*
* Check to see if we need to notify upper layer.
*/
!SCTP_IS_DETACHED(sctp)) {
}
/*
* Socket is gone, detach.
*/
}
}
/* Remove this sctp from all hashes. */
/*
* If the sctp_t is detached, we need to finish freeing up
* the resources. At this point, ip_fanout_sctp() should have
* a hold on this sctp_t. Some thread doing snmp stuff can
* have a hold. And a taskq can also have a hold waiting to
* work. sctp_unlink() the sctp_t from the global list so
* that no new thread can find it. Then do a SCTP_REFRELE().
* The sctp_t will be freed after all those threads are done.
*/
if (SCTP_IS_DETACHED(sctp)) {
}
}
/*
* Called by upper layer when it wants to close this association.
* Depending on the state of this assoication, we need to do
* different things.
*
* If the state is below COOKIE_ECHOED or it is COOKIE_ECHOED but with
* no sent data, just remove this sctp from all the hashes. This
* makes sure that all packets from the other end will go to the default
* sctp handling. The upper layer will then do a sctp_close() to clean
* up.
*
* Otherwise, check and see if SO_LINGER is set. If it is set, check
* the value. If the value is 0, consider this an abortive close. Send
* an ABORT message and kill the associatiion.
*
*/
int
{
int error = 0;
switch (sctp->sctp_state) {
case SCTPS_IDLE:
case SCTPS_BOUND:
case SCTPS_LISTEN:
break;
case SCTPS_COOKIE_WAIT:
case SCTPS_COOKIE_ECHOED:
/*
* Close during the connect 3-way handshake
* but here there may or may not be pending data
* already on queue. Process almost same as in
* the ESTABLISHED state.
*/
break;
}
/* FALLTHRU */
default:
/*
* If SO_LINGER has set a zero linger time, abort the
* connection with a reset.
*/
break;
}
/*
* In there is unread data, send an ABORT
*/
break;
}
/*
* Transmit the shutdown before detaching the sctp_t.
* no longer owns the sctp_t thus others can modify it.
*/
sctp_send_shutdown(sctp, 0);
/* Pass gathered wisdom to IP for keeping */
/*
* If lingering on close then wait until the shutdown
* is complete, or the SO_LINGER time passes, or an
* can be called more than once. Make sure that only
* one thread waits.
*/
!sctp->sctp_lingering) {
/*
* Process the sendq to send the SHUTDOWN out
* before waiting.
*/
sctp->sctp_client_errno = 0;
sctp->sctp_client_errno == 0) {
if (ret < 0) {
/* Stoptime has reached. */
break;
} else if (ret == 0) {
/* Got a signal. */
break;
}
}
sctp->sctp_client_errno = 0;
}
return (error);
}
/* Remove this sctp from all hashes so nobody can find it. */
return (error);
}
void
{
/* If the graceful shutdown has not been completed, just return. */
return;
}
/*
* Since sctp_t is in SCTPS_IDLE state, so the only thread which
* can have a hold on the sctp_t is doing snmp stuff. Just do
* a SCTP_REFRELE() here after the SCTP_UNLINK(). It will
* be freed when the other thread is done.
*/
}
/*
* Unlink from global list and do the eager close.
* Remove the refhold implicit in being on the global list.
*/
void
{
}
/*
* The sctp_t is going away. Remove it from all lists and set it
* to SCTPS_IDLE. The caller has to remove it from the
* global list. The freeing up of memory is deferred until
* sctp_free(). This is needed since a thread in sctp_input() might have
* done a SCTP_REFHOLD on this structure before it was removed from the
* hashes.
*/
static void
{
/* Stop and free the timers */
}
}
/* Set the CONN_CLOSING flag so that IP will not cache IRE again. */
/* Remove from all hashes. */
/*
* Clean up the recvq as much as possible. All those packets
* will be silently dropped as this sctp_t is now in idle state.
*/
}
}
}
/*
*/
static void
{
sctp->sctp_hdr_len = 0;
sctp->sctp_ip_hdr_len = 0;
sctp->sctp_iphc_len = 0;
sctp->sctp_hdr_len = 0;
}
sctp->sctp_hdr6_len = 0;
sctp->sctp_ip_hdr6_len = 0;
sctp->sctp_iphc6_len = 0;
sctp->sctp_hdr6_len = 0;
}
}
static void
{
}
}
return;
}
goto free_unsent;
}
/*
* Cleanup all the messages in the stream queue and the reassembly lists.
* If 'free' is true, then delete the streams as well.
*/
void
{
int i;
/* walk thru and flush out anything remaining in the Q */
for (i = 0; i < sctp->sctp_num_istr; i++) {
}
}
if (free) {
sctp->sctp_num_istr = 0;
}
}
/* un-ordered fragments */
}
}
}
/*
* Last reference to the sctp_t is gone. Free all memory associated with it.
* Called from SCTP_REFRELE. Called inline in sctp_close()
*/
void
{
int cnt;
/* Unlink it from the global list */
/* Free up all the resources. */
/* blow away sctp stream management */
}
/* Remove all data transfer resources. */
sctp->sctp_istr_nmsgs = 0;
sctp->sctp_rxqueued = 0;
sctp->sctp_unacked = 0;
sctp->sctp_unsent = 0;
/* Clear out default xmit settings */
sctp->sctp_def_stream = 0;
sctp->sctp_def_flags = 0;
sctp->sctp_def_ppid = 0;
sctp->sctp_def_context = 0;
sctp->sctp_def_timetolive = 0;
}
sctp->sctp_sack_gaps = 0;
}
/* Remove all the address resources. */
}
if (ipp->ipp_rthdrlen != 0) {
ipp->ipp_rthdrlen = 0;
}
if (ipp->ipp_dstoptslen != 0) {
ipp->ipp_dstoptslen = 0;
}
if (ipp->ipp_rtdstoptslen != 0) {
ipp->ipp_rtdstoptslen = 0;
}
if (ipp->ipp_hopoptslen != 0) {
ipp->ipp_hopoptslen = 0;
}
sctp->sctp_hopoptslen = 0;
}
sctp->sctp_dstoptslen = 0;
}
sctp->sctp_rtdstoptslen = 0;
}
sctp->sctp_rthdrlen = 0;
}
/* Clear all the bitfields. */
/* It is time to update the global statistics. */
sctp->sctp_opkts = 0;
sctp->sctp_obchunks = 0;
sctp->sctp_odchunks = 0;
sctp->sctp_oudchunks = 0;
sctp->sctp_rxtchunks = 0;
sctp->sctp_ipkts = 0;
sctp->sctp_ibchunks = 0;
sctp->sctp_idchunks = 0;
sctp->sctp_iudchunks = 0;
sctp->sctp_fragdmsgs = 0;
sctp->sctp_reassmsgs = 0;
sctp->sctp_autoclose = 0;
sctp->sctp_tx_adaption_code = 0;
/* Clean up conn_t stuff */
}
}
}
}
}
/* Diagnostic routine used to return a string associated with the sctp state. */
char *
{
char *buf;
char buf1[30];
char *cp;
return ("NULL_SCTP");
switch (sctp->sctp_state) {
case SCTPS_IDLE:
cp = "SCTP_IDLE";
break;
case SCTPS_BOUND:
cp = "SCTP_BOUND";
break;
case SCTPS_LISTEN:
cp = "SCTP_LISTEN";
break;
case SCTPS_COOKIE_WAIT:
cp = "SCTP_COOKIE_WAIT";
break;
case SCTPS_COOKIE_ECHOED:
cp = "SCTP_COOKIE_ECHOED";
break;
case SCTPS_ESTABLISHED:
cp = "SCTP_ESTABLISHED";
break;
case SCTPS_SHUTDOWN_PENDING:
cp = "SCTP_SHUTDOWN_PENDING";
break;
case SCTPS_SHUTDOWN_SENT:
cp = "SCTPS_SHUTDOWN_SENT";
break;
case SCTPS_SHUTDOWN_RECEIVED:
cp = "SCTPS_SHUTDOWN_RECEIVED";
break;
case SCTPS_SHUTDOWN_ACK_SENT:
cp = "SCTPS_SHUTDOWN_ACK_SENT";
break;
default:
break;
}
return (buf);
}
/*
* Initialize protocol control block. If a parent exists, inherit
* all values set through setsockopt().
*/
static int
{
int err;
int cnt;
sctp->sctp_nsaddrs = 0;
saddr_ipif));
}
sctp->sctp_ports = 0;
sctp->sctp_strikes = 0;
sctp->sctp_sack_gaps = 0;
/*
* Inherit from parent
*/
return (ENOMEM);
sctp->sctp_iphc6_len = 0;
return (ENOMEM);
}
/*
* Copy the IP+SCTP header templates from listener
*/
sizeof (ip6i_t));
} else {
}
/* xxx should be a better way to copy these flags xxx */
} else {
/*
* Initialize the header template
*/
return (err);
}
return (err);
}
/*
* Set to system defaults
*/
}
sctp->sctp_prsctpdrop = 0;
sctp->sctp_msgcount = 0;
return (0);
}
/*
* Extracts the init tag from an INIT chunk and checks if it matches
* the sctp's verification tag. Returns 0 if it doesn't match, 1 if
* it does.
*/
static boolean_t
{
/* Need at least the data chunk hdr and the first 4 bytes of INIT */
return (B_FALSE);
}
return (B_TRUE);
}
return (B_FALSE);
}
/*
* sctp_icmp_error is called by sctp_input() to process ICMP error messages
* passed up by IP. The queue is the default queue. We need to find a sctp_t
* that corresponds to the returned datagram. Passes the message back in on
* the correct queue once it has located the connection.
* Assumes that IP has pulled up everything up to and including
* the ICMP header.
*/
void
{
int iph_hdr_length;
return;
}
/* not enough data for SCTP header */
return;
}
switch (icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
switch (icmph->icmph_code) {
/*
* Reduce the MSS based on the new MTU. This will
* eliminate any fragmentation locally.
* N.B. There may well be some funny side-effects on
* the local send policy and the remote receive policy.
* Pending further research, we provide
* sctp_ignore_path_mtu just in case this proves
* disastrous somewhere.
*
* After updating the MSS, retransmit part of the
* dropped segment using the new mss by calling
* sctp_wput_slow(). Need to adjust all those
* params to make sure sctp_wput_slow() work properly.
*/
if (sctp_ignore_path_mtu)
break;
/* find the offending faddr */
break;
}
break;
/*
* Make sure that sfa_pmss is a multiple of
* SCTP_ALIGN.
*/
~(SCTP_ALIGN - 1);
break;
case ICMP_PORT_UNREACHABLE:
switch (sctp->sctp_state) {
case SCTPS_COOKIE_WAIT:
case SCTPS_COOKIE_ECHOED:
/* make sure the verification tag matches */
break;
}
break;
}
break;
case ICMP_HOST_UNREACHABLE:
case ICMP_NET_UNREACHABLE:
/* Record the error in case we finally time out. */
break;
default:
break;
}
break;
case ICMP_SOURCE_QUENCH: {
/* Reduce the sending rate as if we got a retransmit timeout */
break;
}
}
}
/*
* sctp_icmp_error_ipv6() is called by sctp_icmp_error() to process ICMPv6
* error messages passed up by IP.
* Assumes that IP has pulled up all the extension headers as well
* as the ICMPv6 header.
*/
static void
{
return;
}
/* XXX need ifindex to find connection */
/* not enough data for SCTP header */
return;
}
switch (icmp6->icmp6_type) {
case ICMP6_PACKET_TOO_BIG:
/*
* Reduce the MSS based on the new MTU. This will
* eliminate any fragmentation locally.
* N.B. There may well be some funny side-effects on
* the local send policy and the remote receive policy.
* Pending further research, we provide
* sctp_ignore_path_mtu just in case this proves
* disastrous somewhere.
*
* After updating the MSS, retransmit part of the
* dropped segment using the new mss by calling
* sctp_wput_slow(). Need to adjust all those
* params to make sure sctp_wput_slow() work properly.
*/
if (sctp_ignore_path_mtu)
break;
/* find the offending faddr */
break;
}
break;
/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
~(SCTP_ALIGN - 1);
break;
case ICMP6_DST_UNREACH:
switch (icmp6->icmp6_code) {
case ICMP6_DST_UNREACH_NOPORT:
/* make sure the verification tag matches */
break;
}
}
break;
case ICMP6_DST_UNREACH_ADMIN:
case ICMP6_DST_UNREACH_ADDR:
/* Record the error in case we finally time out. */
break;
default:
break;
}
break;
case ICMP6_PARAM_PROB:
/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
/* make sure the verification tag matches */
break;
}
}
break;
}
break;
case ICMP6_TIME_EXCEEDED:
default:
break;
}
}
/*
* Called by sockfs to create a new sctp instance.
*
* If parent pointer is passed in, inherit settings from it.
*/
sctp_t *
{
/* User must supply a credential. */
return (NULL);
return (NULL);
return (NULL);
}
switch (family) {
case AF_INET6:
break;
case AF_INET:
break;
default:
ASSERT(0);
break;
}
return (NULL);
}
/*
* Inherit local address list, local port. Parent is either
* in SCTPS_BOUND, or SCTPS_LISTEN state.
*/
return (NULL);
}
/*
* If the parent is specified, it'll be immediatelly
* followed by sctp_connect(). So don't add this guy to
* bind hash.
*/
} else {
}
/* Initialize SCTP instance values, our verf tag must never be 0 */
sizeof (sctp->sctp_lvtag));
if (sctp->sctp_lvtag == 0)
/* Information required by upper layer */
sizeof (sctp_upcalls_t));
/* Fill in the socket buffer limits for sctpsockfs */
}
/* If no sctp_ulpd, must be creating the default sctp */
/* Insert this in the global list. */
return (sctp);
}
void
sctp_ddi_init(void)
{
/* Initialize locks */
/* Initialize SCTP hash arrays. */
if (!sctp_nd_init()) {
sctp_nd_free();
}
/* Create the faddr cache */
/* Create the sets cache */
/* Create the PR-SCTP sets cache */
/* Initialize the recvq taskq. */
/* saddr init */
/* Global SCTP PCB list. */
/* Initialize tables used for CRC calculation */
/* Initialize sctp kernel stats. */
}
void
sctp_ddi_destroy(void)
{
sctp_nd_free();
/* Destroy the faddr cache */
/* Destroy the sets cache */
/* Destroy the PR-SCTP sets cache */
/* Destroy the recvq taskqs. */
/* Destroy saddr */
/* Global SCTP PCB list. */
/* Destroy SCTP hash arrays. */
/* Destroy SCTP kenrel stats. */
}
void
{
}
}
static void
sctp_rq_tq_init(void)
{
/*
* Initialize the recvq_tq_list and create the first recvq taskq.
* What to do if it fails?
*/
}
static void
sctp_rq_tq_fini(void)
{
int i;
for (i = 0; i < recvq_tq_list_cur_sz; i++) {
}
sizeof (taskq_t *));
}
/* Add another taskq for a new ill. */
void
sctp_inc_taskq(void)
{
char tq_name[TASKQ_NAMELEN];
return;
}
return;
}
}
#ifdef DEBUG
uint32_t sendq_loop_cnt = 0;
uint32_t sendq_collision = 0;
uint32_t sendq_empty = 0;
#endif
void
{
} else {
}
}
void
{
#ifdef DEBUG
#endif
#ifdef DEBUG
sendq_empty++;
else
#endif
return;
}
/*
* Note that while we are in this loop, other thread can put
* new packets in the receive queue. We may be looping for
* quite a while. This is OK even for an interrupt thread.
* The reason is that SCTP should only able to send a limited
* number of packets out in a burst. So the number of times
* we go through this loop should not be many.
*/
/* If we don't have sctp_current, default to IPv4 */
#ifdef DEBUG
loop_cnt++;
#endif
}
#ifdef DEBUG
if (loop_cnt > sendq_loop_cnt)
#endif
}
#ifdef DEBUG
uint32_t recvq_loop_cnt = 0;
uint32_t recvq_call = 0;
#endif
/*
* Find the next recvq_tq to use. This routine will go thru all the
* taskqs until it can dispatch a job for the sctp. If this fails,
* it will create a new taskq and try it.
*/
static boolean_t
{
/*
* Note that since we don't hold a lock on sctp_rq_tq_lock for
* performance reason, recvq_ta_list_cur_sz can be changed during
* this loop. The problem this will create is that the loop may
* not have tried all the recvq_tq. This should be OK.
*/
TQ_NOSLEEP) != NULL) {
return (B_TRUE);
}
}
/*
* Create one more taskq and try it. Note that sctp_inc_taskq()
* may not have created another taskq if the number of recvq
* taskqs is at the maximum. We are probably in a pretty bad
* shape if this actually happens...
*/
return (B_TRUE);
}
return (B_FALSE);
}
/*
* To add a message to the recvq. Note that the sctp_timer_fire()
* routine also uses this function to add the timer message to the
* receive queue for later processing. And it should be the only
* caller of sctp_add_recvq() which sets the try_harder argument
* to B_TRUE.
*
* If the try_harder argument is B_TRUE, this routine sctp_find_next_tq()
* will try very hard to dispatch the task. Refer to the comment
* for that routine on how it does that.
*/
{
if (!caller_hold_lock)
/* If the taskq dispatch has not been scheduled, do it now. */
if (!sctp_find_next_tq(sctp)) {
if (!caller_hold_lock)
return (B_FALSE);
}
/* Make sure the sctp_t will not go away. */
}
} else {
}
if (!caller_hold_lock)
return (B_TRUE);
}
static void
sctp_process_recvq(void *arg)
{
#ifdef DEBUG
#endif
#ifdef _BIG_ENDIAN
#else
#endif
#ifdef DEBUG
recvq_call++;
#endif
/*
* Note that while we are in this loop, other thread can put
* new packets in the receive queue. We may be looping for
* quite a while.
*/
#ifdef DEBUG
loop_cnt++;
#endif
}
/* We may have sent something when processing the receive queue. */
#ifdef DEBUG
if (loop_cnt > recvq_loop_cnt)
#endif
/* Now it can go away. */
}
/* ARGSUSED */
static int
{
return (0);
}
/* ARGSUSED */
static void
{
}
static void
{
}
static void
{
}