/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/xti_inet.h>
#include <sys/isa_defs.h>
#include <inet/kstatcom.h>
#include <inet/ipclassifier.h>
#include <inet/ipsec_impl.h>
#include <inet/sctp_crc32.h>
#include "sctp_impl.h"
#include "sctp_addr.h"
#include "sctp_asconf.h"
int sctpdebug;
static void sctp_process_recvq(void *);
static void sctp_rq_tq_init(sctp_stack_t *);
static void sctp_rq_tq_fini(sctp_stack_t *);
static void sctp_conn_cache_init();
static void sctp_conn_cache_fini();
static int sctp_conn_cache_constructor();
static void sctp_conn_cache_destructor();
static void sctp_conn_clear(conn_t *);
/*
* SCTP receive queue taskq
*
* At SCTP initialization time, a default taskq is created for
* servicing packets received when the interrupt thread cannot
* get a hold on the sctp_t. The number of taskq can be increased in
* sctp_find_next_tq() when an existing taskq cannot be dispatched.
* The taskqs are never removed. But the max number of taskq which
* can be created is controlled by sctp_recvq_tq_list_max_sz. Note
* that SCTP recvq taskq is not tied to any specific CPU or ill.
*
* Those taskqs are stored in an array recvq_tq_list. And they are
* used in a round robin fashion. The current taskq being used is
* determined by recvq_tq_list_cur.
*/
/* The minimum number of threads for each taskq. */
/* The maximum number of threads for each taskq. */
/* The mnimum number of tasks for each taskq. */
/* Default value of sctp_recvq_tq_list_max_sz. */
/*
* SCTP tunables related declarations. Definitions are in sctp_tunables.c
*/
extern mod_prop_info_t sctp_propinfo_tbl[];
extern int sctp_propinfo_count;
/*
* Hooks for Sun Cluster. On non-clustered nodes these will remain NULL.
* PSARC/2005/602.
*/
/*
* Return the version number of the SCTP kernel interface.
*/
int
{
if (cl_ver != SCTP_ITF_VER)
return (-1);
return (SCTP_ITF_VER);
}
/*
* Called when we need a new sctp instantiation but don't really have a
* new q to hang it off of. Copy the priv flag from the passed in structure.
*/
sctp_t *
{
return (NULL);
}
KM_NOSLEEP)) == NULL ||
KM_NOSLEEP)) == NULL) {
return (NULL);
}
return (NULL);
}
}
/*
* Link to the global as soon as possible so that this sctp_t
* can be found.
*/
/* If the listener has a limit, inherit the counter info. */
return (sctp);
}
/*
* We are dying for some reason. Try to do it gracefully.
*/
void
{
sctp->sctp_state));
/*
* Check to see if we need to notify upper layer.
*/
!SCTP_IS_DETACHED(sctp)) {
}
/*
* Socket is gone, detach.
*/
}
}
/* Remove this sctp from all hashes. */
/*
* If the sctp_t is detached, we need to finish freeing up
* the resources. At this point, ip_fanout_sctp() should have
* a hold on this sctp_t. Some thread doing snmp stuff can
* have a hold. And a taskq can also have a hold waiting to
* work. sctp_unlink() the sctp_t from the global list so
* that no new thread can find it. Then do a SCTP_REFRELE().
* The sctp_t will be freed after all those threads are done.
*/
if (SCTP_IS_DETACHED(sctp)) {
}
}
/*
* Called by upper layer when it wants to close this association.
* Depending on the state of this assoication, we need to do
* different things.
*
* If the state is below COOKIE_ECHOED or it is COOKIE_ECHOED but with
* no sent data, just remove this sctp from all the hashes. This
* makes sure that all packets from the other end will go to the default
* sctp handling. The upper layer will then do a sctp_close() to clean
* up.
*
* Otherwise, check and see if SO_LINGER is set. If it is set, check
* the value. If the value is 0, consider this an abortive close. Send
* an ABORT message and kill the associatiion.
*
*/
int
{
int error = 0;
sctp->sctp_state));
switch (sctp->sctp_state) {
case SCTPS_IDLE:
case SCTPS_BOUND:
case SCTPS_LISTEN:
break;
case SCTPS_COOKIE_WAIT:
case SCTPS_COOKIE_ECHOED:
/*
* Close during the connect 3-way handshake
* but here there may or may not be pending data
* already on queue. Process almost same as in
* the ESTABLISHED state.
*/
break;
}
/* FALLTHRU */
default:
/*
* If SO_LINGER has set a zero linger time, terminate the
* association and send an ABORT.
*/
return (error);
}
/*
* If there is unread data, send an ABORT and terminate the
* association.
*/
return (error);
}
/*
* Transmit the shutdown before detaching the sctp_t.
* no longer owns the sctp_t thus others can modify it.
*/
sctp_send_shutdown(sctp, 0);
/* Pass gathered wisdom to IP for keeping */
/*
* If lingering on close then wait until the shutdown
* is complete, or the SO_LINGER time passes, or an
* can be called more than once. Make sure that only
* one thread waits.
*/
!sctp->sctp_lingering) {
sctp->sctp_client_errno = 0;
stoptime = ddi_get_lbolt() +
sctp->sctp_client_errno == 0) {
if (ret < 0) {
/* Stoptime has reached. */
break;
} else if (ret == 0) {
/* Got a signal. */
break;
}
}
sctp->sctp_client_errno = 0;
}
return (error);
}
/* Remove this sctp from all hashes so nobody can find it. */
return (error);
}
void
{
sctp->sctp_state));
/* If the graceful shutdown has not been completed, just return. */
return;
}
/*
* Since sctp_t is in SCTPS_IDLE state, so the only thread which
* can have a hold on the sctp_t is doing snmp stuff. Just do
* a SCTP_REFRELE() here after the SCTP_UNLINK(). It will
* be freed when the other thread is done.
*/
}
/*
* Unlink from global list and do the eager close.
* Remove the refhold implicit in being on the global list.
*/
void
{
}
/*
* The sctp_t is going away. Remove it from all lists and set it
* to SCTPS_IDLE. The caller has to remove it from the
* global list. The freeing up of memory is deferred until
* sctp_free(). This is needed since a thread in sctp_input() might have
* done a SCTP_REFHOLD on this structure before it was removed from the
* hashes.
*/
static void
{
/* The counter is incremented only for established associations. */
/* Sanity check, don't do the same thing twice. */
return;
}
/* Stop and free the timers */
}
}
/* Set the CONN_CLOSING flag so that IP will not cache IRE again. */
/* Remove from all hashes. */
/*
* Clean up the recvq as much as possible. All those packets
* will be silently dropped as this sctp_t is now in idle state.
*/
if (ip_recv_attr_is_mblk(mp))
}
}
/*
*/
static void
{
sctp->sctp_hdr_len = 0;
sctp->sctp_ip_hdr_len = 0;
sctp->sctp_iphc_len = 0;
sctp->sctp_hdr_len = 0;
}
sctp->sctp_hdr6_len = 0;
sctp->sctp_ip_hdr6_len = 0;
sctp->sctp_iphc6_len = 0;
sctp->sctp_hdr6_len = 0;
}
}
static void
{
}
}
return;
}
goto free_unsent;
}
/*
* Cleanup all the messages in the stream queue and the reassembly lists.
* If 'free' is true, then delete the streams as well.
*/
void
{
int i;
/* walk thru and flush out anything remaining in the Q */
for (i = 0; i < sctp->sctp_num_istr; i++) {
}
}
if (free) {
sctp->sctp_num_istr = 0;
}
}
/* un-ordered fragments */
}
}
}
/*
* Last reference to the sctp_t is gone. Free all memory associated with it.
* Called from SCTP_REFRELE. Called inline in sctp_close()
*/
void
{
int cnt;
/* Unlink it from the global list */
/* Free up all the resources. */
/* blow away sctp stream management */
}
/* Remove all data transfer resources. */
sctp->sctp_istr_nmsgs = 0;
sctp->sctp_rxqueued = 0;
sctp->sctp_unacked = 0;
sctp->sctp_unsent = 0;
/* Clear out default xmit settings */
sctp->sctp_def_stream = 0;
sctp->sctp_def_flags = 0;
sctp->sctp_def_ppid = 0;
sctp->sctp_def_context = 0;
sctp->sctp_def_timetolive = 0;
}
sctp->sctp_sack_gaps = 0;
}
/* Remove all the address resources. */
}
sctp->sctp_hopoptslen = 0;
}
sctp->sctp_dstoptslen = 0;
}
sctp->sctp_rthdrdstoptslen = 0;
}
sctp->sctp_rthdrlen = 0;
}
sctp->sctp_err_len = 0;
}
/* Clear all the bitfields. */
/* It is time to update the global statistics. */
sctp->sctp_opkts = 0;
sctp->sctp_obchunks = 0;
sctp->sctp_odchunks = 0;
sctp->sctp_oudchunks = 0;
sctp->sctp_rxtchunks = 0;
sctp->sctp_ipkts = 0;
sctp->sctp_ibchunks = 0;
sctp->sctp_idchunks = 0;
sctp->sctp_iudchunks = 0;
sctp->sctp_fragdmsgs = 0;
sctp->sctp_reassmsgs = 0;
sctp->sctp_outseqtsns = 0;
sctp->sctp_osacks = 0;
sctp->sctp_isacks = 0;
sctp->sctp_idupchunks = 0;
sctp->sctp_gapcnt = 0;
sctp->sctp_cum_obchunks = 0;
sctp->sctp_cum_odchunks = 0;
sctp->sctp_cum_oudchunks = 0;
sctp->sctp_cum_rxtchunks = 0;
sctp->sctp_cum_ibchunks = 0;
sctp->sctp_cum_idchunks = 0;
sctp->sctp_cum_iudchunks = 0;
sctp->sctp_autoclose = 0;
sctp->sctp_tx_adaptation_code = 0;
sctp->sctp_v6label_len = 0;
sctp->sctp_v4label_len = 0;
}
/*
* Initialize protocol control block. If a parent exists, inherit
* all values set through setsockopt().
*/
static int
{
int err;
int cnt;
sctp->sctp_nsaddrs = 0;
saddr_ipif));
}
connp->conn_ports = 0;
sctp->sctp_strikes = 0;
sctp->sctp_sack_gaps = 0;
/* So we will not delay sending the first SACK. */
/* Only need to do the allocation if there is no "cached" one. */
} else {
return (ENOMEM);
}
}
/*
* Inherit from parent
*
* Start by inheriting from the conn_t, including conn_ixa and
* conn_xmit_ipp.
*/
if (err != 0)
goto failure;
/* xxx should be a better way to copy these flags xxx */
} else {
/*
* Set to system defaults
*/
else
/*
* Initialize the header template
*/
goto failure;
}
}
sctp->sctp_prsctpdrop = 0;
sctp->sctp_msgcount = 0;
return (0);
return (err);
}
/*
* Extracts the init tag from an INIT chunk and checks if it matches
* the sctp's verification tag. Returns 0 if it doesn't match, 1 if
* it does.
*/
static boolean_t
{
/* Need at least the data chunk hdr and the first 4 bytes of INIT */
return (B_FALSE);
}
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Update the SCTP state according to change of PMTU.
*
* Path MTU might have changed by either increase or decrease, so need to
* adjust the MSS based on the value of ixa_pmtu.
*/
static void
{
return;
/*
* Always call ip_get_pmtu() to make sure that IP has updated
* ixa_flags properly.
*/
/*
* Calculate the MSS by decreasing the PMTU by sctp_hdr_len and
* IPsec overhead if applied. Make sure to use the most recent
* IPsec information.
*/
else
/*
* Nothing to change, so just return.
*/
return;
/*
* Currently, for ICMP errors, only PMTU decrease is handled.
*/
return;
#ifdef DEBUG
(void) printf("sctp_update_pmtu mss from %d to %d\n",
#endif
/*
* Update ixa_fragsize and ixa_pmtu.
*/
/*
* Make sure that sfa_pmss is a multiple of
* SCTP_ALIGN.
*/
#ifdef notyet
#endif
/*
* If below the min size then ip_get_pmtu cleared IXAF_PMTU_IPV4_DF.
* Make sure to clear IXAF_DONTFRAG, which is used by IP to decide
* whether to fragment the packet.
*/
}
}
}
}
/*
* Notify function registered with ip_xmit_attr_t. It's called in the context
* of conn_ip_output so it's safe to update the SCTP state.
* Currently only used for pmtu changes.
*/
/* ARGSUSED1 */
static void
{
switch (ntype) {
case IXAN_PMTU:
/* Find the faddr based on the ip_xmit_attr_t pointer */
break;
}
break;
default:
break;
}
}
/*
* sctp_icmp_error is called by sctp_input() to process ICMP error messages
* passed up by IP. We need to find a sctp_t
* that corresponds to the returned datagram. Passes the message back in on
* the correct queue once it has located the connection.
* Assumes that IP has pulled up everything up to and including
* the ICMP header.
*/
void
{
int iph_hdr_length;
(void *)mp));
return;
}
/* account for the ip hdr from the icmp message */
/* now the ip hdr of message resulting in this icmp */
/* first_mp must expose the full sctp header. */
/* not enough data for SCTP header */
return;
}
switch (icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
switch (icmph->icmph_code) {
/*
* Reduce the MSS based on the new MTU. This will
* eliminate any fragmentation locally.
* N.B. There may well be some funny side-effects on
* the local send policy and the remote receive policy.
* Pending further research, we provide
* sctp_ignore_path_mtu just in case this proves
* disastrous somewhere.
*
* After updating the MSS, retransmit part of the
* dropped segment using the new mss by calling
* sctp_wput_slow(). Need to adjust all those
* params to make sure sctp_wput_slow() work properly.
*/
if (sctps->sctps_ignore_path_mtu)
break;
/* find the offending faddr */
break;
}
/*
* It is possible, even likely that a fast retransmit
* attempt has been dropped by ip as a result of this
* error, retransmission bundles as much as possible.
* A retransmit here prevents significant delays waiting
* on the timer. Analogous to behaviour of TCP after
* ICMP too big.
*/
break;
case ICMP_PORT_UNREACHABLE:
switch (sctp->sctp_state) {
case SCTPS_COOKIE_WAIT:
case SCTPS_COOKIE_ECHOED:
/* make sure the verification tag matches */
break;
}
NULL);
break;
}
break;
case ICMP_HOST_UNREACHABLE:
case ICMP_NET_UNREACHABLE:
/* Record the error in case we finally time out. */
break;
default:
break;
}
break;
case ICMP_SOURCE_QUENCH: {
/* Reduce the sending rate as if we got a retransmit timeout */
break;
}
}
}
/*
* sctp_icmp_error_ipv6() is called by sctp_icmp_error() to process ICMPv6
* error messages passed up by IP.
* Assumes that IP has pulled up all the extension headers as well
* as the ICMPv6 header.
*/
static void
{
return;
}
/* XXX need ifindex to find connection */
/* not enough data for SCTP header */
return;
}
switch (icmp6->icmp6_type) {
case ICMP6_PACKET_TOO_BIG:
/*
* Reduce the MSS based on the new MTU. This will
* eliminate any fragmentation locally.
* N.B. There may well be some funny side-effects on
* the local send policy and the remote receive policy.
* Pending further research, we provide
* sctp_ignore_path_mtu just in case this proves
* disastrous somewhere.
*
* After updating the MSS, retransmit part of the
* dropped segment using the new mss by calling
* sctp_wput_slow(). Need to adjust all those
* params to make sure sctp_wput_slow() work properly.
*/
if (sctps->sctps_ignore_path_mtu)
break;
/* find the offending faddr */
break;
}
/*
* It is possible, even likely that a fast retransmit
* attempt has been dropped by ip as a result of this
* error, retransmission bundles as much as possible.
* A retransmit here prevents significant delays waiting
* on the timer. Analogous to behaviour of TCP after
* ICMP too big.
*/
break;
case ICMP6_DST_UNREACH:
switch (icmp6->icmp6_code) {
case ICMP6_DST_UNREACH_NOPORT:
/* make sure the verification tag matches */
break;
}
NULL);
}
break;
case ICMP6_DST_UNREACH_ADMIN:
case ICMP6_DST_UNREACH_ADDR:
/* Record the error in case we finally time out. */
break;
default:
break;
}
break;
case ICMP6_PARAM_PROB:
/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
/* make sure the verification tag matches */
break;
}
NULL);
}
break;
}
break;
case ICMP6_TIME_EXCEEDED:
default:
break;
}
}
/*
* Called by sockfs to create a new sctp instance.
*
* If parent pointer is passed in, inherit settings from it.
*/
sctp_t *
{
/* User must supply a credential. */
return (NULL);
/* Increase here to have common decrease at end */
} else {
/*
* Check if the receive queue taskq for this sctp_stack_t has
* been set up.
*/
if (sctps->sctps_recvq_tq_list_cur_sz == 0)
/*
* For exclusive stacks we set the zoneid to zero
* to make SCTP operate as if in the global zone.
*/
else
}
return (NULL);
}
/*
* ipcl_conn_create did a netstack_hold. Undo the hold that was
* done at top of sctp_create.
*/
return (NULL);
}
/*
* Have conn_ip_output drop packets should our outer source
* go invalid, and tell us about mtu changes.
*/
return (NULL);
}
/*
* Inherit local address list, local port. Parent is either
* in SCTPS_BOUND, or SCTPS_LISTEN state.
*/
return (NULL);
}
/*
* If the parent is specified, it'll be immediatelly
* followed by sctp_connect(). So don't add this guy to
* bind hash.
*/
} else {
/*
* conn_allzones can not be set this early, hence
* no IPCL_ZONEID
*/
/*
* If the caller has the process-wide flag set, then default to
* MAC exempt mode. This allows read-down to unlabeled hosts.
*/
}
/* Initialize SCTP instance values, our verf tag must never be 0 */
sizeof (sctp->sctp_lvtag));
if (sctp->sctp_lvtag == 0)
/* Information required by upper layer */
/* Fill in the socket buffer limits for sctpsockfs */
/* Insert this in the global list. */
return (sctp);
}
/* Run at module load time */
void
sctp_ddi_g_init(void)
{
/* Create the faddr cache */
/* Create the sets cache */
/* Create the PR-SCTP sets cache */
/* Initialize tables used for CRC calculation */
/*
* We want to be informed each time a stack is created or
* destroyed in the kernel, so we can maintain the
* set of sctp_stack_t's.
*/
}
static void *
{
int i;
/* Initialize locks */
/* Initialize SCTP hash arrays. */
KM_SLEEP);
/* saddr init */
/* Global SCTP PCB list. */
/* Initialize SCTP kstats. */
sctps->sctps_reclaim_tid = 0;
/* Allocate the per netstack stats */
KM_SLEEP);
for (i = 0; i < sctps->sctps_sc_cnt; i++) {
KM_SLEEP);
}
return (sctps);
}
/*
* Called when the module is about to be unloaded.
*/
void
sctp_ddi_g_destroy(void)
{
/* Destroy the faddr cache */
/* Destroy the sets cache */
/* Destroy the PR-SCTP sets cache */
}
/*
* Free the SCTP stack instance.
*/
static void
{
int i;
/*
* Set sctps_reclaim to false tells sctp_reclaim_timer() not to restart
* the timer.
*/
if (sctps->sctps_reclaim_tid != 0)
sctp_propinfo_count * sizeof (mod_prop_info_t));
/* Destroy the recvq taskqs. */
/* Destroy saddr */
/* Global SCTP PCB list. */
/* Destroy SCTP hash arrays. */
/* Destroy SCTP kernel stats. */
for (i = 0; i < sctps->sctps_sc_cnt; i++)
}
static void
{
int thrs;
int max_tasks;
/* Someone may have beaten us in creating the taskqs. */
if (sctps->sctps_recvq_tq_list_cur_sz > 0) {
return;
}
/*
* Make sure that the maximum number of tasks is at least thrice as
* large as the number of threads.
*/
/*
* This helps differentiate the default taskqs in different IP stacks.
*/
/*
* Initialize the recvq_tq_list and create the first recvq taskq.
* What to do if it fails?
*/
KM_SLEEP);
}
static void
{
int i;
if (sctps->sctps_recvq_tq_list_cur_sz == 0)
return;
for (i = 0; i < sctps->sctps_recvq_tq_list_cur_sz; i++) {
}
}
/* Add another taskq for a new ill. */
void
{
int thrs;
int max_tasks;
/*
* Make sure that the maximum number of tasks is at least thrice as
* large as the number of threads.
*/
return;
}
return;
}
}
#ifdef DEBUG
#endif
/*
* Find the next recvq_tq to use. This routine will go thru all the
* taskqs until it can dispatch a job for the sctp. If this fails,
* it will create a new taskq and try it.
*/
static boolean_t
{
/*
* Note that since we don't hold a lock on sctp_rq_tq_lock for
* performance reason, recvq_ta_list_cur_sz can be changed during
* this loop. The problem this will create is that the loop may
* not have tried all the recvq_tq. This should be OK.
*/
TQ_NOSLEEP) != NULL) {
return (B_TRUE);
}
}
/*
* Create one more taskq and try it. Note that sctp_inc_taskq()
* may not have created another taskq if the number of recvq
* taskqs is at the maximum. We are probably in a pretty bad
* shape if this actually happens...
*/
return (B_TRUE);
}
return (B_FALSE);
}
/*
* To add a message to the recvq. Note that the sctp_timer_fire()
* routine also uses this function to add the timer message to the
* receive queue for later processing. And it should be the only
* caller of sctp_add_recvq() which sets the try_harder argument
* to B_TRUE.
*
* If the try_harder argument is B_TRUE, this routine sctp_find_next_tq()
* will try very hard to dispatch the task. Refer to the comment
* for that routine on how it does that.
*
* On failure the message has been freed i.e., this routine always consumes the
* message. It bumps ipIfStatsInDiscards and and uses ip_drop_input to drop.
*/
void
{
if (!caller_hold_lock)
/* If the taskq dispatch has not been scheduled, do it now. */
if (!sctp_find_next_tq(sctp)) {
if (!caller_hold_lock)
return;
}
/* Make sure the sctp_t will not go away. */
}
if (!caller_hold_lock)
return;
}
} else {
}
if (!caller_hold_lock)
}
static void
{
#ifdef DEBUG
#endif
#ifdef _BIG_ENDIAN
#else
#endif
#ifdef DEBUG
recvq_call++;
#endif
/*
* Note that while we are in this loop, other thread can put
* new packets in the receive queue. We may be looping for
* quite a while.
*/
#ifdef DEBUG
loop_cnt++;
#endif
continue;
}
else
}
#ifdef DEBUG
if (loop_cnt > recvq_loop_cnt)
#endif
/* Now it can go away. */
}
/* ARGSUSED */
static int
{
int cnt;
RW_DEFAULT, NULL);
}
return (ENOMEM);
}
return (0);
}
/* ARGSUSED */
static void
{
int cnt;
/*
* sctp_pad_mp can be NULL if the memory allocation fails
* in sctp_init_values() and the conn_t is freed.
*/
}
}
/* Can be NULL if constructor failed */
}
}
static void
{
}
static void
{
}
void
{
/*
* Register sctp_notify to listen to capability changes detected by IP.
* This upcall is made in the context of the call to conn_ip_output
* thus it holds whatever locks sctp holds across conn_ip_output.
*/
}
static void
{
/* Clean up conn_t stuff */
}
}
}
}
}
/* Leave conn_ixa and other constructed fields in place */
}