sctp_output.c revision 1d8c40254918b6a58c4b2132dafb3e012ccd66f0
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
#include <sys/cmn_err.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/socket.h>
#include <sys/stropts.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/socketvar.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/tcp_seq.h>
#include <netinet/sctp.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/ip_ire.h>
#include <inet/sctp_ip.h>
#include <inet/ipclassifier.h>
/*
* PR-SCTP comments.
*
* A message can expire before it gets to the transmit list (i.e. it is still
* in the unsent list - unchunked), after it gets to the transmit list, but
* before transmission has actually started, or after transmission has begun.
* Accordingly, we check for the status of a message in sctp_chunkify() when
* the message is being transferred from the unsent list to the transmit list;
* in sctp_get_msg_to_send(), when we get the next chunk from the transmit
* list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
* When we nuke a message in sctp_chunkify(), all we need to do is take it
* out of the unsent list and update sctp_unsent; when a message is deemed
* timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
* list, update sctp_unsent IFF transmission for the message has not yet begun
* (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
* message has started, then we cannot just take it out of the list, we need
* to send Forward TSN chunk to the peer so that the peer can clear its
* fragment list for this message. However, we cannot just send the Forward
* TSN in sctp_get_msg_to_send() because there might be unacked chunks for
* messages preceeding this abandoned message. So, we send a Forward TSN
* IFF all messages prior to this abandoned message has been SACKd, if not
* we defer sending the Forward TSN to sctp_cumack(), which will check for
* this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
* sctp_rexmit() when we check for retransmissions, we need to determine if
* the advanced peer ack point can be moved ahead, and if so, send a Forward
* TSN to the peer instead of retransmitting the chunk. Note that when
* we send a Forward TSN for a message, there may be yet unsent chunks for
* this message; we need to mark all such chunks as abandoned, so that
* sctp_cumack() can take the message out of the transmit list, additionally
* sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
* decremented when a message/chunk is deemed abandoned), sockfs needs to
* be notified so that it can adjust its idea of the queued message.
*/
#include "sctp_impl.h"
static struct kmem_cache *sctp_kmem_ftsn_set_cache;
/* Padding mblk for SCTP chunks. */
mblk_t *sctp_pad_mp;
#ifdef DEBUG
static boolean_t sctp_verify_chain(mblk_t *, mblk_t *);
#endif
/*
* Called to allocate a header mblk when sending data to SCTP.
* Data will follow in b_cont of this mblk.
*/
mblk_t *
sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
int flags)
{
mblk_t *mp;
struct T_unitdata_req *tudr;
size_t size;
int error;
size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
size = MAX(size, sizeof (sctp_msg_hdr_t));
if (flags & SCTP_CAN_BLOCK) {
mp = allocb_wait(size, BPRI_MED, 0, &error);
} else {
mp = allocb(size, BPRI_MED);
}
if (mp) {
tudr = (struct T_unitdata_req *)mp->b_rptr;
tudr->PRIM_type = T_UNITDATA_REQ;
tudr->DEST_length = nlen;
tudr->DEST_offset = sizeof (*tudr);
tudr->OPT_length = clen;
tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
_TPI_ALIGN_TOPT(nlen));
if (nlen > 0)
bcopy(name, tudr + 1, nlen);
if (clen > 0)
bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
mp->b_wptr += (tudr ->OPT_offset + clen);
mp->b_datap->db_type = M_PROTO;
}
return (mp);
}
/*ARGSUSED2*/
int
sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
{
sctp_faddr_t *fp = NULL;
struct T_unitdata_req *tudr;
int error = 0;
mblk_t *mproto = mp;
in6_addr_t *addr;
in6_addr_t tmpaddr;
uint16_t sid = sctp->sctp_def_stream;
uint32_t ppid = sctp->sctp_def_ppid;
uint32_t context = sctp->sctp_def_context;
uint16_t msg_flags = sctp->sctp_def_flags;
sctp_msg_hdr_t *sctp_msg_hdr;
uint32_t msg_len = 0;
uint32_t timetolive = sctp->sctp_def_timetolive;
ASSERT(DB_TYPE(mproto) == M_PROTO);
mp = mp->b_cont;
ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
tudr = (struct T_unitdata_req *)mproto->b_rptr;
ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
/* Get destination address, if specified */
if (tudr->DEST_length > 0) {
sin_t *sin;
sin6_t *sin6;
sin = (struct sockaddr_in *)
(mproto->b_rptr + tudr->DEST_offset);
switch (sin->sin_family) {
case AF_INET:
if (tudr->DEST_length < sizeof (*sin)) {
return (EINVAL);
}
IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
addr = &tmpaddr;
break;
case AF_INET6:
if (tudr->DEST_length < sizeof (*sin6)) {
return (EINVAL);
}
sin6 = (struct sockaddr_in6 *)
(mproto->b_rptr + tudr->DEST_offset);
addr = &sin6->sin6_addr;
break;
default:
return (EAFNOSUPPORT);
}
fp = sctp_lookup_faddr(sctp, addr);
if (fp == NULL) {
return (EINVAL);
}
}
/* Ancillary Data? */
if (tudr->OPT_length > 0) {
struct cmsghdr *cmsg;
char *cend;
struct sctp_sndrcvinfo *sndrcv;
cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
cend = ((char *)cmsg + tudr->OPT_length);
ASSERT(cend <= (char *)mproto->b_wptr);
for (;;) {
if ((char *)(cmsg + 1) > cend ||
((char *)cmsg + cmsg->cmsg_len) > cend) {
break;
}
if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
(cmsg->cmsg_type == SCTP_SNDRCV)) {
if (cmsg->cmsg_len <
(sizeof (*sndrcv) + sizeof (*cmsg))) {
return (EINVAL);
}
sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
sid = sndrcv->sinfo_stream;
msg_flags = sndrcv->sinfo_flags;
ppid = sndrcv->sinfo_ppid;
context = sndrcv->sinfo_context;
timetolive = sndrcv->sinfo_timetolive;
break;
}
if (cmsg->cmsg_len > 0)
cmsg = CMSG_NEXT(cmsg);
else
break;
}
}
if (msg_flags & MSG_ABORT) {
if (mp && mp->b_cont) {
mblk_t *pump = msgpullup(mp, -1);
if (!pump) {
return (ENOMEM);
}
freemsg(mp);
mp = pump;
mproto->b_cont = mp;
}
RUN_SCTP(sctp);
sctp_user_abort(sctp, mp, B_TRUE);
sctp_clean_death(sctp, ECONNRESET);
freemsg(mproto);
goto process_sendq;
}
if (mp == NULL)
goto done;
RUN_SCTP(sctp);
/* Reject any new data requests if we are shutting down */
if (sctp->sctp_state > SCTPS_ESTABLISHED) {
error = EPIPE;
goto unlock_done;
}
/* Re-use the mproto to store relevant info. */
ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
mproto->b_rptr = mproto->b_datap->db_base;
mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
sctp_msg_hdr->smh_context = context;
sctp_msg_hdr->smh_sid = sid;
sctp_msg_hdr->smh_ppid = ppid;
sctp_msg_hdr->smh_flags = msg_flags;
sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
sctp_msg_hdr->smh_tob = lbolt64;
for (; mp != NULL; mp = mp->b_cont)
msg_len += MBLKL(mp);
sctp_msg_hdr->smh_msglen = msg_len;
/* User requested specific destination */
SCTP_SET_CHUNK_DEST(mproto, fp);
if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
sid >= sctp->sctp_num_ostr) {
/* Send sendfail event */
sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
B_FALSE);
error = EINVAL;
goto unlock_done;
}
/* no data */
if (msg_len == 0) {
sctp_sendfail_event(sctp, dupmsg(mproto),
SCTP_ERR_NO_USR_DATA, B_FALSE);
error = EINVAL;
goto unlock_done;
}
/* Add it to the unsent list */
if (sctp->sctp_xmit_unsent == NULL) {
sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
} else {
sctp->sctp_xmit_unsent_tail->b_next = mproto;
sctp->sctp_xmit_unsent_tail = mproto;
}
sctp->sctp_unsent += msg_len;
BUMP_LOCAL(sctp->sctp_msgcount);
if (sctp->sctp_state == SCTPS_ESTABLISHED)
sctp_output(sctp);
process_sendq:
WAKE_SCTP(sctp);
sctp_process_sendq(sctp);
return (0);
unlock_done:
WAKE_SCTP(sctp);
done:
return (error);
}
void
sctp_chunkify(sctp_t *sctp, int first_len, int bytes_to_send)
{
mblk_t *mp;
mblk_t *chunk_mp;
mblk_t *chunk_head;
mblk_t *chunk_hdr;
mblk_t *chunk_tail = NULL;
int count;
int chunksize;
sctp_data_hdr_t *sdc;
mblk_t *mdblk = sctp->sctp_xmit_unsent;
sctp_faddr_t *fp;
sctp_faddr_t *fp1;
size_t xtralen;
sctp_msg_hdr_t *msg_hdr;
fp = SCTP_CHUNK_DEST(mdblk);
if (fp == NULL)
fp = sctp->sctp_current;
if (fp->isv4)
xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra + sizeof (*sdc);
else
xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra + sizeof (*sdc);
count = chunksize = first_len - sizeof (*sdc);
nextmsg:
chunk_mp = mdblk->b_cont;
/*
* If this partially chunked, we ignore the first_len for now
* and use the one already present. For the unchunked bits, we
* use the length of the last chunk.
*/
if (SCTP_IS_MSG_CHUNKED(mdblk)) {
int chunk_len;
ASSERT(chunk_mp->b_next != NULL);
mdblk->b_cont = chunk_mp->b_next;
chunk_mp->b_next = NULL;
SCTP_MSG_CLEAR_CHUNKED(mdblk);
mp = mdblk->b_cont;
while (mp->b_next != NULL)
mp = mp->b_next;
chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
count = chunksize = fp->sfa_pmss - chunk_len;
else
count = chunksize = fp->sfa_pmss;
count = chunksize = count - sizeof (*sdc);
} else {
msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
sctp->sctp_xmit_unsent = mdblk->b_next;
if (sctp->sctp_xmit_unsent == NULL)
sctp->sctp_xmit_unsent_tail = NULL;
ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
sctp->sctp_unsent -= msg_hdr->smh_msglen;
mdblk->b_next = NULL;
BUMP_LOCAL(sctp->sctp_prsctpdrop);
/*
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
if (!SCTP_IS_DETACHED(sctp)) {
sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
sctp->sctp_unacked + sctp->sctp_unsent);
}
sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
goto try_next;
}
mdblk->b_cont = NULL;
}
msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
nextchunk:
chunk_head = chunk_mp;
chunk_tail = NULL;
/* Skip as many mblk's as we need */
while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
count -= MBLKL(chunk_mp);
chunk_tail = chunk_mp;
chunk_mp = chunk_mp->b_cont;
}
/* Split the chain, if needed */
if (chunk_mp != NULL) {
if (count > 0) {
mblk_t *split_mp = dupb(chunk_mp);
if (split_mp == NULL) {
if (mdblk->b_cont == NULL) {
mdblk->b_cont = chunk_head;
} else {
SCTP_MSG_SET_CHUNKED(mdblk);
ASSERT(chunk_head->b_next == NULL);
chunk_head->b_next = mdblk->b_cont;
mdblk->b_cont = chunk_head;
}
return;
}
if (chunk_tail != NULL) {
chunk_tail->b_cont = split_mp;
chunk_tail = chunk_tail->b_cont;
} else {
chunk_head = chunk_tail = split_mp;
}
chunk_tail->b_wptr = chunk_tail->b_rptr + count;
chunk_mp->b_rptr = chunk_tail->b_wptr;
count = 0;
} else if (chunk_tail == NULL) {
goto next;
} else {
chunk_tail->b_cont = NULL;
}
}
/* Alloc chunk hdr, if needed */
if (DB_REF(chunk_head) > 1 ||
((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
MBLKHEAD(chunk_head) < sizeof (*sdc)) {
if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
if (mdblk->b_cont == NULL) {
if (chunk_mp != NULL)
linkb(chunk_head, chunk_mp);
mdblk->b_cont = chunk_head;
} else {
SCTP_MSG_SET_CHUNKED(mdblk);
if (chunk_mp != NULL)
linkb(chunk_head, chunk_mp);
ASSERT(chunk_head->b_next == NULL);
chunk_head->b_next = mdblk->b_cont;
mdblk->b_cont = chunk_head;
}
return;
}
chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
chunk_hdr->b_cont = chunk_head;
} else {
chunk_hdr = chunk_head;
chunk_hdr->b_rptr -= sizeof (*sdc);
}
ASSERT(chunk_hdr->b_datap->db_ref == 1);
sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
sdc->sdh_id = CHUNK_DATA;
sdc->sdh_flags = 0;
sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
ASSERT(sdc->sdh_len);
sdc->sdh_sid = htons(msg_hdr->smh_sid);
/*
* We defer assigning the SSN just before sending the chunk, else
* if we drop the chunk in sctp_get_msg_to_send(), we would need
* to send a Forward TSN to let the peer know. Some more comments
* about this in sctp_impl.h for SCTP_CHUNK_SENT.
*/
sdc->sdh_payload_id = msg_hdr->smh_ppid;
if (mdblk->b_cont == NULL) {
mdblk->b_cont = chunk_hdr;
SCTP_DATA_SET_BBIT(sdc);
} else {
mp = mdblk->b_cont;
while (mp->b_next != NULL)
mp = mp->b_next;
mp->b_next = chunk_hdr;
}
bytes_to_send -= (chunksize - count);
if (chunk_mp != NULL) {
next:
count = chunksize = fp->sfa_pmss - sizeof (*sdc);
goto nextchunk;
}
SCTP_DATA_SET_EBIT(sdc);
sctp->sctp_xmit_unsent = mdblk->b_next;
if (mdblk->b_next == NULL) {
sctp->sctp_xmit_unsent_tail = NULL;
}
mdblk->b_next = NULL;
if (sctp->sctp_xmit_tail == NULL) {
sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
} else {
mp = sctp->sctp_xmit_tail;
while (mp->b_next != NULL)
mp = mp->b_next;
mp->b_next = mdblk;
mdblk->b_prev = mp;
}
try_next:
if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
mdblk = sctp->sctp_xmit_unsent;
fp1 = SCTP_CHUNK_DEST(mdblk);
if (fp1 == NULL)
fp1 = sctp->sctp_current;
if (fp == fp1) {
size_t len = MBLKL(mdblk->b_cont);
if ((count > 0) &&
((len > fp->sfa_pmss - sizeof (*sdc)) ||
(len <= count))) {
count -= sizeof (*sdc);
count = chunksize = count - (count & 0x3);
} else {
count = chunksize = fp->sfa_pmss -
sizeof (*sdc);
}
} else {
if (fp1->isv4)
xtralen = sctp->sctp_hdr_len;
else
xtralen = sctp->sctp_hdr6_len;
xtralen += sctp_wroff_xtra + sizeof (*sdc);
count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
fp = fp1;
}
goto nextmsg;
}
}
void
sctp_free_msg(mblk_t *ump)
{
mblk_t *mp, *nmp;
for (mp = ump->b_cont; mp; mp = nmp) {
nmp = mp->b_next;
mp->b_next = mp->b_prev = NULL;
freemsg(mp);
}
ASSERT(!ump->b_prev);
ump->b_next = NULL;
freeb(ump);
}
mblk_t *
sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
int *error)
{
int hdrlen;
char *hdr;
int isv4 = fp->isv4;
if (error != NULL)
*error = 0;
if (isv4) {
hdrlen = sctp->sctp_hdr_len;
hdr = sctp->sctp_iphc;
} else {
hdrlen = sctp->sctp_hdr6_len;
hdr = sctp->sctp_iphc6;
}
/*
* A null fp->ire could mean that the address is 'down'. Similarly,
* it is possible that the address went down, we tried to send an
* heartbeat and ended up setting fp->saddr as unspec because we
* didn't have any usable source address. In either case
* sctp_ire2faddr() will try find an IRE, if available, and set
* the source address, if needed. If we still don't have any
* usable source address, fp->state will be SCTP_FADDRS_UNREACH and
* we return EHOSTUNREACH.
*/
if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
sctp_ire2faddr(sctp, fp);
if (fp->state == SCTP_FADDRS_UNREACH) {
if (error != NULL)
*error = EHOSTUNREACH;
return (NULL);
}
}
/* Copy in IP header. */
if ((mp->b_rptr - mp->b_datap->db_base) <
(sctp_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
mblk_t *nmp;
/*
* This can happen if IP headers are adjusted after
* data was moved into chunks, or during retransmission,
* or things like snoop is running.
*/
nmp = allocb(sctp_wroff_xtra + hdrlen + sacklen, BPRI_MED);
if (nmp == NULL) {
if (error != NULL)
*error = ENOMEM;
return (NULL);
}
nmp->b_rptr += sctp_wroff_xtra;
nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
nmp->b_cont = mp;
mp = nmp;
} else {
mp->b_rptr -= (hdrlen + sacklen);
}
bcopy(hdr, mp->b_rptr, hdrlen);
if (sacklen) {
sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
}
if (fp != sctp->sctp_current) {
/* change addresses in header */
if (isv4) {
ipha_t *iph = (ipha_t *)mp->b_rptr;
IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
iph->ipha_src);
} else if (sctp->sctp_bound_to_all) {
iph->ipha_src = INADDR_ANY;
}
} else {
((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
} else if (sctp->sctp_bound_to_all) {
V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
}
}
}
/*
* IP will not free this IRE if it is condemned. SCTP needs to
* free it.
*/
if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
IRE_REFRELE_NOTR(fp->ire);
fp->ire = NULL;
}
/* Stash the conn and ire ptr info for IP */
SCTP_STASH_IPINFO(mp, fp->ire);
return (mp);
}
/*
* SCTP requires every chunk to be padded so that the total length
* is a multiple of SCTP_ALIGN. This function returns a mblk with
* the specified pad length.
*/
static mblk_t *
sctp_get_padding(int pad)
{
mblk_t *fill;
ASSERT(pad < SCTP_ALIGN);
if ((fill = dupb(sctp_pad_mp)) != NULL) {
fill->b_wptr += pad;
return (fill);
}
/*
* The memory saving path of reusing the sctp_pad_mp
* fails may be because it has been dupb() too
* many times (DBLK_REFMAX). Use the memory consuming
* path of allocating the pad mblk.
*/
if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
/* Zero it out. SCTP_ALIGN is sizeof (int32_t) */
*(int32_t *)fill->b_rptr = 0;
fill->b_wptr += pad;
}
return (fill);
}
static mblk_t *
sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
{
mblk_t *meta;
mblk_t *start_mp = NULL;
mblk_t *end_mp = NULL;
mblk_t *mp, *nmp;
mblk_t *fill;
sctp_data_hdr_t *sdh;
int msglen;
int extra;
sctp_msg_hdr_t *msg_hdr;
for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
if (SCTP_IS_MSG_ABANDONED(meta) ||
SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
continue;
}
for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
if (SCTP_CHUNK_WANT_REXMIT(mp)) {
/*
* Use the same peer address to do fast
* retransmission.
*/
if (*fp == NULL) {
*fp = SCTP_CHUNK_DEST(mp);
if ((*fp)->state != SCTP_FADDRS_ALIVE)
*fp = sctp->sctp_current;
} else if (*fp != SCTP_CHUNK_DEST(mp)) {
continue;
}
sdh = (sctp_data_hdr_t *)mp->b_rptr;
msglen = ntohs(sdh->sdh_len);
if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
extra = SCTP_ALIGN - extra;
}
/*
* We still return at least the first message
* even if that message cannot fit in as
* PMTU may have changed.
*/
if (*total + msglen + extra >
(*fp)->sfa_pmss && start_mp != NULL) {
return (start_mp);
}
if ((nmp = dupmsg(mp)) == NULL)
return (start_mp);
if (extra > 0) {
fill = sctp_get_padding(extra);
if (fill != NULL) {
linkb(nmp, fill);
} else {
return (start_mp);
}
}
BUMP_MIB(&sctp_mib, sctpOutFastRetrans);
SCTP_CHUNK_CLEAR_REXMIT(mp);
if (start_mp == NULL) {
start_mp = nmp;
} else {
linkb(end_mp, nmp);
}
end_mp = nmp;
*total += msglen + extra;
dprint(2, ("sctp_find_fast_rexmit_mblks: "
"tsn %x\n", sdh->sdh_tsn));
}
}
}
/* Clear the flag as there is no more message to be fast rexmitted. */
sctp->sctp_chk_fast_rexmit = B_FALSE;
return (start_mp);
}
/* A debug function just to make sure that a mblk chain is not broken */
#ifdef DEBUG
static boolean_t
sctp_verify_chain(mblk_t *head, mblk_t *tail)
{
mblk_t *mp = head;
if (head == NULL || tail == NULL)
return (B_TRUE);
while (mp != NULL) {
if (mp == tail)
return (B_TRUE);
mp = mp->b_next;
}
return (B_FALSE);
}
#endif
/*
* Gets the next unsent chunk to transmit. Messages that are abandoned are
* skipped. A message can be abandoned if it has a non-zero timetolive and
* transmission has not yet started or if it is a partially reliable
* message and its time is up (assuming we are PR-SCTP aware).
* 'cansend' is used to determine if need to try and chunkify messages from
* the unsent list, if any, and also as an input to sctp_chunkify() if so.
* When called from sctp_rexmit(), we don't want to chunkify, so 'cansend'
* will be set to 0.
*/
mblk_t *
sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error,
int32_t firstseg, uint32_t cansend, sctp_faddr_t *fp)
{
mblk_t *mp1;
sctp_msg_hdr_t *msg_hdr;
mblk_t *tmp_meta;
sctp_faddr_t *fp1;
ASSERT(error != NULL && mp != NULL);
*error = 0;
ASSERT(sctp->sctp_current != NULL);
chunkified:
while (meta != NULL) {
tmp_meta = meta->b_next;
msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
mp1 = meta->b_cont;
if (SCTP_IS_MSG_ABANDONED(meta))
goto next_msg;
if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
while (mp1 != NULL) {
if (SCTP_CHUNK_CANSEND(mp1)) {
*mp = mp1;
#ifdef DEBUG
ASSERT(sctp_verify_chain(
sctp->sctp_xmit_head, meta));
#endif
return (meta);
}
mp1 = mp1->b_next;
}
goto next_msg;
}
/*
* If we come here and the first chunk is sent, then we
* we are PR-SCTP aware, in which case if the cumulative
* TSN has moved upto or beyond the first chunk (which
* means all the previous messages have been cumulative
* SACK'd), then we send a Forward TSN with the last
* chunk that was sent in this message. If we can't send
* a Forward TSN because previous non-abandoned messages
* have not been acked then we will defer the Forward TSN
* to sctp_rexmit() or sctp_cumack().
*/
if (SCTP_CHUNK_ISSENT(mp1)) {
*error = sctp_check_abandoned_msg(sctp, meta);
if (*error != 0) {
#ifdef DEBUG
ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
sctp->sctp_xmit_tail));
#endif
return (NULL);
}
goto next_msg;
}
BUMP_LOCAL(sctp->sctp_prsctpdrop);
ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
if (meta->b_prev == NULL) {
ASSERT(sctp->sctp_xmit_head == meta);
sctp->sctp_xmit_head = tmp_meta;
if (sctp->sctp_xmit_tail == meta)
sctp->sctp_xmit_tail = tmp_meta;
meta->b_next = NULL;
if (tmp_meta != NULL)
tmp_meta->b_prev = NULL;
} else if (meta->b_next == NULL) {
if (sctp->sctp_xmit_tail == meta)
sctp->sctp_xmit_tail = meta->b_prev;
meta->b_prev->b_next = NULL;
meta->b_prev = NULL;
} else {
meta->b_prev->b_next = tmp_meta;
tmp_meta->b_prev = meta->b_prev;
if (sctp->sctp_xmit_tail == meta)
sctp->sctp_xmit_tail = tmp_meta;
meta->b_prev = NULL;
meta->b_next = NULL;
}
sctp->sctp_unsent -= msg_hdr->smh_msglen;
/*
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
if (!SCTP_IS_DETACHED(sctp)) {
sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
sctp->sctp_unacked + sctp->sctp_unsent);
}
sctp_sendfail_event(sctp, meta, 0, B_TRUE);
next_msg:
meta = tmp_meta;
}
/* chunkify, if needed */
if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
ASSERT(sctp->sctp_unsent > 0);
if (fp == NULL) {
fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
fp = sctp->sctp_current;
} else {
/*
* If user specified destination, try to honor that.
*/
fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
fp1 != fp) {
goto chunk_done;
}
}
sctp_chunkify(sctp, fp->sfa_pmss - firstseg, cansend);
if ((meta = sctp->sctp_xmit_tail) == NULL)
goto chunk_done;
/*
* sctp_chunkify() won't advance sctp_xmit_tail if it adds
* new chunk(s) to the tail, so we need to skip the
* sctp_xmit_tail, which would have already been processed.
* This could happen when there is unacked chunks, but
* nothing new to send.
* When sctp_chunkify() is called when the transmit queue
* is empty then we need to start from sctp_xmit_tail.
*/
if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
#ifdef DEBUG
mp1 = sctp->sctp_xmit_tail->b_cont;
while (mp1 != NULL) {
ASSERT(!SCTP_CHUNK_CANSEND(mp1));
mp1 = mp1->b_next;
}
#endif
if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
goto chunk_done;
}
goto chunkified;
}
chunk_done:
#ifdef DEBUG
ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
#endif
return (NULL);
}
void
sctp_fast_rexmit(sctp_t *sctp)
{
mblk_t *mp, *head;
int pktlen = 0;
sctp_faddr_t *fp = NULL;
ASSERT(sctp->sctp_xmit_head != NULL);
mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
if (mp == NULL)
return;
if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
freemsg(mp);
return;
}
if ((pktlen > fp->sfa_pmss) && fp->isv4) {
ipha_t *iph = (ipha_t *)head->b_rptr;
iph->ipha_fragment_offset_and_flags = 0;
}
sctp_set_iplen(sctp, head);
sctp_add_sendq(sctp, head);
sctp->sctp_active = fp->lastactive = lbolt64;
}
void
sctp_output(sctp_t *sctp)
{
mblk_t *mp = NULL;
mblk_t *nmp;
mblk_t *head;
mblk_t *meta = sctp->sctp_xmit_tail;
mblk_t *fill = NULL;
uint16_t chunklen;
uint32_t cansend;
int32_t seglen;
int32_t xtralen;
int32_t sacklen;
int32_t pad = 0;
int32_t pathmax;
int extra;
int64_t now = lbolt64;
sctp_faddr_t *fp;
sctp_faddr_t *lfp;
sctp_data_hdr_t *sdc;
int error;
boolean_t notsent = B_TRUE;
if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
sacklen = 0;
} else {
/* send a SACK chunk */
sacklen = sizeof (sctp_chunk_hdr_t) +
sizeof (sctp_sack_chunk_t) +
(sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
lfp = sctp->sctp_lastdata;
ASSERT(lfp != NULL);
if (lfp->state != SCTP_FADDRS_ALIVE)
lfp = sctp->sctp_current;
}
cansend = sctp->sctp_frwnd;
if (sctp->sctp_unsent < cansend)
cansend = sctp->sctp_unsent;
if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
sctp->sctp_unacked &&
(sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
!sctp->sctp_ndelay) {
head = NULL;
fp = sctp->sctp_current;
goto unsent_data;
}
if (meta != NULL)
mp = meta->b_cont;
while (cansend > 0) {
pad = 0;
/*
* Find first segment eligible for transmit.
*/
while (mp != NULL) {
if (SCTP_CHUNK_CANSEND(mp))
break;
mp = mp->b_next;
}
if (mp == NULL) {
meta = sctp_get_msg_to_send(sctp, &mp,
meta == NULL ? NULL : meta->b_next, &error, sacklen,
cansend, NULL);
if (error != 0 || meta == NULL) {
head = NULL;
fp = sctp->sctp_current;
goto unsent_data;
}
sctp->sctp_xmit_tail = meta;
}
sdc = (sctp_data_hdr_t *)mp->b_rptr;
seglen = ntohs(sdc->sdh_len);
xtralen = sizeof (*sdc);
chunklen = seglen - xtralen;
/*
* Check rwnd.
*/
if (chunklen > cansend) {
head = NULL;
fp = SCTP_CHUNK_DEST(meta);
if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
fp = sctp->sctp_current;
goto unsent_data;
}
if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
extra = SCTP_ALIGN - extra;
/*
* Pick destination address, and check cwnd.
*/
if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
(seglen + sacklen + extra <= lfp->sfa_pmss)) {
/*
* Only include SACK chunk if it can be bundled
* with a data chunk, and sent to sctp_lastdata.
*/
pathmax = lfp->cwnd - lfp->suna;
fp = lfp;
if ((nmp = dupmsg(mp)) == NULL) {
head = NULL;
goto unsent_data;
}
SCTP_CHUNK_CLEAR_FLAGS(nmp);
head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
&error);
if (head == NULL) {
/*
* If none of the source addresses are
* available (i.e error == EHOSTUNREACH),
* pretend we have sent the data. We will
* eventually time out trying to retramsmit
* the data if the interface never comes up.
* If we have already sent some stuff (i.e.,
* notsent is B_FALSE) then we are fine, else
* just mark this packet as sent.
*/
if (notsent && error == EHOSTUNREACH) {
SCTP_CHUNK_SENT(sctp, mp, sdc,
fp, chunklen, meta);
}
freemsg(nmp);
goto unsent_data;
}
seglen += sacklen;
xtralen += sacklen;
sacklen = 0;
} else {
fp = SCTP_CHUNK_DEST(meta);
if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
fp = sctp->sctp_current;
/*
* If we haven't sent data to this destination for
* a while, do slow start again.
*/
if (now - fp->lastactive > fp->rto) {
fp->cwnd = sctp_slow_start_after_idle *
fp->sfa_pmss;
}
fp->lastactive = now;
pathmax = fp->cwnd - fp->suna;
if (seglen + extra > pathmax) {
head = NULL;
goto unsent_data;
}
if ((nmp = dupmsg(mp)) == NULL) {
head = NULL;
goto unsent_data;
}
SCTP_CHUNK_CLEAR_FLAGS(nmp);
head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
if (head == NULL) {
/*
* If none of the source addresses are
* available (i.e error == EHOSTUNREACH),
* pretend we have sent the data. We will
* eventually time out trying to retramsmit
* the data if the interface never comes up.
* If we have already sent some stuff (i.e.,
* notsent is B_FALSE) then we are fine, else
* just mark this packet as sent.
*/
if (notsent && error == EHOSTUNREACH) {
SCTP_CHUNK_SENT(sctp, mp, sdc,
fp, chunklen, meta);
}
freemsg(nmp);
goto unsent_data;
}
}
if (pathmax > fp->sfa_pmss)
pathmax = fp->sfa_pmss;
SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
mp = mp->b_next;
/* Use this chunk to measure RTT? */
if (sctp->sctp_out_time == 0) {
sctp->sctp_out_time = now;
sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
}
if (extra > 0) {
fill = sctp_get_padding(extra);
if (fill != NULL) {
linkb(head, fill);
pad = extra;
seglen += extra;
} else {
goto unsent_data;
}
}
/* See if we can bundle more. */
while (seglen < pathmax) {
int32_t new_len;
int32_t new_xtralen;
while (mp != NULL) {
if (SCTP_CHUNK_CANSEND(mp))
break;
mp = mp->b_next;
}
if (mp == NULL) {
meta = sctp_get_msg_to_send(sctp, &mp,
meta->b_next, &error, seglen,
(seglen - xtralen) >= cansend ? 0 :
cansend - seglen, fp);
if (error != 0 || meta == NULL)
break;
sctp->sctp_xmit_tail = meta;
}
ASSERT(mp != NULL);
if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
fp != SCTP_CHUNK_DEST(meta)) {
break;
}
sdc = (sctp_data_hdr_t *)mp->b_rptr;
chunklen = ntohs(sdc->sdh_len);
if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0)
extra = SCTP_ALIGN - extra;
new_len = seglen + chunklen;
new_xtralen = xtralen + sizeof (*sdc);
chunklen -= sizeof (*sdc);
if (new_len - new_xtralen > cansend ||
new_len + extra > pathmax) {
break;
}
if ((nmp = dupmsg(mp)) == NULL)
break;
if (extra > 0) {
fill = sctp_get_padding(extra);
if (fill != NULL) {
pad += extra;
new_len += extra;
linkb(nmp, fill);
} else {
freemsg(nmp);
break;
}
}
seglen = new_len;
xtralen = new_xtralen;
SCTP_CHUNK_CLEAR_FLAGS(nmp);
SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
linkb(head, nmp);
mp = mp->b_next;
}
if ((seglen > fp->sfa_pmss) && fp->isv4) {
ipha_t *iph = (ipha_t *)head->b_rptr;
/*
* Path MTU is different from what we thought it would
* be when we created chunks, or IP headers have grown.
* Need to clear the DF bit.
*/
iph->ipha_fragment_offset_and_flags = 0;
}
/* xmit segment */
ASSERT(cansend >= seglen - pad - xtralen);
cansend -= (seglen - pad - xtralen);
dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
"ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
seglen - xtralen, ntohl(sdc->sdh_tsn),
ntohs(sdc->sdh_ssn), fp, sctp->sctp_frwnd, cansend,
sctp->sctp_lastack_rxd));
sctp_set_iplen(sctp, head);
sctp_add_sendq(sctp, head);
/* arm rto timer (if not set) */
if (!fp->timer_running)
SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
notsent = B_FALSE;
}
sctp->sctp_active = now;
return;
unsent_data:
/* arm persist timer (if rto timer not set) */
if (!fp->timer_running)
SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
if (head != NULL)
freemsg(head);
}
/*
* The following two functions initialize and destroy the cache
* associated with the sets used for PR-SCTP.
*/
void
sctp_ftsn_sets_init(void)
{
sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
NULL, 0);
}
void
sctp_ftsn_sets_fini(void)
{
kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
}
/* Free PR-SCTP sets */
void
sctp_free_ftsn_set(sctp_ftsn_set_t *s)
{
sctp_ftsn_set_t *p;
while (s != NULL) {
p = s->next;
s->next = NULL;
kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
s = p;
}
}
/*
* Given a message meta block, meta, this routine creates or modifies
* the set that will be used to generate a Forward TSN chunk. If the
* entry for stream id, sid, for this message already exists, the
* sequence number, ssn, is updated if it is greater than the existing
* one. If an entry for this sid does not exist, one is created if
* the size does not exceed fp->sfa_pmss. We return false in case
* or an error.
*/
boolean_t
sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
uint_t *nsets, uint32_t *slen)
{
sctp_ftsn_set_t *p;
sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
uint16_t sid = htons(msg_hdr->smh_sid);
/* msg_hdr->smh_ssn is already in NBO */
uint16_t ssn = msg_hdr->smh_ssn;
ASSERT(s != NULL && nsets != NULL);
ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
if (*s == NULL) {
ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
if (*s == NULL)
return (B_FALSE);
(*s)->ftsn_entries.ftsn_sid = sid;
(*s)->ftsn_entries.ftsn_ssn = ssn;
(*s)->next = NULL;
*nsets = 1;
*slen += sizeof (uint32_t);
return (B_TRUE);
}
for (p = *s; p->next != NULL; p = p->next) {
if (p->ftsn_entries.ftsn_sid == sid) {
if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
p->ftsn_entries.ftsn_ssn = ssn;
return (B_TRUE);
}
}
/* the last one */
if (p->ftsn_entries.ftsn_sid == sid) {
if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
p->ftsn_entries.ftsn_ssn = ssn;
} else {
if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
return (B_FALSE);
p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
KM_NOSLEEP);
if (p->next == NULL)
return (B_FALSE);
p = p->next;
p->ftsn_entries.ftsn_sid = sid;
p->ftsn_entries.ftsn_ssn = ssn;
p->next = NULL;
(*nsets)++;
*slen += sizeof (uint32_t);
}
return (B_TRUE);
}
/*
* Given a set of stream id - sequence number pairs, this routing creates
* a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
* for the chunk is obtained from sctp->sctp_adv_pap. The caller
* will add the IP/SCTP header.
*/
mblk_t *
sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
uint_t nsets, uint32_t seglen)
{
mblk_t *ftsn_mp;
sctp_chunk_hdr_t *ch_hdr;
uint32_t *advtsn;
uint16_t schlen;
size_t xtralen;
ftsn_entry_t *ftsn_entry;
seglen += sizeof (sctp_chunk_hdr_t);
if (fp->isv4)
xtralen = sctp->sctp_hdr_len + sctp_wroff_xtra;
else
xtralen = sctp->sctp_hdr6_len + sctp_wroff_xtra;
ftsn_mp = allocb(xtralen + seglen, BPRI_MED);
if (ftsn_mp == NULL)
return (NULL);
ftsn_mp->b_rptr += xtralen;
ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
ch_hdr->sch_id = CHUNK_FORWARD_TSN;
ch_hdr->sch_flags = 0;
/*
* The cast here should not be an issue since seglen is
* the length of the Forward TSN chunk.
*/
schlen = (uint16_t)seglen;
U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
advtsn = (uint32_t *)(ch_hdr + 1);
U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
while (nsets > 0) {
ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
ftsn_entry++;
sets = sets->next;
nsets--;
}
return (ftsn_mp);
}
/*
* Given a starting message, the routine steps through all the
* messages whose TSN is less than sctp->sctp_adv_pap and creates
* ftsn sets. The ftsn sets is then used to create an Forward TSN
* chunk. All the messages, that have chunks that are included in the
* ftsn sets, are flagged abandonded. If a message is partially sent
* and is deemed abandoned, all remaining unsent chunks are marked
* abandoned and are deducted from sctp_unsent.
*/
void
sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
sctp_faddr_t *fp, uint32_t *seglen)
{
mblk_t *mp1 = mp;
mblk_t *mp_head = mp;
mblk_t *meta_head = meta;
mblk_t *head;
sctp_ftsn_set_t *sets = NULL;
uint_t nsets = 0;
uint16_t clen;
sctp_data_hdr_t *sdc;
uint32_t sacklen;
uint32_t adv_pap = sctp->sctp_adv_pap;
uint32_t unsent = 0;
boolean_t ubit;
*seglen = sizeof (uint32_t);
sdc = (sctp_data_hdr_t *)mp1->b_rptr;
while (meta != NULL &&
SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
/*
* Skip adding FTSN sets for un-ordered messages as they do
* not have SSNs.
*/
ubit = SCTP_DATA_GET_UBIT(sdc);
if (!ubit &&
!sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
meta = NULL;
sctp->sctp_adv_pap = adv_pap;
goto ftsn_done;
}
while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
sdc = (sctp_data_hdr_t *)mp1->b_rptr;
adv_pap = ntohl(sdc->sdh_tsn);
mp1 = mp1->b_next;
}
meta = meta->b_next;
if (meta != NULL) {
mp1 = meta->b_cont;
if (!SCTP_CHUNK_ISSENT(mp1))
break;
sdc = (sctp_data_hdr_t *)mp1->b_rptr;
}
}
ftsn_done:
/*
* Can't compare with sets == NULL, since we don't add any
* sets for un-ordered messages.
*/
if (meta == meta_head)
return;
*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
sctp_free_ftsn_set(sets);
if (*nmp == NULL)
return;
if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
sacklen = 0;
} else {
sacklen = sizeof (sctp_chunk_hdr_t) +
sizeof (sctp_sack_chunk_t) +
(sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
/* piggybacked SACK doesn't fit */
sacklen = 0;
} else {
fp = sctp->sctp_lastdata;
}
}
head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
if (head == NULL) {
freemsg(*nmp);
*nmp = NULL;
return;
}
*seglen += sacklen;
*nmp = head;
/*
* XXXNeed to optimise this, the reason it is done here is so
* that we don't have to undo in case of failure.
*/
mp1 = mp_head;
sdc = (sctp_data_hdr_t *)mp1->b_rptr;
while (meta_head != NULL &&
SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
if (!SCTP_IS_MSG_ABANDONED(meta_head))
SCTP_MSG_SET_ABANDONED(meta_head);
while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
sdc = (sctp_data_hdr_t *)mp1->b_rptr;
if (!SCTP_CHUNK_ISACKED(mp1)) {
clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
meta_head);
}
mp1 = mp1->b_next;
}
while (mp1 != NULL) {
sdc = (sctp_data_hdr_t *)mp1->b_rptr;
if (!SCTP_CHUNK_ABANDONED(mp1)) {
ASSERT(!SCTP_CHUNK_ISSENT(mp1));
unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
SCTP_ABANDON_CHUNK(mp1);
}
mp1 = mp1->b_next;
}
meta_head = meta_head->b_next;
if (meta_head != NULL) {
mp1 = meta_head->b_cont;
if (!SCTP_CHUNK_ISSENT(mp1))
break;
sdc = (sctp_data_hdr_t *)mp1->b_rptr;
}
}
if (unsent > 0) {
ASSERT(sctp->sctp_unsent >= unsent);
sctp->sctp_unsent -= unsent;
/*
* Update ULP the amount of queued data, which is
* sent-unack'ed + unsent.
*/
if (!SCTP_IS_DETACHED(sctp)) {
sctp->sctp_ulp_xmitted(sctp->sctp_ulpd,
sctp->sctp_unacked + sctp->sctp_unsent);
}
}
}
/*
* This function steps through messages starting at meta and checks if
* the message is abandoned. It stops when it hits an unsent chunk or
* a message that has all its chunk acked. This is the only place
* where the sctp_adv_pap is moved forward to indicated abandoned
* messages.
*/
void
sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
{
uint32_t tsn = sctp->sctp_adv_pap;
sctp_data_hdr_t *sdc;
sctp_msg_hdr_t *msg_hdr;
ASSERT(mp != NULL);
sdc = (sctp_data_hdr_t *)mp->b_rptr;
ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
if (!SCTP_IS_MSG_ABANDONED(meta) &&
!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
return;
}
while (meta != NULL) {
while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
sdc = (sctp_data_hdr_t *)mp->b_rptr;
tsn = ntohl(sdc->sdh_tsn);
mp = mp->b_next;
}
if (mp != NULL)
break;
/*
* We continue checking for successive messages only if there
* is a chunk marked for retransmission. Else, we might
* end up sending FTSN prematurely for chunks that have been
* sent, but not yet acked.
*/
if ((meta = meta->b_next) != NULL) {
msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
if (!SCTP_IS_MSG_ABANDONED(meta) &&
!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
break;
}
for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
if (!SCTP_CHUNK_ISSENT(mp)) {
sctp->sctp_adv_pap = tsn;
return;
}
if (SCTP_CHUNK_WANT_REXMIT(mp))
break;
}
if (mp == NULL)
break;
}
}
sctp->sctp_adv_pap = tsn;
}
/*
* Retransmit first segment which hasn't been acked with cumtsn or send
* a Forward TSN chunk, if appropriate.
*/
void
sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
{
mblk_t *mp;
mblk_t *nmp = NULL;
mblk_t *head;
mblk_t *meta = sctp->sctp_xmit_head;
mblk_t *fill;
uint32_t seglen = 0;
uint32_t sacklen;
uint16_t chunklen;
int extra;
sctp_data_hdr_t *sdc;
sctp_faddr_t *fp;
int error;
uint32_t adv_pap = sctp->sctp_adv_pap;
boolean_t do_ftsn = B_FALSE;
boolean_t ftsn_check = B_TRUE;
while (meta != NULL) {
for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
uint32_t tsn;
if (!SCTP_CHUNK_ISSENT(mp))
goto window_probe;
/*
* We break in the following cases -
*
* if the advanced peer ack point includes the next
* chunk to be retransmited - possibly the Forward
* TSN was lost.
*
* if we are PRSCTP aware and the next chunk to be
* retransmitted is now abandoned
*
* if the next chunk to be retransmitted is for
* the dest on which the timer went off. (this
* message is not abandoned).
*
* We check for Forward TSN only for the first
* eligible chunk to be retransmitted. The reason
* being if the first eligible chunk is skipped (say
* it was sent to a destination other than oldfp)
* then we cannot advance the cum TSN via Forward
* TSN chunk.
*
* Also, ftsn_check is B_TRUE only for the first
* eligible chunk, it will be B_FALSE for all
* subsequent candidate messages for retransmission.
*/
sdc = (sctp_data_hdr_t *)mp->b_rptr;
tsn = ntohl(sdc->sdh_tsn);
if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
if (sctp->sctp_prsctp_aware && ftsn_check) {
if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
ASSERT(sctp->sctp_prsctp_aware);
do_ftsn = B_TRUE;
goto out;
} else {
sctp_check_adv_ack_pt(sctp,
meta, mp);
if (SEQ_GT(sctp->sctp_adv_pap,
adv_pap)) {
do_ftsn = B_TRUE;
goto out;
}
}
ftsn_check = B_FALSE;
}
if (SCTP_CHUNK_DEST(mp) == oldfp)
goto out;
}
}
meta = meta->b_next;
if (meta != NULL && sctp->sctp_prsctp_aware) {
sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
meta = meta->b_next;
}
}
}
window_probe:
/*
* Retransmit fired for a destination which didn't have
* any unacked data pending.
*/
if (!sctp->sctp_unacked && sctp->sctp_unsent) {
/*
* Send a window probe. Inflate frwnd to allow
* sending one segment.
*/
if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc))) {
sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
}
BUMP_MIB(&sctp_mib, sctpOutWinProbe);
sctp_output(sctp);
}
return;
out:
/*
* Enter slowstart for this destination
*/
oldfp->ssthresh = oldfp->cwnd / 2;
if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
oldfp->ssthresh = 2 * oldfp->sfa_pmss;
oldfp->cwnd = oldfp->sfa_pmss;
oldfp->pba = 0;
fp = sctp_rotate_faddr(sctp, oldfp);
ASSERT(fp != NULL);
sdc = (sctp_data_hdr_t *)mp->b_rptr;
if (do_ftsn) {
sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
if (nmp == NULL) {
sctp->sctp_adv_pap = adv_pap;
goto restart_timer;
}
head = nmp;
mp = NULL;
meta = sctp->sctp_xmit_tail;
if (meta != NULL)
mp = meta->b_cont;
goto try_bundle;
}
seglen = ntohs(sdc->sdh_len);
chunklen = seglen - sizeof (*sdc);
if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
extra = SCTP_ALIGN - extra;
/*
* Cancel RTT measurement if the retransmitted TSN is before the
* TSN used for timimg.
*/
if (sctp->sctp_out_time != 0 &&
SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
sctp->sctp_out_time = 0;
}
/* Clear the counter as the RTT calculation may be off. */
fp->rtt_updates = 0;
if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
sacklen = 0;
} else {
sacklen = sizeof (sctp_chunk_hdr_t) +
sizeof (sctp_sack_chunk_t) +
(sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
/* piggybacked SACK doesn't fit */
sacklen = 0;
} else {
fp = sctp->sctp_lastdata;
}
}
nmp = dupmsg(mp);
if (nmp == NULL)
goto restart_timer;
if (extra > 0) {
fill = sctp_get_padding(extra);
if (fill != NULL) {
linkb(nmp, fill);
seglen += extra;
} else {
freemsg(nmp);
goto restart_timer;
}
}
SCTP_CHUNK_CLEAR_FLAGS(nmp);
head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
if (head == NULL) {
freemsg(nmp);
goto restart_timer;
}
seglen += sacklen;
SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
mp = mp->b_next;
try_bundle:
while (seglen < fp->sfa_pmss) {
int32_t new_len;
while (mp != NULL) {
if (SCTP_CHUNK_CANSEND(mp))
break;
mp = mp->b_next;
}
if (mp == NULL) {
meta = sctp_get_msg_to_send(sctp, &mp, meta->b_next,
&error, 0, 0, oldfp);
if (error != 0 || meta == NULL)
break;
ASSERT(mp != NULL);
sctp->sctp_xmit_tail = meta;
}
sdc = (sctp_data_hdr_t *)mp->b_rptr;
chunklen = ntohs(sdc->sdh_len) - sizeof (*sdc);
new_len = seglen + ntohs(sdc->sdh_len);
if (seglen & (SCTP_ALIGN - 1)) {
extra = SCTP_ALIGN - (seglen & (SCTP_ALIGN - 1));
if (new_len + extra > fp->sfa_pmss) {
break;
}
fill = sctp_get_padding(extra);
if (fill != NULL) {
new_len += extra;
linkb(head, fill);
} else {
break;
}
} else {
if (new_len > fp->sfa_pmss) {
break;
}
}
if ((nmp = dupmsg(mp)) == NULL) {
break;
}
seglen = new_len;
SCTP_CHUNK_CLEAR_FLAGS(nmp);
SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
linkb(head, nmp);
mp = mp->b_next;
}
if ((seglen > fp->sfa_pmss) && fp->isv4) {
ipha_t *iph = (ipha_t *)head->b_rptr;
/*
* Path MTU is different from path we thought it would
* be when we created chunks, or IP headers have grown.
* Need to clear the DF bit.
*/
iph->ipha_fragment_offset_and_flags = 0;
}
dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
"ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn), fp,
sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
sctp_set_iplen(sctp, head);
sctp_add_sendq(sctp, head);
/*
* Restart timer with exponential backoff
*/
restart_timer:
oldfp->strikes++;
sctp->sctp_strikes++;
SCTP_CALC_RXT(oldfp, sctp->sctp_rto_max);
SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
if (oldfp->suna != 0)
SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
sctp->sctp_active = lbolt64;
}
/*
* The SCTP write put procedure called from IP.
*/
void
sctp_wput(queue_t *q, mblk_t *mp)
{
uchar_t *rptr;
t_scalar_t type;
switch (mp->b_datap->db_type) {
case M_IOCTL:
sctp_wput_ioctl(q, mp);
break;
case M_DATA:
/* Should be handled in sctp_output() */
ASSERT(0);
freemsg(mp);
break;
case M_PROTO:
case M_PCPROTO:
rptr = mp->b_rptr;
if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
type = ((union T_primitives *)rptr)->type;
/*
* There is no "standard" way on how to respond
* to T_CAPABILITY_REQ if a module does not
* understand it. And the current TI mod
* has problems handling an error ack. So we
* catch the request here and reply with a response
* which the TI mod knows how to respond to.
*/
switch (type) {
case T_CAPABILITY_REQ:
(void) putnextctl1(RD(q), M_ERROR, EPROTO);
break;
default:
if ((mp = mi_tpi_err_ack_alloc(mp,
TNOTSUPPORT, 0)) != NULL) {
qreply(q, mp);
return;
}
}
}
/* FALLTHRU */
default:
freemsg(mp);
return;
}
}