/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/kmem.h>
#include <sys/socket.h>
#include <sys/random.h>
#include <sys/tsol/tndb.h>
#include <sys/tsol/tnet.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/sctp.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/ip_ire.h>
#include <inet/ip_if.h>
#include <inet/ip_ndp.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/optcom.h>
#include <inet/sctp_ip.h>
#include <inet/ipclassifier.h>
#include "sctp_impl.h"
#include "sctp_addr.h"
#include "sctp_asconf.h"
static struct kmem_cache *sctp_kmem_faddr_cache;
static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *);
/* Set the source address. Refer to comments in sctp_get_dest(). */
void
sctp_set_saddr(sctp_t *sctp, sctp_faddr_t *fp)
{
boolean_t v6 = !fp->sf_isv4;
boolean_t addr_set;
fp->sf_saddr = sctp_get_valid_addr(sctp, v6, &addr_set);
/*
* If there is no source address avaialble, mark this peer address
* as unreachable for now. When the heartbeat timer fires, it will
* call sctp_get_dest() to re-check if there is any source address
* available.
*/
if (!addr_set)
fp->sf_state = SCTP_FADDRS_UNREACH;
}
/*
* Call this function to get information about a peer addr fp.
*
* Uses ip_attr_connect to avoid explicit use of ire and source address
* selection.
*/
void
sctp_get_dest(sctp_t *sctp, sctp_faddr_t *fp)
{
in6_addr_t laddr;
in6_addr_t nexthop;
sctp_saddr_ipif_t *sp;
int hdrlen;
sctp_stack_t *sctps = sctp->sctp_sctps;
conn_t *connp = sctp->sctp_connp;
iulp_t uinfo;
uint_t pmtu;
int error;
uint32_t flags = IPDF_VERIFY_DST | IPDF_IPSEC |
IPDF_SELECT_SRC | IPDF_UNIQUE_DCE;
/*
* Tell sctp_make_mp it needs to call us again should we not
* complete and set the saddr.
*/
fp->sf_saddr = ipv6_all_zeros;
/*
* If this addr is not reachable, mark it as unconfirmed for now, the
* state will be changed back to unreachable later in this function
* if it is still the case.
*/
if (fp->sf_state == SCTP_FADDRS_UNREACH) {
fp->sf_state = SCTP_FADDRS_UNCONFIRMED;
}
/*
* Socket is connected - enable PMTU discovery.
*/
if (!sctps->sctps_ignore_path_mtu)
fp->sf_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
ip_attr_nexthop(&connp->conn_xmit_ipp, fp->sf_ixa, &fp->sf_faddr,
&nexthop);
laddr = fp->sf_saddr;
error = ip_attr_connect(connp, fp->sf_ixa, &laddr, &fp->sf_faddr,
&nexthop, connp->conn_fport, &laddr, &uinfo, flags);
if (error != 0) {
dprint(3, ("sctp_get_dest: no ire for %x:%x:%x:%x\n",
SCTP_PRINTADDR(fp->sf_faddr)));
/*
* It is tempting to just leave the src addr
* unspecified and let IP figure it out, but we
* *cannot* do this, since IP may choose a src addr
* that is not part of this association... unless
* this sctp has bound to all addrs. So if the dest
* lookup fails, try to find one in our src addr
* list, unless the sctp has bound to all addrs, in
* which case we change the src addr to unspec.
*
* Note that if this is a v6 endpoint but it does
* not have any v4 address at this point (e.g. may
* have been deleted), sctp_get_valid_addr() will
* return mapped INADDR_ANY. In this case, this
* address should be marked not reachable so that
* it won't be used to send data.
*/
sctp_set_saddr(sctp, fp);
if (fp->sf_state == SCTP_FADDRS_UNREACH)
return;
goto check_current;
}
ASSERT(fp->sf_ixa->ixa_ire != NULL);
ASSERT(!(fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)));
if (!sctp->sctp_loopback)
sctp->sctp_loopback = uinfo.iulp_loopback;
/* Make sure the laddr is part of this association */
if ((sp = sctp_saddr_lookup(sctp, &laddr, 0)) != NULL &&
!sp->saddr_ipif_dontsrc) {
if (sp->saddr_ipif_unconfirmed == 1)
sp->saddr_ipif_unconfirmed = 0;
/* We did IPsec policy lookup for laddr already */
fp->sf_saddr = laddr;
} else {
dprint(2, ("sctp_get_dest: src addr is not part of assoc "
"%x:%x:%x:%x\n", SCTP_PRINTADDR(laddr)));
/*
* Set the src to the first saddr and hope for the best.
* Note that this case should very seldomly
* happen. One scenario this can happen is an app
* explicitly bind() to an address. But that address is
* not the preferred source address to send to the peer.
*/
sctp_set_saddr(sctp, fp);
if (fp->sf_state == SCTP_FADDRS_UNREACH) {
return;
}
}
/*
* Pull out RTO information for this faddr and use it if we don't
* have any yet.
*/
if (fp->sf_srtt == -1 && uinfo.iulp_rtt != 0) {
/* The cached value is in ms. */
fp->sf_srtt = MSEC_TO_TICK(uinfo.iulp_rtt);
fp->sf_rttvar = MSEC_TO_TICK(uinfo.iulp_rtt_sd);
fp->sf_rto = 3 * fp->sf_srtt;
/* Bound the RTO by configured min and max values */
if (fp->sf_rto < sctp->sctp_rto_min) {
fp->sf_rto = sctp->sctp_rto_min;
}
if (fp->sf_rto > sctp->sctp_rto_max) {
fp->sf_rto = sctp->sctp_rto_max;
}
SCTP_MAX_RTO(sctp, fp);
}
pmtu = uinfo.iulp_mtu;
/*
* Record the MTU for this faddr. If the MTU for this faddr has
* changed, check if the assc MTU will also change.
*/
if (fp->sf_isv4) {
hdrlen = sctp->sctp_hdr_len;
} else {
hdrlen = sctp->sctp_hdr6_len;
}
if ((fp->sf_pmss + hdrlen) != pmtu) {
/* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
fp->sf_pmss = (pmtu - hdrlen) & ~(SCTP_ALIGN - 1);
if (fp->sf_cwnd < (fp->sf_pmss * 2)) {
SET_CWND(fp, fp->sf_pmss,
sctps->sctps_slow_start_initial);
}
}
check_current:
if (fp == sctp->sctp_current)
sctp_set_faddr_current(sctp, fp);
}
void
sctp_update_dce(sctp_t *sctp)
{
sctp_faddr_t *fp;
sctp_stack_t *sctps = sctp->sctp_sctps;
iulp_t uinfo;
ip_stack_t *ipst = sctps->sctps_netstack->netstack_ip;
uint_t ifindex;
for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
bzero(&uinfo, sizeof (uinfo));
/*
* Only record the PMTU for this faddr if we actually have
* done discovery. This prevents initialized default from
* clobbering any real info that IP may have.
*/
if (fp->sf_pmtu_discovered) {
if (fp->sf_isv4) {
uinfo.iulp_mtu = fp->sf_pmss +
sctp->sctp_hdr_len;
} else {
uinfo.iulp_mtu = fp->sf_pmss +
sctp->sctp_hdr6_len;
}
}
if (sctps->sctps_rtt_updates != 0 &&
fp->sf_rtt_updates >= sctps->sctps_rtt_updates) {
/*
* dce_update_uinfo() merges these values with the
* old values.
*/
uinfo.iulp_rtt = TICK_TO_MSEC(fp->sf_srtt);
uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->sf_rttvar);
fp->sf_rtt_updates = 0;
}
ifindex = 0;
if (IN6_IS_ADDR_LINKSCOPE(&fp->sf_faddr)) {
/*
* If we are going to create a DCE we'd better have
* an ifindex
*/
if (fp->sf_ixa->ixa_nce != NULL) {
ifindex = fp->sf_ixa->ixa_nce->nce_common->
ncec_ill->ill_phyint->phyint_ifindex;
} else {
continue;
}
}
(void) dce_update_uinfo(&fp->sf_faddr, ifindex, &uinfo, ipst);
}
}
/*
* The sender must later set the total length in the IP header.
*/
mblk_t *
sctp_make_mp(sctp_t *sctp, sctp_faddr_t *fp, int trailer)
{
mblk_t *mp;
size_t ipsctplen;
int isv4;
sctp_stack_t *sctps = sctp->sctp_sctps;
boolean_t src_changed = B_FALSE;
ASSERT(fp != NULL);
isv4 = fp->sf_isv4;
if (SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr) ||
(fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
/* Need to pick a source */
sctp_get_dest(sctp, fp);
/*
* Although we still may not get an IRE, the source address
* may be changed in sctp_get_ire(). Set src_changed to
* true so that the source address is copied again.
*/
src_changed = B_TRUE;
}
/* There is no suitable source address to use, return. */
if (fp->sf_state == SCTP_FADDRS_UNREACH)
return (NULL);
ASSERT(fp->sf_ixa->ixa_ire != NULL);
ASSERT(!SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr));
if (isv4) {
ipsctplen = sctp->sctp_hdr_len;
} else {
ipsctplen = sctp->sctp_hdr6_len;
}
mp = allocb(ipsctplen + sctps->sctps_wroff_xtra + trailer, BPRI_MED);
if (mp == NULL) {
ip1dbg(("sctp_make_mp: error making mp..\n"));
return (NULL);
}
mp->b_rptr += sctps->sctps_wroff_xtra;
mp->b_wptr = mp->b_rptr + ipsctplen;
ASSERT(OK_32PTR(mp->b_wptr));
if (isv4) {
ipha_t *iph = (ipha_t *)mp->b_rptr;
bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen);
if (fp != sctp->sctp_current || src_changed) {
/* Fix the source and destination addresses. */
IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst);
IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr, iph->ipha_src);
}
/* set or clear the don't fragment bit */
if (fp->sf_df) {
iph->ipha_fragment_offset_and_flags = htons(IPH_DF);
} else {
iph->ipha_fragment_offset_and_flags = 0;
}
} else {
bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen);
if (fp != sctp->sctp_current || src_changed) {
/* Fix the source and destination addresses. */
((ip6_t *)(mp->b_rptr))->ip6_dst = fp->sf_faddr;
((ip6_t *)(mp->b_rptr))->ip6_src = fp->sf_saddr;
}
}
ASSERT(sctp->sctp_connp != NULL);
return (mp);
}
/*
* Notify upper layers about preferred write offset, write size.
*/
void
sctp_set_ulp_prop(sctp_t *sctp)
{
int hdrlen;
struct sock_proto_props sopp;
sctp_stack_t *sctps = sctp->sctp_sctps;
if (sctp->sctp_current->sf_isv4) {
hdrlen = sctp->sctp_hdr_len;
} else {
hdrlen = sctp->sctp_hdr6_len;
}
ASSERT(sctp->sctp_ulpd);
sctp->sctp_connp->conn_wroff = sctps->sctps_wroff_xtra + hdrlen +
sizeof (sctp_data_hdr_t);
ASSERT(sctp->sctp_current->sf_pmss == sctp->sctp_mss);
bzero(&sopp, sizeof (sopp));
sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF;
sopp.sopp_wroff = sctp->sctp_connp->conn_wroff;
sopp.sopp_maxblk = sctp->sctp_mss - sizeof (sctp_data_hdr_t);
sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp);
}
/*
* Set the lengths in the packet and the transmit attributes.
*/
void
sctp_set_iplen(sctp_t *sctp, mblk_t *mp, ip_xmit_attr_t *ixa)
{
uint16_t sum = 0;
ipha_t *iph;
ip6_t *ip6h;
mblk_t *pmp = mp;
boolean_t isv4;
isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
for (; pmp; pmp = pmp->b_cont)
sum += pmp->b_wptr - pmp->b_rptr;
ixa->ixa_pktlen = sum;
if (isv4) {
iph = (ipha_t *)mp->b_rptr;
iph->ipha_length = htons(sum);
ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr_len;
} else {
ip6h = (ip6_t *)mp->b_rptr;
ip6h->ip6_plen = htons(sum - IPV6_HDR_LEN);
ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len;
}
}
int
sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2)
{
int na1 = 0;
int overlap = 0;
int equal = 1;
int onematch;
sctp_faddr_t *fp1, *fp2;
for (fp1 = a1; fp1; fp1 = fp1->sf_next) {
onematch = 0;
for (fp2 = a2; fp2; fp2 = fp2->sf_next) {
if (IN6_ARE_ADDR_EQUAL(&fp1->sf_faddr,
&fp2->sf_faddr)) {
overlap++;
onematch = 1;
break;
}
if (!onematch) {
equal = 0;
}
}
na1++;
}
if (equal) {
return (SCTP_ADDR_EQUAL);
}
if (overlap == na1) {
return (SCTP_ADDR_SUBSET);
}
if (overlap) {
return (SCTP_ADDR_OVERLAP);
}
return (SCTP_ADDR_DISJOINT);
}
/*
* Returns 0 on success, ENOMEM on memory allocation failure, EHOSTUNREACH
* if the connection credentials fail remote host accreditation or
* if the new destination does not support the previously established
* connection security label. If sleep is true, this function should
* never fail for a memory allocation failure. The boolean parameter
* "first" decides whether the newly created faddr structure should be
* added at the beginning of the list or at the end.
*
* Note: caller must hold conn fanout lock.
*/
int
sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first)
{
sctp_faddr_t *faddr;
mblk_t *timer_mp;
int err;
conn_t *connp = sctp->sctp_connp;
if (is_system_labeled()) {
ip_xmit_attr_t *ixa = connp->conn_ixa;
ts_label_t *effective_tsl = NULL;
ASSERT(ixa->ixa_tsl != NULL);
/*
* Verify the destination is allowed to receive packets
* at the security label of the connection we are initiating.
*
* tsol_check_dest() will create a new effective label for
* this connection with a modified label or label flags only
* if there are changes from the original label.
*
* Accept whatever label we get if this is the first
* destination address for this connection. The security
* label and label flags must match any previuous settings
* for all subsequent destination addresses.
*/
if (IN6_IS_ADDR_V4MAPPED(addr)) {
uint32_t dst;
IN6_V4MAPPED_TO_IPADDR(addr, dst);
err = tsol_check_dest(ixa->ixa_tsl,
&dst, IPV4_VERSION, connp->conn_mac_mode,
connp->conn_zone_is_global, &effective_tsl);
} else {
err = tsol_check_dest(ixa->ixa_tsl,
addr, IPV6_VERSION, connp->conn_mac_mode,
connp->conn_zone_is_global, &effective_tsl);
}
if (err != 0)
return (err);
if (sctp->sctp_faddrs == NULL && effective_tsl != NULL) {
ip_xmit_attr_replace_tsl(ixa, effective_tsl);
} else if (effective_tsl != NULL) {
label_rele(effective_tsl);
return (EHOSTUNREACH);
}
}
if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL)
return (ENOMEM);
bzero(faddr, sizeof (*faddr));
timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer, sleep);
if (timer_mp == NULL) {
kmem_cache_free(sctp_kmem_faddr_cache, faddr);
return (ENOMEM);
}
((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr;
/* Start with any options set on the conn */
faddr->sf_ixa = conn_get_ixa_exclusive(connp);
if (faddr->sf_ixa == NULL) {
freemsg(timer_mp);
kmem_cache_free(sctp_kmem_faddr_cache, faddr);
return (ENOMEM);
}
faddr->sf_ixa->ixa_notify_cookie = connp->conn_sctp;
sctp_init_faddr(sctp, faddr, addr, timer_mp);
ASSERT(faddr->sf_ixa->ixa_cred != NULL);
/* ip_attr_connect didn't allow broadcats/multicast dest */
ASSERT(faddr->sf_next == NULL);
if (sctp->sctp_faddrs == NULL) {
ASSERT(sctp->sctp_lastfaddr == NULL);
/* only element on list; first and last are same */
sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr;
} else if (first) {
ASSERT(sctp->sctp_lastfaddr != NULL);
faddr->sf_next = sctp->sctp_faddrs;
sctp->sctp_faddrs = faddr;
} else {
sctp->sctp_lastfaddr->sf_next = faddr;
sctp->sctp_lastfaddr = faddr;
}
sctp->sctp_nfaddrs++;
return (0);
}
sctp_faddr_t *
sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr)
{
sctp_faddr_t *fp;
for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr))
break;
}
return (fp);
}
sctp_faddr_t *
sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr)
{
for (; fp; fp = fp->sf_next) {
if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr)) {
break;
}
}
return (fp);
}
/*
* To change the currently used peer address to the specified one.
*/
void
sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp)
{
/* Now setup the composite header. */
if (fp->sf_isv4) {
IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr,
sctp->sctp_ipha->ipha_dst);
IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr,
sctp->sctp_ipha->ipha_src);
/* update don't fragment bit */
if (fp->sf_df) {
sctp->sctp_ipha->ipha_fragment_offset_and_flags =
htons(IPH_DF);
} else {
sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0;
}
} else {
sctp->sctp_ip6h->ip6_dst = fp->sf_faddr;
sctp->sctp_ip6h->ip6_src = fp->sf_saddr;
}
sctp->sctp_current = fp;
sctp->sctp_mss = fp->sf_pmss;
/* Update the uppper layer for the change. */
if (!SCTP_IS_DETACHED(sctp))
sctp_set_ulp_prop(sctp);
}
void
sctp_redo_faddr_srcs(sctp_t *sctp)
{
sctp_faddr_t *fp;
for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
sctp_get_dest(sctp, fp);
}
}
void
sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
{
int64_t now = LBOLT_FASTPATH64;
/*
* If we are under memory pressure, we abort association waiting
* in zero window probing state for too long. We do this by not
* resetting sctp_strikes. So if sctp_zero_win_probe continues
* while under memory pressure, this association will eventually
* time out.
*/
if (!sctp->sctp_zero_win_probe || !sctp->sctp_sctps->sctps_reclaim) {
sctp->sctp_strikes = 0;
}
fp->sf_strikes = 0;
fp->sf_lastactive = now;
fp->sf_hb_expiry = now + SET_HB_INTVL(fp);
fp->sf_hb_pending = B_FALSE;
if (fp->sf_state != SCTP_FADDRS_ALIVE) {
fp->sf_state = SCTP_FADDRS_ALIVE;
sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_AVAILABLE, 0);
/* Should have a full IRE now */
sctp_get_dest(sctp, fp);
/*
* If this is the primary, switch back to it now. And
* we probably want to reset the source addr used to reach
* it.
* Note that if we didn't find a source in sctp_get_dest
* then we'd be unreachable at this point in time.
*/
if (fp == sctp->sctp_primary &&
fp->sf_state != SCTP_FADDRS_UNREACH) {
sctp_set_faddr_current(sctp, fp);
return;
}
}
}
/*
* Return B_TRUE if there is still an active peer address with zero strikes;
* otherwise rturn B_FALSE.
*/
boolean_t
sctp_is_a_faddr_clean(sctp_t *sctp)
{
sctp_faddr_t *fp;
for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) {
if (fp->sf_state == SCTP_FADDRS_ALIVE && fp->sf_strikes == 0) {
return (B_TRUE);
}
}
return (B_FALSE);
}
/*
* Returns 0 if there is at leave one other active faddr, -1 if there
* are none. If there are none left, faddr_dead() will start killing the
* association.
* If the downed faddr was the current faddr, a new current faddr
* will be chosen.
*/
int
sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate)
{
sctp_faddr_t *ofp;
sctp_stack_t *sctps = sctp->sctp_sctps;
if (fp->sf_state == SCTP_FADDRS_ALIVE) {
sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_UNREACHABLE, 0);
}
fp->sf_state = newstate;
dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n",
SCTP_PRINTADDR(fp->sf_faddr), newstate));
if (fp == sctp->sctp_current) {
/* Current faddr down; need to switch it */
sctp->sctp_current = NULL;
}
/* Find next alive faddr */
ofp = fp;
for (fp = fp->sf_next; fp != NULL; fp = fp->sf_next) {
if (fp->sf_state == SCTP_FADDRS_ALIVE) {
break;
}
}
if (fp == NULL) {
/* Continue from beginning of list */
for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->sf_next) {
if (fp->sf_state == SCTP_FADDRS_ALIVE) {
break;
}
}
}
/*
* Find a new fp, so if the current faddr is dead, use the new fp
* as the current one.
*/
if (fp != ofp) {
if (sctp->sctp_current == NULL) {
dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n",
SCTP_PRINTADDR(fp->sf_faddr)));
/*
* Note that we don't need to reset the source addr
* of the new fp.
*/
sctp_set_faddr_current(sctp, fp);
}
return (0);
}
/* All faddrs are down; kill the association */
dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
SCTPS_BUMP_MIB(sctps, sctpAborted);
sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ?
SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL);
sctp_clean_death(sctp, sctp->sctp_client_errno ?
sctp->sctp_client_errno : ETIMEDOUT);
return (-1);
}
sctp_faddr_t *
sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp)
{
sctp_faddr_t *nfp = NULL;
sctp_faddr_t *saved_fp = NULL;
int min_strikes;
if (ofp == NULL) {
ofp = sctp->sctp_current;
}
/* Nothing to do */
if (sctp->sctp_nfaddrs < 2)
return (ofp);
/*
* Find the next live peer address with zero strikes. In case
* there is none, find the one with the lowest number of strikes.
*/
min_strikes = ofp->sf_strikes;
nfp = ofp->sf_next;
while (nfp != ofp) {
/* If reached end of list, continue scan from the head */
if (nfp == NULL) {
nfp = sctp->sctp_faddrs;
continue;
}
if (nfp->sf_state == SCTP_FADDRS_ALIVE) {
if (nfp->sf_strikes == 0)
break;
if (nfp->sf_strikes < min_strikes) {
min_strikes = nfp->sf_strikes;
saved_fp = nfp;
}
}
nfp = nfp->sf_next;
}
/* If reached the old address, there is no zero strike path */
if (nfp == ofp)
nfp = NULL;
/*
* If there is a peer address with zero strikes we use that, if not
* return a peer address with fewer strikes than the one last used,
* if neither exist we may as well stay with the old one.
*/
if (nfp != NULL)
return (nfp);
if (saved_fp != NULL)
return (saved_fp);
return (ofp);
}
void
sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp)
{
sctp_faddr_t *fpp;
if (!sctp->sctp_faddrs) {
return;
}
if (fp->sf_timer_mp != NULL) {
sctp_timer_free(fp->sf_timer_mp);
fp->sf_timer_mp = NULL;
fp->sf_timer_running = 0;
}
if (fp->sf_rc_timer_mp != NULL) {
sctp_timer_free(fp->sf_rc_timer_mp);
fp->sf_rc_timer_mp = NULL;
fp->sf_rc_timer_running = 0;
}
if (fp->sf_ixa != NULL) {
ixa_refrele(fp->sf_ixa);
fp->sf_ixa = NULL;
}
if (fp == sctp->sctp_faddrs) {
goto gotit;
}
for (fpp = sctp->sctp_faddrs; fpp->sf_next != fp; fpp = fpp->sf_next)
;
gotit:
ASSERT(sctp->sctp_conn_tfp != NULL);
mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
if (fp == sctp->sctp_faddrs) {
sctp->sctp_faddrs = fp->sf_next;
} else {
fpp->sf_next = fp->sf_next;
}
mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
kmem_cache_free(sctp_kmem_faddr_cache, fp);
sctp->sctp_nfaddrs--;
}
void
sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock)
{
sctp_faddr_t *fp, *fpn;
if (sctp->sctp_faddrs == NULL) {
ASSERT(sctp->sctp_lastfaddr == NULL);
return;
}
ASSERT(sctp->sctp_lastfaddr != NULL);
sctp->sctp_lastfaddr = NULL;
sctp->sctp_current = NULL;
sctp->sctp_primary = NULL;
sctp_free_faddr_timers(sctp);
if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
/* in conn fanout; need to hold lock */
mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
}
for (fp = sctp->sctp_faddrs; fp; fp = fpn) {
fpn = fp->sf_next;
if (fp->sf_ixa != NULL) {
ixa_refrele(fp->sf_ixa);
fp->sf_ixa = NULL;
}
kmem_cache_free(sctp_kmem_faddr_cache, fp);
sctp->sctp_nfaddrs--;
}
sctp->sctp_faddrs = NULL;
ASSERT(sctp->sctp_nfaddrs == 0);
if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
}
}
void
sctp_zap_addrs(sctp_t *sctp)
{
sctp_zap_faddrs(sctp, 0);
sctp_free_saddrs(sctp);
}
/*
* Build two SCTP header templates; one for IPv4 and one for IPv6.
* Store them in sctp_iphc and sctp_iphc6 respectively (and related fields).
* There are no IP addresses in the templates, but the port numbers and
* verifier are field in from the conn_t and sctp_t.
*
* Returns failure if can't allocate memory, or if there is a problem
* with a routing header/option.
*
* We allocate space for the minimum sctp header (sctp_hdr_t).
*
* We massage an routing option/header. There is no checksum implication
* for a routing header for sctp.
*
* Caller needs to update conn_wroff if desired.
*
* TSol notes: This assumes that a SCTP association has a single peer label
* since we only track a single pair of ipp_label_v4/v6 and not a separate one
* for each faddr.
*/
int
sctp_build_hdrs(sctp_t *sctp, int sleep)
{
conn_t *connp = sctp->sctp_connp;
ip_pkt_t *ipp = &connp->conn_xmit_ipp;
uint_t ip_hdr_length;
uchar_t *hdrs;
uint_t hdrs_len;
uint_t ulp_hdr_length = sizeof (sctp_hdr_t);
ipha_t *ipha;
ip6_t *ip6h;
sctp_hdr_t *sctph;
in6_addr_t v6src, v6dst;
ipaddr_t v4src, v4dst;
v4src = connp->conn_saddr_v4;
v4dst = connp->conn_faddr_v4;
v6src = connp->conn_saddr_v6;
v6dst = connp->conn_faddr_v6;
/* First do IPv4 header */
ip_hdr_length = ip_total_hdrs_len_v4(ipp);
/* In case of TX label and IP options it can be too much */
if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
/* Preserves existing TX errno for this */
return (EHOSTUNREACH);
}
hdrs_len = ip_hdr_length + ulp_hdr_length;
ASSERT(hdrs_len != 0);
if (hdrs_len != sctp->sctp_iphc_len) {
/* Allocate new before we free any old */
hdrs = kmem_alloc(hdrs_len, sleep);
if (hdrs == NULL)
return (ENOMEM);
if (sctp->sctp_iphc != NULL)
kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len);
sctp->sctp_iphc = hdrs;
sctp->sctp_iphc_len = hdrs_len;
} else {
hdrs = sctp->sctp_iphc;
}
sctp->sctp_hdr_len = sctp->sctp_iphc_len;
sctp->sctp_ip_hdr_len = ip_hdr_length;
sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length);
sctp->sctp_sctph = sctph;
sctph->sh_sport = connp->conn_lport;
sctph->sh_dport = connp->conn_fport;
sctph->sh_verf = sctp->sctp_fvtag;
sctph->sh_chksum = 0;
ipha = (ipha_t *)hdrs;
sctp->sctp_ipha = ipha;
ipha->ipha_src = v4src;
ipha->ipha_dst = v4dst;
ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
ipha->ipha_length = htons(hdrs_len);
ipha->ipha_fragment_offset_and_flags = 0;
if (ipp->ipp_fields & IPPF_IPV4_OPTIONS)
(void) ip_massage_options(ipha, connp->conn_netstack);
/* Now IPv6 */
ip_hdr_length = ip_total_hdrs_len_v6(ipp);
hdrs_len = ip_hdr_length + ulp_hdr_length;
ASSERT(hdrs_len != 0);
if (hdrs_len != sctp->sctp_iphc6_len) {
/* Allocate new before we free any old */
hdrs = kmem_alloc(hdrs_len, sleep);
if (hdrs == NULL)
return (ENOMEM);
if (sctp->sctp_iphc6 != NULL)
kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
sctp->sctp_iphc6 = hdrs;
sctp->sctp_iphc6_len = hdrs_len;
} else {
hdrs = sctp->sctp_iphc6;
}
sctp->sctp_hdr6_len = sctp->sctp_iphc6_len;
sctp->sctp_ip_hdr6_len = ip_hdr_length;
sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length);
sctp->sctp_sctph6 = sctph;
sctph->sh_sport = connp->conn_lport;
sctph->sh_dport = connp->conn_fport;
sctph->sh_verf = sctp->sctp_fvtag;
sctph->sh_chksum = 0;
ip6h = (ip6_t *)hdrs;
sctp->sctp_ip6h = ip6h;
ip6h->ip6_src = v6src;
ip6h->ip6_dst = v6dst;
ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
connp->conn_flowinfo);
ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
if (ipp->ipp_fields & IPPF_RTHDR) {
uint8_t *end;
ip6_rthdr_t *rth;
end = (uint8_t *)ip6h + ip_hdr_length;
rth = ip_find_rthdr_v6(ip6h, end);
if (rth != NULL) {
(void) ip_massage_options_v6(ip6h, rth,
connp->conn_netstack);
}
/*
* Verify that the first hop isn't a mapped address.
* Routers along the path need to do this verification
* for subsequent hops.
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
return (EADDRNOTAVAIL);
}
return (0);
}
static int
sctp_v4_label(sctp_t *sctp, sctp_faddr_t *fp)
{
conn_t *connp = sctp->sctp_connp;
ASSERT(fp->sf_ixa->ixa_flags & IXAF_IS_IPV4);
return (conn_update_label(connp, fp->sf_ixa, &fp->sf_faddr,
&connp->conn_xmit_ipp));
}
static int
sctp_v6_label(sctp_t *sctp, sctp_faddr_t *fp)
{
conn_t *connp = sctp->sctp_connp;
ASSERT(!(fp->sf_ixa->ixa_flags & IXAF_IS_IPV4));
return (conn_update_label(connp, fp->sf_ixa, &fp->sf_faddr,
&connp->conn_xmit_ipp));
}
/*
* XXX implement more sophisticated logic
*
* Tsol note: We have already verified the addresses using tsol_check_dest
* in sctp_add_faddr, thus no need to redo that here.
* We do setup ipp_label_v4 and ipp_label_v6 based on which addresses
* we have.
*/
int
sctp_set_hdraddrs(sctp_t *sctp)
{
sctp_faddr_t *fp;
int gotv4 = 0;
int gotv6 = 0;
conn_t *connp = sctp->sctp_connp;
ASSERT(sctp->sctp_faddrs != NULL);
ASSERT(sctp->sctp_nsaddrs > 0);
/* Set up using the primary first */
connp->conn_faddr_v6 = sctp->sctp_primary->sf_faddr;
/* saddr may be unspec; make_mp() will handle this */
connp->conn_saddr_v6 = sctp->sctp_primary->sf_saddr;
connp->conn_laddr_v6 = connp->conn_saddr_v6;
if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->sf_faddr)) {
if (!is_system_labeled() ||
sctp_v4_label(sctp, sctp->sctp_primary) == 0) {
gotv4 = 1;
if (connp->conn_family == AF_INET) {
goto done;
}
}
} else {
if (!is_system_labeled() ||
sctp_v6_label(sctp, sctp->sctp_primary) == 0) {
gotv6 = 1;
}
}
for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) {
if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
if (!is_system_labeled() ||
sctp_v4_label(sctp, fp) == 0) {
gotv4 = 1;
if (connp->conn_family == AF_INET || gotv6) {
break;
}
}
} else if (!gotv6 && !IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
if (!is_system_labeled() ||
sctp_v6_label(sctp, fp) == 0) {
gotv6 = 1;
if (gotv4)
break;
}
}
}
done:
if (!gotv4 && !gotv6)
return (EACCES);
return (0);
}
/*
* got_errchunk is set B_TRUE only if called from validate_init_params(), when
* an ERROR chunk is already prepended the size of which needs updating for
* additional unrecognized parameters. Other callers either prepend the ERROR
* chunk with the correct size after calling this function, or they are calling
* to add an invalid parameter to an INIT_ACK chunk, in that case no ERROR chunk
* exists, the CAUSE blocks go into the INIT_ACK directly.
*
* *errmp will be non-NULL both when adding an additional CAUSE block to an
* existing prepended COOKIE ERROR chunk (processing params of an INIT_ACK),
* and when adding unrecognized parameters after the first, to an INIT_ACK
* (processing params of an INIT chunk).
*/
void
sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp,
boolean_t got_errchunk)
{
mblk_t *mp;
sctp_parm_hdr_t *ph;
size_t len;
int pad;
sctp_chunk_hdr_t *ecp;
len = sizeof (*ph) + ntohs(uph->sph_len);
if ((pad = len % SCTP_ALIGN) != 0) {
pad = SCTP_ALIGN - pad;
len += pad;
}
mp = allocb(len, BPRI_MED);
if (mp == NULL) {
return;
}
ph = (sctp_parm_hdr_t *)(mp->b_rptr);
ph->sph_type = htons(PARM_UNRECOGNIZED);
ph->sph_len = htons(len - pad);
/* copy in the unrecognized parameter */
bcopy(uph, ph + 1, ntohs(uph->sph_len));
if (pad != 0)
bzero((mp->b_rptr + len - pad), pad);
mp->b_wptr = mp->b_rptr + len;
if (*errmp != NULL) {
/*
* Update total length if an ERROR chunk, then link
* this CAUSE block to the possible chain of CAUSE
* blocks attached to the ERROR chunk or INIT_ACK
* being created.
*/
if (got_errchunk) {
/* ERROR chunk already prepended */
ecp = (sctp_chunk_hdr_t *)((*errmp)->b_rptr);
ecp->sch_len = htons(ntohs(ecp->sch_len) + len);
}
linkb(*errmp, mp);
} else {
*errmp = mp;
}
}
/*
* o Bounds checking
* o Updates remaining
* o Checks alignment
*/
sctp_parm_hdr_t *
sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining)
{
int pad;
uint16_t len;
len = ntohs(current->sph_len);
*remaining -= len;
if (*remaining < sizeof (*current) || len < sizeof (*current)) {
return (NULL);
}
if ((pad = len & (SCTP_ALIGN - 1)) != 0) {
pad = SCTP_ALIGN - pad;
*remaining -= pad;
}
/*LINTED pointer cast may result in improper alignment*/
current = (sctp_parm_hdr_t *)((char *)current + len + pad);
return (current);
}
/*
* Sets the address parameters given in the INIT chunk into sctp's
* faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are
* no address parameters in the INIT chunk, a single faddr is created
* from the ip hdr at the beginning of pkt.
* If there already are existing addresses hanging from sctp, merge
* them in, if the old info contains addresses which are not present
* in this new info, get rid of them, and clean the pointers if there's
* messages which have this as their target address.
*
* We also re-adjust the source address list here since the list may
* contain more than what is actually part of the association. If
* we get here from sctp_send_cookie_echo(), we are on the active
* side and psctp will be NULL and ich will be the INIT-ACK chunk.
* If we get here from sctp_accept_comm(), ich will be the INIT chunk
* and psctp will the listening endpoint.
*
* INIT processing: When processing the INIT we inherit the src address
* list from the listener. For a loopback or linklocal association, we
* delete the list and just take the address from the IP header (since
* that's how we created the INIT-ACK). Additionally, for loopback we
* ignore the address params in the INIT. For determining which address
* types were sent in the INIT-ACK we follow the same logic as in
* creating the INIT-ACK. We delete addresses of the type that are not
* supported by the peer.
*
* INIT-ACK processing: When processing the INIT-ACK since we had not
* included addr params for loopback or linklocal addresses when creating
* the INIT, we just use the address from the IP header. Further, for
* loopback we ignore the addr param list. We mark addresses of the
* type not supported by the peer as unconfirmed.
*
* In case of INIT processing we look for supported address types in the
* supported address param, if present. In both cases the address type in
* the IP header is supported as well as types for addresses in the param
* list, if any.
*
* Once we have the supported address types sctp_check_saddr() runs through
* the source address list and deletes or marks as unconfirmed address of
* types not supported by the peer.
*
* Returns 0 on success, sys errno on failure
*/
int
sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt,
sctp_chunk_hdr_t *ich, uint_t *sctp_options)
{
sctp_init_chunk_t *init;
ipha_t *iph;
ip6_t *ip6h;
in6_addr_t hdrsaddr[1];
in6_addr_t hdrdaddr[1];
sctp_parm_hdr_t *ph;
ssize_t remaining;
int isv4;
int err;
sctp_faddr_t *fp;
int supp_af = 0;
boolean_t check_saddr = B_TRUE;
in6_addr_t curaddr;
sctp_stack_t *sctps = sctp->sctp_sctps;
conn_t *connp = sctp->sctp_connp;
if (sctp_options != NULL)
*sctp_options = 0;
/* extract the address from the IP header */
isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
if (isv4) {
iph = (ipha_t *)pkt->b_rptr;
IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr);
IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr);
supp_af |= PARM_SUPP_V4;
} else {
ip6h = (ip6_t *)pkt->b_rptr;
hdrsaddr[0] = ip6h->ip6_src;
hdrdaddr[0] = ip6h->ip6_dst;
supp_af |= PARM_SUPP_V6;
}
/*
* Unfortunately, we can't delay this because adding an faddr
* looks for the presence of the source address (from the ire
* for the faddr) in the source address list. We could have
* delayed this if, say, this was a loopback/linklocal connection.
* Now, we just end up nuking this list and taking the addr from
* the IP header for loopback/linklocal.
*/
if (psctp != NULL && psctp->sctp_nsaddrs > 0) {
ASSERT(sctp->sctp_nsaddrs == 0);
err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP);
if (err != 0)
return (err);
}
/*
* We will add the faddr before parsing the address list as this
* might be a loopback connection and we would not have to
* go through the list.
*
* Make sure the header's addr is in the list
*/
fp = sctp_lookup_faddr(sctp, hdrsaddr);
if (fp == NULL) {
/* not included; add it now */
err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE);
if (err != 0)
return (err);
/* sctp_faddrs will be the hdr addr */
fp = sctp->sctp_faddrs;
}
/* make the header addr the primary */
if (cl_sctp_assoc_change != NULL && psctp == NULL)
curaddr = sctp->sctp_current->sf_faddr;
sctp->sctp_primary = fp;
sctp->sctp_current = fp;
sctp->sctp_mss = fp->sf_pmss;
/* For loopback connections & linklocal get address from the header */
if (sctp->sctp_loopback || sctp->sctp_linklocal) {
if (sctp->sctp_nsaddrs != 0)
sctp_free_saddrs(sctp);
if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0)
return (err);
/* For loopback ignore address list */
if (sctp->sctp_loopback)
return (0);
check_saddr = B_FALSE;
}
/* Walk the params in the INIT [ACK], pulling out addr params */
remaining = ntohs(ich->sch_len) - sizeof (*ich) -
sizeof (sctp_init_chunk_t);
if (remaining < sizeof (*ph)) {
if (check_saddr) {
sctp_check_saddr(sctp, supp_af, psctp == NULL ?
B_FALSE : B_TRUE, hdrdaddr);
}
ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
return (0);
}
init = (sctp_init_chunk_t *)(ich + 1);
ph = (sctp_parm_hdr_t *)(init + 1);
/* params will have already been byteordered when validating */
while (ph != NULL) {
if (ph->sph_type == htons(PARM_SUPP_ADDRS)) {
int plen;
uint16_t *p;
uint16_t addrtype;
ASSERT(psctp != NULL);
plen = ntohs(ph->sph_len);
p = (uint16_t *)(ph + 1);
while (plen > 0) {
addrtype = ntohs(*p);
switch (addrtype) {
case PARM_ADDR6:
supp_af |= PARM_SUPP_V6;
break;
case PARM_ADDR4:
supp_af |= PARM_SUPP_V4;
break;
default:
break;
}
p++;
plen -= sizeof (*p);
}
} else if (ph->sph_type == htons(PARM_ADDR4)) {
if (remaining >= PARM_ADDR4_LEN) {
in6_addr_t addr;
ipaddr_t ta;
supp_af |= PARM_SUPP_V4;
/*
* Screen out broad/multicasts & loopback.
* If the endpoint only accepts v6 address,
* go to the next one.
*
* Subnet broadcast check is done in
* sctp_add_faddr(). If the address is
* a broadcast address, it won't be added.
*/
bcopy(ph + 1, &ta, sizeof (ta));
if (ta == 0 ||
ta == INADDR_BROADCAST ||
ta == htonl(INADDR_LOOPBACK) ||
CLASSD(ta) || connp->conn_ipv6_v6only) {
goto next;
}
IN6_INADDR_TO_V4MAPPED((struct in_addr *)
(ph + 1), &addr);
/* Check for duplicate. */
if (sctp_lookup_faddr(sctp, &addr) != NULL)
goto next;
/* OK, add it to the faddr set */
err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP,
B_FALSE);
/* Something is wrong... Try the next one. */
if (err != 0)
goto next;
}
} else if (ph->sph_type == htons(PARM_ADDR6) &&
connp->conn_family == AF_INET6) {
/* An v4 socket should not take v6 addresses. */
if (remaining >= PARM_ADDR6_LEN) {
in6_addr_t *addr6;
supp_af |= PARM_SUPP_V6;
addr6 = (in6_addr_t *)(ph + 1);
/*
* Screen out link locals, mcast, loopback
* and bogus v6 address.
*/
if (IN6_IS_ADDR_LINKLOCAL(addr6) ||
IN6_IS_ADDR_MULTICAST(addr6) ||
IN6_IS_ADDR_LOOPBACK(addr6) ||
IN6_IS_ADDR_V4MAPPED(addr6)) {
goto next;
}
/* Check for duplicate. */
if (sctp_lookup_faddr(sctp, addr6) != NULL)
goto next;
err = sctp_add_faddr(sctp,
(in6_addr_t *)(ph + 1), KM_NOSLEEP,
B_FALSE);
/* Something is wrong... Try the next one. */
if (err != 0)
goto next;
}
} else if (ph->sph_type == htons(PARM_FORWARD_TSN)) {
if (sctp_options != NULL)
*sctp_options |= SCTP_PRSCTP_OPTION;
} /* else; skip */
next:
ph = sctp_next_parm(ph, &remaining);
}
if (check_saddr) {
sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE :
B_TRUE, hdrdaddr);
}
ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
/*
* We have the right address list now, update clustering's
* knowledge because when we sent the INIT we had just added
* the address the INIT was sent to.
*/
if (psctp == NULL && cl_sctp_assoc_change != NULL) {
uchar_t *alist;
size_t asize;
uchar_t *dlist;
size_t dsize;
asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
alist = kmem_alloc(asize, KM_NOSLEEP);
if (alist == NULL) {
SCTP_KSTAT(sctps, sctp_cl_assoc_change);
return (ENOMEM);
}
/*
* Just include the address the INIT was sent to in the
* delete list and send the entire faddr list. We could
* do it differently (i.e include all the addresses in the
* add list even if it contains the original address OR
* remove the original address from the add list etc.), but
* this seems reasonable enough.
*/
dsize = sizeof (in6_addr_t);
dlist = kmem_alloc(dsize, KM_NOSLEEP);
if (dlist == NULL) {
kmem_free(alist, asize);
SCTP_KSTAT(sctps, sctp_cl_assoc_change);
return (ENOMEM);
}
bcopy(&curaddr, dlist, sizeof (curaddr));
sctp_get_faddr_list(sctp, alist, asize);
(*cl_sctp_assoc_change)(connp->conn_family, alist, asize,
sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR,
(cl_sctp_handle_t)sctp);
/* alist and dlist will be freed by the clustering module */
}
return (0);
}
/*
* Returns 0 if the check failed and the restart should be refused,
* 1 if the check succeeded.
*/
int
sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports,
int sleep, sctp_stack_t *sctps, ip_recv_attr_t *ira)
{
sctp_faddr_t *fp, *fphead = NULL;
sctp_parm_hdr_t *ph;
ssize_t remaining;
int isv4;
ipha_t *iph;
ip6_t *ip6h;
in6_addr_t hdraddr[1];
int retval = 0;
sctp_tf_t *tf;
sctp_t *sctp;
int compres;
sctp_init_chunk_t *init;
int nadded = 0;
/* extract the address from the IP header */
isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
if (isv4) {
iph = (ipha_t *)pkt->b_rptr;
IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr);
} else {
ip6h = (ip6_t *)pkt->b_rptr;
hdraddr[0] = ip6h->ip6_src;
}
/* Walk the params in the INIT [ACK], pulling out addr params */
remaining = ntohs(ich->sch_len) - sizeof (*ich) -
sizeof (sctp_init_chunk_t);
if (remaining < sizeof (*ph)) {
/* no parameters; restart OK */
return (1);
}
init = (sctp_init_chunk_t *)(ich + 1);
ph = (sctp_parm_hdr_t *)(init + 1);
while (ph != NULL) {
sctp_faddr_t *fpa = NULL;
/* params will have already been byteordered when validating */
if (ph->sph_type == htons(PARM_ADDR4)) {
if (remaining >= PARM_ADDR4_LEN) {
in6_addr_t addr;
IN6_INADDR_TO_V4MAPPED((struct in_addr *)
(ph + 1), &addr);
fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
sleep);
if (fpa == NULL) {
goto done;
}
bzero(fpa, sizeof (*fpa));
fpa->sf_faddr = addr;
fpa->sf_next = NULL;
}
} else if (ph->sph_type == htons(PARM_ADDR6)) {
if (remaining >= PARM_ADDR6_LEN) {
fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
sleep);
if (fpa == NULL) {
goto done;
}
bzero(fpa, sizeof (*fpa));
bcopy(ph + 1, &fpa->sf_faddr,
sizeof (fpa->sf_faddr));
fpa->sf_next = NULL;
}
}
/* link in the new addr, if it was an addr param */
if (fpa != NULL) {
if (fphead == NULL) {
fphead = fpa;
} else {
fpa->sf_next = fphead;
fphead = fpa;
}
}
ph = sctp_next_parm(ph, &remaining);
}
if (fphead == NULL) {
/* no addr parameters; restart OK */
return (1);
}
/*
* got at least one; make sure the header's addr is
* in the list
*/
fp = sctp_lookup_faddr_nosctp(fphead, hdraddr);
if (fp == NULL) {
/* not included; add it now */
fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep);
if (fp == NULL) {
goto done;
}
bzero(fp, sizeof (*fp));
fp->sf_faddr = *hdraddr;
fp->sf_next = fphead;
fphead = fp;
}
/*
* Now, we can finally do the check: For each sctp instance
* on the hash line for ports, compare its faddr set against
* the new one. If the new one is a strict subset of any
* existing sctp's faddrs, the restart is OK. However, if there
* is an overlap, this could be an attack, so return failure.
* If all sctp's faddrs are disjoint, this is a legitimate new
* association.
*/
tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
mutex_enter(&tf->tf_lock);
for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
if (ports != sctp->sctp_connp->conn_ports) {
continue;
}
compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs);
if (compres <= SCTP_ADDR_SUBSET) {
retval = 1;
mutex_exit(&tf->tf_lock);
goto done;
}
if (compres == SCTP_ADDR_OVERLAP) {
dprint(1,
("new assoc from %x:%x:%x:%x overlaps with %p\n",
SCTP_PRINTADDR(*hdraddr), (void *)sctp));
/*
* While we still hold the lock, we need to
* figure out which addresses have been
* added so we can include them in the abort
* we will send back. Since these faddrs will
* never be used, we overload the rto field
* here, setting it to 0 if the address was
* not added, 1 if it was added.
*/
for (fp = fphead; fp; fp = fp->sf_next) {
if (sctp_lookup_faddr(sctp, &fp->sf_faddr)) {
fp->sf_rto = 0;
} else {
fp->sf_rto = 1;
nadded++;
}
}
mutex_exit(&tf->tf_lock);
goto done;
}
}
mutex_exit(&tf->tf_lock);
/* All faddrs are disjoint; legit new association */
retval = 1;
done:
/* If are attempted adds, send back an abort listing the addrs */
if (nadded > 0) {
void *dtail;
size_t dlen;
dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP);
if (dtail == NULL) {
goto cleanup;
}
ph = dtail;
dlen = 0;
for (fp = fphead; fp; fp = fp->sf_next) {
if (fp->sf_rto == 0) {
continue;
}
if (IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
ipaddr_t addr4;
ph->sph_type = htons(PARM_ADDR4);
ph->sph_len = htons(PARM_ADDR4_LEN);
IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, addr4);
ph++;
bcopy(&addr4, ph, sizeof (addr4));
ph = (sctp_parm_hdr_t *)
((char *)ph + sizeof (addr4));
dlen += PARM_ADDR4_LEN;
} else {
ph->sph_type = htons(PARM_ADDR6);
ph->sph_len = htons(PARM_ADDR6_LEN);
ph++;
bcopy(&fp->sf_faddr, ph, sizeof (fp->sf_faddr));
ph = (sctp_parm_hdr_t *)
((char *)ph + sizeof (fp->sf_faddr));
dlen += PARM_ADDR6_LEN;
}
}
/* Send off the abort */
sctp_send_abort(sctp, sctp_init2vtag(ich),
SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE,
ira);
kmem_free(dtail, PARM_ADDR6_LEN * nadded);
}
cleanup:
/* Clean up */
if (fphead) {
sctp_faddr_t *fpn;
for (fp = fphead; fp; fp = fpn) {
fpn = fp->sf_next;
if (fp->sf_ixa != NULL) {
ixa_refrele(fp->sf_ixa);
fp->sf_ixa = NULL;
}
kmem_cache_free(sctp_kmem_faddr_cache, fp);
}
}
return (retval);
}
/*
* Reset any state related to transmitted chunks.
*/
void
sctp_congest_reset(sctp_t *sctp)
{
sctp_faddr_t *fp;
sctp_stack_t *sctps = sctp->sctp_sctps;
mblk_t *mp;
for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
fp->sf_ssthresh = sctps->sctps_initial_mtu;
SET_CWND(fp, fp->sf_pmss, sctps->sctps_slow_start_initial);
fp->sf_suna = 0;
fp->sf_pba = 0;
}
/*
* Clean up the transmit list as well since we have reset accounting
* on all the fps. Send event upstream, if required.
*/
while ((mp = sctp->sctp_xmit_head) != NULL) {
sctp->sctp_xmit_head = mp->b_next;
mp->b_next = NULL;
if (sctp->sctp_xmit_head != NULL)
sctp->sctp_xmit_head->b_prev = NULL;
sctp_sendfail_event(sctp, mp, 0, B_TRUE);
}
sctp->sctp_xmit_head = NULL;
sctp->sctp_xmit_tail = NULL;
sctp->sctp_xmit_unacked = NULL;
sctp->sctp_unacked = 0;
/*
* Any control message as well. We will clean-up this list as well.
* This contains any pending ASCONF request that we have queued/sent.
* If we do get an ACK we will just drop it. However, given that
* we are restarting chances are we aren't going to get any.
*/
if (sctp->sctp_cxmit_list != NULL)
sctp_asconf_free_cxmit(sctp, NULL);
sctp->sctp_cxmit_list = NULL;
sctp->sctp_cchunk_pend = 0;
sctp->sctp_rexmitting = B_FALSE;
sctp->sctp_rxt_nxttsn = 0;
sctp->sctp_rxt_maxtsn = 0;
sctp->sctp_zero_win_probe = B_FALSE;
}
static void
sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr,
mblk_t *timer_mp)
{
sctp_stack_t *sctps = sctp->sctp_sctps;
ASSERT(fp->sf_ixa != NULL);
bcopy(addr, &fp->sf_faddr, sizeof (*addr));
if (IN6_IS_ADDR_V4MAPPED(addr)) {
fp->sf_isv4 = 1;
/* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
fp->sf_pmss =
(sctps->sctps_initial_mtu - sctp->sctp_hdr_len) &
~(SCTP_ALIGN - 1);
fp->sf_ixa->ixa_flags |= IXAF_IS_IPV4;
} else {
fp->sf_isv4 = 0;
fp->sf_pmss =
(sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) &
~(SCTP_ALIGN - 1);
fp->sf_ixa->ixa_flags &= ~IXAF_IS_IPV4;
}
fp->sf_cwnd = sctps->sctps_slow_start_initial * fp->sf_pmss;
fp->sf_rto = MIN(sctp->sctp_rto_initial, sctp->sctp_rto_max_init);
SCTP_MAX_RTO(sctp, fp);
fp->sf_srtt = -1;
fp->sf_rtt_updates = 0;
fp->sf_strikes = 0;
fp->sf_max_retr = sctp->sctp_pp_max_rxt;
/* Mark it as not confirmed. */
fp->sf_state = SCTP_FADDRS_UNCONFIRMED;
fp->sf_hb_interval = sctp->sctp_hb_interval;
fp->sf_ssthresh = sctps->sctps_initial_ssthresh;
fp->sf_suna = 0;
fp->sf_pba = 0;
fp->sf_acked = 0;
fp->sf_lastactive = fp->sf_hb_expiry = ddi_get_lbolt64();
fp->sf_timer_mp = timer_mp;
fp->sf_hb_pending = B_FALSE;
fp->sf_hb_enabled = B_TRUE;
fp->sf_df = 1;
fp->sf_pmtu_discovered = 0;
fp->sf_next = NULL;
fp->sf_T3expire = 0;
(void) random_get_pseudo_bytes((uint8_t *)&fp->sf_hb_secret,
sizeof (fp->sf_hb_secret));
fp->sf_rxt_unacked = 0;
sctp_get_dest(sctp, fp);
}
/*ARGSUSED*/
static int
faddr_constructor(void *buf, void *arg, int flags)
{
sctp_faddr_t *fp = buf;
fp->sf_timer_mp = NULL;
fp->sf_timer_running = 0;
fp->sf_rc_timer_mp = NULL;
fp->sf_rc_timer_running = 0;
return (0);
}
/*ARGSUSED*/
static void
faddr_destructor(void *buf, void *arg)
{
sctp_faddr_t *fp = buf;
ASSERT(fp->sf_timer_mp == NULL);
ASSERT(fp->sf_timer_running == 0);
ASSERT(fp->sf_rc_timer_mp == NULL);
ASSERT(fp->sf_rc_timer_running == 0);
}
void
sctp_faddr_init(void)
{
sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache",
sizeof (sctp_faddr_t), 0, faddr_constructor, faddr_destructor,
NULL, NULL, NULL, 0);
}
void
sctp_faddr_fini(void)
{
kmem_cache_destroy(sctp_kmem_faddr_cache);
}