/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
/*
* Internet Group Management Protocol (IGMP) routines.
* Multicast Listener Discovery Protocol (MLD) routines.
*
* Written by Steve Deering, Stanford, May 1988.
* Modified by Rosen Sharma, Stanford, Aug 1994.
* Modified by Bill Fenner, Xerox PARC, Feb. 1995.
*
* MULTICAST 3.5.1.1
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#include <sys/strsun.h>
#include <sys/systm.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/cmn_err.h>
#include <sys/atomic.h>
#include <sys/zone.h>
#include <sys/callb.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <inet/ipclassifier.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/igmp_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/ipsec_impl.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/nd.h>
#include <inet/tunables.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/ip_multi.h>
#include <inet/ip_listutils.h>
#include <netinet/igmp.h>
#include <inet/ip_ndp.h>
#include <inet/ip_if.h>
static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill);
static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
static void igmpv3_sendrpt(ill_t *ill, mrec_t *reclist);
static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
slist_t *srclist, mrec_t *next);
static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
mcast_record_t rtype, slist_t *flist);
static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
/*
* Macros used to do timer len conversions. Timer values are always
* stored and passed to the timer functions as milliseconds; but the
* default values and values from the wire may not be.
*
* And yes, it's obscure, but decisecond is easier to abbreviate than
* "tenths of a second".
*/
#define DSEC_TO_MSEC(dsec) ((dsec) * 100)
#define SEC_TO_MSEC(sec) ((sec) * 1000)
/*
* A running timer (scheduled thru timeout) can be cancelled if another
* timer with a shorter timeout value is scheduled before it has timed
* out. When the shorter timer expires, the original timer is updated
* to account for the time elapsed while the shorter timer ran; but this
* does not take into account the amount of time already spent in timeout
* state before being preempted by the shorter timer, that is the time
* interval between time scheduled to time cancelled. This can cause
* delays in sending out multicast membership reports. To resolve this
* problem, wallclock time (absolute time) is used instead of deltas
* (relative time) to track timers.
*
* The MACRO below gets the lbolt value, used for proper timer scheduling
* and firing. Therefore multicast membership reports are sent on time.
* The timer does not exactly fire at the time it was scehduled to fire,
* there is a difference of a few milliseconds observed. An offset is used
* to take care of the difference.
*/
#define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
#define CURRENT_OFFSET (999)
/*
* The first multicast join will trigger the igmp timers / mld timers
* The unit for next is milliseconds.
*/
void
igmp_start_timers(unsigned next, ip_stack_t *ipst)
{
int time_left;
int ret;
timeout_id_t id;
ASSERT(next != 0 && next != INFINITY);
mutex_enter(&ipst->ips_igmp_timer_lock);
if (ipst->ips_igmp_timer_setter_active) {
/*
* Serialize timer setters, one at a time. If the
* timer is currently being set by someone,
* just record the next time when it has to be
* invoked and return. The current setter will
* take care.
*/
ipst->ips_igmp_time_to_next =
MIN(ipst->ips_igmp_time_to_next, next);
mutex_exit(&ipst->ips_igmp_timer_lock);
return;
} else {
ipst->ips_igmp_timer_setter_active = B_TRUE;
}
if (ipst->ips_igmp_timeout_id == 0) {
/*
* The timer is inactive. We need to start a timer if we haven't
* been asked to quiesce.
*/
ipst->ips_igmp_time_to_next = next;
if (ipst->ips_igmp_timer_quiesce != B_TRUE) {
ipst->ips_igmp_timeout_id =
timeout(igmp_timeout_handler, (void *)ipst,
MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
}
ipst->ips_igmp_timer_setter_active = B_FALSE;
mutex_exit(&ipst->ips_igmp_timer_lock);
return;
}
/*
* The timer was scheduled sometime back for firing in
* 'igmp_time_to_next' ms and is active. We need to
* reschedule the timeout if the new 'next' will happen
* earlier than the currently scheduled timeout
*/
time_left = ipst->ips_igmp_timer_scheduled_last +
MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
if (time_left < MSEC_TO_TICK(next)) {
ipst->ips_igmp_timer_setter_active = B_FALSE;
mutex_exit(&ipst->ips_igmp_timer_lock);
return;
}
id = ipst->ips_igmp_timeout_id;
mutex_exit(&ipst->ips_igmp_timer_lock);
ret = untimeout(id);
mutex_enter(&ipst->ips_igmp_timer_lock);
/*
* The timeout was cancelled, or the timeout handler
* completed, while we were blocked in the untimeout.
* No other thread could have set the timer meanwhile
* since we serialized all the timer setters. Thus
* no timer is currently active nor executing nor will
* any timer fire in the future. We start the timer now
* if needed.
*/
if (ret == -1) {
ASSERT(ipst->ips_igmp_timeout_id == 0);
} else {
ASSERT(ipst->ips_igmp_timeout_id != 0);
ipst->ips_igmp_timeout_id = 0;
}
if (ipst->ips_igmp_time_to_next != 0 &&
ipst->ips_igmp_timer_quiesce != B_TRUE) {
ipst->ips_igmp_time_to_next =
MIN(ipst->ips_igmp_time_to_next, next);
ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
(void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
}
ipst->ips_igmp_timer_setter_active = B_FALSE;
mutex_exit(&ipst->ips_igmp_timer_lock);
}
/*
* mld_start_timers:
* The unit for next is milliseconds.
*/
void
mld_start_timers(unsigned next, ip_stack_t *ipst)
{
int time_left;
int ret;
timeout_id_t id;
ASSERT(next != 0 && next != INFINITY);
mutex_enter(&ipst->ips_mld_timer_lock);
if (ipst->ips_mld_timer_setter_active) {
/*
* Serialize timer setters, one at a time. If the
* timer is currently being set by someone,
* just record the next time when it has to be
* invoked and return. The current setter will
* take care.
*/
ipst->ips_mld_time_to_next =
MIN(ipst->ips_mld_time_to_next, next);
mutex_exit(&ipst->ips_mld_timer_lock);
return;
} else {
ipst->ips_mld_timer_setter_active = B_TRUE;
}
if (ipst->ips_mld_timeout_id == 0) {
/*
* The timer is inactive. We need to start a timer, if we
* haven't been asked to quiesce.
*/
ipst->ips_mld_time_to_next = next;
if (ipst->ips_mld_timer_quiesce != B_TRUE) {
ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
(void *)ipst,
MSEC_TO_TICK(ipst->ips_mld_time_to_next));
ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
}
ipst->ips_mld_timer_setter_active = B_FALSE;
mutex_exit(&ipst->ips_mld_timer_lock);
return;
}
/*
* The timer was scheduled sometime back for firing in
* 'igmp_time_to_next' ms and is active. We need to
* reschedule the timeout if the new 'next' will happen
* earlier than the currently scheduled timeout
*/
time_left = ipst->ips_mld_timer_scheduled_last +
MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
if (time_left < MSEC_TO_TICK(next)) {
ipst->ips_mld_timer_setter_active = B_FALSE;
mutex_exit(&ipst->ips_mld_timer_lock);
return;
}
id = ipst->ips_mld_timeout_id;
mutex_exit(&ipst->ips_mld_timer_lock);
ret = untimeout(id);
mutex_enter(&ipst->ips_mld_timer_lock);
/*
* The timeout was cancelled, or the timeout handler
* completed, while we were blocked in the untimeout.
* No other thread could have set the timer meanwhile
* since we serialized all the timer setters. Thus
* no timer is currently active nor executing nor will
* any timer fire in the future. We start the timer now
* if needed.
*/
if (ret == -1) {
ASSERT(ipst->ips_mld_timeout_id == 0);
} else {
ASSERT(ipst->ips_mld_timeout_id != 0);
ipst->ips_mld_timeout_id = 0;
}
if (ipst->ips_mld_time_to_next != 0 &&
ipst->ips_mld_timer_quiesce == B_FALSE) {
ipst->ips_mld_time_to_next =
MIN(ipst->ips_mld_time_to_next, next);
ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
(void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
}
ipst->ips_mld_timer_setter_active = B_FALSE;
mutex_exit(&ipst->ips_mld_timer_lock);
}
/*
* igmp_input:
* Return NULL for a bad packet that is discarded here.
* Return mp if the message is OK and should be handed to "raw" receivers.
* Callers of igmp_input() may need to reinitialize variables that were copied
* from the mblk as this calls pullupmsg().
*/
mblk_t *
igmp_input(mblk_t *mp, ip_recv_attr_t *ira)
{
igmpa_t *igmpa;
ipha_t *ipha = (ipha_t *)(mp->b_rptr);
int iphlen, igmplen, mblklen;
ilm_t *ilm;
uint32_t src, dst;
uint32_t group;
in6_addr_t v6group;
uint_t next;
ipif_t *ipif;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(!ill->ill_isv6);
++ipst->ips_igmpstat.igps_rcv_total;
mblklen = MBLKL(mp);
iphlen = ira->ira_ip_hdr_length;
if (mblklen < 1 || mblklen < iphlen) {
++ipst->ips_igmpstat.igps_rcv_tooshort;
goto bad_pkt;
}
igmplen = ira->ira_pktlen - iphlen;
/*
* Since msg sizes are more variable with v3, just pullup the
* whole thing now.
*/
if (MBLKL(mp) < (igmplen + iphlen)) {
mblk_t *mp1;
if ((mp1 = msgpullup(mp, -1)) == NULL) {
++ipst->ips_igmpstat.igps_rcv_tooshort;
goto bad_pkt;
}
freemsg(mp);
mp = mp1;
ipha = (ipha_t *)(mp->b_rptr);
}
/*
* Validate lengths
*/
if (igmplen < IGMP_MINLEN) {
++ipst->ips_igmpstat.igps_rcv_tooshort;
goto bad_pkt;
}
igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
src = ipha->ipha_src;
dst = ipha->ipha_dst;
if (ip_debug > 1)
(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
"igmp_input: src 0x%x, dst 0x%x on %s\n",
(int)ntohl(src), (int)ntohl(dst),
ill->ill_name);
switch (igmpa->igmpa_type) {
case IGMP_MEMBERSHIP_QUERY:
/*
* packet length differentiates between v1/v2 and v3
* v1/v2 should be exactly 8 octets long; v3 is >= 12
*/
if ((igmplen == IGMP_MINLEN) ||
(ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
next = igmp_query_in(ipha, igmpa, ill);
} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
igmplen);
} else {
++ipst->ips_igmpstat.igps_rcv_tooshort;
goto bad_pkt;
}
if (next == 0)
goto bad_pkt;
if (next != INFINITY)
igmp_start_timers(next, ipst);
break;
case IGMP_V1_MEMBERSHIP_REPORT:
case IGMP_V2_MEMBERSHIP_REPORT:
/*
* For fast leave to work, we have to know that we are the
* last person to send a report for this group. Reports
* generated by us are looped back since we could potentially
* be a multicast router, so discard reports sourced by me.
*/
mutex_enter(&ill->ill_lock);
for (ipif = ill->ill_ipif; ipif != NULL;
ipif = ipif->ipif_next) {
if (ipif->ipif_lcl_addr == src) {
if (ip_debug > 1) {
(void) mi_strlog(ill->ill_rq,
1,
SL_TRACE,
"igmp_input: we are only "
"member src 0x%x\n",
(int)ntohl(src));
}
mutex_exit(&ill->ill_lock);
return (mp);
}
}
mutex_exit(&ill->ill_lock);
++ipst->ips_igmpstat.igps_rcv_reports;
group = igmpa->igmpa_group;
if (!CLASSD(group)) {
++ipst->ips_igmpstat.igps_rcv_badreports;
goto bad_pkt;
}
/*
* KLUDGE: if the IP source address of the report has an
* unspecified (i.e., zero) subnet number, as is allowed for
* a booting host, replace it with the correct subnet number
* so that a process-level multicast routing demon can
* determine which subnet it arrived from. This is necessary
* to compensate for the lack of any way for a process to
* determine the arrival interface of an incoming packet.
*
* Requires that a copy of *this* message it passed up
* to the raw interface which is done by our caller.
*/
if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */
/* Pick the first ipif on this ill */
mutex_enter(&ill->ill_lock);
src = ill->ill_ipif->ipif_subnet;
mutex_exit(&ill->ill_lock);
ip1dbg(("igmp_input: changed src to 0x%x\n",
(int)ntohl(src)));
ipha->ipha_src = src;
}
/*
* If our ill has ILMs that belong to the group being
* reported, and we are a 'Delaying Member' in the RFC
* terminology, stop our timer for that group and 'clear
* flag' i.e. mark as IGMP_OTHERMEMBER.
*/
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
IN6_IPADDR_TO_V4MAPPED(group, &v6group);
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
continue;
++ipst->ips_igmpstat.igps_rcv_ourreports;
ilm->ilm_timer = INFINITY;
ilm->ilm_state = IGMP_OTHERMEMBER;
} /* for */
rw_exit(&ill->ill_mcast_lock);
ill_mcast_timer_start(ill->ill_ipst);
break;
case IGMP_V3_MEMBERSHIP_REPORT:
/*
* Currently nothing to do here; IGMP router is not
* implemented in ip, and v3 hosts don't pay attention
* to membership reports.
*/
break;
}
/*
* Pass all valid IGMP packets up to any process(es) listening
* on a raw IGMP socket. Do not free the packet.
*/
return (mp);
bad_pkt:
freemsg(mp);
return (NULL);
}
static uint_t
igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
{
ilm_t *ilm;
int timer;
uint_t next, current;
ip_stack_t *ipst;
ipst = ill->ill_ipst;
++ipst->ips_igmpstat.igps_rcv_queries;
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
/*
* In the IGMPv2 specification, there are 3 states and a flag.
*
* In Non-Member state, we simply don't have a membership record.
* In Delaying Member state, our timer is running (ilm->ilm_timer
* < INFINITY). In Idle Member state, our timer is not running
* (ilm->ilm_timer == INFINITY).
*
* The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
* we have heard a report from another member, or IGMP_IREPORTEDLAST
* if I sent the last report.
*/
if ((igmpa->igmpa_code == 0) ||
(ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
/*
* Query from an old router.
* Remember that the querier on this interface is old,
* and set the timer to the value in RFC 1112.
*/
ill->ill_mcast_v1_time = 0;
ill->ill_mcast_v1_tset = 1;
if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
ip1dbg(("Received IGMPv1 Query on %s, switching mode "
"to IGMP_V1_ROUTER\n", ill->ill_name));
atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
ill->ill_mcast_type = IGMP_V1_ROUTER;
}
timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
igmpa->igmpa_group != 0) {
++ipst->ips_igmpstat.igps_rcv_badqueries;
rw_exit(&ill->ill_mcast_lock);
ill_mcast_timer_start(ill->ill_ipst);
return (0);
}
} else {
in_addr_t group;
/*
* Query from a new router
* Simply do a validity check
*/
group = igmpa->igmpa_group;
if (group != 0 && (!CLASSD(group))) {
++ipst->ips_igmpstat.igps_rcv_badqueries;
rw_exit(&ill->ill_mcast_lock);
ill_mcast_timer_start(ill->ill_ipst);
return (0);
}
/*
* Switch interface state to v2 on receipt of a v2 query
* ONLY IF current state is v3. Let things be if current
* state if v1 but do reset the v2-querier-present timer.
*/
if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
ip1dbg(("Received IGMPv2 Query on %s, switching mode "
"to IGMP_V2_ROUTER", ill->ill_name));
atomic_inc_16(&ill->ill_ifptr->illif_mcast_v2);
ill->ill_mcast_type = IGMP_V2_ROUTER;
}
ill->ill_mcast_v2_time = 0;
ill->ill_mcast_v2_tset = 1;
timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
}
if (ip_debug > 1) {
(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
"igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
(int)ntohs(igmpa->igmpa_code),
(int)ntohs(igmpa->igmpa_type));
}
/*
* -Start the timers in all of our membership records
* for the physical interface on which the query
* arrived, excluding those that belong to the "all
* hosts" group (224.0.0.1).
*
* -Restart any timer that is already running but has
* a value longer than the requested timeout.
*
* -Use the value specified in the query message as
* the maximum timeout.
*/
next = (unsigned)INFINITY;
current = CURRENT_MSTIME;
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
/*
* A multicast router joins INADDR_ANY address
* to enable promiscuous reception of all
* mcasts from the interface. This INADDR_ANY
* is stored in the ilm_v6addr as V6 unspec addr
*/
if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
continue;
if (ilm->ilm_addr == htonl(INADDR_ANY))
continue;
if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
(igmpa->igmpa_group == 0) ||
(igmpa->igmpa_group == ilm->ilm_addr)) {
if (ilm->ilm_timer > timer) {
MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
if (ilm->ilm_timer < next)
next = ilm->ilm_timer;
ilm->ilm_timer += current;
}
}
}
rw_exit(&ill->ill_mcast_lock);
/*
* No packets have been sent above - no
* ill_mcast_send_queued is needed.
*/
ill_mcast_timer_start(ill->ill_ipst);
return (next);
}
static uint_t
igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
{
uint_t i, next, mrd, qqi, timer, delay, numsrc;
uint_t current;
ilm_t *ilm;
ipaddr_t *src_array;
uint8_t qrv;
ip_stack_t *ipst;
ipst = ill->ill_ipst;
/* make sure numsrc matches packet size */
numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
++ipst->ips_igmpstat.igps_rcv_tooshort;
return (0);
}
src_array = (ipaddr_t *)&igmp3qa[1];
++ipst->ips_igmpstat.igps_rcv_queries;
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
uint_t hdrval, mant, exp;
hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
mrd = (mant | 0x10) << (exp + 3);
}
if (mrd == 0)
mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
timer = DSEC_TO_MSEC(mrd);
MCAST_RANDOM_DELAY(delay, timer);
next = (unsigned)INFINITY;
current = CURRENT_MSTIME;
if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
else
ill->ill_mcast_rv = qrv;
if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
uint_t hdrval, mant, exp;
hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
mant = hdrval & IGMP_V3_QQI_MANT_MASK;
exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
qqi = (mant | 0x10) << (exp + 3);
}
ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
/*
* If we have a pending general query response that's scheduled
* sooner than the delay we calculated for this response, then
* no action is required (RFC3376 section 5.2 rule 1)
*/
if (ill->ill_global_timer < (current + delay)) {
rw_exit(&ill->ill_mcast_lock);
ill_mcast_timer_start(ill->ill_ipst);
return (next);
}
/*
* Now take action depending upon query type:
* general, group specific, or group/source specific.
*/
if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
/*
* general query
* We know global timer is either not running or is
* greater than our calculated delay, so reset it to
* our delay (random value in range [0, response time]).
*/
ill->ill_global_timer = current + delay;
next = delay;
} else {
/* group or group/source specific query */
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
(ilm->ilm_addr == htonl(INADDR_ANY)) ||
(ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
(igmp3qa->igmp3qa_group != ilm->ilm_addr))
continue;
/*
* If the query is group specific or we have a
* pending group specific query, the response is
* group specific (pending sources list should be
* empty). Otherwise, need to update the pending
* sources list for the group and source specific
* response.
*/
if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
group_query:
FREE_SLIST(ilm->ilm_pendsrcs);
ilm->ilm_pendsrcs = NULL;
} else {
boolean_t overflow;
slist_t *pktl;
if (numsrc > MAX_FILTER_SIZE ||
(ilm->ilm_pendsrcs == NULL &&
(ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
/*
* We've been sent more sources than
* we can deal with; or we can't deal
* with a source list at all. Revert
* to a group specific query.
*/
goto group_query;
}
if ((pktl = l_alloc()) == NULL)
goto group_query;
pktl->sl_numsrc = numsrc;
for (i = 0; i < numsrc; i++)
IN6_IPADDR_TO_V4MAPPED(src_array[i],
&(pktl->sl_addr[i]));
l_union_in_a(ilm->ilm_pendsrcs, pktl,
&overflow);
l_free(pktl);
if (overflow)
goto group_query;
}
ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
INFINITY : (ilm->ilm_timer - current);
/* choose soonest timer */
ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
if (ilm->ilm_timer < next)
next = ilm->ilm_timer;
ilm->ilm_timer += current;
}
}
rw_exit(&ill->ill_mcast_lock);
/*
* No packets have been sent above - no
* ill_mcast_send_queued is needed.
*/
ill_mcast_timer_start(ill->ill_ipst);
return (next);
}
/*
* Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
* and it gets sent after the lock is dropped.
*/
void
igmp_joingroup(ilm_t *ilm)
{
uint_t timer;
ill_t *ill;
ip_stack_t *ipst = ilm->ilm_ipst;
ill = ilm->ilm_ill;
ASSERT(!ill->ill_isv6);
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
ilm->ilm_rtx.rtx_timer = INFINITY;
ilm->ilm_state = IGMP_OTHERMEMBER;
} else {
ip1dbg(("Querier mode %d, sending report, group %x\n",
ill->ill_mcast_type, htonl(ilm->ilm_addr)));
if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
} else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
mrec_t *rp;
mcast_record_t rtype;
/*
* The possible state changes we need to handle here:
* Old State New State Report
*
* INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
* INCLUDE(0) EXCLUDE(X) TO_EX(X)
*
* No need to send the BLOCK(0) report; ALLOW(X)
* is enough.
*/
rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
ilm->ilm_filter, NULL);
igmpv3_sendrpt(ill, rp);
/*
* Set up retransmission state. Timer is set below,
* for both v3 and older versions.
*/
mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
ilm->ilm_filter);
}
/* Set the ilm timer value */
ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
timer = ilm->ilm_rtx.rtx_timer;
ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
ilm->ilm_state = IGMP_IREPORTEDLAST;
/*
* We are holding ill_mcast_lock here and the timeout
* handler (igmp_timeout_handler_per_ill) acquires that
* lock. Hence we can't call igmp_start_timers since it could
* deadlock in untimeout().
* Instead the thread which drops ill_mcast_lock will have
* to call ill_mcast_timer_start().
*/
mutex_enter(&ipst->ips_igmp_timer_lock);
ipst->ips_igmp_deferred_next = MIN(timer,
ipst->ips_igmp_deferred_next);
mutex_exit(&ipst->ips_igmp_timer_lock);
}
if (ip_debug > 1) {
(void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
"igmp_joingroup: multicast_type %d timer %d",
(ilm->ilm_ill->ill_mcast_type),
(int)ntohl(timer));
}
}
/*
* Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
* and it gets sent after the lock is dropped.
*/
void
mld_joingroup(ilm_t *ilm)
{
uint_t timer;
ill_t *ill;
ip_stack_t *ipst = ilm->ilm_ipst;
ill = ilm->ilm_ill;
ASSERT(ill->ill_isv6);
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
ilm->ilm_rtx.rtx_timer = INFINITY;
ilm->ilm_state = IGMP_OTHERMEMBER;
} else {
if (ill->ill_mcast_type == MLD_V1_ROUTER) {
mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
} else {
mrec_t *rp;
mcast_record_t rtype;
/*
* The possible state changes we need to handle here:
* Old State New State Report
*
* INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
* INCLUDE(0) EXCLUDE(X) TO_EX(X)
*
* No need to send the BLOCK(0) report; ALLOW(X)
* is enough
*/
rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
ilm->ilm_filter, NULL);
mldv2_sendrpt(ill, rp);
/*
* Set up retransmission state. Timer is set below,
* for both v2 and v1.
*/
mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
ilm->ilm_filter);
}
/* Set the ilm timer value */
ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
ilm->ilm_rtx.rtx_cnt > 0);
ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
timer = ilm->ilm_rtx.rtx_timer;
ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
ilm->ilm_state = IGMP_IREPORTEDLAST;
/*
* We are holding ill_mcast_lock here and the timeout
* handler (mld_timeout_handler_per_ill) acquires that
* lock. Hence we can't call mld_start_timers since it could
* deadlock in untimeout().
* Instead the thread which drops ill_mcast_lock will have
* to call ill_mcast_timer_start().
*/
mutex_enter(&ipst->ips_mld_timer_lock);
ipst->ips_mld_deferred_next = MIN(timer,
ipst->ips_mld_deferred_next);
mutex_exit(&ipst->ips_mld_timer_lock);
}
if (ip_debug > 1) {
(void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
"mld_joingroup: multicast_type %d timer %d",
(ilm->ilm_ill->ill_mcast_type),
(int)ntohl(timer));
}
}
/*
* Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
* and it gets sent after the lock is dropped.
*/
void
igmp_leavegroup(ilm_t *ilm)
{
ill_t *ill = ilm->ilm_ill;
ASSERT(!ill->ill_isv6);
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
ill->ill_mcast_type == IGMP_V2_ROUTER &&
(ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
(htonl(INADDR_ALLRTRS_GROUP)));
return;
}
if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
(ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
mrec_t *rp;
/*
* The possible state changes we need to handle here:
* Old State New State Report
*
* INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
* EXCLUDE(X) INCLUDE(0) TO_IN(0)
*
* No need to send the ALLOW(0) report; BLOCK(X) is enough
*/
if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
ilm->ilm_filter, NULL);
} else {
rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
NULL, NULL);
}
igmpv3_sendrpt(ill, rp);
return;
}
}
/*
* Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
* and it gets sent after the lock is dropped.
*/
void
mld_leavegroup(ilm_t *ilm)
{
ill_t *ill = ilm->ilm_ill;
ASSERT(ill->ill_isv6);
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
ill->ill_mcast_type == MLD_V1_ROUTER &&
(!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
return;
}
if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
(!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
mrec_t *rp;
/*
* The possible state changes we need to handle here:
* Old State New State Report
*
* INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
* EXCLUDE(X) INCLUDE(0) TO_IN(0)
*
* No need to send the ALLOW(0) report; BLOCK(X) is enough
*/
if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
ilm->ilm_filter, NULL);
} else {
rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
NULL, NULL);
}
mldv2_sendrpt(ill, rp);
return;
}
}
/*
* Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
* and it gets sent after the lock is dropped.
*/
void
igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
{
ill_t *ill;
mrec_t *rp;
ip_stack_t *ipst = ilm->ilm_ipst;
ASSERT(ilm != NULL);
/* state change reports should only be sent if the router is v3 */
if (ilm->ilm_ill->ill_mcast_type != IGMP_V3_ROUTER)
return;
ill = ilm->ilm_ill;
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
/*
* Compare existing(old) state with the new state and prepare
* State Change Report, according to the rules in RFC 3376:
*
* Old State New State State Change Report
*
* INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
* EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
* INCLUDE(A) EXCLUDE(B) TO_EX(B)
* EXCLUDE(A) INCLUDE(B) TO_IN(B)
*/
if (ilm->ilm_fmode == fmode) {
slist_t *a_minus_b = NULL, *b_minus_a = NULL;
slist_t *allow, *block;
if (((a_minus_b = l_alloc()) == NULL) ||
((b_minus_a = l_alloc()) == NULL)) {
l_free(a_minus_b);
if (ilm->ilm_fmode == MODE_IS_INCLUDE)
goto send_to_ex;
else
goto send_to_in;
}
l_difference(ilm->ilm_filter, flist, a_minus_b);
l_difference(flist, ilm->ilm_filter, b_minus_a);
if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
allow = b_minus_a;
block = a_minus_b;
} else {
allow = a_minus_b;
block = b_minus_a;
}
rp = NULL;
if (!SLIST_IS_EMPTY(allow))
rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
allow, rp);
if (!SLIST_IS_EMPTY(block))
rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
block, rp);
l_free(a_minus_b);
l_free(b_minus_a);
} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
send_to_ex:
rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
NULL);
} else {
send_to_in:
rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
NULL);
}
/*
* Need to set up retransmission state; merge the new info with the
* current state (which may be null). If the timer is not currently
* running, the caller will start it when dropping ill_mcast_lock.
*/
rp = mcast_merge_rtx(ilm, rp, flist);
if (ilm->ilm_rtx.rtx_timer == INFINITY) {
ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
mutex_enter(&ipst->ips_igmp_timer_lock);
ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
ilm->ilm_rtx.rtx_timer);
ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
mutex_exit(&ipst->ips_igmp_timer_lock);
}
igmpv3_sendrpt(ill, rp);
}
/*
* Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
* and it gets sent after the lock is dropped.
*/
void
mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
{
ill_t *ill;
mrec_t *rp = NULL;
ip_stack_t *ipst = ilm->ilm_ipst;
ASSERT(ilm != NULL);
ill = ilm->ilm_ill;
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
/* only need to send if we have an mldv2-capable router */
if (ill->ill_mcast_type != MLD_V2_ROUTER) {
return;
}
/*
* Compare existing (old) state with the new state passed in
* and send appropriate MLDv2 State Change Report.
*
* Old State New State State Change Report
*
* INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
* EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
* INCLUDE(A) EXCLUDE(B) TO_EX(B)
* EXCLUDE(A) INCLUDE(B) TO_IN(B)
*/
if (ilm->ilm_fmode == fmode) {
slist_t *a_minus_b = NULL, *b_minus_a = NULL;
slist_t *allow, *block;
if (((a_minus_b = l_alloc()) == NULL) ||
((b_minus_a = l_alloc()) == NULL)) {
l_free(a_minus_b);
if (ilm->ilm_fmode == MODE_IS_INCLUDE)
goto send_to_ex;
else
goto send_to_in;
}
l_difference(ilm->ilm_filter, flist, a_minus_b);
l_difference(flist, ilm->ilm_filter, b_minus_a);
if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
allow = b_minus_a;
block = a_minus_b;
} else {
allow = a_minus_b;
block = b_minus_a;
}
if (!SLIST_IS_EMPTY(allow))
rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
allow, rp);
if (!SLIST_IS_EMPTY(block))
rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
block, rp);
l_free(a_minus_b);
l_free(b_minus_a);
} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
send_to_ex:
rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
NULL);
} else {
send_to_in:
rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
NULL);
}
/*
* Need to set up retransmission state; merge the new info with the
* current state (which may be null). If the timer is not currently
* running, the caller will start it when dropping ill_mcast_lock.
*/
rp = mcast_merge_rtx(ilm, rp, flist);
ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
if (ilm->ilm_rtx.rtx_timer == INFINITY) {
ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
mutex_enter(&ipst->ips_mld_timer_lock);
ipst->ips_mld_deferred_next =
MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
mutex_exit(&ipst->ips_mld_timer_lock);
}
mldv2_sendrpt(ill, rp);
}
uint_t
igmp_timeout_handler_per_ill(ill_t *ill)
{
uint_t next = INFINITY, current;
ilm_t *ilm;
mrec_t *rp = NULL;
mrec_t *rtxrp = NULL;
rtx_state_t *rtxp;
mcast_record_t rtype;
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
current = CURRENT_MSTIME;
/* First check the global timer on this interface */
if (ill->ill_global_timer == INFINITY)
goto per_ilm_timer;
if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
ill->ill_global_timer = INFINITY;
/*
* Send report for each group on this interface.
* Since we just set the global timer (received a v3 general
* query), need to skip the all hosts addr (224.0.0.1), per
* RFC 3376 section 5.
*/
for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
continue;
rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
ilm->ilm_filter, rp);
/*
* Since we're sending a report on this group, okay
* to delete pending group-specific timers. Note
* that group-specific retransmit timers still need
* to be checked in the per_ilm_timer for-loop.
*/
ilm->ilm_timer = INFINITY;
ilm->ilm_state = IGMP_IREPORTEDLAST;
FREE_SLIST(ilm->ilm_pendsrcs);
ilm->ilm_pendsrcs = NULL;
}
igmpv3_sendrpt(ill, rp);
rp = NULL;
} else {
if ((ill->ill_global_timer - current) < next)
next = ill->ill_global_timer - current;
}
per_ilm_timer:
for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
if (ilm->ilm_timer == INFINITY)
goto per_ilm_rtxtimer;
if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
if ((ilm->ilm_timer - current) < next)
next = ilm->ilm_timer - current;
if (ip_debug > 1) {
(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
"igmp_timo_hlr 2: ilm_timr %d "
"typ %d nxt %d",
(int)ntohl(ilm->ilm_timer - current),
(ill->ill_mcast_type), next);
}
goto per_ilm_rtxtimer;
}
/* the timer has expired, need to take action */
ilm->ilm_timer = INFINITY;
ilm->ilm_state = IGMP_IREPORTEDLAST;
if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
} else {
slist_t *rsp;
if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
(rsp = l_alloc()) != NULL) {
/*
* Contents of reply depend on pending
* requested source list.
*/
if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
l_intersection(ilm->ilm_filter,
ilm->ilm_pendsrcs, rsp);
} else {
l_difference(ilm->ilm_pendsrcs,
ilm->ilm_filter, rsp);
}
FREE_SLIST(ilm->ilm_pendsrcs);
ilm->ilm_pendsrcs = NULL;
if (!SLIST_IS_EMPTY(rsp))
rp = mcast_bldmrec(MODE_IS_INCLUDE,
&ilm->ilm_v6addr, rsp, rp);
FREE_SLIST(rsp);
} else {
/*
* Either the pending request is just group-
* specific, or we couldn't get the resources
* (rsp) to build a source-specific reply.
*/
rp = mcast_bldmrec(ilm->ilm_fmode,
&ilm->ilm_v6addr, ilm->ilm_filter, rp);
}
igmpv3_sendrpt(ill, rp);
rp = NULL;
}
per_ilm_rtxtimer:
rtxp = &ilm->ilm_rtx;
if (rtxp->rtx_timer == INFINITY)
continue;
if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
if ((rtxp->rtx_timer - current) < next)
next = rtxp->rtx_timer - current;
continue;
}
rtxp->rtx_timer = INFINITY;
ilm->ilm_state = IGMP_IREPORTEDLAST;
if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
continue;
}
if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
continue;
}
/*
* The retransmit timer has popped, and our router is
* IGMPv3. We have to delve into the retransmit state
* stored in the ilm.
*
* Decrement the retransmit count. If the fmode rtx
* count is active, decrement it, and send a filter
* mode change report with the ilm's source list.
* Otherwise, send a source list change report with
* the current retransmit lists.
*/
ASSERT(rtxp->rtx_cnt > 0);
ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
rtxp->rtx_cnt--;
if (rtxp->rtx_fmode_cnt > 0) {
rtxp->rtx_fmode_cnt--;
rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
ilm->ilm_filter, rtxrp);
} else {
rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
&ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
&ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
}
if (rtxp->rtx_cnt > 0) {
MCAST_RANDOM_DELAY(rtxp->rtx_timer,
SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
if (rtxp->rtx_timer < next)
next = rtxp->rtx_timer;
rtxp->rtx_timer += current;
} else {
ASSERT(rtxp->rtx_timer == INFINITY);
CLEAR_SLIST(rtxp->rtx_allow);
CLEAR_SLIST(rtxp->rtx_block);
}
igmpv3_sendrpt(ill, rtxrp);
rtxrp = NULL;
}
rw_exit(&ill->ill_mcast_lock);
/* Send any deferred/queued IP packets */
ill_mcast_send_queued(ill);
/* Defer ill_mcast_timer_start() until the caller is done */
return (next);
}
/*
* igmp_timeout_handler:
* Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
* Returns number of ticks to next event (or 0 if none).
*
* As part of multicast join and leave igmp we may need to send out an
* igmp request. The igmp related state variables in the ilm are protected
* by ill_mcast_lock. A single global igmp timer is used to track igmp timeouts.
* igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
* starts the igmp timer if needed. It serializes multiple threads trying to
* simultaneously start the timer using the igmp_timer_setter_active flag.
*
* igmp_input() receives igmp queries and responds to the queries
* in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
* Later the igmp_timer fires, the timeout handler igmp_timerout_handler()
* performs the action exclusively after acquiring ill_mcast_lock.
*
* The igmp_slowtimeo() function is called thru another timer.
* igmp_slowtimeout_lock protects the igmp_slowtimeout_id
*/
void
igmp_timeout_handler(void *arg)
{
ill_t *ill;
uint_t global_next = INFINITY;
uint_t next;
ill_walk_context_t ctx;
ip_stack_t *ipst = arg;
ASSERT(arg != NULL);
mutex_enter(&ipst->ips_igmp_timer_lock);
ASSERT(ipst->ips_igmp_timeout_id != 0);
ipst->ips_igmp_timeout_id = 0;
ipst->ips_igmp_timer_scheduled_last = 0;
ipst->ips_igmp_time_to_next = 0;
mutex_exit(&ipst->ips_igmp_timer_lock);
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
ill = ILL_START_WALK_V4(&ctx, ipst);
for (; ill != NULL; ill = ill_next(&ctx, ill)) {
ASSERT(!ill->ill_isv6);
/* Make sure the ill isn't going away. */
if (!ill_check_and_refhold(ill))
continue;
rw_exit(&ipst->ips_ill_g_lock);
next = igmp_timeout_handler_per_ill(ill);
if (next < global_next)
global_next = next;
ill_refrele(ill);
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
}
rw_exit(&ipst->ips_ill_g_lock);
if (global_next != INFINITY)
igmp_start_timers(global_next, ipst);
}
/*
* mld_timeout_handler:
* Called when there are timeout events, every next (tick).
* Returns number of ticks to next event (or 0 if none).
*/
uint_t
mld_timeout_handler_per_ill(ill_t *ill)
{
ilm_t *ilm;
uint_t next = INFINITY, current;
mrec_t *rp, *rtxrp;
rtx_state_t *rtxp;
mcast_record_t rtype;
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
current = CURRENT_MSTIME;
/*
* First check the global timer on this interface; the global timer
* is not used for MLDv1, so if it's set we can assume we're v2.
*/
if (ill->ill_global_timer == INFINITY)
goto per_ilm_timer;
if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
ill->ill_global_timer = INFINITY;
/*
* Send report for each group on this interface.
* Since we just set the global timer (received a v2 general
* query), need to skip the all hosts addr (ff02::1), per
* RFC 3810 section 6.
*/
rp = NULL;
for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
&ipv6_all_hosts_mcast))
continue;
rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
ilm->ilm_filter, rp);
/*
* Since we're sending a report on this group, okay
* to delete pending group-specific timers. Note
* that group-specific retransmit timers still need
* to be checked in the per_ilm_timer for-loop.
*/
ilm->ilm_timer = INFINITY;
ilm->ilm_state = IGMP_IREPORTEDLAST;
FREE_SLIST(ilm->ilm_pendsrcs);
ilm->ilm_pendsrcs = NULL;
}
mldv2_sendrpt(ill, rp);
} else {
if ((ill->ill_global_timer - current) < next)
next = ill->ill_global_timer - current;
}
per_ilm_timer:
rp = rtxrp = NULL;
for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
if (ilm->ilm_timer == INFINITY)
goto per_ilm_rtxtimer;
if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
if ((ilm->ilm_timer - current) < next)
next = ilm->ilm_timer - current;
if (ip_debug > 1) {
(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
"igmp_timo_hlr 2: ilm_timr"
" %d typ %d nxt %d",
(int)ntohl(ilm->ilm_timer - current),
(ill->ill_mcast_type), next);
}
goto per_ilm_rtxtimer;
}
/* the timer has expired, need to take action */
ilm->ilm_timer = INFINITY;
ilm->ilm_state = IGMP_IREPORTEDLAST;
if (ill->ill_mcast_type == MLD_V1_ROUTER) {
mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
} else {
slist_t *rsp;
if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
(rsp = l_alloc()) != NULL) {
/*
* Contents of reply depend on pending
* requested source list.
*/
if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
l_intersection(ilm->ilm_filter,
ilm->ilm_pendsrcs, rsp);
} else {
l_difference(ilm->ilm_pendsrcs,
ilm->ilm_filter, rsp);
}
FREE_SLIST(ilm->ilm_pendsrcs);
ilm->ilm_pendsrcs = NULL;
if (!SLIST_IS_EMPTY(rsp))
rp = mcast_bldmrec(MODE_IS_INCLUDE,
&ilm->ilm_v6addr, rsp, rp);
FREE_SLIST(rsp);
} else {
rp = mcast_bldmrec(ilm->ilm_fmode,
&ilm->ilm_v6addr, ilm->ilm_filter, rp);
}
}
per_ilm_rtxtimer:
rtxp = &ilm->ilm_rtx;
if (rtxp->rtx_timer == INFINITY)
continue;
if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
if ((rtxp->rtx_timer - current) < next)
next = rtxp->rtx_timer - current;
continue;
}
rtxp->rtx_timer = INFINITY;
ilm->ilm_state = IGMP_IREPORTEDLAST;
if (ill->ill_mcast_type == MLD_V1_ROUTER) {
mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
continue;
}
/*
* The retransmit timer has popped, and our router is
* MLDv2. We have to delve into the retransmit state
* stored in the ilm.
*
* Decrement the retransmit count. If the fmode rtx
* count is active, decrement it, and send a filter
* mode change report with the ilm's source list.
* Otherwise, send a source list change report with
* the current retransmit lists.
*/
ASSERT(rtxp->rtx_cnt > 0);
ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
rtxp->rtx_cnt--;
if (rtxp->rtx_fmode_cnt > 0) {
rtxp->rtx_fmode_cnt--;
rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
ilm->ilm_filter, rtxrp);
} else {
rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
&ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
&ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
}
if (rtxp->rtx_cnt > 0) {
MCAST_RANDOM_DELAY(rtxp->rtx_timer,
SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
if (rtxp->rtx_timer < next)
next = rtxp->rtx_timer;
rtxp->rtx_timer += current;
} else {
ASSERT(rtxp->rtx_timer == INFINITY);
CLEAR_SLIST(rtxp->rtx_allow);
CLEAR_SLIST(rtxp->rtx_block);
}
}
if (ill->ill_mcast_type == MLD_V2_ROUTER) {
mldv2_sendrpt(ill, rp);
mldv2_sendrpt(ill, rtxrp);
}
rw_exit(&ill->ill_mcast_lock);
/* Send any deferred/queued IP packets */
ill_mcast_send_queued(ill);
/* Defer ill_mcast_timer_start() until the caller is done */
return (next);
}
/*
* mld_timeout_handler:
* Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
* Returns number of ticks to next event (or 0 if none).
* MT issues are same as igmp_timeout_handler
*/
void
mld_timeout_handler(void *arg)
{
ill_t *ill;
uint_t global_next = INFINITY;
uint_t next;
ill_walk_context_t ctx;
ip_stack_t *ipst = arg;
ASSERT(arg != NULL);
mutex_enter(&ipst->ips_mld_timer_lock);
ASSERT(ipst->ips_mld_timeout_id != 0);
ipst->ips_mld_timeout_id = 0;
ipst->ips_mld_timer_scheduled_last = 0;
ipst->ips_mld_time_to_next = 0;
mutex_exit(&ipst->ips_mld_timer_lock);
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
ill = ILL_START_WALK_V6(&ctx, ipst);
for (; ill != NULL; ill = ill_next(&ctx, ill)) {
ASSERT(ill->ill_isv6);
/* Make sure the ill isn't going away. */
if (!ill_check_and_refhold(ill))
continue;
rw_exit(&ipst->ips_ill_g_lock);
next = mld_timeout_handler_per_ill(ill);
if (next < global_next)
global_next = next;
ill_refrele(ill);
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
}
rw_exit(&ipst->ips_ill_g_lock);
if (global_next != INFINITY)
mld_start_timers(global_next, ipst);
}
/*
* Calculate the Older Version Querier Present timeout value, in number
* of slowtimo intervals, for the given ill.
*/
#define OVQP(ill) \
((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
+ MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
/*
* igmp_slowtimo:
* - Resets to new router if we didnt we hear from the router
* in IGMP_AGE_THRESHOLD seconds.
* - Resets slowtimeout.
* Check for ips_igmp_max_version ensures that we don't revert to a higher
* IGMP version than configured.
*/
void
igmp_slowtimo(void *arg)
{
ill_t *ill;
ill_if_t *ifp;
avl_tree_t *avl_tree;
ip_stack_t *ipst = (ip_stack_t *)arg;
ASSERT(arg != NULL);
/*
* The ill_if_t list is circular, hence the odd loop parameters.
*
* We can't use the ILL_START_WALK and ill_next() wrappers for this
* walk, as we need to check the illif_mcast_* fields in the ill_if_t
* structure (allowing us to skip if none of the instances have timers
* running).
*/
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
for (ifp = IP_V4_ILL_G_LIST(ipst);
ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
ifp = ifp->illif_next) {
/*
* illif_mcast_v[12] are set using atomics. If an ill hears
* a V1 or V2 query now and we miss seeing the count now,
* we will see it the next time igmp_slowtimo is called.
*/
if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
continue;
avl_tree = &ifp->illif_avl_by_ppa;
for (ill = avl_first(avl_tree); ill != NULL;
ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
/* Make sure the ill isn't going away. */
if (!ill_check_and_refhold(ill))
continue;
rw_exit(&ipst->ips_ill_g_lock);
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
if (ill->ill_mcast_v1_tset == 1)
ill->ill_mcast_v1_time++;
if (ill->ill_mcast_v2_tset == 1)
ill->ill_mcast_v2_time++;
if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
(ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
(ill->ill_mcast_v1_time >= OVQP(ill))) {
if ((ill->ill_mcast_v2_tset > 0) ||
(ipst->ips_igmp_max_version ==
IGMP_V2_ROUTER)) {
ip1dbg(("V1 query timer "
"expired on %s; switching "
"mode to IGMP_V2\n",
ill->ill_name));
ill->ill_mcast_type =
IGMP_V2_ROUTER;
} else {
ip1dbg(("V1 query timer "
"expired on %s; switching "
"mode to IGMP_V3\n",
ill->ill_name));
ill->ill_mcast_type =
IGMP_V3_ROUTER;
}
ill->ill_mcast_v1_time = 0;
ill->ill_mcast_v1_tset = 0;
atomic_dec_16(&ifp->illif_mcast_v1);
}
if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
(ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
(ill->ill_mcast_v2_time >= OVQP(ill))) {
ip1dbg(("V2 query timer expired on "
"%s; switching mode to IGMP_V3\n",
ill->ill_name));
ill->ill_mcast_type = IGMP_V3_ROUTER;
ill->ill_mcast_v2_time = 0;
ill->ill_mcast_v2_tset = 0;
atomic_dec_16(&ifp->illif_mcast_v2);
}
rw_exit(&ill->ill_mcast_lock);
ill_refrele(ill);
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
}
}
rw_exit(&ipst->ips_ill_g_lock);
ill_mcast_timer_start(ipst);
mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
if (ipst->ips_igmp_slowtimeout_quiesce != B_TRUE) {
ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo,
(void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
} else {
ipst->ips_igmp_slowtimeout_id = 0;
}
mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
}
/*
* mld_slowtimo:
* - Resets to newer version if we didn't hear from the older version router
* in MLD_AGE_THRESHOLD seconds.
* - Restarts slowtimeout.
* Check for ips_mld_max_version ensures that we don't revert to a higher
* IGMP version than configured.
*/
void
mld_slowtimo(void *arg)
{
ill_t *ill;
ill_if_t *ifp;
avl_tree_t *avl_tree;
ip_stack_t *ipst = (ip_stack_t *)arg;
ASSERT(arg != NULL);
/* See comments in igmp_slowtimo() above... */
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
for (ifp = IP_V6_ILL_G_LIST(ipst);
ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
ifp = ifp->illif_next) {
if (ifp->illif_mcast_v1 == 0)
continue;
avl_tree = &ifp->illif_avl_by_ppa;
for (ill = avl_first(avl_tree); ill != NULL;
ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
/* Make sure the ill isn't going away. */
if (!ill_check_and_refhold(ill))
continue;
rw_exit(&ipst->ips_ill_g_lock);
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
if (ill->ill_mcast_v1_tset == 1)
ill->ill_mcast_v1_time++;
if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
(ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
(ill->ill_mcast_v1_time >= OVQP(ill))) {
ip1dbg(("MLD query timer expired on"
" %s; switching mode to MLD_V2\n",
ill->ill_name));
ill->ill_mcast_type = MLD_V2_ROUTER;
ill->ill_mcast_v1_time = 0;
ill->ill_mcast_v1_tset = 0;
atomic_dec_16(&ifp->illif_mcast_v1);
}
rw_exit(&ill->ill_mcast_lock);
ill_refrele(ill);
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
}
}
rw_exit(&ipst->ips_ill_g_lock);
ill_mcast_timer_start(ipst);
mutex_enter(&ipst->ips_mld_slowtimeout_lock);
if (ipst->ips_mld_slowtimeout_quiesce != B_TRUE) {
ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo,
(void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
} else {
ipst->ips_mld_slowtimeout_id = 0;
}
mutex_exit(&ipst->ips_mld_slowtimeout_lock);
}
/*
* igmp_sendpkt:
* This will send to ip_output_simple just like icmp_inbound.
*/
static void
igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
{
mblk_t *mp;
igmpa_t *igmpa;
uint8_t *rtralert;
ipha_t *ipha;
int hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
size_t size = hdrlen + sizeof (igmpa_t);
ill_t *ill = ilm->ilm_ill;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
mp = allocb(size, BPRI_HI);
if (mp == NULL) {
return;
}
mp->b_wptr = mp->b_rptr + size;
ipha = (ipha_t *)mp->b_rptr;
rtralert = (uint8_t *)&(ipha[1]);
igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
igmpa->igmpa_type = type;
igmpa->igmpa_code = 0;
igmpa->igmpa_group = ilm->ilm_addr;
igmpa->igmpa_cksum = 0;
igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0);
rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
rtralert[1] = RTRALERT_LEN;
rtralert[2] = 0;
rtralert[3] = 0;
ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
| (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
ipha->ipha_type_of_service = 0;
ipha->ipha_length = htons(size);
ipha->ipha_ident = 0;
ipha->ipha_fragment_offset_and_flags = 0;
ipha->ipha_ttl = IGMP_TTL;
ipha->ipha_protocol = IPPROTO_IGMP;
ipha->ipha_hdr_checksum = 0;
ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
ipha->ipha_src = INADDR_ANY;
ill_mcast_queue(ill, mp);
++ipst->ips_igmpstat.igps_snd_reports;
}
/*
* Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill.
* The report will contain one group record
* for each element of reclist. If this causes packet length to
* exceed ill->ill_mc_mtu, multiple reports are sent.
* reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
* and those buffers are freed here.
*/
static void
igmpv3_sendrpt(ill_t *ill, mrec_t *reclist)
{
igmp3ra_t *igmp3ra;
grphdra_t *grphdr;
mblk_t *mp;
ipha_t *ipha;
uint8_t *rtralert;
ipaddr_t *src_array;
int i, j, numrec, more_src_cnt;
size_t hdrsize, size, rsize;
mrec_t *rp, *cur_reclist;
mrec_t *next_reclist = reclist;
boolean_t morepkts;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
/* if there aren't any records, there's nothing to send */
if (reclist == NULL)
return;
hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
nextpkt:
size = hdrsize + sizeof (igmp3ra_t);
morepkts = B_FALSE;
more_src_cnt = 0;
cur_reclist = next_reclist;
numrec = 0;
for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
rsize = sizeof (grphdra_t) +
(rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
if (size + rsize > ill->ill_mc_mtu) {
if (rp == cur_reclist) {
/*
* If the first mrec we looked at is too big
* to fit in a single packet (i.e the source
* list is too big), we must either truncate
* the list (if TO_EX or IS_EX), or send
* multiple reports for the same group (all
* other types).
*/
int srcspace, srcsperpkt;
srcspace = ill->ill_mc_mtu - (size +
sizeof (grphdra_t));
/*
* Skip if there's not even enough room in
* a single packet to send something useful.
*/
if (srcspace <= sizeof (ipaddr_t))
continue;
srcsperpkt = srcspace / sizeof (ipaddr_t);
/*
* Increment size and numrec, because we will
* be sending a record for the mrec we're
* looking at now.
*/
size += sizeof (grphdra_t) +
(srcsperpkt * sizeof (ipaddr_t));
numrec++;
if (rp->mrec_type == MODE_IS_EXCLUDE ||
rp->mrec_type == CHANGE_TO_EXCLUDE) {
rp->mrec_srcs.sl_numsrc = srcsperpkt;
if (rp->mrec_next == NULL) {
/* no more packets to send */
break;
} else {
/*
* more packets, but we're
* done with this mrec.
*/
next_reclist = rp->mrec_next;
}
} else {
more_src_cnt = rp->mrec_srcs.sl_numsrc
- srcsperpkt;
rp->mrec_srcs.sl_numsrc = srcsperpkt;
/*
* We'll fix up this mrec (remove the
* srcs we've already sent) before
* returning to nextpkt above.
*/
next_reclist = rp;
}
} else {
next_reclist = rp;
}
morepkts = B_TRUE;
break;
}
size += rsize;
numrec++;
}
mp = allocb(size, BPRI_HI);
if (mp == NULL) {
goto free_reclist;
}
bzero((char *)mp->b_rptr, size);
mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
ipha = (ipha_t *)mp->b_rptr;
rtralert = (uint8_t *)&(ipha[1]);
igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
grphdr = (grphdra_t *)&(igmp3ra[1]);
rp = cur_reclist;
for (i = 0; i < numrec; i++) {
grphdr->grphdra_type = rp->mrec_type;
grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
src_array = (ipaddr_t *)&(grphdr[1]);
for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
grphdr = (grphdra_t *)&(src_array[j]);
rp = rp->mrec_next;
}
igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
igmp3ra->igmp3ra_numrec = htons(numrec);
igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
rtralert[1] = RTRALERT_LEN;
rtralert[2] = 0;
rtralert[3] = 0;
ipha->ipha_version_and_hdr_length = IP_VERSION << 4
| (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
ipha->ipha_length = htons(size);
ipha->ipha_ttl = IGMP_TTL;
ipha->ipha_protocol = IPPROTO_IGMP;
ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
ipha->ipha_src = INADDR_ANY;
ill_mcast_queue(ill, mp);
++ipst->ips_igmpstat.igps_snd_reports;
if (morepkts) {
if (more_src_cnt > 0) {
int index, mvsize;
slist_t *sl = &next_reclist->mrec_srcs;
index = sl->sl_numsrc;
mvsize = more_src_cnt * sizeof (in6_addr_t);
(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
mvsize);
sl->sl_numsrc = more_src_cnt;
}
goto nextpkt;
}
free_reclist:
while (reclist != NULL) {
rp = reclist->mrec_next;
mi_free(reclist);
reclist = rp;
}
}
/*
* mld_input:
* Return NULL for a bad packet that is discarded here.
* Return mp if the message is OK and should be handed to "raw" receivers.
* Callers of mld_input() may need to reinitialize variables that were copied
* from the mblk as this calls pullupmsg().
*/
mblk_t *
mld_input(mblk_t *mp, ip_recv_attr_t *ira)
{
ip6_t *ip6h = (ip6_t *)(mp->b_rptr);
mld_hdr_t *mldh;
ilm_t *ilm;
ipif_t *ipif;
uint16_t hdr_length, exthdr_length;
in6_addr_t *v6group_ptr;
uint_t next;
int mldlen;
ill_t *ill = ira->ira_ill;
ip_stack_t *ipst = ill->ill_ipst;
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
/* Make sure the src address of the packet is link-local */
if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(mp);
return (NULL);
}
if (ip6h->ip6_hlim != 1) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
freemsg(mp);
return (NULL);
}
/* Get to the icmp header part */
hdr_length = ira->ira_ip_hdr_length;
exthdr_length = hdr_length - IPV6_HDR_LEN;
mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
/* An MLD packet must at least be 24 octets to be valid */
if (mldlen < MLD_MINLEN) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(mp);
return (NULL);
}
mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
switch (mldh->mld_type) {
case MLD_LISTENER_QUERY:
/*
* packet length differentiates between v1 and v2. v1
* query should be exactly 24 octets long; v2 is >= 28.
*/
if ((mldlen == MLD_MINLEN) ||
(ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
next = mld_query_in(mldh, ill);
} else if (mldlen >= MLD_V2_QUERY_MINLEN) {
next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
} else {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(mp);
return (NULL);
}
if (next == 0) {
return (mp);
}
if (next != INFINITY)
mld_start_timers(next, ipst);
break;
case MLD_LISTENER_REPORT:
/*
* For fast leave to work, we have to know that we are the
* last person to send a report for this group. Reports
* generated by us are looped back since we could potentially
* be a multicast router, so discard reports sourced by me.
*/
mutex_enter(&ill->ill_lock);
for (ipif = ill->ill_ipif; ipif != NULL;
ipif = ipif->ipif_next) {
if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
&ip6h->ip6_src)) {
if (ip_debug > 1) {
char buf1[INET6_ADDRSTRLEN];
(void) mi_strlog(ill->ill_rq,
1,
SL_TRACE,
"mld_input: we are only "
"member src %s\n",
inet_ntop(AF_INET6, &ip6h->ip6_src,
buf1, sizeof (buf1)));
}
mutex_exit(&ill->ill_lock);
return (mp);
}
}
mutex_exit(&ill->ill_lock);
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
v6group_ptr = &mldh->mld_addr;
if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInGroupMembBadReports);
freemsg(mp);
return (NULL);
}
/*
* If we belong to the group being reported, and we are a
* 'Delaying member' per the RFC terminology, stop our timer
* for that group and 'clear flag' i.e. mark ilm_state as
* IGMP_OTHERMEMBER. With zones, there can be multiple group
* membership entries for the same group address (one per zone)
* so we need to walk the ill_ilm list.
*/
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
continue;
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInGroupMembOurReports);
ilm->ilm_timer = INFINITY;
ilm->ilm_state = IGMP_OTHERMEMBER;
}
rw_exit(&ill->ill_mcast_lock);
/*
* No packets have been sent above - no
* ill_mcast_send_queued is needed.
*/
ill_mcast_timer_start(ill->ill_ipst);
break;
case MLD_LISTENER_REDUCTION:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
break;
}
return (mp);
}
/*
* Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate
* (non-zero, unsigned) timer value to be set on success.
*/
static uint_t
mld_query_in(mld_hdr_t *mldh, ill_t *ill)
{
ilm_t *ilm;
int timer;
uint_t next, current;
in6_addr_t *v6group;
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
/*
* In the MLD specification, there are 3 states and a flag.
*
* In Non-Listener state, we simply don't have a membership record.
* In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
* In Idle Member state, our timer is not running (ilm->ilm_timer ==
* INFINITY)
*
* The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
* we have heard a report from another member, or IGMP_IREPORTEDLAST
* if I sent the last report.
*/
v6group = &mldh->mld_addr;
if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
((!IN6_IS_ADDR_MULTICAST(v6group)))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
return (0);
}
/* Need to do compatibility mode checking */
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
ill->ill_mcast_v1_time = 0;
ill->ill_mcast_v1_tset = 1;
if (ill->ill_mcast_type == MLD_V2_ROUTER) {
ip1dbg(("Received MLDv1 Query on %s, switching mode to "
"MLD_V1_ROUTER\n", ill->ill_name));
atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
ill->ill_mcast_type = MLD_V1_ROUTER;
}
timer = (int)ntohs(mldh->mld_maxdelay);
if (ip_debug > 1) {
(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
"mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
timer, (int)mldh->mld_type);
}
/*
* -Start the timers in all of our membership records for
* the physical interface on which the query arrived,
* excl:
* 1. those that belong to the "all hosts" group,
* 2. those with 0 scope, or 1 node-local scope.
*
* -Restart any timer that is already running but has a value
* longer that the requested timeout.
* -Use the value specified in the query message as the
* maximum timeout.
*/
next = INFINITY;
current = CURRENT_MSTIME;
for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
continue;
if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
&ipv6_all_hosts_mcast)) &&
(IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
(IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
if (timer == 0) {
/* Respond immediately */
ilm->ilm_timer = INFINITY;
ilm->ilm_state = IGMP_IREPORTEDLAST;
mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
break;
}
if (ilm->ilm_timer > timer) {
MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
if (ilm->ilm_timer < next)
next = ilm->ilm_timer;
ilm->ilm_timer += current;
}
break;
}
}
rw_exit(&ill->ill_mcast_lock);
/* Send any deferred/queued IP packets */
ill_mcast_send_queued(ill);
ill_mcast_timer_start(ill->ill_ipst);
return (next);
}
/*
* Handles an MLDv2 Listener Query. On error, returns 0; on success,
* returns the appropriate (non-zero, unsigned) timer value (which may
* be INFINITY) to be set.
*/
static uint_t
mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
{
ilm_t *ilm;
in6_addr_t *v6group, *src_array;
uint_t next, numsrc, i, mrd, delay, qqi, current;
uint8_t qrv;
v6group = &mld2q->mld2q_addr;
numsrc = ntohs(mld2q->mld2q_numsrc);
/* make sure numsrc matches packet size */
if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
return (0);
}
src_array = (in6_addr_t *)&mld2q[1];
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
/* extract Maximum Response Delay from code in header */
mrd = ntohs(mld2q->mld2q_mxrc);
if (mrd >= MLD_V2_MAXRT_FPMIN) {
uint_t hdrval, mant, exp;
hdrval = mrd;
mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
mrd = (mant | 0x1000) << (exp + 3);
}
if (mrd == 0)
mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
MCAST_RANDOM_DELAY(delay, mrd);
next = (unsigned)INFINITY;
current = CURRENT_MSTIME;
if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
else
ill->ill_mcast_rv = qrv;
if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
uint_t mant, exp;
mant = qqi & MLD_V2_QQI_MANT_MASK;
exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
qqi = (mant | 0x10) << (exp + 3);
}
ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
/*
* If we have a pending general query response that's scheduled
* sooner than the delay we calculated for this response, then
* no action is required (MLDv2 draft section 6.2 rule 1)
*/
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
if (ill->ill_global_timer < (current + delay)) {
rw_exit(&ill->ill_mcast_lock);
return (next);
}
/*
* Now take action depending on query type: general,
* group specific, or group/source specific.
*/
if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
/*
* general query
* We know global timer is either not running or is
* greater than our calculated delay, so reset it to
* our delay (random value in range [0, response time])
*/
ill->ill_global_timer = current + delay;
next = delay;
} else {
/* group or group/source specific query */
for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
!IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
continue;
/*
* If the query is group specific or we have a
* pending group specific query, the response is
* group specific (pending sources list should be
* empty). Otherwise, need to update the pending
* sources list for the group and source specific
* response.
*/
if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
group_query:
FREE_SLIST(ilm->ilm_pendsrcs);
ilm->ilm_pendsrcs = NULL;
} else {
boolean_t overflow;
slist_t *pktl;
if (numsrc > MAX_FILTER_SIZE ||
(ilm->ilm_pendsrcs == NULL &&
(ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
/*
* We've been sent more sources than
* we can deal with; or we can't deal
* with a source list at all. Revert
* to a group specific query.
*/
goto group_query;
}
if ((pktl = l_alloc()) == NULL)
goto group_query;
pktl->sl_numsrc = numsrc;
for (i = 0; i < numsrc; i++)
pktl->sl_addr[i] = src_array[i];
l_union_in_a(ilm->ilm_pendsrcs, pktl,
&overflow);
l_free(pktl);
if (overflow)
goto group_query;
}
ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
INFINITY : (ilm->ilm_timer - current);
/* set timer to soonest value */
ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
if (ilm->ilm_timer < next)
next = ilm->ilm_timer;
ilm->ilm_timer += current;
break;
}
}
rw_exit(&ill->ill_mcast_lock);
/*
* No packets have been sent above - no
* ill_mcast_send_queued is needed.
*/
ill_mcast_timer_start(ill->ill_ipst);
return (next);
}
/*
* Send MLDv1 response packet with hoplimit 1
*/
static void
mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
{
mblk_t *mp;
mld_hdr_t *mldh;
ip6_t *ip6h;
ip6_hbh_t *ip6hbh;
struct ip6_opt_router *ip6router;
size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
ill_t *ill = ilm->ilm_ill;
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
/*
* We need to place a router alert option in this packet. The length
* of the options must be a multiple of 8. The hbh option header is 2
* bytes followed by the 4 byte router alert option. That leaves
* 2 bytes of pad for a total of 8 bytes.
*/
const int router_alert_length = 8;
ASSERT(ill->ill_isv6);
size += router_alert_length;
mp = allocb(size, BPRI_HI);
if (mp == NULL)
return;
bzero(mp->b_rptr, size);
mp->b_wptr = mp->b_rptr + size;
ip6h = (ip6_t *)mp->b_rptr;
ip6hbh = (struct ip6_hbh *)&ip6h[1];
ip6router = (struct ip6_opt_router *)&ip6hbh[1];
/*
* A zero is a pad option of length 1. The bzero of the whole packet
* above will pad between ip6router and mld.
*/
mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
mldh->mld_type = type;
mldh->mld_addr = ilm->ilm_v6addr;
ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
ip6router->ip6or_len = 2;
ip6router->ip6or_value[0] = 0;
ip6router->ip6or_value[1] = IP6_ALERT_MLD;
ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
ip6hbh->ip6h_len = 0;
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
ip6h->ip6_nxt = IPPROTO_HOPOPTS;
ip6h->ip6_hops = MLD_HOP_LIMIT;
if (v6addr == NULL)
ip6h->ip6_dst = ilm->ilm_v6addr;
else
ip6h->ip6_dst = *v6addr;
ip6h->ip6_src = ipv6_all_zeros;
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_output.
*/
mldh->mld_cksum = htons(sizeof (*mldh));
ill_mcast_queue(ill, mp);
}
/*
* Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The
* report will contain one multicast address record for each element of
* reclist. If this causes packet length to exceed ill->ill_mc_mtu,
* multiple reports are sent. reclist is assumed to be made up of
* buffers allocated by mcast_bldmrec(), and those buffers are freed here.
*/
static void
mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
{
mblk_t *mp;
mld2r_t *mld2r;
mld2mar_t *mld2mar;
in6_addr_t *srcarray;
ip6_t *ip6h;
ip6_hbh_t *ip6hbh;
struct ip6_opt_router *ip6router;
size_t size, optlen, padlen, icmpsize, rsize;
int i, numrec, more_src_cnt;
mrec_t *rp, *cur_reclist;
mrec_t *next_reclist = reclist;
boolean_t morepkts;
/* If there aren't any records, there's nothing to send */
if (reclist == NULL)
return;
ASSERT(ill->ill_isv6);
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
/*
* Total option length (optlen + padlen) must be a multiple of
* 8 bytes. We assume here that optlen <= 8, so the total option
* length will be 8. Assert this in case anything ever changes.
*/
optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
ASSERT(optlen <= 8);
padlen = 8 - optlen;
nextpkt:
icmpsize = sizeof (mld2r_t);
size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
morepkts = B_FALSE;
more_src_cnt = 0;
for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
rp = rp->mrec_next, numrec++) {
rsize = sizeof (mld2mar_t) +
(rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
if (size + rsize > ill->ill_mc_mtu) {
if (rp == cur_reclist) {
/*
* If the first mrec we looked at is too big
* to fit in a single packet (i.e the source
* list is too big), we must either truncate
* the list (if TO_EX or IS_EX), or send
* multiple reports for the same group (all
* other types).
*/
int srcspace, srcsperpkt;
srcspace = ill->ill_mc_mtu -
(size + sizeof (mld2mar_t));
/*
* Skip if there's not even enough room in
* a single packet to send something useful.
*/
if (srcspace <= sizeof (in6_addr_t))
continue;
srcsperpkt = srcspace / sizeof (in6_addr_t);
/*
* Increment icmpsize and size, because we will
* be sending a record for the mrec we're
* looking at now.
*/
rsize = sizeof (mld2mar_t) +
(srcsperpkt * sizeof (in6_addr_t));
icmpsize += rsize;
size += rsize;
if (rp->mrec_type == MODE_IS_EXCLUDE ||
rp->mrec_type == CHANGE_TO_EXCLUDE) {
rp->mrec_srcs.sl_numsrc = srcsperpkt;
if (rp->mrec_next == NULL) {
/* no more packets to send */
break;
} else {
/*
* more packets, but we're
* done with this mrec.
*/
next_reclist = rp->mrec_next;
}
} else {
more_src_cnt = rp->mrec_srcs.sl_numsrc
- srcsperpkt;
rp->mrec_srcs.sl_numsrc = srcsperpkt;
/*
* We'll fix up this mrec (remove the
* srcs we've already sent) before
* returning to nextpkt above.
*/
next_reclist = rp;
}
} else {
next_reclist = rp;
}
morepkts = B_TRUE;
break;
}
icmpsize += rsize;
size += rsize;
}
mp = allocb(size, BPRI_HI);
if (mp == NULL)
goto free_reclist;
bzero(mp->b_rptr, size);
mp->b_wptr = mp->b_rptr + size;
ip6h = (ip6_t *)mp->b_rptr;
ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
mld2mar = (mld2mar_t *)&(mld2r[1]);
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
ip6h->ip6_nxt = IPPROTO_HOPOPTS;
ip6h->ip6_hops = MLD_HOP_LIMIT;
ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
ip6h->ip6_src = ipv6_all_zeros;
ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
/*
* ip6h_len is the number of 8-byte words, not including the first
* 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
*/
ip6hbh->ip6h_len = 0;
ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
ip6router->ip6or_len = 2;
ip6router->ip6or_value[0] = 0;
ip6router->ip6or_value[1] = IP6_ALERT_MLD;
mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
mld2r->mld2r_nummar = htons(numrec);
/*
* Prepare for the checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_output_simple.
*/
mld2r->mld2r_cksum = htons(icmpsize);
for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
mld2mar->mld2mar_type = rp->mrec_type;
mld2mar->mld2mar_auxlen = 0;
mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
mld2mar->mld2mar_group = rp->mrec_group;
srcarray = (in6_addr_t *)&(mld2mar[1]);
for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
srcarray[i] = rp->mrec_srcs.sl_addr[i];
mld2mar = (mld2mar_t *)&(srcarray[i]);
}
ill_mcast_queue(ill, mp);
if (morepkts) {
if (more_src_cnt > 0) {
int index, mvsize;
slist_t *sl = &next_reclist->mrec_srcs;
index = sl->sl_numsrc;
mvsize = more_src_cnt * sizeof (in6_addr_t);
(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
mvsize);
sl->sl_numsrc = more_src_cnt;
}
goto nextpkt;
}
free_reclist:
while (reclist != NULL) {
rp = reclist->mrec_next;
mi_free(reclist);
reclist = rp;
}
}
static mrec_t *
mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
mrec_t *next)
{
mrec_t *rp;
int i;
if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
SLIST_IS_EMPTY(srclist))
return (next);
rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
if (rp == NULL)
return (next);
rp->mrec_next = next;
rp->mrec_type = type;
rp->mrec_auxlen = 0;
rp->mrec_group = *grp;
if (srclist == NULL) {
rp->mrec_srcs.sl_numsrc = 0;
} else {
rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
for (i = 0; i < srclist->sl_numsrc; i++)
rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
}
return (rp);
}
/*
* Set up initial retransmit state. If memory cannot be allocated for
* the source lists, simply create as much state as is possible; memory
* allocation failures are considered one type of transient error that
* the retransmissions are designed to overcome (and if they aren't
* transient, there are bigger problems than failing to notify the
* router about multicast group membership state changes).
*/
static void
mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
slist_t *flist)
{
/*
* There are only three possibilities for rtype:
* New join, transition from INCLUDE {} to INCLUDE {flist}
* => rtype is ALLOW_NEW_SOURCES
* New join, transition from INCLUDE {} to EXCLUDE {flist}
* => rtype is CHANGE_TO_EXCLUDE
* State change that involves a filter mode change
* => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
*/
ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
rtype == ALLOW_NEW_SOURCES);
rtxp->rtx_cnt = ill->ill_mcast_rv;
switch (rtype) {
case CHANGE_TO_EXCLUDE:
rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
CLEAR_SLIST(rtxp->rtx_allow);
COPY_SLIST(flist, rtxp->rtx_block);
break;
case ALLOW_NEW_SOURCES:
case CHANGE_TO_INCLUDE:
rtxp->rtx_fmode_cnt =
rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
CLEAR_SLIST(rtxp->rtx_block);
COPY_SLIST(flist, rtxp->rtx_allow);
break;
}
}
/*
* The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
* RFC 3376 section 5.1, covers three cases:
* * The current state change is a filter mode change
* Set filter mode retransmit counter; set retransmit allow or
* block list to new source list as appropriate, and clear the
* retransmit list that was not set; send TO_IN or TO_EX with
* new source list.
* * The current state change is a source list change, but the filter
* mode retransmit counter is > 0
* Decrement filter mode retransmit counter; set retransmit
* allow or block list to new source list as appropriate,
* and clear the retransmit list that was not set; send TO_IN
* or TO_EX with new source list.
* * The current state change is a source list change, and the filter
* mode retransmit counter is 0.
* Merge existing rtx allow and block lists with new state:
* rtx_allow = (new allow + rtx_allow) - new block
* rtx_block = (new block + rtx_block) - new allow
* Send ALLOW and BLOCK records for new retransmit lists;
* decrement retransmit counter.
*
* As is the case for mcast_init_rtx(), memory allocation failures are
* acceptable; we just create as much state as we can.
*/
static mrec_t *
mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
{
ill_t *ill;
rtx_state_t *rtxp = &ilm->ilm_rtx;
mcast_record_t txtype;
mrec_t *rp, *rpnext, *rtnmrec;
boolean_t ovf;
ill = ilm->ilm_ill;
if (mreclist == NULL)
return (mreclist);
/*
* A filter mode change is indicated by a single mrec, which is
* either TO_IN or TO_EX. In this case, we just need to set new
* retransmit state as if this were an initial join. There is
* no change to the mrec list.
*/
if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
&mreclist->mrec_srcs);
return (mreclist);
}
/*
* Only the source list has changed
*/
rtxp->rtx_cnt = ill->ill_mcast_rv;
if (rtxp->rtx_fmode_cnt > 0) {
/* but we're still sending filter mode change reports */
rtxp->rtx_fmode_cnt--;
if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
CLEAR_SLIST(rtxp->rtx_block);
COPY_SLIST(flist, rtxp->rtx_allow);
txtype = CHANGE_TO_INCLUDE;
} else {
CLEAR_SLIST(rtxp->rtx_allow);
COPY_SLIST(flist, rtxp->rtx_block);
txtype = CHANGE_TO_EXCLUDE;
}
/* overwrite first mrec with new info */
mreclist->mrec_type = txtype;
l_copy(flist, &mreclist->mrec_srcs);
/* then free any remaining mrecs */
for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
rpnext = rp->mrec_next;
mi_free(rp);
}
mreclist->mrec_next = NULL;
rtnmrec = mreclist;
} else {
mrec_t *allow_mrec, *block_mrec;
/*
* Just send the source change reports; but we need to
* recalculate the ALLOW and BLOCK lists based on previous
* state and new changes.
*/
rtnmrec = mreclist;
allow_mrec = block_mrec = NULL;
for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
rp->mrec_type == BLOCK_OLD_SOURCES);
if (rp->mrec_type == ALLOW_NEW_SOURCES)
allow_mrec = rp;
else
block_mrec = rp;
}
/*
* Perform calculations:
* new_allow = mrec_allow + (rtx_allow - mrec_block)
* new_block = mrec_block + (rtx_block - mrec_allow)
*
* Each calc requires two steps, for example:
* rtx_allow = rtx_allow - mrec_block;
* new_allow = mrec_allow + rtx_allow;
*
* Store results in mrec lists, and then copy into rtx lists.
* We do it in this order in case the rtx list hasn't been
* alloc'd yet; if it hasn't and our alloc fails, that's okay,
* Overflows are also okay.
*/
if (block_mrec != NULL) {
l_difference_in_a(rtxp->rtx_allow,
&block_mrec->mrec_srcs);
}
if (allow_mrec != NULL) {
l_difference_in_a(rtxp->rtx_block,
&allow_mrec->mrec_srcs);
l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
&ovf);
}
if (block_mrec != NULL) {
l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
&ovf);
COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
} else {
rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
&ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
}
if (allow_mrec != NULL) {
COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
} else {
rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
&ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
}
}
return (rtnmrec);
}