ip_multi.c revision 0e0e37a8f0c38eb919c913bbb67030114a6b74a9
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/stropts.h>
#include <sys/strsun.h>
#include <sys/ddi.h>
#include <sys/cmn_err.h>
#include <sys/sdt.h>
#include <sys/zone.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <net/if.h>
#include <sys/systm.h>
#include <sys/strsubr.h>
#include <net/route.h>
#include <netinet/in.h>
#include <net/if_dl.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/nd.h>
#include <inet/arp.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/ip_if.h>
#include <inet/ip_ndp.h>
#include <inet/ip_multi.h>
#include <inet/ipclassifier.h>
#include <inet/ipsec_impl.h>
#include <inet/sctp_ip.h>
#include <inet/ip_listutils.h>
#include <inet/udp_impl.h>
/* igmpv3/mldv2 source filter manipulation */
static void ilm_bld_flists(conn_t *conn, void *arg);
static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
slist_t *flist);
static ilm_t *ilm_add(ill_t *ill, const in6_addr_t *group,
ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
zoneid_t zoneid);
static void ilm_delete(ilm_t *ilm);
static int ilm_numentries(ill_t *, const in6_addr_t *);
static ilm_t *ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
ilg_stat_t, mcast_record_t, slist_t *, int *);
static ilm_t *ip_addmulti_impl(const in6_addr_t *, ill_t *,
zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
static int ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
static int ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
static int ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
t_uscalar_t);
static ilg_t *ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
uint_t ifindex);
static int ilg_add(conn_t *connp, const in6_addr_t *group,
ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
const in6_addr_t *v6src);
static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive,
uint32_t *addr_lenp, uint32_t *addr_offp);
static int ip_opt_delete_group_excl(conn_t *connp,
const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
mcast_record_t fmode, const in6_addr_t *v6src);
static ilm_t *ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
static int ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
ill_t **);
static void ilg_check_detach(conn_t *, ill_t *);
static void ilg_check_reattach(conn_t *);
/*
* MT notes:
*
* Multicast joins operate on both the ilg and ilm structures. Multiple
* threads operating on an conn (socket) trying to do multicast joins
* need to synchronize when operating on the ilg. Multiple threads
* potentially operating on different conn (socket endpoints) trying to
* do multicast joins could eventually end up trying to manipulate the
* ilm simulatenously and need to synchronize on the access to the ilm.
* The access and lookup of the ilm, as well as other ill multicast state,
* is under ill_mcast_lock.
* The modifications and lookup of ilg entries is serialized using conn_ilg_lock
* rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
*
* In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
* never the other way around.
*
* An ilm is an IP data structure used to track multicast join/leave.
* An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
* with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
* referencing the ilm.
* The modifications and lookup of ilm entries is serialized using the
* ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
* of the ilm state.
* ilms are created / destroyed only as writer. ilms
* are not passed around. The datapath (anything outside of this file
* and igmp.c) use functions that do not return ilms - just the number
* of members. So we don't need a dynamic refcount of the number
* of threads holding reference to an ilm.
*
* In the cases where we serially access the ilg and ilm, which happens when
* we handle the applications requests to join or leave groups and sources,
* we use the ill_mcast_serializer mutex to ensure that a multithreaded
* application which does concurrent joins and/or leaves on the same group on
* the same socket always results in a consistent order for the ilg and ilm
* modifications.
*
* When a multicast operation results in needing to send a message to
* the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
* which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
* to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
*/
#define GETSTRUCT(structure, number) \
((structure *)mi_zalloc(sizeof (structure) * (number)))
/*
* Caller must ensure that the ilg has not been condemned
* The condemned flag is only set in ilg_delete under conn_ilg_lock.
*
* The caller must hold conn_ilg_lock as writer.
*/
static void
ilg_refhold(ilg_t *ilg)
{
ASSERT(ilg->ilg_refcnt != 0);
ASSERT(!ilg->ilg_condemned);
ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
ilg->ilg_refcnt++;
}
static void
ilg_inactive(ilg_t *ilg)
{
ASSERT(ilg->ilg_ill == NULL);
ASSERT(ilg->ilg_ilm == NULL);
ASSERT(ilg->ilg_filter == NULL);
ASSERT(ilg->ilg_condemned);
/* Unlink from list */
*ilg->ilg_ptpn = ilg->ilg_next;
if (ilg->ilg_next != NULL)
ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
ilg->ilg_next = NULL;
ilg->ilg_ptpn = NULL;
ilg->ilg_connp = NULL;
kmem_free(ilg, sizeof (*ilg));
}
/*
* The caller must hold conn_ilg_lock as writer.
*/
static void
ilg_refrele(ilg_t *ilg)
{
ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
ASSERT(ilg->ilg_refcnt != 0);
if (--ilg->ilg_refcnt == 0)
ilg_inactive(ilg);
}
/*
* Acquire reference on ilg and drop reference on held_ilg.
* In the case when held_ilg is the same as ilg we already have
* a reference, but the held_ilg might be condemned. In that case
* we avoid the ilg_refhold/rele so that we can assert in ire_refhold
* that the ilg isn't condemned.
*/
static void
ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
{
if (held_ilg == ilg)
return;
ilg_refhold(ilg);
if (held_ilg != NULL)
ilg_refrele(held_ilg);
}
/*
* Allocate a new ilg_t and links it into conn_ilg.
* Returns NULL on failure, in which case `*errp' will be
* filled in with the reason.
*
* Assumes connp->conn_ilg_lock is held.
*/
static ilg_t *
conn_ilg_alloc(conn_t *connp, int *errp)
{
ilg_t *ilg;
ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
/*
* If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
* create any ilgs.
*/
if (connp->conn_state_flags & CONN_CLOSING) {
*errp = EINVAL;
return (NULL);
}
ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
if (ilg == NULL) {
*errp = ENOMEM;
return (NULL);
}
ilg->ilg_refcnt = 1;
/* Insert at head */
if (connp->conn_ilg != NULL)
connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
ilg->ilg_next = connp->conn_ilg;
ilg->ilg_ptpn = &connp->conn_ilg;
connp->conn_ilg = ilg;
ilg->ilg_connp = connp;
return (ilg);
}
typedef struct ilm_fbld_s {
ilm_t *fbld_ilm;
int fbld_in_cnt;
int fbld_ex_cnt;
slist_t fbld_in;
slist_t fbld_ex;
boolean_t fbld_in_overflow;
} ilm_fbld_t;
/*
* Caller must hold ill_mcast_lock
*/
static void
ilm_bld_flists(conn_t *connp, void *arg)
{
ilg_t *ilg;
ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
ilm_t *ilm = fbld->fbld_ilm;
in6_addr_t *v6group = &ilm->ilm_v6addr;
if (connp->conn_ilg == NULL)
return;
/*
* Since we can't break out of the ipcl_walk once started, we still
* have to look at every conn. But if we've already found one
* (EXCLUDE, NULL) list, there's no need to keep checking individual
* ilgs--that will be our state.
*/
if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
return;
/*
* Check this conn's ilgs to see if any are interested in our
* ilm (group, interface match). If so, update the master
* include and exclude lists we're building in the fbld struct
* with this ilg's filter info.
*
* Note that the caller has already serialized on the ill we care
* about.
*/
ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
rw_enter(&connp->conn_ilg_lock, RW_READER);
for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
if (ilg->ilg_condemned)
continue;
/*
* Since we are under the ill_mcast_serializer we know
* that any ilg+ilm operations on this ilm have either
* not started or completed, except for the last ilg
* (the one that caused us to be called) which doesn't
* have ilg_ilm set yet. Hence we compare using ilg_ill
* and the address.
*/
if ((ilg->ilg_ill == ilm->ilm_ill) &&
IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
fbld->fbld_in_cnt++;
if (!fbld->fbld_in_overflow)
l_union_in_a(&fbld->fbld_in,
ilg->ilg_filter,
&fbld->fbld_in_overflow);
} else {
fbld->fbld_ex_cnt++;
/*
* On the first exclude list, don't try to do
* an intersection, as the master exclude list
* is intentionally empty. If the master list
* is still empty on later iterations, that
* means we have at least one ilg with an empty
* exclude list, so that should be reflected
* when we take the intersection.
*/
if (fbld->fbld_ex_cnt == 1) {
if (ilg->ilg_filter != NULL)
l_copy(ilg->ilg_filter,
&fbld->fbld_ex);
} else {
l_intersection_in_a(&fbld->fbld_ex,
ilg->ilg_filter);
}
}
/* there will only be one match, so break now. */
break;
}
}
rw_exit(&connp->conn_ilg_lock);
}
/*
* Caller must hold ill_mcast_lock
*/
static void
ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
{
ilm_fbld_t fbld;
ip_stack_t *ipst = ilm->ilm_ipst;
fbld.fbld_ilm = ilm;
fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
fbld.fbld_in_overflow = B_FALSE;
/* first, construct our master include and exclude lists */
ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
/* now use those master lists to generate the interface filter */
/* if include list overflowed, filter is (EXCLUDE, NULL) */
if (fbld.fbld_in_overflow) {
*fmode = MODE_IS_EXCLUDE;
flist->sl_numsrc = 0;
return;
}
/* if nobody interested, interface filter is (INCLUDE, NULL) */
if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
*fmode = MODE_IS_INCLUDE;
flist->sl_numsrc = 0;
return;
}
/*
* If there are no exclude lists, then the interface filter
* is INCLUDE, with its filter list equal to fbld_in. A single
* exclude list makes the interface filter EXCLUDE, with its
* filter list equal to (fbld_ex - fbld_in).
*/
if (fbld.fbld_ex_cnt == 0) {
*fmode = MODE_IS_INCLUDE;
l_copy(&fbld.fbld_in, flist);
} else {
*fmode = MODE_IS_EXCLUDE;
l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
}
}
/*
* Caller must hold ill_mcast_lock
*/
static int
ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
{
mcast_record_t fmode;
slist_t *flist;
boolean_t fdefault;
char buf[INET6_ADDRSTRLEN];
ill_t *ill = ilm->ilm_ill;
/*
* There are several cases where the ilm's filter state
* defaults to (EXCLUDE, NULL):
* - we've had previous joins without associated ilgs
* - this join has no associated ilg
* - the ilg's filter state is (EXCLUDE, NULL)
*/
fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
(ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
/* attempt mallocs (if needed) before doing anything else */
if ((flist = l_alloc()) == NULL)
return (ENOMEM);
if (!fdefault && ilm->ilm_filter == NULL) {
ilm->ilm_filter = l_alloc();
if (ilm->ilm_filter == NULL) {
l_free(flist);
return (ENOMEM);
}
}
if (ilgstat != ILGSTAT_CHANGE)
ilm->ilm_refcnt++;
if (ilgstat == ILGSTAT_NONE)
ilm->ilm_no_ilg_cnt++;
/*
* Determine new filter state. If it's not the default
* (EXCLUDE, NULL), we must walk the conn list to find
* any ilgs interested in this group, and re-build the
* ilm filter.
*/
if (fdefault) {
fmode = MODE_IS_EXCLUDE;
flist->sl_numsrc = 0;
} else {
ilm_gen_filter(ilm, &fmode, flist);
}
/* make sure state actually changed; nothing to do if not. */
if ((ilm->ilm_fmode == fmode) &&
!lists_are_different(ilm->ilm_filter, flist)) {
l_free(flist);
return (0);
}
/* send the state change report */
if (!IS_LOOPBACK(ill)) {
if (ill->ill_isv6)
mld_statechange(ilm, fmode, flist);
else
igmp_statechange(ilm, fmode, flist);
}
/* update the ilm state */
ilm->ilm_fmode = fmode;
if (flist->sl_numsrc > 0)
l_copy(flist, ilm->ilm_filter);
else
CLEAR_SLIST(ilm->ilm_filter);
ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
l_free(flist);
return (0);
}
/*
* Caller must hold ill_mcast_lock
*/
static int
ilm_update_del(ilm_t *ilm)
{
mcast_record_t fmode;
slist_t *flist;
ill_t *ill = ilm->ilm_ill;
ip1dbg(("ilm_update_del: still %d left; updating state\n",
ilm->ilm_refcnt));
if ((flist = l_alloc()) == NULL)
return (ENOMEM);
/*
* If present, the ilg in question has already either been
* updated or removed from our list; so all we need to do
* now is walk the list to update the ilm filter state.
*
* Skip the list walk if we have any no-ilg joins, which
* cause the filter state to revert to (EXCLUDE, NULL).
*/
if (ilm->ilm_no_ilg_cnt != 0) {
fmode = MODE_IS_EXCLUDE;
flist->sl_numsrc = 0;
} else {
ilm_gen_filter(ilm, &fmode, flist);
}
/* check to see if state needs to be updated */
if ((ilm->ilm_fmode == fmode) &&
(!lists_are_different(ilm->ilm_filter, flist))) {
l_free(flist);
return (0);
}
if (!IS_LOOPBACK(ill)) {
if (ill->ill_isv6)
mld_statechange(ilm, fmode, flist);
else
igmp_statechange(ilm, fmode, flist);
}
ilm->ilm_fmode = fmode;
if (flist->sl_numsrc > 0) {
if (ilm->ilm_filter == NULL) {
ilm->ilm_filter = l_alloc();
if (ilm->ilm_filter == NULL) {
char buf[INET6_ADDRSTRLEN];
ip1dbg(("ilm_update_del: failed to alloc ilm "
"filter; no source filtering for %s on %s",
inet_ntop(AF_INET6, &ilm->ilm_v6addr,
buf, sizeof (buf)), ill->ill_name));
ilm->ilm_fmode = MODE_IS_EXCLUDE;
l_free(flist);
return (0);
}
}
l_copy(flist, ilm->ilm_filter);
} else {
CLEAR_SLIST(ilm->ilm_filter);
}
l_free(flist);
return (0);
}
/*
* Create/update the ilm for the group/ill. Used by other parts of IP to
* do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
* Returns with a refhold on the ilm.
*
* The unspecified address means all multicast addresses for in both the
* case of IPv4 and IPv6.
*
* The caller should have already mapped an IPMP under ill to the upper.
*/
ilm_t *
ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
int *errorp)
{
ilm_t *ilm;
/* Acquire serializer to keep assert in ilm_bld_flists happy */
mutex_enter(&ill->ill_mcast_serializer);
ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
MODE_IS_EXCLUDE, NULL, errorp);
mutex_exit(&ill->ill_mcast_serializer);
return (ilm);
}
/*
* Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
* then this returns with a refhold on the ilm.
*
* Internal routine which assumes the caller has already acquired
* ill_multi_serializer.
*
* The unspecified address means all multicast addresses for in both the
* case of IPv4 and IPv6.
*
* ilgstat tells us if there's an ilg associated with this join,
* and if so, if it's a new ilg or a change to an existing one.
* ilg_fmode and ilg_flist give us the current filter state of
* the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
*
* The caller should have already mapped an IPMP under ill to the upper.
*/
static ilm_t *
ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
int *errorp)
{
ilm_t *ilm;
ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
if (ill->ill_isv6) {
if (!IN6_IS_ADDR_MULTICAST(v6group) &&
!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
*errorp = EINVAL;
return (NULL);
}
} else {
if (IN6_IS_ADDR_V4MAPPED(v6group)) {
ipaddr_t v4group;
IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
if (!CLASSD(v4group)) {
*errorp = EINVAL;
return (NULL);
}
} else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
*errorp = EINVAL;
return (NULL);
}
}
if (IS_UNDER_IPMP(ill)) {
*errorp = EINVAL;
return (NULL);
}
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
/*
* We do the equivalent of a lookup by checking after we get the lock
* This is needed since the ill could have been condemned after
* we looked it up, and we need to check condemned after we hold
* ill_mcast_lock to synchronize with the unplumb code.
*/
if (ill->ill_state_flags & ILL_CONDEMNED) {
rw_exit(&ill->ill_mcast_lock);
*errorp = ENXIO;
return (NULL);
}
ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
ilg_flist, errorp);
rw_exit(&ill->ill_mcast_lock);
/* Send any deferred/queued DLPI or IP packets */
ill_mcast_send_queued(ill);
ill_dlpi_send_queued(ill);
ill_mcast_timer_start(ill->ill_ipst);
return (ilm);
}
static ilm_t *
ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
int *errorp)
{
ilm_t *ilm;
int ret = 0;
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
*errorp = 0;
/*
* An ilm is uniquely identified by the tuple of (group, ill) where
* `group' is the multicast group address, and `ill' is the interface
* on which it is currently joined.
*/
ilm = ilm_lookup(ill, v6group, zoneid);
if (ilm != NULL) {
/* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
ret = ilm_update_add(ilm, ilgstat, ilg_flist);
if (ret == 0)
return (ilm);
*errorp = ret;
return (NULL);
}
/*
* The callers checks on the ilg and the ilg+ilm consistency under
* ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
* and no ilm.
*/
ASSERT(ilgstat != ILGSTAT_CHANGE);
ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
if (ilm == NULL) {
*errorp = ENOMEM;
return (NULL);
}
if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
/*
* If we have more then one we should not tell the driver
* to join this time.
*/
if (ilm_numentries(ill, v6group) == 1) {
ret = ill_join_allmulti(ill);
}
} else {
if (!IS_LOOPBACK(ill)) {
if (ill->ill_isv6)
mld_joingroup(ilm);
else
igmp_joingroup(ilm);
}
/*
* If we have more then one we should not tell the driver
* to join this time.
*/
if (ilm_numentries(ill, v6group) == 1) {
ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
}
}
if (ret != 0) {
if (ret == ENETDOWN) {
char buf[INET6_ADDRSTRLEN];
ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
inet_ntop(AF_INET6, &ilm->ilm_v6addr,
buf, sizeof (buf)), ill->ill_name));
}
ilm_delete(ilm);
*errorp = ret;
return (NULL);
} else {
return (ilm);
}
}
/*
* Send a multicast request to the driver for enabling or disabling
* multicast reception for v6groupp address. The caller has already
* checked whether it is appropriate to send one or not.
*
* For IPMP we switch to the cast_ill since it has the right hardware
* information.
*/
static int
ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
{
mblk_t *mp;
uint32_t addrlen, addroff;
ill_t *release_ill = NULL;
int err = 0;
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
if (IS_IPMP(ill)) {
/* On the upper IPMP ill. */
release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
if (release_ill == NULL) {
/*
* Avoid sending it down to the ipmpstub.
* We will be called again once the members of the
* group are in place
*/
ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
ill->ill_name, ill->ill_isv6));
return (0);
}
ill = release_ill;
}
/* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
mp = ill_create_dl(ill, prim, &addrlen, &addroff);
if (mp == NULL) {
err = ENOMEM;
goto done;
}
mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
if (mp == NULL) {
ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
err = ENOMEM;
goto done;
}
switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
case DL_ENABMULTI_REQ:
mutex_enter(&ill->ill_lock);
/* Track the state if this is the first enabmulti */
if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
mutex_exit(&ill->ill_lock);
break;
}
ill_dlpi_queue(ill, mp);
done:
if (release_ill != NULL)
ill_refrele(release_ill);
return (err);
}
/*
* Send a multicast request to the driver for enabling multicast
* membership for v6group if appropriate.
*/
static int
ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
{
if (ill->ill_net_type != IRE_IF_RESOLVER ||
ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
ip1dbg(("ip_ll_multireq: not resolver\n"));
return (0); /* Must be IRE_IF_NORESOLVER */
}
if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
return (0);
}
return (ip_ll_send_multireq(ill, v6groupp, prim));
}
/*
* Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
* being true.
*/
int
ip_delmulti(ilm_t *ilm)
{
ill_t *ill = ilm->ilm_ill;
int error;
/* Acquire serializer to keep assert in ilm_bld_flists happy */
mutex_enter(&ill->ill_mcast_serializer);
error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
mutex_exit(&ill->ill_mcast_serializer);
return (error);
}
/*
* Delete the ilm.
* Assumes ill_multi_serializer is held by the caller.
*/
static int
ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
{
ill_t *ill = ilm->ilm_ill;
int ret;
ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
ASSERT(!(IS_UNDER_IPMP(ill)));
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
ret = ip_delmulti_impl(ilm, no_ilg, leaving);
rw_exit(&ill->ill_mcast_lock);
/* Send any deferred/queued DLPI or IP packets */
ill_mcast_send_queued(ill);
ill_dlpi_send_queued(ill);
ill_mcast_timer_start(ill->ill_ipst);
return (ret);
}
static int
ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
{
ill_t *ill = ilm->ilm_ill;
int error;
in6_addr_t v6group;
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
/* Update counters */
if (no_ilg)
ilm->ilm_no_ilg_cnt--;
if (leaving)
ilm->ilm_refcnt--;
if (ilm->ilm_refcnt > 0)
return (ilm_update_del(ilm));
v6group = ilm->ilm_v6addr;
if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
ilm_delete(ilm);
/*
* If we have some left then one we should not tell the driver
* to leave.
*/
if (ilm_numentries(ill, &v6group) != 0)
return (0);
ill_leave_allmulti(ill);
return (0);
}
if (!IS_LOOPBACK(ill)) {
if (ill->ill_isv6)
mld_leavegroup(ilm);
else
igmp_leavegroup(ilm);
}
ilm_delete(ilm);
/*
* If we have some left then one we should not tell the driver
* to leave.
*/
if (ilm_numentries(ill, &v6group) != 0)
return (0);
error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
/* We ignore the case when ill_dl_up is not set */
if (error == ENETDOWN) {
char buf[INET6_ADDRSTRLEN];
ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
ill->ill_name));
}
return (error);
}
/*
* Make the driver pass up all multicast packets.
*/
int
ill_join_allmulti(ill_t *ill)
{
mblk_t *promiscon_mp, *promiscoff_mp = NULL;
uint32_t addrlen, addroff;
ill_t *release_ill = NULL;
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (IS_LOOPBACK(ill))
return (0);
if (!ill->ill_dl_up) {
/*
* Nobody there. All multicast addresses will be re-joined
* when we get the DL_BIND_ACK bringing the interface up.
*/
return (ENETDOWN);
}
if (IS_IPMP(ill)) {
/* On the upper IPMP ill. */
release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
if (release_ill == NULL) {
/*
* Avoid sending it down to the ipmpstub.
* We will be called again once the members of the
* group are in place
*/
ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
ill->ill_name, ill->ill_isv6));
return (0);
}
ill = release_ill;
if (!ill->ill_dl_up) {
ill_refrele(ill);
return (ENETDOWN);
}
}
/*
* Create a DL_PROMISCON_REQ message and send it directly to the DLPI
* provider. We don't need to do this for certain media types for
* which we never need to turn promiscuous mode on. While we're here,
* pre-allocate a DL_PROMISCOFF_REQ message to make sure that
* ill_leave_allmulti() will not fail due to low memory conditions.
*/
if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
!(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
&addrlen, &addroff);
if (ill->ill_promiscoff_mp == NULL)
promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
&addrlen, &addroff);
if (promiscon_mp == NULL ||
(ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
freemsg(promiscon_mp);
freemsg(promiscoff_mp);
if (release_ill != NULL)
ill_refrele(release_ill);
return (ENOMEM);
}
if (ill->ill_promiscoff_mp == NULL)
ill->ill_promiscoff_mp = promiscoff_mp;
ill_dlpi_queue(ill, promiscon_mp);
}
if (release_ill != NULL)
ill_refrele(release_ill);
return (0);
}
/*
* Make the driver stop passing up all multicast packets
*/
void
ill_leave_allmulti(ill_t *ill)
{
mblk_t *promiscoff_mp;
ill_t *release_ill = NULL;
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
if (IS_LOOPBACK(ill))
return;
if (!ill->ill_dl_up) {
/*
* Nobody there. All multicast addresses will be re-joined
* when we get the DL_BIND_ACK bringing the interface up.
*/
return;
}
if (IS_IPMP(ill)) {
/* On the upper IPMP ill. */
release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
if (release_ill == NULL) {
/*
* Avoid sending it down to the ipmpstub.
* We will be called again once the members of the
* group are in place
*/
ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
ill->ill_name, ill->ill_isv6));
return;
}
ill = release_ill;
if (!ill->ill_dl_up)
goto done;
}
/*
* In the case of IPMP and ill_dl_up not being set when we joined
* we didn't allocate a promiscoff_mp. In that case we have
* nothing to do when we leave.
* Ditto for PHYI_MULTI_BCAST
*/
promiscoff_mp = ill->ill_promiscoff_mp;
if (promiscoff_mp != NULL) {
ill->ill_promiscoff_mp = NULL;
ill_dlpi_queue(ill, promiscoff_mp);
}
done:
if (release_ill != NULL)
ill_refrele(release_ill);
}
int
ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
{
ill_t *ill;
int ret;
ilm_t *ilm;
ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
if (ill == NULL)
return (ENODEV);
/*
* The ip_addmulti() function doesn't allow IPMP underlying interfaces
* to join allmulti since only the nominated underlying interface in
* the group should receive multicast. We silently succeed to avoid
* having to teach IPobs (currently the only caller of this routine)
* to ignore failures in this case.
*/
if (IS_UNDER_IPMP(ill)) {
ill_refrele(ill);
return (0);
}
mutex_enter(&ill->ill_lock);
if (ill->ill_ipallmulti_cnt > 0) {
/* Already joined */
ASSERT(ill->ill_ipallmulti_ilm != NULL);
ill->ill_ipallmulti_cnt++;
mutex_exit(&ill->ill_lock);
goto done;
}
mutex_exit(&ill->ill_lock);
ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
if (ilm == NULL) {
ASSERT(ret != 0);
ill_refrele(ill);
return (ret);
}
mutex_enter(&ill->ill_lock);
if (ill->ill_ipallmulti_cnt > 0) {
/* Another thread added it concurrently */
(void) ip_delmulti(ilm);
mutex_exit(&ill->ill_lock);
goto done;
}
ASSERT(ill->ill_ipallmulti_ilm == NULL);
ill->ill_ipallmulti_ilm = ilm;
ill->ill_ipallmulti_cnt++;
mutex_exit(&ill->ill_lock);
done:
ill_refrele(ill);
return (0);
}
int
ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
{
ill_t *ill;
ilm_t *ilm;
ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
if (ill == NULL)
return (ENODEV);
if (IS_UNDER_IPMP(ill)) {
ill_refrele(ill);
return (0);
}
mutex_enter(&ill->ill_lock);
if (ill->ill_ipallmulti_cnt == 0) {
/* ip_purge_allmulti could have removed them all */
mutex_exit(&ill->ill_lock);
goto done;
}
ill->ill_ipallmulti_cnt--;
if (ill->ill_ipallmulti_cnt == 0) {
/* Last one */
ilm = ill->ill_ipallmulti_ilm;
ill->ill_ipallmulti_ilm = NULL;
} else {
ilm = NULL;
}
mutex_exit(&ill->ill_lock);
if (ilm != NULL)
(void) ip_delmulti(ilm);
done:
ill_refrele(ill);
return (0);
}
/*
* Delete the allmulti memberships that were added as part of
* ip_join_allmulti().
*/
void
ip_purge_allmulti(ill_t *ill)
{
ilm_t *ilm;
ASSERT(IAM_WRITER_ILL(ill));
mutex_enter(&ill->ill_lock);
ilm = ill->ill_ipallmulti_ilm;
ill->ill_ipallmulti_ilm = NULL;
ill->ill_ipallmulti_cnt = 0;
mutex_exit(&ill->ill_lock);
if (ilm != NULL)
(void) ip_delmulti(ilm);
}
/*
* Create a dlpi message with room for phys+sap. Later
* we will strip the sap for those primitives which
* only need a physical address.
*/
static mblk_t *
ill_create_dl(ill_t *ill, uint32_t dl_primitive,
uint32_t *addr_lenp, uint32_t *addr_offp)
{
mblk_t *mp;
uint32_t hw_addr_length;
char *cp;
uint32_t offset;
uint32_t length;
uint32_t size;
*addr_lenp = *addr_offp = 0;
hw_addr_length = ill->ill_phys_addr_length;
if (!hw_addr_length) {
ip0dbg(("ip_create_dl: hw addr length = 0\n"));
return (NULL);
}
switch (dl_primitive) {
case DL_ENABMULTI_REQ:
length = sizeof (dl_enabmulti_req_t);
size = length + hw_addr_length;
break;
case DL_DISABMULTI_REQ:
length = sizeof (dl_disabmulti_req_t);
size = length + hw_addr_length;
break;
case DL_PROMISCON_REQ:
case DL_PROMISCOFF_REQ:
size = length = sizeof (dl_promiscon_req_t);
break;
default:
return (NULL);
}
mp = allocb(size, BPRI_HI);
if (!mp)
return (NULL);
mp->b_wptr += size;
mp->b_datap->db_type = M_PROTO;
cp = (char *)mp->b_rptr;
offset = length;
switch (dl_primitive) {
case DL_ENABMULTI_REQ: {
dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
dl->dl_primitive = dl_primitive;
dl->dl_addr_offset = offset;
*addr_lenp = dl->dl_addr_length = hw_addr_length;
*addr_offp = offset;
break;
}
case DL_DISABMULTI_REQ: {
dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
dl->dl_primitive = dl_primitive;
dl->dl_addr_offset = offset;
*addr_lenp = dl->dl_addr_length = hw_addr_length;
*addr_offp = offset;
break;
}
case DL_PROMISCON_REQ:
case DL_PROMISCOFF_REQ: {
dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
dl->dl_primitive = dl_primitive;
dl->dl_level = DL_PROMISC_MULTI;
break;
}
}
ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
*addr_lenp, *addr_offp));
return (mp);
}
/*
* Rejoin any groups for which we have ilms.
*
* This is only needed for IPMP when the cast_ill changes since that
* change is invisible to the ilm. Other interface changes are handled
* by conn_update_ill.
*/
void
ill_recover_multicast(ill_t *ill)
{
ilm_t *ilm;
char addrbuf[INET6_ADDRSTRLEN];
ill->ill_need_recover_multicast = 0;
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
/*
* If we have more then one ilm for the group (e.g., with
* different zoneid) then we should not tell the driver
* to join unless this is the first ilm for the group.
*/
if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
continue;
}
ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
&ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
(void) ill_join_allmulti(ill);
} else {
if (ill->ill_isv6)
mld_joingroup(ilm);
else
igmp_joingroup(ilm);
(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
DL_ENABMULTI_REQ);
}
}
rw_exit(&ill->ill_mcast_lock);
/* Send any deferred/queued DLPI or IP packets */
ill_mcast_send_queued(ill);
ill_dlpi_send_queued(ill);
ill_mcast_timer_start(ill->ill_ipst);
}
/*
* The opposite of ill_recover_multicast() -- leaves all multicast groups
* that were explicitly joined.
*
* This is only needed for IPMP when the cast_ill changes since that
* change is invisible to the ilm. Other interface changes are handled
* by conn_update_ill.
*/
void
ill_leave_multicast(ill_t *ill)
{
ilm_t *ilm;
char addrbuf[INET6_ADDRSTRLEN];
ill->ill_need_recover_multicast = 1;
rw_enter(&ill->ill_mcast_lock, RW_WRITER);
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
/*
* If we have more then one ilm for the group (e.g., with
* different zoneid) then we should not tell the driver
* to leave unless this is the first ilm for the group.
*/
if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
continue;
}
ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
&ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
ill_leave_allmulti(ill);
} else {
if (ill->ill_isv6)
mld_leavegroup(ilm);
else
igmp_leavegroup(ilm);
(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
DL_DISABMULTI_REQ);
}
}
rw_exit(&ill->ill_mcast_lock);
/* Send any deferred/queued DLPI or IP packets */
ill_mcast_send_queued(ill);
ill_dlpi_send_queued(ill);
ill_mcast_timer_start(ill->ill_ipst);
}
/*
* Interface used by IP input/output.
* Returns true if there is a member on the ill for any zoneid.
*/
boolean_t
ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
{
ilm_t *ilm;
rw_enter(&ill->ill_mcast_lock, RW_READER);
ilm = ilm_lookup(ill, v6group, ALL_ZONES);
rw_exit(&ill->ill_mcast_lock);
return (ilm != NULL);
}
/*
* Interface used by IP input/output.
* Returns true if there is a member on the ill for any zoneid.
*
* The group and source can't be INADDR_ANY here so no need to translate to
* the unspecified IPv6 address.
*/
boolean_t
ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
{
in6_addr_t v6group;
IN6_IPADDR_TO_V4MAPPED(group, &v6group);
return (ill_hasmembers_v6(ill, &v6group));
}
/*
* Interface used by IP input/output.
* Returns true if there is a member on the ill for any zoneid except skipzone.
*/
boolean_t
ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
zoneid_t skipzone)
{
ilm_t *ilm;
rw_enter(&ill->ill_mcast_lock, RW_READER);
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
ilm->ilm_zoneid != skipzone) {
rw_exit(&ill->ill_mcast_lock);
return (B_TRUE);
}
}
rw_exit(&ill->ill_mcast_lock);
return (B_FALSE);
}
/*
* Interface used by IP input/output.
* Returns true if there is a member on the ill for any zoneid except skipzone.
*
* The group and source can't be INADDR_ANY here so no need to translate to
* the unspecified IPv6 address.
*/
boolean_t
ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
{
in6_addr_t v6group;
IN6_IPADDR_TO_V4MAPPED(group, &v6group);
return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
}
/*
* Interface used by IP input.
* Returns the next numerically larger zoneid that has a member. If none exist
* then returns -1 (ALL_ZONES).
* The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
* to find the first zoneid which has a member, and then pass that in for
* subsequent calls until ALL_ZONES is returned.
*
* The implementation of ill_hasmembers_nextzone() assumes the ilms
* are sorted by zoneid for efficiency.
*/
zoneid_t
ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
zoneid_t zoneid)
{
ilm_t *ilm;
rw_enter(&ill->ill_mcast_lock, RW_READER);
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
ilm->ilm_zoneid > zoneid) {
zoneid = ilm->ilm_zoneid;
rw_exit(&ill->ill_mcast_lock);
return (zoneid);
}
}
rw_exit(&ill->ill_mcast_lock);
return (ALL_ZONES);
}
/*
* Interface used by IP input.
* Returns the next numerically larger zoneid that has a member. If none exist
* then returns -1 (ALL_ZONES).
*
* The group and source can't be INADDR_ANY here so no need to translate to
* the unspecified IPv6 address.
*/
zoneid_t
ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
{
in6_addr_t v6group;
IN6_IPADDR_TO_V4MAPPED(group, &v6group);
return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
}
/*
* Find an ilm matching the ill, group, and zoneid.
*/
static ilm_t *
ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
{
ilm_t *ilm;
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
continue;
if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
continue;
ASSERT(ilm->ilm_ill == ill);
return (ilm);
}
return (NULL);
}
/*
* How many members on this ill?
* Since each shared-IP zone has a separate ilm for the same group/ill
* we can have several.
*/
static int
ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
{
ilm_t *ilm;
int i = 0;
ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
i++;
}
}
return (i);
}
/* Caller guarantees that the group is not already on the list */
static ilm_t *
ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
{
ilm_t *ilm;
ilm_t *ilm_cur;
ilm_t **ilm_ptpn;
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
ilm = GETSTRUCT(ilm_t, 1);
if (ilm == NULL)
return (NULL);
if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
ilm->ilm_filter = l_alloc();
if (ilm->ilm_filter == NULL) {
mi_free(ilm);
return (NULL);
}
}
ilm->ilm_v6addr = *v6group;
ilm->ilm_refcnt = 1;
ilm->ilm_zoneid = zoneid;
ilm->ilm_timer = INFINITY;
ilm->ilm_rtx.rtx_timer = INFINITY;
ilm->ilm_ill = ill;
DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
(char *), "ilm", (void *), ilm);
ill->ill_ilm_cnt++;
ASSERT(ill->ill_ipst);
ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */
/* The ill/ipif could have just been marked as condemned */
/*
* To make ill_hasmembers_nextzone_v6 work we keep the list
* sorted by zoneid.
*/
ilm_cur = ill->ill_ilm;
ilm_ptpn = &ill->ill_ilm;
while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
ilm_ptpn = &ilm_cur->ilm_next;
ilm_cur = ilm_cur->ilm_next;
}
ilm->ilm_next = ilm_cur;
*ilm_ptpn = ilm;
/*
* If we have an associated ilg, use its filter state; if not,
* default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
*/
if (ilgstat != ILGSTAT_NONE) {
if (!SLIST_IS_EMPTY(ilg_flist))
l_copy(ilg_flist, ilm->ilm_filter);
ilm->ilm_fmode = ilg_fmode;
} else {
ilm->ilm_no_ilg_cnt = 1;
ilm->ilm_fmode = MODE_IS_EXCLUDE;
}
return (ilm);
}
void
ilm_inactive(ilm_t *ilm)
{
FREE_SLIST(ilm->ilm_filter);
FREE_SLIST(ilm->ilm_pendsrcs);
FREE_SLIST(ilm->ilm_rtx.rtx_allow);
FREE_SLIST(ilm->ilm_rtx.rtx_block);
ilm->ilm_ipst = NULL;
mi_free((char *)ilm);
}
/*
* Unlink ilm and free it.
*/
static void
ilm_delete(ilm_t *ilm)
{
ill_t *ill = ilm->ilm_ill;
ilm_t **ilmp;
boolean_t need_wakeup;
/*
* Delete under lock protection so that readers don't stumble
* on bad ilm_next
*/
ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
;
*ilmp = ilm->ilm_next;
mutex_enter(&ill->ill_lock);
/*
* if we are the last reference to the ill, we may need to wakeup any
* pending FREE or unplumb operations. This is because conn_update_ill
* bails if there is a ilg_delete_all in progress.
*/
need_wakeup = B_FALSE;
DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
(char *), "ilm", (void *), ilm);
ASSERT(ill->ill_ilm_cnt > 0);
ill->ill_ilm_cnt--;
if (ILL_FREE_OK(ill))
need_wakeup = B_TRUE;
ilm_inactive(ilm); /* frees this ilm */
if (need_wakeup) {
/* drops ill lock */
ipif_ill_refrele_tail(ill);
} else {
mutex_exit(&ill->ill_lock);
}
}
/*
* Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
* Applies to both IPv4 and IPv6, although ifaddr is only used with
* IPv4.
* Returns an error for IS_UNDER_IPMP and VNI interfaces.
* On error it sets *errorp.
*/
static ill_t *
ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
{
ill_t *ill;
ipaddr_t v4group;
if (IN6_IS_ADDR_V4MAPPED(group)) {
IN6_V4MAPPED_TO_IPADDR(group, v4group);
if (ifindex != 0) {
ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
B_FALSE, ipst);
} else if (ifaddr != INADDR_ANY) {
ipif_t *ipif;
ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
if (ipif == NULL) {
ill = NULL;
} else {
ill = ipif->ipif_ill;
ill_refhold(ill);
ipif_refrele(ipif);
}
} else {
ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL,
NULL);
}
} else {
if (ifindex != 0) {
ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
B_TRUE, ipst);
} else {
ill = ill_lookup_group_v6(group, zoneid, ipst, NULL,
NULL);
}
}
if (ill == NULL) {
if (ifindex != 0)
*errorp = ENXIO;
else
*errorp = EADDRNOTAVAIL;
return (NULL);
}
/* operation not supported on the virtual network interface */
if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
ill_refrele(ill);
*errorp = EINVAL;
return (NULL);
}
return (ill);
}
/*
* Looks up the appropriate ill given an interface index (or interface address)
* and multicast group. On success, returns 0, with *illpp pointing to the
* found struct. On failure, returns an errno and *illpp is set to NULL.
*
* Returns an error for IS_UNDER_IPMP and VNI interfaces.
*
* Handles both IPv4 and IPv6. The ifaddr argument only applies in the
* case of IPv4.
*/
int
ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
{
boolean_t src_unspec;
ill_t *ill = NULL;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
int error = 0;
*illpp = NULL;
src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
if (IN6_IS_ADDR_V4MAPPED(v6group)) {
ipaddr_t v4group;
ipaddr_t v4src;
if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
return (EINVAL);
IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
if (src_unspec) {
v4src = INADDR_ANY;
} else {
IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
}
if (!CLASSD(v4group) || CLASSD(v4src))
return (EINVAL);
} else {
if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
return (EINVAL);
if (!IN6_IS_ADDR_MULTICAST(v6group) ||
IN6_IS_ADDR_MULTICAST(v6src)) {
return (EINVAL);
}
}
ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
ipst, &error);
*illpp = ill;
return (error);
}
static int
ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
{
ilg_t *ilg;
int i, numsrc, fmode, outsrcs;
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
struct in_addr *addrp;
slist_t *fp;
boolean_t is_v4only_api;
ipaddr_t ifaddr;
uint_t ifindex;
if (gf == NULL) {
ASSERT(imsf != NULL);
ASSERT(!issin6);
is_v4only_api = B_TRUE;
outsrcs = imsf->imsf_numsrc;
ifaddr = imsf->imsf_interface.s_addr;
ifindex = 0;
} else {
ASSERT(imsf == NULL);
is_v4only_api = B_FALSE;
outsrcs = gf->gf_numsrc;
ifaddr = INADDR_ANY;
ifindex = gf->gf_interface;
}
/* No need to use ill_mcast_serializer for the reader */
rw_enter(&connp->conn_ilg_lock, RW_READER);
ilg = ilg_lookup(connp, group, ifaddr, ifindex);
if (ilg == NULL) {
rw_exit(&connp->conn_ilg_lock);
return (EADDRNOTAVAIL);
}
/*
* In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
* to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
* So we need to translate here.
*/
fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
MCAST_INCLUDE : MCAST_EXCLUDE;
if ((fp = ilg->ilg_filter) == NULL) {
numsrc = 0;
} else {
for (i = 0; i < outsrcs; i++) {
if (i == fp->sl_numsrc)
break;
if (issin6) {
sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
sin6->sin6_family = AF_INET6;
sin6->sin6_addr = fp->sl_addr[i];
} else {
if (is_v4only_api) {
addrp = &imsf->imsf_slist[i];
} else {
sin = (struct sockaddr_in *)
&gf->gf_slist[i];
sin->sin_family = AF_INET;
addrp = &sin->sin_addr;
}
IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
}
}
numsrc = fp->sl_numsrc;
}
if (is_v4only_api) {
imsf->imsf_numsrc = numsrc;
imsf->imsf_fmode = fmode;
} else {
gf->gf_numsrc = numsrc;
gf->gf_fmode = fmode;
}
rw_exit(&connp->conn_ilg_lock);
return (0);
}
/*
* Common for IPv4 and IPv6.
*/
static int
ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
boolean_t issin6)
{
ilg_t *ilg;
int i, err, infmode, new_fmode;
uint_t insrcs;
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
struct in_addr *addrp;
slist_t *orig_filter = NULL;
slist_t *new_filter = NULL;
mcast_record_t orig_fmode;
boolean_t leave_group, is_v4only_api;
ilg_stat_t ilgstat;
ilm_t *ilm;
ipaddr_t ifaddr;
uint_t ifindex;
if (gf == NULL) {
ASSERT(imsf != NULL);
ASSERT(!issin6);
is_v4only_api = B_TRUE;
insrcs = imsf->imsf_numsrc;
infmode = imsf->imsf_fmode;
ifaddr = imsf->imsf_interface.s_addr;
ifindex = 0;
} else {
ASSERT(imsf == NULL);
is_v4only_api = B_FALSE;
insrcs = gf->gf_numsrc;
infmode = gf->gf_fmode;
ifaddr = INADDR_ANY;
ifindex = gf->gf_interface;
}
/* Make sure we can handle the source list */
if (insrcs > MAX_FILTER_SIZE)
return (ENOBUFS);
/*
* setting the filter to (INCLUDE, NULL) is treated
* as a request to leave the group.
*/
leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
mutex_enter(&ill->ill_mcast_serializer);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
ilg = ilg_lookup(connp, group, ifaddr, ifindex);
if (ilg == NULL) {
/*
* if the request was actually to leave, and we
* didn't find an ilg, there's nothing to do.
*/
if (leave_group) {
rw_exit(&connp->conn_ilg_lock);
mutex_exit(&ill->ill_mcast_serializer);
return (0);
}
ilg = conn_ilg_alloc(connp, &err);
if (ilg == NULL) {
rw_exit(&connp->conn_ilg_lock);
mutex_exit(&ill->ill_mcast_serializer);
return (err);
}
ilgstat = ILGSTAT_NEW;
ilg->ilg_v6group = *group;
ilg->ilg_ill = ill;
ilg->ilg_ifaddr = ifaddr;
ilg->ilg_ifindex = ifindex;
} else if (leave_group) {
/*
* Make sure we have the correct serializer. The ill argument
* might not match ilg_ill.
*/
ilg_refhold(ilg);
mutex_exit(&ill->ill_mcast_serializer);
ill = ilg->ilg_ill;
rw_exit(&connp->conn_ilg_lock);
mutex_enter(&ill->ill_mcast_serializer);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
ilm = ilg->ilg_ilm;
ilg->ilg_ilm = NULL;
ilg_delete(connp, ilg, NULL);
ilg_refrele(ilg);
rw_exit(&connp->conn_ilg_lock);
if (ilm != NULL)
(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
mutex_exit(&ill->ill_mcast_serializer);
return (0);
} else {
ilgstat = ILGSTAT_CHANGE;
/* Preserve existing state in case ip_addmulti() fails */
orig_fmode = ilg->ilg_fmode;
if (ilg->ilg_filter == NULL) {
orig_filter = NULL;
} else {
orig_filter = l_alloc_copy(ilg->ilg_filter);
if (orig_filter == NULL) {
rw_exit(&connp->conn_ilg_lock);
mutex_exit(&ill->ill_mcast_serializer);
return (ENOMEM);
}
}
}
/*
* Alloc buffer to copy new state into (see below) before
* we make any changes, so we can bail if it fails.
*/
if ((new_filter = l_alloc()) == NULL) {
rw_exit(&connp->conn_ilg_lock);
err = ENOMEM;
goto free_and_exit;
}
if (insrcs == 0) {
CLEAR_SLIST(ilg->ilg_filter);
} else {
slist_t *fp;
if (ilg->ilg_filter == NULL) {
fp = l_alloc();
if (fp == NULL) {
if (ilgstat == ILGSTAT_NEW)
ilg_delete(connp, ilg, NULL);
rw_exit(&connp->conn_ilg_lock);
err = ENOMEM;
goto free_and_exit;
}
} else {
fp = ilg->ilg_filter;
}
for (i = 0; i < insrcs; i++) {
if (issin6) {
sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
fp->sl_addr[i] = sin6->sin6_addr;
} else {
if (is_v4only_api) {
addrp = &imsf->imsf_slist[i];
} else {
sin = (struct sockaddr_in *)
&gf->gf_slist[i];
addrp = &sin->sin_addr;
}
IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
}
}
fp->sl_numsrc = insrcs;
ilg->ilg_filter = fp;
}
/*
* In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
* to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
* So we need to translate here.
*/
ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
/*
* Save copy of ilg's filter state to pass to other functions,
* so we can release conn_ilg_lock now.
*/
new_fmode = ilg->ilg_fmode;
l_copy(ilg->ilg_filter, new_filter);
rw_exit(&connp->conn_ilg_lock);
/*
* Now update the ill. We wait to do this until after the ilg
* has been updated because we need to update the src filter
* info for the ill, which involves looking at the status of
* all the ilgs associated with this group/interface pair.
*/
ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
new_fmode, new_filter, &err);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
/*
* Must look up the ilg again since we've not been holding
* conn_ilg_lock. The ilg could have disappeared due to an unplumb
* having called conn_update_ill, which can run once we dropped the
* conn_ilg_lock above.
*/
ilg = ilg_lookup(connp, group, ifaddr, ifindex);
if (ilg == NULL) {
rw_exit(&connp->conn_ilg_lock);
if (ilm != NULL) {
(void) ip_delmulti_serial(ilm, B_FALSE,
(ilgstat == ILGSTAT_NEW));
}
err = ENXIO;
goto free_and_exit;
}
if (ilm != NULL) {
/* Succeeded. Update the ilg to point at the ilm */
if (ilgstat == ILGSTAT_NEW) {
ASSERT(ilg->ilg_ilm == NULL);
ilg->ilg_ilm = ilm;
ilm->ilm_ifaddr = ifaddr; /* For netstat */
} else {
/*
* ip_addmulti didn't get a held ilm for
* ILGSTAT_CHANGE; ilm_refcnt was unchanged.
*/
ASSERT(ilg->ilg_ilm == ilm);
}
} else {
ASSERT(err != 0);
/*
* Failed to allocate the ilm.
* Restore the original filter state, or delete the
* newly-created ilg.
* If ENETDOWN just clear ill_ilg since so that we
* will rejoin when the ill comes back; don't report ENETDOWN
* to application.
*/
if (ilgstat == ILGSTAT_NEW) {
if (err == ENETDOWN) {
ilg->ilg_ill = NULL;
err = 0;
} else {
ilg_delete(connp, ilg, NULL);
}
} else {
ilg->ilg_fmode = orig_fmode;
if (SLIST_IS_EMPTY(orig_filter)) {
CLEAR_SLIST(ilg->ilg_filter);
} else {
/*
* We didn't free the filter, even if we
* were trying to make the source list empty;
* so if orig_filter isn't empty, the ilg
* must still have a filter alloc'd.
*/
l_copy(orig_filter, ilg->ilg_filter);
}
}
}
rw_exit(&connp->conn_ilg_lock);
free_and_exit:
mutex_exit(&ill->ill_mcast_serializer);
l_free(orig_filter);
l_free(new_filter);
return (err);
}
/*
* Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
*/
/* ARGSUSED */
int
ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
ip_ioctl_cmd_t *ipip, void *ifreq)
{
struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
/* existence verified in ip_wput_nondata() */
mblk_t *data_mp = mp->b_cont->b_cont;
int datalen, err, cmd, minsize;
uint_t expsize = 0;
conn_t *connp;
boolean_t isv6, is_v4only_api, getcmd;
struct sockaddr_in *gsin;
struct sockaddr_in6 *gsin6;
ipaddr_t v4group;
in6_addr_t v6group;
struct group_filter *gf = NULL;
struct ip_msfilter *imsf = NULL;
mblk_t *ndp;
ill_t *ill;
connp = Q_TO_CONN(q);
err = ip_msfilter_ill(connp, mp, ipip, &ill);
if (err != 0)
return (err);
if (data_mp->b_cont != NULL) {
if ((ndp = msgpullup(data_mp, -1)) == NULL)
return (ENOMEM);
freemsg(data_mp);
data_mp = ndp;
mp->b_cont->b_cont = data_mp;
}
cmd = iocp->ioc_cmd;
getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
datalen = MBLKL(data_mp);
if (datalen < minsize)
return (EINVAL);
/*
* now we know we have at least have the initial structure,
* but need to check for the source list array.
*/
if (is_v4only_api) {
imsf = (struct ip_msfilter *)data_mp->b_rptr;
isv6 = B_FALSE;
expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
} else {
gf = (struct group_filter *)data_mp->b_rptr;
if (gf->gf_group.ss_family == AF_INET6) {
gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
} else {
isv6 = B_FALSE;
}
expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
}
if (datalen < expsize)
return (EINVAL);
if (isv6) {
gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
v6group = gsin6->sin6_addr;
if (getcmd) {
err = ip_get_srcfilter(connp, gf, NULL, &v6group,
B_TRUE);
} else {
err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
B_TRUE);
}
} else {
boolean_t issin6 = B_FALSE;
if (is_v4only_api) {
v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
} else {
if (gf->gf_group.ss_family == AF_INET) {
gsin = (struct sockaddr_in *)&gf->gf_group;
v4group = (ipaddr_t)gsin->sin_addr.s_addr;
IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
} else {
gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
v4group);
issin6 = B_TRUE;
}
}
/*
* INADDR_ANY is represented as the IPv6 unspecifed addr.
*/
if (v4group == INADDR_ANY)
v6group = ipv6_all_zeros;
else
IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
if (getcmd) {
err = ip_get_srcfilter(connp, gf, imsf, &v6group,
issin6);
} else {
err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
issin6);
}
}
ill_refrele(ill);
return (err);
}
/*
* Determine the ill for the SIOC*MSFILTER ioctls
*
* Returns an error for IS_UNDER_IPMP interfaces.
*
* Finds the ill based on information in the ioctl headers.
*/
static int
ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
ill_t **illp)
{
int cmd = ipip->ipi_cmd;
int err = 0;
ill_t *ill;
/* caller has verified this mblk exists */
char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
struct ip_msfilter *imsf;
struct group_filter *gf;
ipaddr_t v4addr, v4group;
in6_addr_t v6group;
uint32_t index;
ip_stack_t *ipst;
ipst = connp->conn_netstack->netstack_ip;
*illp = NULL;
/* don't allow multicast operations on a tcp conn */
if (IPCL_IS_TCP(connp))
return (ENOPROTOOPT);
if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
/* don't allow v4-specific ioctls on v6 socket */
if (connp->conn_family == AF_INET6)
return (EAFNOSUPPORT);
imsf = (struct ip_msfilter *)dbuf;
v4addr = imsf->imsf_interface.s_addr;
v4group = imsf->imsf_multiaddr.s_addr;
IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
ipst, &err);
if (ill == NULL && v4addr != INADDR_ANY)
err = ENXIO;
} else {
gf = (struct group_filter *)dbuf;
index = gf->gf_interface;
if (gf->gf_group.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *)&gf->gf_group;
v6group = sin6->sin6_addr;
} else if (gf->gf_group.ss_family == AF_INET) {
struct sockaddr_in *sin;
sin = (struct sockaddr_in *)&gf->gf_group;
v4group = sin->sin_addr.s_addr;
IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
} else {
return (EAFNOSUPPORT);
}
ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
IPCL_ZONEID(connp), ipst, &err);
}
*illp = ill;
return (err);
}
/*
* The structures used for the SIOC*MSFILTER ioctls usually must be copied
* in in two stages, as the first copyin tells us the size of the attached
* source buffer. This function is called by ip_wput_nondata() after the
* first copyin has completed; it figures out how big the second stage
* needs to be, and kicks it off.
*
* In some cases (numsrc < 2), the second copyin is not needed as the
* first one gets a complete structure containing 1 source addr.
*
* The function returns 0 if a second copyin has been started (i.e. there's
* no more work to be done right now), or 1 if the second copyin is not
* needed and ip_wput_nondata() can continue its processing.
*/
int
ip_copyin_msfilter(queue_t *q, mblk_t *mp)
{
struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
int cmd = iocp->ioc_cmd;
/* validity of this checked in ip_wput_nondata() */
mblk_t *mp1 = mp->b_cont->b_cont;
int copysize = 0;
int offset;
if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
if (gf->gf_numsrc >= 2) {
offset = sizeof (struct group_filter);
copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
}
} else {
struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
if (imsf->imsf_numsrc >= 2) {
offset = sizeof (struct ip_msfilter);
copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
}
}
if (copysize > 0) {
mi_copyin_n(q, mp, offset, copysize);
return (0);
}
return (1);
}
/*
* Handle the following optmgmt:
* IP_ADD_MEMBERSHIP must not have joined already
* IPV6_JOIN_GROUP must not have joined already
* MCAST_JOIN_GROUP must not have joined already
* IP_BLOCK_SOURCE must have joined already
* MCAST_BLOCK_SOURCE must have joined already
* IP_JOIN_SOURCE_GROUP may have joined already
* MCAST_JOIN_SOURCE_GROUP may have joined already
*
* fmode and src parameters may be used to determine which option is
* being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
* are functionally equivalent):
* opt fmode v6src
* IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE unspecified
* IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified
* MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified
* IP_BLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr
* MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr
* IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr
* MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr
*
* Changing the filter mode is not allowed; if a matching ilg already
* exists and fmode != ilg->ilg_fmode, EINVAL is returned.
*
* Verifies that there is a source address of appropriate scope for
* the group; if not, EADDRNOTAVAIL is returned.
*
* The interface to be used may be identified by an IPv4 address or by an
* interface index.
*
* Handles IPv4-mapped IPv6 multicast addresses by associating them
* with the IPv4 address. Assumes that if v6group is v4-mapped,
* v6src is also v4-mapped.
*/
int
ip_opt_add_group(conn_t *connp, boolean_t checkonly,
const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
mcast_record_t fmode, const in6_addr_t *v6src)
{
ill_t *ill;
char buf[INET6_ADDRSTRLEN];
int err;
err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
if (err != 0) {
ip1dbg(("ip_opt_add_group: no ill for group %s/"
"index %d\n", inet_ntop(AF_INET6, v6group, buf,
sizeof (buf)), ifindex));
return (err);
}
if (checkonly) {
/*
* do not do operation, just pretend to - new T_CHECK
* semantics. The error return case above if encountered
* considered a good enough "check" here.
*/
ill_refrele(ill);
return (0);
}
mutex_enter(&ill->ill_mcast_serializer);
err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
mutex_exit(&ill->ill_mcast_serializer);
ill_refrele(ill);
return (err);
}
/*
* Common for IPv6 and IPv4.
* Here we handle ilgs that are still attached to their original ill
* (the one ifaddr/ifindex points at), as well as detached ones.
* The detached ones might have been attached to some other ill.
*/
static int
ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
const in6_addr_t *v6src)
{
ilg_t *ilg;
boolean_t leaving;
ilm_t *ilm;
ill_t *ill;
int err = 0;
retry:
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
if (ilg == NULL) {
rw_exit(&connp->conn_ilg_lock);
/*
* Since we didn't have any ilg we now do the error checks
* to determine the best errno.
*/
err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
&ill);
if (ill != NULL) {
/* The only error was a missing ilg for the group */
ill_refrele(ill);
err = EADDRNOTAVAIL;
}
return (err);
}
/* If the ilg is attached then we serialize using that ill */
ill = ilg->ilg_ill;
if (ill != NULL) {
/* Prevent the ill and ilg from being freed */
ill_refhold(ill);
ilg_refhold(ilg);
rw_exit(&connp->conn_ilg_lock);
mutex_enter(&ill->ill_mcast_serializer);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
if (ilg->ilg_condemned) {
/* Disappeared */
ilg_refrele(ilg);
rw_exit(&connp->conn_ilg_lock);
mutex_exit(&ill->ill_mcast_serializer);
ill_refrele(ill);
goto retry;
}
}
/*
* Decide if we're actually deleting the ilg or just removing a
* source filter address; if just removing an addr, make sure we
* aren't trying to change the filter mode, and that the addr is
* actually in our filter list already. If we're removing the
* last src in an include list, just delete the ilg.
*/
if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
leaving = B_TRUE;
} else {
if (fmode != ilg->ilg_fmode)
err = EINVAL;
else if (ilg->ilg_filter == NULL ||
!list_has_addr(ilg->ilg_filter, v6src))
err = EADDRNOTAVAIL;
if (err != 0) {
if (ill != NULL)
ilg_refrele(ilg);
rw_exit(&connp->conn_ilg_lock);
goto done;
}
if (fmode == MODE_IS_INCLUDE &&
ilg->ilg_filter->sl_numsrc == 1) {
leaving = B_TRUE;
v6src = NULL;
} else {
leaving = B_FALSE;
}
}
ilm = ilg->ilg_ilm;
if (leaving)
ilg->ilg_ilm = NULL;
ilg_delete(connp, ilg, v6src);
if (ill != NULL)
ilg_refrele(ilg);
rw_exit(&connp->conn_ilg_lock);
if (ilm != NULL) {
ASSERT(ill != NULL);
(void) ip_delmulti_serial(ilm, B_FALSE, leaving);
}
done:
if (ill != NULL) {
mutex_exit(&ill->ill_mcast_serializer);
ill_refrele(ill);
}
return (err);
}
/*
* Handle the following optmgmt:
* IP_DROP_MEMBERSHIP will leave
* IPV6_LEAVE_GROUP will leave
* MCAST_LEAVE_GROUP will leave
* IP_UNBLOCK_SOURCE will not leave
* MCAST_UNBLOCK_SOURCE will not leave
* IP_LEAVE_SOURCE_GROUP may leave (if leaving last source)
* MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source)
*
* fmode and src parameters may be used to determine which option is
* being set, as follows:
* opt fmode v6src
* IP_DROP_MEMBERSHIP MODE_IS_INCLUDE unspecified
* IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified
* MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified
* IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr
* MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr
* IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr
* MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr
*
* Changing the filter mode is not allowed; if a matching ilg already
* exists and fmode != ilg->ilg_fmode, EINVAL is returned.
*
* The interface to be used may be identified by an IPv4 address or by an
* interface index.
*
* Handles IPv4-mapped IPv6 multicast addresses by associating them
* with the IPv4 address. Assumes that if v6group is v4-mapped,
* v6src is also v4-mapped.
*/
int
ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
mcast_record_t fmode, const in6_addr_t *v6src)
{
/*
* In the normal case below we don't check for the ill existing.
* Instead we look for an existing ilg in _excl.
* If checkonly we sanity check the arguments
*/
if (checkonly) {
ill_t *ill;
int err;
err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
&ill);
/*
* do not do operation, just pretend to - new T_CHECK semantics.
* ip_opt_check is considered a good enough "check" here.
*/
if (ill != NULL)
ill_refrele(ill);
return (err);
}
return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
fmode, v6src));
}
/*
* Group mgmt for upper conn that passes things down
* to the interface multicast list (and DLPI)
* These routines can handle new style options that specify an interface name
* as opposed to an interface address (needed for general handling of
* unnumbered interfaces.)
*/
/*
* Add a group to an upper conn group data structure and pass things down
* to the interface multicast list (and DLPI)
* Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
*/
static int
ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
{
int error = 0;
ilg_t *ilg;
ilg_stat_t ilgstat;
slist_t *new_filter = NULL;
int new_fmode;
ilm_t *ilm;
if (!(ill->ill_flags & ILLF_MULTICAST))
return (EADDRNOTAVAIL);
/* conn_ilg_lock protects the ilg list. */
ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
/*
* Depending on the option we're handling, may or may not be okay
* if group has already been added. Figure out our rules based
* on fmode and src params. Also make sure there's enough room
* in the filter if we're adding a source to an existing filter.
*/
if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
/* we're joining for all sources, must not have joined */
if (ilg != NULL)
error = EADDRINUSE;
} else {
if (fmode == MODE_IS_EXCLUDE) {
/* (excl {addr}) => block source, must have joined */
if (ilg == NULL)
error = EADDRNOTAVAIL;
}
/* (incl {addr}) => join source, may have joined */
if (ilg != NULL &&
SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
error = ENOBUFS;
}
if (error != 0) {
rw_exit(&connp->conn_ilg_lock);
return (error);
}
/*
* Alloc buffer to copy new state into (see below) before
* we make any changes, so we can bail if it fails.
*/
if ((new_filter = l_alloc()) == NULL) {
rw_exit(&connp->conn_ilg_lock);
return (ENOMEM);
}
if (ilg == NULL) {
if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
rw_exit(&connp->conn_ilg_lock);
l_free(new_filter);
return (error);
}
ilg->ilg_ifindex = ifindex;
ilg->ilg_ifaddr = ifaddr;
if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
ilg->ilg_filter = l_alloc();
if (ilg->ilg_filter == NULL) {
ilg_delete(connp, ilg, NULL);
rw_exit(&connp->conn_ilg_lock);
l_free(new_filter);
return (ENOMEM);
}
ilg->ilg_filter->sl_numsrc = 1;
ilg->ilg_filter->sl_addr[0] = *v6src;
}
ilgstat = ILGSTAT_NEW;
ilg->ilg_v6group = *v6group;
ilg->ilg_fmode = fmode;
ilg->ilg_ill = ill;
} else {
int index;
if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
rw_exit(&connp->conn_ilg_lock);
l_free(new_filter);
return (EINVAL);
}
if (ilg->ilg_filter == NULL) {
ilg->ilg_filter = l_alloc();
if (ilg->ilg_filter == NULL) {
rw_exit(&connp->conn_ilg_lock);
l_free(new_filter);
return (ENOMEM);
}
}
if (list_has_addr(ilg->ilg_filter, v6src)) {
rw_exit(&connp->conn_ilg_lock);
l_free(new_filter);
return (EADDRNOTAVAIL);
}
ilgstat = ILGSTAT_CHANGE;
index = ilg->ilg_filter->sl_numsrc++;
ilg->ilg_filter->sl_addr[index] = *v6src;
}
/*
* Save copy of ilg's filter state to pass to other functions,
* so we can release conn_ilg_lock now.
*/
new_fmode = ilg->ilg_fmode;
l_copy(ilg->ilg_filter, new_filter);
rw_exit(&connp->conn_ilg_lock);
/*
* Now update the ill. We wait to do this until after the ilg
* has been updated because we need to update the src filter
* info for the ill, which involves looking at the status of
* all the ilgs associated with this group/interface pair.
*/
ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
new_fmode, new_filter, &error);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
/*
* Must look up the ilg again since we've not been holding
* conn_ilg_lock. The ilg could have disappeared due to an unplumb
* having called conn_update_ill, which can run once we dropped the
* conn_ilg_lock above.
*/
ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
if (ilg == NULL) {
rw_exit(&connp->conn_ilg_lock);
if (ilm != NULL) {
(void) ip_delmulti_serial(ilm, B_FALSE,
(ilgstat == ILGSTAT_NEW));
}
error = ENXIO;
goto free_and_exit;
}
if (ilm != NULL) {
/* Succeeded. Update the ilg to point at the ilm */
if (ilgstat == ILGSTAT_NEW) {
ASSERT(ilg->ilg_ilm == NULL);
ilg->ilg_ilm = ilm;
ilm->ilm_ifaddr = ifaddr; /* For netstat */
} else {
/*
* ip_addmulti didn't get a held ilm for
* ILGSTAT_CHANGE; ilm_refcnt was unchanged.
*/
ASSERT(ilg->ilg_ilm == ilm);
}
} else {
ASSERT(error != 0);
/*
* Failed to allocate the ilm.
* Need to undo what we did before calling ip_addmulti()
* If ENETDOWN just clear ill_ilg since so that we
* will rejoin when the ill comes back; don't report ENETDOWN
* to application.
*/
if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
ilg->ilg_ill = NULL;
error = 0;
} else {
in6_addr_t delsrc =
(ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
ilg_delete(connp, ilg, &delsrc);
}
}
rw_exit(&connp->conn_ilg_lock);
free_and_exit:
l_free(new_filter);
return (error);
}
/*
* Find an IPv4 ilg matching group, ill and source.
* The group and source can't be INADDR_ANY here so no need to translate to
* the unspecified IPv6 address.
*/
boolean_t
conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
ill_t *ill)
{
in6_addr_t v6group, v6src;
int i;
boolean_t isinlist;
ilg_t *ilg;
rw_enter(&connp->conn_ilg_lock, RW_READER);
IN6_IPADDR_TO_V4MAPPED(group, &v6group);
for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
if (ilg->ilg_condemned)
continue;
/* ilg_ill could be NULL if an add is in progress */
if (ilg->ilg_ill != ill)
continue;
/* The callers use upper ill for IPMP */
ASSERT(!IS_UNDER_IPMP(ill));
if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
/* no source filter, so this is a match */
rw_exit(&connp->conn_ilg_lock);
return (B_TRUE);
}
break;
}
}
if (ilg == NULL) {
rw_exit(&connp->conn_ilg_lock);
return (B_FALSE);
}
/*
* we have an ilg with matching ill and group; but
* the ilg has a source list that we must check.
*/
IN6_IPADDR_TO_V4MAPPED(src, &v6src);
isinlist = B_FALSE;
for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
isinlist = B_TRUE;
break;
}
}
if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
(!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
rw_exit(&connp->conn_ilg_lock);
return (B_TRUE);
}
rw_exit(&connp->conn_ilg_lock);
return (B_FALSE);
}
/*
* Find an IPv6 ilg matching group, ill, and source
*/
boolean_t
conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
const in6_addr_t *v6src, ill_t *ill)
{
int i;
boolean_t isinlist;
ilg_t *ilg;
rw_enter(&connp->conn_ilg_lock, RW_READER);
for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
if (ilg->ilg_condemned)
continue;
/* ilg_ill could be NULL if an add is in progress */
if (ilg->ilg_ill != ill)
continue;
/* The callers use upper ill for IPMP */
ASSERT(!IS_UNDER_IPMP(ill));
if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
/* no source filter, so this is a match */
rw_exit(&connp->conn_ilg_lock);
return (B_TRUE);
}
break;
}
}
if (ilg == NULL) {
rw_exit(&connp->conn_ilg_lock);
return (B_FALSE);
}
/*
* we have an ilg with matching ill and group; but
* the ilg has a source list that we must check.
*/
isinlist = B_FALSE;
for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
isinlist = B_TRUE;
break;
}
}
if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
(!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
rw_exit(&connp->conn_ilg_lock);
return (B_TRUE);
}
rw_exit(&connp->conn_ilg_lock);
return (B_FALSE);
}
/*
* Find an ilg matching group and ifaddr/ifindex.
* We check both ifaddr and ifindex even though at most one of them
* will be non-zero; that way we always find the right one.
*/
static ilg_t *
ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
uint_t ifindex)
{
ilg_t *ilg;
ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
if (ilg->ilg_condemned)
continue;
if (ilg->ilg_ifaddr == ifaddr &&
ilg->ilg_ifindex == ifindex &&
IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
return (ilg);
}
return (NULL);
}
/*
* If a source address is passed in (src != NULL and src is not
* unspecified), remove the specified src addr from the given ilg's
* filter list, else delete the ilg.
*/
static void
ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
{
ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
ASSERT(ilg->ilg_ptpn != NULL);
ASSERT(!ilg->ilg_condemned);
if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
FREE_SLIST(ilg->ilg_filter);
ilg->ilg_filter = NULL;
ASSERT(ilg->ilg_ilm == NULL);
ilg->ilg_ill = NULL;
ilg->ilg_condemned = B_TRUE;
/* ilg_inactive will unlink from the list */
ilg_refrele(ilg);
} else {
l_remove(ilg->ilg_filter, src);
}
}
/*
* Called from conn close. No new ilg can be added or removed
* because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
* will return error if conn has started closing.
*
* We handle locking as follows.
* Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
* proceed with the ilm part of the delete we hold a reference on both the ill
* and the ilg. This doesn't prevent changes to the ilg, but prevents it from
* being deleted.
*
* Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
* and ill_mcast_lock for the ip_addmulti part) we can run at a point between
* the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
* that case we delete the ilg here, which makes ilg_add discover that the ilg
* has disappeared when ip_addmulti returns, so it will discard the ilm it just
* added.
*/
void
ilg_delete_all(conn_t *connp)
{
ilg_t *ilg, *next_ilg, *held_ilg;
ilm_t *ilm;
ill_t *ill;
boolean_t need_refrele;
/*
* Can not run if there is a conn_update_ill already running.
* Wait for it to complete. Caller should have already set CONN_CLOSING
* which prevents any new threads to run in conn_update_ill.
*/
mutex_enter(&connp->conn_lock);
ASSERT(connp->conn_state_flags & CONN_CLOSING);
while (connp->conn_state_flags & CONN_UPDATE_ILL)
cv_wait(&connp->conn_cv, &connp->conn_lock);
mutex_exit(&connp->conn_lock);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
ilg = connp->conn_ilg;
held_ilg = NULL;
while (ilg != NULL) {
if (ilg->ilg_condemned) {
ilg = ilg->ilg_next;
continue;
}
/* If the ilg is detached then no need to serialize */
if (ilg->ilg_ilm == NULL) {
next_ilg = ilg->ilg_next;
ilg_delete(connp, ilg, NULL);
ilg = next_ilg;
continue;
}
ill = ilg->ilg_ilm->ilm_ill;
/*
* In order to serialize on the ill we try to enter
* and if that fails we unlock and relock and then
* check that we still have an ilm.
*/
need_refrele = B_FALSE;
if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
ill_refhold(ill);
need_refrele = B_TRUE;
ilg_refhold(ilg);
if (held_ilg != NULL)
ilg_refrele(held_ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
mutex_enter(&ill->ill_mcast_serializer);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
if (ilg->ilg_condemned) {
ilg = ilg->ilg_next;
goto next;
}
}
ilm = ilg->ilg_ilm;
ilg->ilg_ilm = NULL;
next_ilg = ilg->ilg_next;
ilg_delete(connp, ilg, NULL);
ilg = next_ilg;
rw_exit(&connp->conn_ilg_lock);
if (ilm != NULL)
(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
next:
mutex_exit(&ill->ill_mcast_serializer);
if (need_refrele) {
/* Drop ill reference while we hold no locks */
ill_refrele(ill);
}
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
}
if (held_ilg != NULL)
ilg_refrele(held_ilg);
rw_exit(&connp->conn_ilg_lock);
}
/*
* Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
* that a subsequent attempt can attach it.
* Drops and reacquires conn_ilg_lock.
*/
static void
ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
{
ilg_stat_t ilgstat;
slist_t *new_filter;
int new_fmode;
in6_addr_t v6group;
ipaddr_t ifaddr;
uint_t ifindex;
ilm_t *ilm;
int error = 0;
ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
/*
* Alloc buffer to copy new state into (see below) before
* we make any changes, so we can bail if it fails.
*/
if ((new_filter = l_alloc()) == NULL)
return;
/*
* Save copy of ilg's filter state to pass to other functions, so
* we can release conn_ilg_lock now.
* Set ilg_ill so that an unplumb can find us.
*/
new_fmode = ilg->ilg_fmode;
l_copy(ilg->ilg_filter, new_filter);
v6group = ilg->ilg_v6group;
ifaddr = ilg->ilg_ifaddr;
ifindex = ilg->ilg_ifindex;
ilgstat = ILGSTAT_NEW;
ilg->ilg_ill = ill;
ASSERT(ilg->ilg_ilm == NULL);
rw_exit(&connp->conn_ilg_lock);
ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
new_fmode, new_filter, &error);
l_free(new_filter);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
/*
* Must look up the ilg again since we've not been holding
* conn_ilg_lock. The ilg could have disappeared due to an unplumb
* having called conn_update_ill, which can run once we dropped the
* conn_ilg_lock above.
*/
ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
if (ilg == NULL) {
if (ilm != NULL) {
rw_exit(&connp->conn_ilg_lock);
(void) ip_delmulti_serial(ilm, B_FALSE,
(ilgstat == ILGSTAT_NEW));
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
}
return;
}
if (ilm == NULL) {
ilg->ilg_ill = NULL;
return;
}
ASSERT(ilg->ilg_ilm == NULL);
ilg->ilg_ilm = ilm;
ilm->ilm_ifaddr = ifaddr; /* For netstat */
}
/*
* Called when an ill is unplumbed to make sure that there are no
* dangling conn references to that ill. In that case ill is non-NULL and
* we make sure we remove all references to it.
* Also called when we should revisit the ilg_ill used for multicast
* memberships, in which case ill is NULL.
*
* conn is held by caller.
*
* Note that ipcl_walk only walks conns that are not yet condemned.
* condemned conns can't be refheld. For this reason, conn must become clean
* first, i.e. it must not refer to any ill/ire and then only set
* condemned flag.
*
* We leave ixa_multicast_ifindex in place. We prefer dropping
* packets instead of sending them out the wrong interface.
*
* We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
* NULL) so that the application can leave it later. Also, if ilg_ifaddr and
* ilg_ifindex are zero, indicating that the system should pick the interface,
* then we attempt to reselect the ill and join on it.
*
* Locking notes:
* Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
* proceed with the ilm part of the delete we hold a reference on both the ill
* and the ilg. This doesn't prevent changes to the ilg, but prevents it from
* being deleted.
*
* Note: if this function is called when new ill/ipif's arrive or change status
* (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
* a NULL ilg_ill to an ill/ilm.
*/
static void
conn_update_ill(conn_t *connp, caddr_t arg)
{
ill_t *ill = (ill_t *)arg;
/*
* We have to prevent ip_close/ilg_delete_all from running at
* the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
* all, and we set CONN_UPDATE_ILL. That ensures that only one of
* ilg_delete_all and conn_update_ill run at a time for a given conn.
* If ilg_delete_all got here first, then we have nothing to do.
*/
mutex_enter(&connp->conn_lock);
if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
/* Caller has to wait for ill_ilm_cnt to drop to zero */
mutex_exit(&connp->conn_lock);
return;
}
connp->conn_state_flags |= CONN_UPDATE_ILL;
mutex_exit(&connp->conn_lock);
if (ill != NULL)
ilg_check_detach(connp, ill);
ilg_check_reattach(connp);
/* Do we need to wake up a thread in ilg_delete_all? */
mutex_enter(&connp->conn_lock);
connp->conn_state_flags &= ~CONN_UPDATE_ILL;
if (connp->conn_state_flags & CONN_CLOSING)
cv_broadcast(&connp->conn_cv);
mutex_exit(&connp->conn_lock);
}
/* Detach from an ill that is going away */
static void
ilg_check_detach(conn_t *connp, ill_t *ill)
{
char group_buf[INET6_ADDRSTRLEN];
ilg_t *ilg, *held_ilg;
ilm_t *ilm;
mutex_enter(&ill->ill_mcast_serializer);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
held_ilg = NULL;
for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
if (ilg->ilg_condemned)
continue;
if (ilg->ilg_ill != ill)
continue;
/* Detach from current ill */
ip1dbg(("ilg_check_detach: detach %s on %s\n",
inet_ntop(AF_INET6, &ilg->ilg_v6group,
group_buf, sizeof (group_buf)),
ilg->ilg_ill->ill_name));
/* Detach this ilg from the ill/ilm */
ilm = ilg->ilg_ilm;
ilg->ilg_ilm = NULL;
ilg->ilg_ill = NULL;
if (ilm == NULL)
continue;
/* Prevent ilg from disappearing */
ilg_transfer_hold(held_ilg, ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
}
if (held_ilg != NULL)
ilg_refrele(held_ilg);
rw_exit(&connp->conn_ilg_lock);
mutex_exit(&ill->ill_mcast_serializer);
}
/*
* Check if there is a place to attach the conn_ilgs. We do this for both
* detached ilgs and attached ones, since for the latter there could be
* a better ill to attach them to.
*/
static void
ilg_check_reattach(conn_t *connp)
{
ill_t *ill;
char group_buf[INET6_ADDRSTRLEN];
ilg_t *ilg, *held_ilg;
ilm_t *ilm;
zoneid_t zoneid = IPCL_ZONEID(connp);
int error;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
held_ilg = NULL;
for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
if (ilg->ilg_condemned)
continue;
/* Check if the conn_ill matches what we would pick now */
ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
ilg->ilg_ifindex, zoneid, ipst, &error);
/*
* Make sure the ill is usable for multicast and that
* we can send the DL_ADDMULTI_REQ before we create an
* ilm.
*/
if (ill != NULL &&
(!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
/* Drop locks across ill_refrele */
ilg_transfer_hold(held_ilg, ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
ill_refrele(ill);
ill = NULL;
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
/* Note that ilg could have become condemned */
}
/* Is the ill unchanged, even if both are NULL? */
if (ill == ilg->ilg_ill) {
if (ill != NULL) {
/* Drop locks across ill_refrele */
ilg_transfer_hold(held_ilg, ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
ill_refrele(ill);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
}
continue;
}
/* Something changed; detach from old first if needed */
if (ilg->ilg_ill != NULL) {
ill_t *ill2 = ilg->ilg_ill;
boolean_t need_refrele = B_FALSE;
/*
* In order to serialize on the ill we try to enter
* and if that fails we unlock and relock.
*/
if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
ill_refhold(ill2);
need_refrele = B_TRUE;
ilg_transfer_hold(held_ilg, ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
mutex_enter(&ill2->ill_mcast_serializer);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
/* Note that ilg could have become condemned */
}
/*
* Check that nobody else re-attached the ilg while we
* dropped the lock.
*/
if (ilg->ilg_ill == ill2) {
ASSERT(!ilg->ilg_condemned);
/* Detach from current ill */
ip1dbg(("conn_check_reattach: detach %s/%s\n",
inet_ntop(AF_INET6, &ilg->ilg_v6group,
group_buf, sizeof (group_buf)),
ill2->ill_name));
ilm = ilg->ilg_ilm;
ilg->ilg_ilm = NULL;
ilg->ilg_ill = NULL;
} else {
ilm = NULL;
}
rw_exit(&connp->conn_ilg_lock);
if (ilm != NULL)
(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
mutex_exit(&ill2->ill_mcast_serializer);
if (need_refrele) {
/* Drop ill reference while we hold no locks */
ill_refrele(ill2);
}
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
/*
* While we dropped conn_ilg_lock some other thread
* could have attached this ilg, thus we check again.
*/
if (ilg->ilg_ill != NULL) {
if (ill != NULL) {
/* Drop locks across ill_refrele */
ilg_transfer_hold(held_ilg, ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
ill_refrele(ill);
rw_enter(&connp->conn_ilg_lock,
RW_WRITER);
}
continue;
}
}
if (ill != NULL) {
/*
* In order to serialize on the ill we try to enter
* and if that fails we unlock and relock.
*/
if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
/* Already have a refhold on ill */
ilg_transfer_hold(held_ilg, ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
mutex_enter(&ill->ill_mcast_serializer);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
/* Note that ilg could have become condemned */
}
/*
* Check that nobody else attached the ilg and that
* it wasn't condemned while we dropped the lock.
*/
if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
/*
* Attach to the new ill. Can fail in which
* case ilg_ill will remain NULL. ilg_attach
* drops and reacquires conn_ilg_lock.
*/
ip1dbg(("conn_check_reattach: attach %s/%s\n",
inet_ntop(AF_INET6, &ilg->ilg_v6group,
group_buf, sizeof (group_buf)),
ill->ill_name));
ilg_attach(connp, ilg, ill);
ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
}
mutex_exit(&ill->ill_mcast_serializer);
/* Drop locks across ill_refrele */
ilg_transfer_hold(held_ilg, ilg);
held_ilg = ilg;
rw_exit(&connp->conn_ilg_lock);
ill_refrele(ill);
rw_enter(&connp->conn_ilg_lock, RW_WRITER);
}
}
if (held_ilg != NULL)
ilg_refrele(held_ilg);
rw_exit(&connp->conn_ilg_lock);
}
/*
* Called when an ill is unplumbed to make sure that there are no
* dangling conn references to that ill. In that case ill is non-NULL and
* we make sure we remove all references to it.
* Also called when we should revisit the ilg_ill used for multicast
* memberships, in which case ill is NULL.
*/
void
update_conn_ill(ill_t *ill, ip_stack_t *ipst)
{
ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);
}