ip_ndp.c revision 2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strsun.h>
#include <sys/sysmacros.h>
#include <sys/errno.h>
#include <sys/dlpi.h>
#include <sys/socket.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/vtrace.h>
#include <sys/kmem.h>
#include <sys/zone.h>
#include <sys/ethernet.h>
#include <sys/sdt.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ipclassifier.h>
#include <inet/ip_if.h>
#include <inet/ip_ire.h>
#include <inet/ip_rts.h>
#include <inet/ip6.h>
#include <inet/ip_ndp.h>
#include <inet/ipsec_impl.h>
#include <inet/ipsec_info.h>
#include <inet/sctp_ip.h>
#include <inet/ip2mac_impl.h>
/*
* Function names with nce_ prefix are static while function
* names with ndp_ prefix are used by rest of the IP.
*
* Lock ordering:
*
* ndp_g_lock -> ill_lock -> nce_lock
*
* The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
* nce_next. Nce_lock protects the contents of the NCE (particularly
* nce_refcnt).
*/
static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr,
uint32_t ll_addr_len);
static void nce_ire_delete(nce_t *nce);
static void nce_ire_delete1(ire_t *ire, char *nce_arg);
static void nce_set_ll(nce_t *nce, uchar_t *ll_addr);
static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *,
nce_t *);
static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *);
static void nce_make_mapping(nce_t *nce, uchar_t *addrpos,
uchar_t *addr);
static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr);
static void nce_queue_mp(nce_t *nce, mblk_t *mp);
static mblk_t *nce_udreq_alloc(ill_t *ill);
static void nce_update(nce_t *nce, uint16_t new_state,
uchar_t *new_ll_addr);
static uint32_t nce_solicit(nce_t *nce, in6_addr_t src);
static boolean_t nce_xmit(ill_t *ill, uint8_t type,
boolean_t use_lla_addr, const in6_addr_t *sender,
const in6_addr_t *target, int flag);
static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla,
const in6_addr_t *target, uint_t flags);
static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla,
const in6_addr_t *src, uint_t flags);
static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t,
nce_t **, nce_t *);
static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill);
#ifdef DEBUG
static void nce_trace_cleanup(const nce_t *);
#endif
#define NCE_HASH_PTR_V4(ipst, addr) \
(&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
#define NCE_HASH_PTR_V6(ipst, addr) \
(&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
NCE_TABLE_SIZE)]))
/* Non-tunable probe interval, based on link capabilities */
#define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500)
/*
* NDP Cache Entry creation routine.
* Mapped entries will never do NUD .
* This routine must always be called with ndp6->ndp_g_lock held.
* Prior to return, nce_refcnt is incremented.
*/
int
ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
const in6_addr_t *mask, const in6_addr_t *extract_mask,
uint32_t hw_extract_start, uint16_t flags, uint16_t state,
nce_t **newnce)
{
static nce_t nce_nil;
nce_t *nce;
mblk_t *mp;
mblk_t *template;
nce_t **ncep;
int err;
boolean_t dropped = B_FALSE;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
ASSERT(ill != NULL && ill->ill_isv6);
if (IN6_IS_ADDR_UNSPECIFIED(addr)) {
ip0dbg(("ndp_add_v6: no addr\n"));
return (EINVAL);
}
if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) {
ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags));
return (EINVAL);
}
if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) &&
(flags & NCE_F_MAPPING)) {
ip0dbg(("ndp_add_v6: extract mask zero for mapping"));
return (EINVAL);
}
/*
* Allocate the mblk to hold the nce.
*
* XXX This can come out of a separate cache - nce_cache.
* We don't need the mp anymore as there are no more
* "qwriter"s
*/
mp = allocb(sizeof (nce_t), BPRI_MED);
if (mp == NULL)
return (ENOMEM);
nce = (nce_t *)mp->b_rptr;
mp->b_wptr = (uchar_t *)&nce[1];
*nce = nce_nil;
/*
* This one holds link layer address
*/
if (ill->ill_net_type == IRE_IF_RESOLVER) {
template = nce_udreq_alloc(ill);
} else {
if (ill->ill_phys_addr_length == IPV6_ADDR_LEN &&
ill->ill_mactype != DL_IPV6) {
/*
* We create a nce_res_mp with the IP nexthop address
* as the destination address if the physical length
* is exactly 16 bytes for point-to-multipoint links
* that do their own resolution from IP to link-layer
* address.
*/
template = ill_dlur_gen((uchar_t *)addr,
ill->ill_phys_addr_length, ill->ill_sap,
ill->ill_sap_length);
} else {
if (ill->ill_resolver_mp == NULL) {
freeb(mp);
return (EINVAL);
}
ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER));
template = copyb(ill->ill_resolver_mp);
}
}
if (template == NULL) {
freeb(mp);
return (ENOMEM);
}
nce->nce_ill = ill;
nce->nce_ipversion = IPV6_VERSION;
nce->nce_flags = flags;
nce->nce_state = state;
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
nce->nce_rcnt = ill->ill_xmit_count;
nce->nce_addr = *addr;
nce->nce_mask = *mask;
nce->nce_extract_mask = *extract_mask;
nce->nce_ll_extract_start = hw_extract_start;
nce->nce_fp_mp = NULL;
nce->nce_res_mp = template;
if (state == ND_REACHABLE)
nce->nce_last = TICK_TO_MSEC(lbolt64);
else
nce->nce_last = 0;
nce->nce_qd_mp = NULL;
nce->nce_mp = mp;
if (hw_addr != NULL)
nce_set_ll(nce, hw_addr);
/* This one is for nce getting created */
nce->nce_refcnt = 1;
mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
if (nce->nce_flags & NCE_F_MAPPING) {
ASSERT(IN6_IS_ADDR_MULTICAST(addr));
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask));
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
ncep = &ipst->ips_ndp6->nce_mask_entries;
} else {
ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
}
nce->nce_trace_disable = B_FALSE;
list_create(&nce->nce_cb, sizeof (nce_cb_t),
offsetof(nce_cb_t, nce_cb_node));
/*
* Atomically ensure that the ill is not CONDEMNED, before
* adding the NCE.
*/
mutex_enter(&ill->ill_lock);
if (ill->ill_state_flags & ILL_CONDEMNED) {
mutex_exit(&ill->ill_lock);
freeb(mp);
freeb(template);
return (EINVAL);
}
if ((nce->nce_next = *ncep) != NULL)
nce->nce_next->nce_ptpn = &nce->nce_next;
*ncep = nce;
nce->nce_ptpn = ncep;
*newnce = nce;
/* This one is for nce being used by an active thread */
NCE_REFHOLD(*newnce);
/* Bump up the number of nce's referencing this ill */
DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
(char *), "nce", (void *), nce);
ill->ill_nce_cnt++;
mutex_exit(&ill->ill_lock);
err = 0;
if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) {
mutex_enter(&nce->nce_lock);
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
mutex_exit(&nce->nce_lock);
dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE);
if (dropped) {
mutex_enter(&nce->nce_lock);
nce->nce_pcnt++;
mutex_exit(&nce->nce_lock);
}
NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
err = EINPROGRESS;
} else if (flags & NCE_F_UNSOL_ADV) {
/*
* We account for the transmit below by assigning one
* less than the ndd variable. Subsequent decrements
* are done in ndp_timer.
*/
mutex_enter(&nce->nce_lock);
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1;
mutex_exit(&nce->nce_lock);
dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast,
0);
mutex_enter(&nce->nce_lock);
if (dropped)
nce->nce_unsolicit_count++;
if (nce->nce_unsolicit_count != 0) {
ASSERT(nce->nce_timeout_id == 0);
nce->nce_timeout_id = timeout(ndp_timer, nce,
MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval));
}
mutex_exit(&nce->nce_lock);
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
}
/*
* If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
* we call nce_fastpath as soon as the nce is resolved in ndp_process.
* We call nce_fastpath from nce_update if the link layer address of
* the peer changes from nce_update
*/
if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER)
nce_fastpath(nce);
return (err);
}
int
ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr,
const in6_addr_t *addr, const in6_addr_t *mask,
const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags,
uint16_t state, nce_t **newnce)
{
int err = 0;
nce_t *nce;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill->ill_isv6);
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
/* Get head of v6 hash table */
nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
nce = nce_lookup_addr(ill, match_illgrp, addr, nce);
if (nce == NULL) {
err = ndp_add_v6(ill,
hw_addr,
addr,
mask,
extract_mask,
hw_extract_start,
flags,
state,
newnce);
} else {
*newnce = nce;
err = EEXIST;
}
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
return (err);
}
/*
* Remove all the CONDEMNED nces from the appropriate hash table.
* We create a private list of NCEs, these may have ires pointing
* to them, so the list will be passed through to clean up dependent
* ires and only then we can do NCE_REFRELE which can make NCE inactive.
*/
static void
nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list)
{
nce_t *nce1;
nce_t **ptpn;
ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
ASSERT(ndp->ndp_g_walker == 0);
for (; nce; nce = nce1) {
nce1 = nce->nce_next;
mutex_enter(&nce->nce_lock);
if (nce->nce_flags & NCE_F_CONDEMNED) {
ptpn = nce->nce_ptpn;
nce1 = nce->nce_next;
if (nce1 != NULL)
nce1->nce_ptpn = ptpn;
*ptpn = nce1;
nce->nce_ptpn = NULL;
nce->nce_next = NULL;
nce->nce_next = *free_nce_list;
*free_nce_list = nce;
}
mutex_exit(&nce->nce_lock);
}
}
/*
* 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup()
* will return this NCE. Also no new IREs will be created that
* point to this NCE (See ire_add_v6). Also no new timeouts will
* be started (See NDP_RESTART_TIMER).
* 2. Cancel any currently running timeouts.
* 3. If there is an ndp walker, return. The walker will do the cleanup.
* This ensures that walkers see a consistent list of NCEs while walking.
* 4. Otherwise remove the NCE from the list of NCEs
* 5. Delete all IREs pointing to this NCE.
*/
void
ndp_delete(nce_t *nce)
{
nce_t **ptpn;
nce_t *nce1;
int ipversion = nce->nce_ipversion;
ndp_g_t *ndp;
ip_stack_t *ipst = nce->nce_ill->ill_ipst;
if (ipversion == IPV4_VERSION)
ndp = ipst->ips_ndp4;
else
ndp = ipst->ips_ndp6;
/* Serialize deletes */
mutex_enter(&nce->nce_lock);
if (nce->nce_flags & NCE_F_CONDEMNED) {
/* Some other thread is doing the delete */
mutex_exit(&nce->nce_lock);
return;
}
/*
* Caller has a refhold. Also 1 ref for being in the list. Thus
* refcnt has to be >= 2
*/
ASSERT(nce->nce_refcnt >= 2);
nce->nce_flags |= NCE_F_CONDEMNED;
mutex_exit(&nce->nce_lock);
nce_fastpath_list_delete(nce);
/* Complete any waiting callbacks */
nce_cb_dispatch(nce);
/*
* Cancel any running timer. Timeout can't be restarted
* since CONDEMNED is set. Can't hold nce_lock across untimeout.
* Passing invalid timeout id is fine.
*/
if (nce->nce_timeout_id != 0) {
(void) untimeout(nce->nce_timeout_id);
nce->nce_timeout_id = 0;
}
mutex_enter(&ndp->ndp_g_lock);
if (nce->nce_ptpn == NULL) {
/*
* The last ndp walker has already removed this nce from
* the list after we marked the nce CONDEMNED and before
* we grabbed the global lock.
*/
mutex_exit(&ndp->ndp_g_lock);
return;
}
if (ndp->ndp_g_walker > 0) {
/*
* Can't unlink. The walker will clean up
*/
ndp->ndp_g_walker_cleanup = B_TRUE;
mutex_exit(&ndp->ndp_g_lock);
return;
}
/*
* Now remove the nce from the list. NDP_RESTART_TIMER won't restart
* the timer since it is marked CONDEMNED.
*/
ptpn = nce->nce_ptpn;
nce1 = nce->nce_next;
if (nce1 != NULL)
nce1->nce_ptpn = ptpn;
*ptpn = nce1;
nce->nce_ptpn = NULL;
nce->nce_next = NULL;
mutex_exit(&ndp->ndp_g_lock);
nce_ire_delete(nce);
}
void
ndp_inactive(nce_t *nce)
{
mblk_t **mpp;
ill_t *ill;
ASSERT(nce->nce_refcnt == 0);
ASSERT(MUTEX_HELD(&nce->nce_lock));
ASSERT(nce->nce_fastpath == NULL);
/* Free all nce allocated messages */
mpp = &nce->nce_first_mp_to_free;
do {
while (*mpp != NULL) {
mblk_t *mp;
mp = *mpp;
*mpp = mp->b_next;
inet_freemsg(mp);
}
} while (mpp++ != &nce->nce_last_mp_to_free);
if (nce->nce_ipversion == IPV6_VERSION) {
/*
* must have been cleaned up in nce_delete
*/
ASSERT(list_is_empty(&nce->nce_cb));
list_destroy(&nce->nce_cb);
}
#ifdef DEBUG
nce_trace_cleanup(nce);
#endif
ill = nce->nce_ill;
mutex_enter(&ill->ill_lock);
DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
(char *), "nce", (void *), nce);
ill->ill_nce_cnt--;
/*
* If the number of nce's associated with this ill have dropped
* to zero, check whether we need to restart any operation that
* is waiting for this to happen.
*/
if (ILL_DOWN_OK(ill)) {
/* ipif_ill_refrele_tail drops the ill_lock */
ipif_ill_refrele_tail(ill);
} else {
mutex_exit(&ill->ill_lock);
}
mutex_destroy(&nce->nce_lock);
if (nce->nce_mp != NULL)
inet_freemsg(nce->nce_mp);
}
/*
* ndp_walk routine. Delete the nce if it is associated with the ill
* that is going away. Always called as a writer.
*/
void
ndp_delete_per_ill(nce_t *nce, uchar_t *arg)
{
if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) {
ndp_delete(nce);
}
}
/*
* Walk a list of to be inactive NCEs and blow away all the ires.
*/
static void
nce_ire_delete_list(nce_t *nce)
{
nce_t *nce_next;
ASSERT(nce != NULL);
while (nce != NULL) {
nce_next = nce->nce_next;
nce->nce_next = NULL;
/*
* It is possible for the last ndp walker (this thread)
* to come here after ndp_delete has marked the nce CONDEMNED
* and before it has removed the nce from the fastpath list
* or called untimeout. So we need to do it here. It is safe
* for both ndp_delete and this thread to do it twice or
* even simultaneously since each of the threads has a
* reference on the nce.
*/
nce_fastpath_list_delete(nce);
/*
* Cancel any running timer. Timeout can't be restarted
* since CONDEMNED is set. Can't hold nce_lock across untimeout.
* Passing invalid timeout id is fine.
*/
if (nce->nce_timeout_id != 0) {
(void) untimeout(nce->nce_timeout_id);
nce->nce_timeout_id = 0;
}
/*
* We might hit this func thus in the v4 case:
* ipif_down->ipif_ndp_down->ndp_walk
*/
if (nce->nce_ipversion == IPV4_VERSION) {
ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE,
IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill);
} else {
ASSERT(nce->nce_ipversion == IPV6_VERSION);
ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE,
IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill);
}
NCE_REFRELE_NOTR(nce);
nce = nce_next;
}
}
/*
* Delete an ire when the nce goes away.
*/
/* ARGSUSED */
static void
nce_ire_delete(nce_t *nce)
{
if (nce->nce_ipversion == IPV6_VERSION) {
ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
nce_ire_delete1, (char *)nce, nce->nce_ill);
NCE_REFRELE_NOTR(nce);
} else {
ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
nce_ire_delete1, (char *)nce, nce->nce_ill);
NCE_REFRELE_NOTR(nce);
}
}
/*
* ire_walk routine used to delete every IRE that shares this nce
*/
static void
nce_ire_delete1(ire_t *ire, char *nce_arg)
{
nce_t *nce = (nce_t *)nce_arg;
ASSERT(ire->ire_type == IRE_CACHE);
if (ire->ire_nce == nce) {
ASSERT(ire->ire_ipversion == nce->nce_ipversion);
ire_delete(ire);
}
}
/*
* Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
*/
boolean_t
ndp_restart_dad(nce_t *nce)
{
boolean_t started;
boolean_t dropped;
if (nce == NULL)
return (B_FALSE);
mutex_enter(&nce->nce_lock);
if (nce->nce_state == ND_PROBE) {
mutex_exit(&nce->nce_lock);
started = B_TRUE;
} else if (nce->nce_state == ND_REACHABLE) {
nce->nce_state = ND_PROBE;
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1;
mutex_exit(&nce->nce_lock);
dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE);
if (dropped) {
mutex_enter(&nce->nce_lock);
nce->nce_pcnt++;
mutex_exit(&nce->nce_lock);
}
NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill));
started = B_TRUE;
} else {
mutex_exit(&nce->nce_lock);
started = B_FALSE;
}
return (started);
}
/*
* IPv6 Cache entry lookup. Try to find an nce matching the parameters passed.
* If one is found, the refcnt on the nce will be incremented.
*/
nce_t *
ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr,
boolean_t caller_holds_lock)
{
nce_t *nce;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill->ill_isv6);
if (!caller_holds_lock)
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
/* Get head of v6 hash table */
nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
nce = nce_lookup_addr(ill, match_illgrp, addr, nce);
if (nce == NULL)
nce = nce_lookup_mapping(ill, addr);
if (!caller_holds_lock)
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
return (nce);
}
/*
* IPv4 Cache entry lookup. Try to find an nce matching the parameters passed.
* If one is found, the refcnt on the nce will be incremented.
* Since multicast mappings are handled in arp, there are no nce_mcast_entries
* so we skip the nce_lookup_mapping call.
* XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL
*/
nce_t *
ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock)
{
nce_t *nce;
in6_addr_t addr6;
ip_stack_t *ipst = ill->ill_ipst;
if (!caller_holds_lock)
mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
/* Get head of v4 hash table */
nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
/*
* NOTE: IPv4 never matches across the illgrp since the NCE's we're
* looking up have fastpath headers that are inherently per-ill.
*/
nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce);
if (!caller_holds_lock)
mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
return (nce);
}
/*
* Cache entry lookup. Try to find an nce matching the parameters passed.
* Look only for exact entries (no mappings). If an nce is found, increment
* the hold count on that nce. The caller passes in the start of the
* appropriate hash table, and must be holding the appropriate global
* lock (ndp_g_lock).
*/
static nce_t *
nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr,
nce_t *nce)
{
ndp_g_t *ndp;
ip_stack_t *ipst = ill->ill_ipst;
if (ill->ill_isv6)
ndp = ipst->ips_ndp6;
else
ndp = ipst->ips_ndp4;
ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
if (IN6_IS_ADDR_UNSPECIFIED(addr))
return (NULL);
for (; nce != NULL; nce = nce->nce_next) {
if (nce->nce_ill == ill ||
match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) {
if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) &&
IN6_ARE_ADDR_EQUAL(&nce->nce_mask,
&ipv6_all_ones)) {
mutex_enter(&nce->nce_lock);
if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
NCE_REFHOLD_LOCKED(nce);
mutex_exit(&nce->nce_lock);
break;
}
mutex_exit(&nce->nce_lock);
}
}
}
return (nce);
}
/*
* Cache entry lookup. Try to find an nce matching the parameters passed.
* Look only for mappings.
*/
static nce_t *
nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr)
{
nce_t *nce;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill != NULL && ill->ill_isv6);
ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
if (!IN6_IS_ADDR_MULTICAST(addr))
return (NULL);
nce = ipst->ips_ndp6->nce_mask_entries;
for (; nce != NULL; nce = nce->nce_next)
if (nce->nce_ill == ill &&
(V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) {
mutex_enter(&nce->nce_lock);
if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
NCE_REFHOLD_LOCKED(nce);
mutex_exit(&nce->nce_lock);
break;
}
mutex_exit(&nce->nce_lock);
}
return (nce);
}
/*
* Process passed in parameters either from an incoming packet or via
* user ioctl.
*/
static void
nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
{
ill_t *ill = nce->nce_ill;
uint32_t hw_addr_len = ill->ill_nd_lla_len;
mblk_t *mp;
boolean_t ll_updated = B_FALSE;
boolean_t ll_changed;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(nce->nce_ipversion == IPV6_VERSION);
/*
* No updates of link layer address or the neighbor state is
* allowed, when the cache is in NONUD state. This still
* allows for responding to reachability solicitation.
*/
mutex_enter(&nce->nce_lock);
if (nce->nce_state == ND_INCOMPLETE) {
if (hw_addr == NULL) {
mutex_exit(&nce->nce_lock);
return;
}
nce_set_ll(nce, hw_addr);
/*
* Update nce state and send the queued packets
* back to ip this time ire will be added.
*/
if (flag & ND_NA_FLAG_SOLICITED) {
nce_update(nce, ND_REACHABLE, NULL);
} else {
nce_update(nce, ND_STALE, NULL);
}
mutex_exit(&nce->nce_lock);
nce_fastpath(nce);
nce_cb_dispatch(nce); /* complete callbacks */
mutex_enter(&nce->nce_lock);
mp = nce->nce_qd_mp;
nce->nce_qd_mp = NULL;
mutex_exit(&nce->nce_lock);
while (mp != NULL) {
mblk_t *nxt_mp, *data_mp;
nxt_mp = mp->b_next;
mp->b_next = NULL;
if (mp->b_datap->db_type == M_CTL)
data_mp = mp->b_cont;
else
data_mp = mp;
if (data_mp->b_prev != NULL) {
ill_t *inbound_ill;
queue_t *fwdq = NULL;
uint_t ifindex;
ifindex = (uint_t)(uintptr_t)data_mp->b_prev;
inbound_ill = ill_lookup_on_ifindex(ifindex,
B_TRUE, NULL, NULL, NULL, NULL, ipst);
if (inbound_ill == NULL) {
data_mp->b_prev = NULL;
freemsg(mp);
return;
} else {
fwdq = inbound_ill->ill_rq;
}
data_mp->b_prev = NULL;
/*
* Send a forwarded packet back into ip_rput_v6
* just as in ire_send_v6().
* Extract the queue from b_prev (set in
* ip_rput_data_v6).
*/
if (fwdq != NULL) {
/*
* Forwarded packets hop count will
* get decremented in ip_rput_data_v6
*/
if (data_mp != mp)
freeb(mp);
put(fwdq, data_mp);
} else {
/*
* Send locally originated packets back
* into ip_wput_v6.
*/
put(ill->ill_wq, mp);
}
ill_refrele(inbound_ill);
} else {
put(ill->ill_wq, mp);
}
mp = nxt_mp;
}
return;
}
ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len);
if (!is_adv) {
/* If this is a SOLICITATION request only */
if (ll_changed)
nce_update(nce, ND_STALE, hw_addr);
mutex_exit(&nce->nce_lock);
nce_cb_dispatch(nce);
return;
}
if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) {
/* If in any other state than REACHABLE, ignore */
if (nce->nce_state == ND_REACHABLE) {
nce_update(nce, ND_STALE, NULL);
}
mutex_exit(&nce->nce_lock);
nce_cb_dispatch(nce);
return;
} else {
if (ll_changed) {
nce_update(nce, ND_UNCHANGED, hw_addr);
ll_updated = B_TRUE;
}
if (flag & ND_NA_FLAG_SOLICITED) {
nce_update(nce, ND_REACHABLE, NULL);
} else {
if (ll_updated) {
nce_update(nce, ND_STALE, NULL);
}
}
mutex_exit(&nce->nce_lock);
if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags &
NCE_F_ISROUTER)) {
ire_t *ire;
/*
* Router turned to host. We need to remove the
* entry as well as any default route that may be
* using this as a next hop. This is required by
* section 7.2.5 of RFC 2461.
*/
ire = ire_ftable_lookup_v6(&ipv6_all_zeros,
&ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT,
nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL,
MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW |
MATCH_IRE_DEFAULT, ipst);
if (ire != NULL) {
ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst);
ire_delete(ire);
ire_refrele(ire);
}
ndp_delete(nce); /* will do nce_cb_dispatch */
} else {
nce_cb_dispatch(nce);
}
}
}
/*
* Walker state structure used by ndp_process() / ndp_process_entry().
*/
typedef struct ndp_process_data {
ill_t *np_ill; /* ill/illgrp to match against */
const in6_addr_t *np_addr; /* IPv6 address to match */
uchar_t *np_hw_addr; /* passed to nce_process() */
uint32_t np_flag; /* passed to nce_process() */
boolean_t np_is_adv; /* passed to nce_process() */
} ndp_process_data_t;
/*
* Walker callback used by ndp_process() for IPMP groups: calls nce_process()
* for each NCE with a matching address that's in the same IPMP group.
*/
static void
ndp_process_entry(nce_t *nce, void *arg)
{
ndp_process_data_t *npp = arg;
if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) &&
IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) &&
IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv);
}
}
/*
* Wrapper around nce_process() that handles IPMP. In particular, for IPMP,
* NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have
* more than one NCE for a given IPv6 address to tend to. In that case, we
* need to walk all NCEs and callback nce_process() for each one. Since this
* is expensive, in the non-IPMP case we just directly call nce_process().
* Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP
* interfaces in an IPMP group share the same NCEs -- at which point this
* function can be removed entirely.
*/
void
ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
{
ill_t *ill = nce->nce_ill;
struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6;
ndp_process_data_t np;
if (ill->ill_grp == NULL) {
nce_process(nce, hw_addr, flag, is_adv);
return;
}
/* IPMP case: walk all NCEs */
np.np_ill = ill;
np.np_addr = &nce->nce_addr;
np.np_flag = flag;
np.np_is_adv = is_adv;
np.np_hw_addr = hw_addr;
ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES);
}
/*
* Pass arg1 to the pfi supplied, along with each nce in existence.
* ndp_walk() places a REFHOLD on the nce and drops the lock when
* walking the hash list.
*/
void
ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1,
boolean_t trace)
{
nce_t *nce;
nce_t *nce1;
nce_t **ncep;
nce_t *free_nce_list = NULL;
mutex_enter(&ndp->ndp_g_lock);
/* Prevent ndp_delete from unlink and free of NCE */
ndp->ndp_g_walker++;
mutex_exit(&ndp->ndp_g_lock);
for (ncep = ndp->nce_hash_tbl;
ncep < A_END(ndp->nce_hash_tbl); ncep++) {
for (nce = *ncep; nce != NULL; nce = nce1) {
nce1 = nce->nce_next;
if (ill == NULL || nce->nce_ill == ill) {
if (trace) {
NCE_REFHOLD(nce);
(*pfi)(nce, arg1);
NCE_REFRELE(nce);
} else {
NCE_REFHOLD_NOTR(nce);
(*pfi)(nce, arg1);
NCE_REFRELE_NOTR(nce);
}
}
}
}
for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) {
nce1 = nce->nce_next;
if (ill == NULL || nce->nce_ill == ill) {
if (trace) {
NCE_REFHOLD(nce);
(*pfi)(nce, arg1);
NCE_REFRELE(nce);
} else {
NCE_REFHOLD_NOTR(nce);
(*pfi)(nce, arg1);
NCE_REFRELE_NOTR(nce);
}
}
}
mutex_enter(&ndp->ndp_g_lock);
ndp->ndp_g_walker--;
/*
* While NCE's are removed from global list they are placed
* in a private list, to be passed to nce_ire_delete_list().
* The reason is, there may be ires pointing to this nce
* which needs to cleaned up.
*/
if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) {
/* Time to delete condemned entries */
for (ncep = ndp->nce_hash_tbl;
ncep < A_END(ndp->nce_hash_tbl); ncep++) {
nce = *ncep;
if (nce != NULL) {
nce_remove(ndp, nce, &free_nce_list);
}
}
nce = ndp->nce_mask_entries;
if (nce != NULL) {
nce_remove(ndp, nce, &free_nce_list);
}
ndp->ndp_g_walker_cleanup = B_FALSE;
}
mutex_exit(&ndp->ndp_g_lock);
if (free_nce_list != NULL) {
nce_ire_delete_list(free_nce_list);
}
}
/*
* Walk everything.
* Note that ill can be NULL hence can't derive the ipst from it.
*/
void
ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst)
{
ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE);
ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE);
}
/*
* Process resolve requests. Handles both mapped entries
* as well as cases that needs to be send out on the wire.
* Lookup a NCE for a given IRE. Regardless of whether one exists
* or one is created, we defer making ire point to nce until the
* ire is actually added at which point the nce_refcnt on the nce is
* incremented. This is done primarily to have symmetry between ire_add()
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
*/
int
ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid)
{
nce_t *nce, *hw_nce = NULL;
int err;
ill_t *ipmp_ill;
uint16_t nce_flags;
mblk_t *mp_nce = NULL;
ip_stack_t *ipst = ill->ill_ipst;
uchar_t *hwaddr = NULL;
ASSERT(ill->ill_isv6);
if (IN6_IS_ADDR_MULTICAST(dst))
return (nce_set_multicast(ill, dst));
nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0;
/*
* If `ill' is under IPMP, then first check to see if there's an NCE
* for `dst' on the IPMP meta-interface (e.g., because an application
* explicitly did an SIOCLIFSETND to tie a hardware address to `dst').
* If so, we use that hardware address when creating the NCE below.
* Note that we don't yet have a mechanism to remove these NCEs if the
* NCE for `dst' on the IPMP meta-interface is subsequently removed --
* but rather than build such a beast, we should fix NCEs so that they
* can be properly shared across an IPMP group.
*/
if (IS_UNDER_IPMP(ill)) {
if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) {
hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE);
if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) {
hwaddr = hw_nce->nce_res_mp->b_rptr +
NCE_LL_ADDR_OFFSET(ipmp_ill);
nce_flags |= hw_nce->nce_flags;
}
ill_refrele(ipmp_ill);
}
}
err = ndp_lookup_then_add_v6(ill,
B_FALSE, /* NCE fastpath is per ill; don't match across group */
hwaddr,
dst,
&ipv6_all_ones,
&ipv6_all_zeros,
0,
nce_flags,
hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE,
&nce);
if (hw_nce != NULL)
NCE_REFRELE(hw_nce);
switch (err) {
case 0:
/*
* New cache entry was created. Make sure that the state
* is not ND_INCOMPLETE. It can be in some other state
* even before we send out the solicitation as we could
* get un-solicited advertisements.
*
* If this is an XRESOLV interface, simply return 0,
* since we don't want to solicit just yet.
*/
if (ill->ill_flags & ILLF_XRESOLV) {
NCE_REFRELE(nce);
return (0);
}
mutex_enter(&nce->nce_lock);
if (nce->nce_state != ND_INCOMPLETE) {
mutex_exit(&nce->nce_lock);
NCE_REFRELE(nce);
return (0);
}
if (nce->nce_rcnt == 0) {
/* The caller will free mp */
mutex_exit(&nce->nce_lock);
ndp_delete(nce);
NCE_REFRELE(nce);
return (ESRCH);
}
mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
if (mp_nce == NULL) {
/* The caller will free mp */
mutex_exit(&nce->nce_lock);
ndp_delete(nce);
NCE_REFRELE(nce);
return (ENOMEM);
}
nce_queue_mp(nce, mp_nce);
ip_ndp_resolve(nce);
mutex_exit(&nce->nce_lock);
NCE_REFRELE(nce);
return (EINPROGRESS);
case EEXIST:
/* Resolution in progress just queue the packet */
mutex_enter(&nce->nce_lock);
if (nce->nce_state == ND_INCOMPLETE) {
mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
if (mp_nce == NULL) {
err = ENOMEM;
} else {
nce_queue_mp(nce, mp_nce);
err = EINPROGRESS;
}
} else {
/*
* Any other state implies we have
* a nce but IRE needs to be added ...
* ire_add_v6() will take care of the
* the case when the nce becomes CONDEMNED
* before the ire is added to the table.
*/
err = 0;
}
mutex_exit(&nce->nce_lock);
NCE_REFRELE(nce);
break;
default:
ip1dbg(("ndp_resolver: Can't create NCE %d\n", err));
break;
}
return (err);
}
/*
* When there is no resolver, the link layer template is passed in
* the IRE.
* Lookup a NCE for a given IRE. Regardless of whether one exists
* or one is created, we defer making ire point to nce until the
* ire is actually added at which point the nce_refcnt on the nce is
* incremented. This is done primarily to have symmetry between ire_add()
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
*/
int
ndp_noresolver(ill_t *ill, const in6_addr_t *dst)
{
nce_t *nce;
int err = 0;
ASSERT(ill != NULL);
ASSERT(ill->ill_isv6);
if (IN6_IS_ADDR_MULTICAST(dst)) {
err = nce_set_multicast(ill, dst);
return (err);
}
err = ndp_lookup_then_add_v6(ill,
B_FALSE, /* NCE fastpath is per ill; don't match across group */
ill->ill_dest_addr, /* hardware address is NULL in most cases */
dst,
&ipv6_all_ones,
&ipv6_all_zeros,
0,
(ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0,
ND_REACHABLE,
&nce);
switch (err) {
case 0:
/*
* Cache entry with a proper resolver cookie was
* created.
*/
NCE_REFRELE(nce);
break;
case EEXIST:
err = 0;
NCE_REFRELE(nce);
break;
default:
ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err));
break;
}
return (err);
}
/*
* For each interface an entry is added for the unspecified multicast group.
* Here that mapping is used to form the multicast cache entry for a particular
* multicast destination.
*/
static int
nce_set_multicast(ill_t *ill, const in6_addr_t *dst)
{
nce_t *mnce; /* Multicast mapping entry */
nce_t *nce;
uchar_t *hw_addr = NULL;
int err = 0;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill != NULL);
ASSERT(ill->ill_isv6);
ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst)));
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst));
nce = nce_lookup_addr(ill, B_FALSE, dst, nce);
if (nce != NULL) {
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
NCE_REFRELE(nce);
return (0);
}
/* No entry, now lookup for a mapping this should never fail */
mnce = nce_lookup_mapping(ill, dst);
if (mnce == NULL) {
/* Something broken for the interface. */
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
return (ESRCH);
}
ASSERT(mnce->nce_flags & NCE_F_MAPPING);
if (ill->ill_net_type == IRE_IF_RESOLVER) {
/*
* For IRE_IF_RESOLVER a hardware mapping can be
* generated, for IRE_IF_NORESOLVER, resolution cookie
* in the ill is copied in ndp_add_v6().
*/
hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP);
if (hw_addr == NULL) {
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
NCE_REFRELE(mnce);
return (ENOMEM);
}
nce_make_mapping(mnce, hw_addr, (uchar_t *)dst);
}
NCE_REFRELE(mnce);
/*
* IRE_IF_NORESOLVER type simply copies the resolution
* cookie passed in. So no hw_addr is needed.
*/
err = ndp_add_v6(ill,
hw_addr,
dst,
&ipv6_all_ones,
&ipv6_all_zeros,
0,
NCE_F_NONUD,
ND_REACHABLE,
&nce);
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
if (hw_addr != NULL)
kmem_free(hw_addr, ill->ill_nd_lla_len);
if (err != 0) {
ip1dbg(("nce_set_multicast: create failed" "%d\n", err));
return (err);
}
NCE_REFRELE(nce);
return (0);
}
/*
* Return the link layer address, and any flags of a nce.
*/
int
ndp_query(ill_t *ill, struct lif_nd_req *lnr)
{
nce_t *nce;
in6_addr_t *addr;
sin6_t *sin6;
dl_unitdata_req_t *dl;
ASSERT(ill != NULL && ill->ill_isv6);
sin6 = (sin6_t *)&lnr->lnr_addr;
addr = &sin6->sin6_addr;
/*
* NOTE: if the ill is an IPMP interface, then match against the whole
* illgrp. This e.g. allows in.ndpd to retrieve the link layer
* addresses for the data addresses on an IPMP interface even though
* ipif_ndp_up() created them with an nce_ill of ipif_bound_ill.
*/
nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE);
if (nce == NULL)
return (ESRCH);
/* If in INCOMPLETE state, no link layer address is available yet */
if (!NCE_ISREACHABLE(nce)) {
NCE_REFRELE(nce);
return (ESRCH);
}
dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr;
if (ill->ill_flags & ILLF_XRESOLV)
lnr->lnr_hdw_len = dl->dl_dest_addr_length;
else
lnr->lnr_hdw_len = ill->ill_nd_lla_len;
ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <=
sizeof (lnr->lnr_hdw_addr));
bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
(uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len);
if (nce->nce_flags & NCE_F_ISROUTER)
lnr->lnr_flags = NDF_ISROUTER_ON;
if (nce->nce_flags & NCE_F_ANYCAST)
lnr->lnr_flags |= NDF_ANYCAST_ON;
NCE_REFRELE(nce);
return (0);
}
/*
* Send Enable/Disable multicast reqs to driver.
*/
int
ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len,
uint32_t hw_addr_offset, mblk_t *mp)
{
nce_t *nce;
uchar_t *hw_addr;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill != NULL && ill->ill_isv6);
ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len);
if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) {
freemsg(mp);
return (EINVAL);
}
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
nce = nce_lookup_mapping(ill, addr);
if (nce == NULL) {
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
freemsg(mp);
return (ESRCH);
}
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
/*
* Update dl_addr_length and dl_addr_offset for primitives that
* have physical addresses as opposed to full saps
*/
switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
case DL_ENABMULTI_REQ:
/* Track the state if this is the first enabmulti */
if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
ip1dbg(("ndp_mcastreq: ENABMULTI\n"));
break;
case DL_DISABMULTI_REQ:
ip1dbg(("ndp_mcastreq: DISABMULTI\n"));
break;
default:
NCE_REFRELE(nce);
ip1dbg(("ndp_mcastreq: default\n"));
return (EINVAL);
}
nce_make_mapping(nce, hw_addr, (uchar_t *)addr);
NCE_REFRELE(nce);
ill_dlpi_send(ill, mp);
return (0);
}
/*
* Send out a NS for resolving the ip address in nce.
*/
void
ip_ndp_resolve(nce_t *nce)
{
in6_addr_t sender6 = ipv6_all_zeros;
uint32_t ms;
mblk_t *mp;
ip6_t *ip6h;
ASSERT(MUTEX_HELD(&nce->nce_lock));
/*
* Pick the src from outgoing packet, if one is available.
* Otherwise let nce_xmit figure out the src.
*/
if ((mp = nce->nce_qd_mp) != NULL) {
/* Handle ip_newroute_v6 giving us IPSEC packets */
if (mp->b_datap->db_type == M_CTL)
mp = mp->b_cont;
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_nxt == IPPROTO_RAW) {
/*
* This message should have been pulled up already in
* ip_wput_v6. We can't do pullups here because
* the message could be from the nce_qd_mp which could
* have b_next/b_prev non-NULL.
*/
ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN);
ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
}
sender6 = ip6h->ip6_src;
}
ms = nce_solicit(nce, sender6);
mutex_exit(&nce->nce_lock);
if (ms == 0) {
if (nce->nce_state != ND_REACHABLE) {
nce_resolv_failed(nce);
ndp_delete(nce);
}
} else {
NDP_RESTART_TIMER(nce, (clock_t)ms);
}
mutex_enter(&nce->nce_lock);
}
/*
* Send a neighbor solicitation.
* Returns number of milliseconds after which we should either rexmit or abort.
* Return of zero means we should abort.
* The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt.
*
* NOTE: This routine drops nce_lock (and later reacquires it) when sending
* the packet.
*/
uint32_t
nce_solicit(nce_t *nce, in6_addr_t sender)
{
boolean_t dropped;
ASSERT(nce->nce_ipversion == IPV6_VERSION);
ASSERT(MUTEX_HELD(&nce->nce_lock));
if (nce->nce_rcnt == 0)
return (0);
nce->nce_rcnt--;
mutex_exit(&nce->nce_lock);
dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0);
mutex_enter(&nce->nce_lock);
if (dropped)
nce->nce_rcnt++;
return (nce->nce_ill->ill_reachable_retrans_time);
}
/*
* Attempt to recover an address on an interface that's been marked as a
* duplicate. Because NCEs are destroyed when the interface goes down, there's
* no easy way to just probe the address and have the right thing happen if
* it's no longer in use. Instead, we just bring it up normally and allow the
* regular interface start-up logic to probe for a remaining duplicate and take
* us back down if necessary.
* Neither DHCP nor temporary addresses arrive here; they're excluded by
* ip_ndp_excl.
*/
/* ARGSUSED */
static void
ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
{
ill_t *ill = rq->q_ptr;
ipif_t *ipif;
in6_addr_t *addr = (in6_addr_t *)mp->b_rptr;
for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
/*
* We do not support recovery of proxy ARP'd interfaces,
* because the system lacks a complete proxy ARP mechanism.
*/
if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
!IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) {
continue;
}
/*
* If we have already recovered or if the interface is going
* away, then ignore.
*/
mutex_enter(&ill->ill_lock);
if (!(ipif->ipif_flags & IPIF_DUPLICATE) ||
(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
mutex_exit(&ill->ill_lock);
continue;
}
ipif->ipif_flags &= ~IPIF_DUPLICATE;
ill->ill_ipif_dup_count--;
mutex_exit(&ill->ill_lock);
ipif->ipif_was_dup = B_TRUE;
VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS);
(void) ipif_up_done_v6(ipif);
}
freeb(mp);
}
/*
* Attempt to recover an IPv6 interface that's been shut down as a duplicate.
* As long as someone else holds the address, the interface will stay down.
* When that conflict goes away, the interface is brought back up. This is
* done so that accidental shutdowns of addresses aren't made permanent. Your
* server will recover from a failure.
*
* For DHCP and temporary addresses, recovery is not done in the kernel.
* Instead, it's handled by user space processes (dhcpagent and in.ndpd).
*
* This function is entered on a timer expiry; the ID is in ipif_recovery_id.
*/
static void
ipif6_dup_recovery(void *arg)
{
ipif_t *ipif = arg;
ipif->ipif_recovery_id = 0;
if (!(ipif->ipif_flags & IPIF_DUPLICATE))
return;
/*
* No lock, because this is just an optimization.
*/
if (ipif->ipif_state_flags & IPIF_CONDEMNED)
return;
/* If the link is down, we'll retry this later */
if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING))
return;
ndp_do_recovery(ipif);
}
/*
* Perform interface recovery by forcing the duplicate interfaces up and
* allowing the system to determine which ones should stay up.
*
* Called both by recovery timer expiry and link-up notification.
*/
void
ndp_do_recovery(ipif_t *ipif)
{
ill_t *ill = ipif->ipif_ill;
mblk_t *mp;
ip_stack_t *ipst = ill->ill_ipst;
mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED);
if (mp == NULL) {
mutex_enter(&ill->ill_lock);
if (ipif->ipif_recovery_id == 0 &&
!(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
}
mutex_exit(&ill->ill_lock);
} else {
/*
* A recovery timer may still be running if we got here from
* ill_restart_dad(); cancel that timer.
*/
if (ipif->ipif_recovery_id != 0)
(void) untimeout(ipif->ipif_recovery_id);
ipif->ipif_recovery_id = 0;
bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
sizeof (ipif->ipif_v6lcl_addr));
ill_refhold(ill);
qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP,
B_FALSE);
}
}
/*
* Find the MAC and IP addresses in an NA/NS message.
*/
static void
ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp,
uchar_t **haddr, uint_t *haddrlenp)
{
ip6_t *ip6h = (ip6_t *)mp->b_rptr;
icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
uchar_t *addr;
int alen = 0;
if (dl_mp == NULL) {
nd_opt_hdr_t *opt = NULL;
int len;
/*
* If it's from the fast-path, then it can't be a probe
* message, and thus must include a linkaddr option.
* Extract that here.
*/
switch (icmp6->icmp6_type) {
case ND_NEIGHBOR_SOLICIT:
len = mp->b_wptr - (uchar_t *)ns;
if ((len -= sizeof (*ns)) > 0) {
opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1),
len, ND_OPT_SOURCE_LINKADDR);
}
break;
case ND_NEIGHBOR_ADVERT:
len = mp->b_wptr - (uchar_t *)na;
if ((len -= sizeof (*na)) > 0) {
opt = ndp_get_option((nd_opt_hdr_t *)(na + 1),
len, ND_OPT_TARGET_LINKADDR);
}
break;
}
if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >=
ill->ill_nd_lla_len) {
addr = (uchar_t *)(opt + 1);
alen = ill->ill_nd_lla_len;
}
/*
* We cheat a bit here for the sake of printing usable log
* messages in the rare case where the reply we got was unicast
* without a source linkaddr option, and the interface is in
* fastpath mode. (Sigh.)
*/
if (alen == 0 && ill->ill_type == IFT_ETHER &&
MBLKHEAD(mp) >= sizeof (struct ether_header)) {
struct ether_header *pether;
pether = (struct ether_header *)((char *)ip6h -
sizeof (*pether));
addr = pether->ether_shost.ether_addr_octet;
alen = ETHERADDRL;
}
} else {
dl_unitdata_ind_t *dlu;
dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr;
alen = dlu->dl_src_addr_length;
if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) &&
dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) {
addr = dl_mp->b_rptr + dlu->dl_src_addr_offset;
if (ill->ill_sap_length < 0) {
alen += ill->ill_sap_length;
} else {
addr += ill->ill_sap_length;
alen -= ill->ill_sap_length;
}
}
}
if (alen > 0) {
*haddr = addr;
*haddrlenp = alen;
} else {
*haddr = NULL;
*haddrlenp = 0;
}
/* nd_ns_target and nd_na_target are at the same offset, so we cheat */
*targp = ns->nd_ns_target;
}
/*
* This is for exclusive changes due to NDP duplicate address detection
* failure.
*/
/* ARGSUSED */
static void
ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
{
ill_t *ill = rq->q_ptr;
ipif_t *ipif;
mblk_t *dl_mp = NULL;
uchar_t *haddr;
uint_t haddrlen;
ip_stack_t *ipst = ill->ill_ipst;
in6_addr_t targ;
if (DB_TYPE(mp) != M_DATA) {
dl_mp = mp;
mp = mp->b_cont;
}
ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen);
if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) {
/*
* Ignore conflicts generated by misbehaving switches that
* just reflect our own messages back to us. For IPMP, we may
* see reflections across any ill in the illgrp.
*/
if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
IS_UNDER_IPMP(ill) &&
ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL)
goto ignore_conflict;
}
/*
* Look up the appropriate ipif.
*/
ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL,
NULL, ipst);
if (ipif == NULL)
goto ignore_conflict;
/* Reload the ill to match the ipif */
ill = ipif->ipif_ill;
/* If it's already duplicate or ineligible, then don't do anything. */
if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
ipif_refrele(ipif);
goto ignore_conflict;
}
/*
* If this is a failure during duplicate recovery, then don't
* complain. It may take a long time to recover.
*/
if (!ipif->ipif_was_dup) {
char ibuf[LIFNAMSIZ];
char hbuf[MAC_STR_LEN];
char sbuf[INET6_ADDRSTRLEN];
ipif_get_name(ipif, ibuf, sizeof (ibuf));
cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
" disabled", ibuf,
inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)),
mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)));
}
mutex_enter(&ill->ill_lock);
ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
ipif->ipif_flags |= IPIF_DUPLICATE;
ill->ill_ipif_dup_count++;
mutex_exit(&ill->ill_lock);
(void) ipif_down(ipif, NULL, NULL);
ipif_down_tail(ipif);
mutex_enter(&ill->ill_lock);
if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
ill->ill_net_type == IRE_IF_RESOLVER &&
!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
ipst->ips_ip_dup_recovery > 0) {
ASSERT(ipif->ipif_recovery_id == 0);
ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
}
mutex_exit(&ill->ill_lock);
ipif_refrele(ipif);
ignore_conflict:
if (dl_mp != NULL)
freeb(dl_mp);
freemsg(mp);
}
/*
* Handle failure by tearing down the ipifs with the specified address. Note
* that tearing down the ipif also means deleting the nce through ipif_down, so
* it's not possible to do recovery by just restarting the nce timer. Instead,
* we start a timer on the ipif.
*/
static void
ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
{
if ((mp = copymsg(mp)) != NULL) {
if (dl_mp == NULL)
dl_mp = mp;
else if ((dl_mp = copyb(dl_mp)) != NULL)
dl_mp->b_cont = mp;
if (dl_mp == NULL) {
freemsg(mp);
} else {
ill_refhold(ill);
qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP,
B_FALSE);
}
}
}
/*
* Handle a discovered conflict: some other system is advertising that it owns
* one of our IP addresses. We need to defend ourselves, or just shut down the
* interface.
*/
static void
ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
{
ipif_t *ipif;
uint32_t now;
uint_t maxdefense;
uint_t defs;
ip_stack_t *ipst = ill->ill_ipst;
ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL,
NULL, NULL, ipst);
if (ipif == NULL)
return;
/*
* First, figure out if this address is disposable.
*/
if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY))
maxdefense = ipst->ips_ip_max_temp_defend;
else
maxdefense = ipst->ips_ip_max_defend;
/*
* Now figure out how many times we've defended ourselves. Ignore
* defenses that happened long in the past.
*/
now = gethrestime_sec();
mutex_enter(&nce->nce_lock);
if ((defs = nce->nce_defense_count) > 0 &&
now - nce->nce_defense_time > ipst->ips_ip_defend_interval) {
nce->nce_defense_count = defs = 0;
}
nce->nce_defense_count++;
nce->nce_defense_time = now;
mutex_exit(&nce->nce_lock);
ipif_refrele(ipif);
/*
* If we've defended ourselves too many times already, then give up and
* tear down the interface(s) using this address. Otherwise, defend by
* sending out an unsolicited Neighbor Advertisement.
*/
if (defs >= maxdefense) {
ip_ndp_failure(ill, mp, dl_mp);
} else {
char hbuf[MAC_STR_LEN];
char sbuf[INET6_ADDRSTRLEN];
uchar_t *haddr;
uint_t haddrlen;
in6_addr_t targ;
ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen);
cmn_err(CE_WARN, "node %s is using our IP address %s on %s",
mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)),
inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)),
ill->ill_name);
(void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0);
}
}
static void
ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
{
nd_neighbor_solicit_t *ns;
uint32_t hlen = ill->ill_nd_lla_len;
uchar_t *haddr = NULL;
icmp6_t *icmp_nd;
ip6_t *ip6h;
nce_t *our_nce = NULL;
in6_addr_t target;
in6_addr_t src;
int len;
int flag = 0;
nd_opt_hdr_t *opt = NULL;
boolean_t bad_solicit = B_FALSE;
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib;
ip6h = (ip6_t *)mp->b_rptr;
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
src = ip6h->ip6_src;
ns = (nd_neighbor_solicit_t *)icmp_nd;
target = ns->nd_ns_target;
if (IN6_IS_ADDR_MULTICAST(&target)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_input_solicit: Target is"
" multicast! %s\n", AF_INET6, &target);
}
bad_solicit = B_TRUE;
goto done;
}
if (len > sizeof (nd_neighbor_solicit_t)) {
/* Options present */
opt = (nd_opt_hdr_t *)&ns[1];
len -= sizeof (nd_neighbor_solicit_t);
if (!ndp_verify_optlen(opt, len)) {
ip1dbg(("ndp_input_solicit: Bad opt len\n"));
bad_solicit = B_TRUE;
goto done;
}
}
if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
/* Check to see if this is a valid DAD solicitation */
if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_input_solicit: IPv6 "
"Destination is not solicited node "
"multicast %s\n", AF_INET6,
&ip6h->ip6_dst);
}
bad_solicit = B_TRUE;
goto done;
}
}
/*
* NOTE: with IPMP, it's possible the nominated multicast ill (which
* received this packet if it's multicast) is not the ill tied to
* e.g. the IPMP ill's data link-local. So we match across the illgrp
* to ensure we find the associated NCE.
*/
our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE);
/*
* If this is a valid Solicitation, a permanent
* entry should exist in the cache
*/
if (our_nce == NULL ||
!(our_nce->nce_flags & NCE_F_PERMANENT)) {
ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
"ifname=%s ", ill->ill_name));
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg(" dst %s\n", AF_INET6, &target);
}
bad_solicit = B_TRUE;
goto done;
}
/* At this point we should have a verified NS per spec */
if (opt != NULL) {
opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR);
if (opt != NULL) {
haddr = (uchar_t *)&opt[1];
if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
hlen == 0) {
ip1dbg(("ndp_input_solicit: bad SLLA\n"));
bad_solicit = B_TRUE;
goto done;
}
}
}
/* If sending directly to peer, set the unicast flag */
if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))
flag |= NDP_UNICAST;
/*
* Create/update the entry for the soliciting node.
* or respond to outstanding queries, don't if
* the source is unspecified address.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
int err;
nce_t *nnce;
ASSERT(ill->ill_isv6);
/*
* Regular solicitations *must* include the Source Link-Layer
* Address option. Ignore messages that do not.
*/
if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
ip1dbg(("ndp_input_solicit: source link-layer address "
"option missing with a specified source.\n"));
bad_solicit = B_TRUE;
goto done;
}
/*
* This is a regular solicitation. If we're still in the
* process of verifying the address, then don't respond at all
* and don't keep track of the sender.
*/
if (our_nce->nce_state == ND_PROBE)
goto done;
/*
* If the solicitation doesn't have sender hardware address
* (legal for unicast solicitation), then process without
* installing the return NCE. Either we already know it, or
* we'll be forced to look it up when (and if) we reply to the
* packet.
*/
if (haddr == NULL)
goto no_source;
err = ndp_lookup_then_add_v6(ill,
B_FALSE,
haddr,
&src, /* Soliciting nodes address */
&ipv6_all_ones,
&ipv6_all_zeros,
0,
0,
ND_STALE,
&nnce);
switch (err) {
case 0:
/* done with this entry */
NCE_REFRELE(nnce);
break;
case EEXIST:
/*
* B_FALSE indicates this is not an an advertisement.
*/
ndp_process(nnce, haddr, 0, B_FALSE);
NCE_REFRELE(nnce);
break;
default:
ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
err));
goto done;
}
no_source:
flag |= NDP_SOLICITED;
} else {
/*
* No source link layer address option should be present in a
* valid DAD request.
*/
if (haddr != NULL) {
ip1dbg(("ndp_input_solicit: source link-layer address "
"option present with an unspecified source.\n"));
bad_solicit = B_TRUE;
goto done;
}
if (our_nce->nce_state == ND_PROBE) {
/*
* Internally looped-back probes won't have DLPI
* attached to them. External ones (which are sent by
* multicast) always will. Just ignore our own
* transmissions.
*/
if (dl_mp != NULL) {
/*
* If someone else is probing our address, then
* we've crossed wires. Declare failure.
*/
ip_ndp_failure(ill, mp, dl_mp);
}
goto done;
}
/*
* This is a DAD probe. Multicast the advertisement to the
* all-nodes address.
*/
src = ipv6_all_hosts_mcast;
}
/* Response to a solicitation */
(void) nce_xmit_advert(our_nce, B_TRUE, &src, flag);
done:
if (bad_solicit)
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations);
if (our_nce != NULL)
NCE_REFRELE(our_nce);
}
void
ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
{
nd_neighbor_advert_t *na;
uint32_t hlen = ill->ill_nd_lla_len;
uchar_t *haddr = NULL;
icmp6_t *icmp_nd;
ip6_t *ip6h;
nce_t *dst_nce = NULL;
in6_addr_t target;
nd_opt_hdr_t *opt = NULL;
int len;
ip_stack_t *ipst = ill->ill_ipst;
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib;
ip6h = (ip6_t *)mp->b_rptr;
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
na = (nd_neighbor_advert_t *)icmp_nd;
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
(na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
ip1dbg(("ndp_input_advert: Target is multicast but the "
"solicited flag is not zero\n"));
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
return;
}
target = na->nd_na_target;
if (IN6_IS_ADDR_MULTICAST(&target)) {
ip1dbg(("ndp_input_advert: Target is multicast!\n"));
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
return;
}
if (len > sizeof (nd_neighbor_advert_t)) {
opt = (nd_opt_hdr_t *)&na[1];
if (!ndp_verify_optlen(opt,
len - sizeof (nd_neighbor_advert_t))) {
ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
return;
}
/* At this point we have a verified NA per spec */
len -= sizeof (nd_neighbor_advert_t);
opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR);
if (opt != NULL) {
haddr = (uchar_t *)&opt[1];
if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
hlen == 0) {
ip1dbg(("ndp_input_advert: bad SLLA\n"));
BUMP_MIB(mib,
ipv6IfIcmpInBadNeighborAdvertisements);
return;
}
}
}
/*
* NOTE: we match across the illgrp since we need to do DAD for all of
* our local addresses, and those are spread across all the active
* ills in the group.
*/
if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL)
return;
if (dst_nce->nce_flags & NCE_F_PERMANENT) {
/*
* Someone just advertised one of our local addresses. First,
* check it it was us -- if so, we can safely ignore it.
*/
if (haddr != NULL) {
if (!nce_cmp_ll_addr(dst_nce, haddr, hlen))
goto out; /* from us -- no conflict */
/*
* If we're in an IPMP group, check if this is an echo
* from another ill in the group. Use the double-
* checked locking pattern to avoid grabbing
* ill_g_lock in the non-IPMP case.
*/
if (IS_UNDER_IPMP(ill)) {
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill(
ill->ill_grp, haddr, hlen) != NULL) {
rw_exit(&ipst->ips_ill_g_lock);
goto out;
}
rw_exit(&ipst->ips_ill_g_lock);
}
}
/*
* Our own (looped-back) unsolicited neighbor advertisements
* will get here with dl_mp == NULL. (These will usually be
* filtered by the `haddr' checks above, but point-to-point
* links have no hardware address and thus make it here.)
*/
if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE)
goto out;
/*
* This appears to be a real conflict. If we're trying to
* configure this NCE (ND_PROBE), then shut it down.
* Otherwise, handle the discovered conflict.
*
* In the ND_PROBE case, dl_mp might be NULL if we're getting
* a unicast reply. This isn't typically done (multicast is
* the norm in response to a probe), but we can handle it.
*/
if (dst_nce->nce_state == ND_PROBE)
ip_ndp_failure(ill, mp, dl_mp);
else
ip_ndp_conflict(ill, mp, dl_mp, dst_nce);
} else {
if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER)
dst_nce->nce_flags |= NCE_F_ISROUTER;
/* B_TRUE indicates this an advertisement */
ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE);
}
out:
NCE_REFRELE(dst_nce);
}
/*
* Process NDP neighbor solicitation/advertisement messages.
* The checksum has already checked o.k before reaching here.
*/
void
ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
{
icmp6_t *icmp_nd;
ip6_t *ip6h;
int len;
mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib;
if (!pullupmsg(mp, -1)) {
ip1dbg(("ndp_input: pullupmsg failed\n"));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
goto done;
}
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_hops != IPV6_MAX_HOPS) {
ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
BUMP_MIB(mib, ipv6IfIcmpBadHoplimit);
goto done;
}
/*
* NDP does not accept any extension headers between the
* IP header and the ICMP header since e.g. a routing
* header could be dangerous.
* This assumes that any AH or ESP headers are removed
* by ip prior to passing the packet to ndp_input.
*/
if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
ip1dbg(("ndp_input: Wrong next header 0x%x\n",
ip6h->ip6_nxt));
BUMP_MIB(mib, ipv6IfIcmpInErrors);
goto done;
}
icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT ||
icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT);
if (icmp_nd->icmp6_code != 0) {
ip1dbg(("ndp_input: icmp6 code != 0 \n"));
BUMP_MIB(mib, ipv6IfIcmpInErrors);
goto done;
}
len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
/*
* Make sure packet length is large enough for either
* a NS or a NA icmp packet.
*/
if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) {
ip1dbg(("ndp_input: packet too short\n"));
BUMP_MIB(mib, ipv6IfIcmpInErrors);
goto done;
}
if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) {
ndp_input_solicit(ill, mp, dl_mp);
} else {
ndp_input_advert(ill, mp, dl_mp);
}
done:
freemsg(mp);
}
/*
* Utility routine to send an advertisement. Assumes that the NCE cannot
* go away (e.g., because it's refheld).
*/
static boolean_t
nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target,
uint_t flags)
{
ASSERT((flags & NDP_PROBE) == 0);
if (nce->nce_flags & NCE_F_ISROUTER)
flags |= NDP_ISROUTER;
if (!(nce->nce_flags & NCE_F_ANYCAST))
flags |= NDP_ORIDE;
return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla,
&nce->nce_addr, target, flags));
}
/*
* Utility routine to send a solicitation. Assumes that the NCE cannot
* go away (e.g., because it's refheld).
*/
static boolean_t
nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender,
uint_t flags)
{
if (flags & NDP_PROBE)
sender = &ipv6_all_zeros;
return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla,
sender, &nce->nce_addr, flags));
}
/*
* nce_xmit is called to form and transmit a ND solicitation or
* advertisement ICMP packet.
*
* If the source address is unspecified and this isn't a probe (used for
* duplicate address detection), an appropriate source address and link layer
* address will be chosen here. The link layer address option is included if
* the source is specified (i.e., all non-probe packets), and omitted (per the
* specification) otherwise.
*
* It returns B_FALSE only if it does a successful put() to the
* corresponding ill's ill_wq otherwise returns B_TRUE.
*/
static boolean_t
nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla,
const in6_addr_t *sender, const in6_addr_t *target, int flag)
{
ill_t *hwaddr_ill;
uint32_t len;
icmp6_t *icmp6;
mblk_t *mp;
ip6_t *ip6h;
nd_opt_hdr_t *opt;
uint_t plen, maxplen;
ip6i_t *ip6i;
ipif_t *src_ipif = NULL;
uint8_t *hw_addr;
zoneid_t zoneid = GLOBAL_ZONEID;
char buf[INET6_ADDRSTRLEN];
ASSERT(!IS_IPMP(ill));
/*
* Check that the sender is actually a usable address on `ill', and if
* so, track that as the src_ipif. If not, for solicitations, set the
* sender to :: so that a new one will be picked below; for adverts,
* drop the packet since we expect nce_xmit_advert() to always provide
* a valid sender.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(sender)) {
if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL ||
!src_ipif->ipif_addr_ready) {
if (src_ipif != NULL) {
ipif_refrele(src_ipif);
src_ipif = NULL;
}
if (type == ND_NEIGHBOR_ADVERT) {
ip1dbg(("nce_xmit: No source ipif for src %s\n",
inet_ntop(AF_INET6, sender, buf,
sizeof (buf))));
return (B_TRUE);
}
sender = &ipv6_all_zeros;
}
}
/*
* If we still have an unspecified source (sender) address and this
* isn't a probe, select a source address from `ill'.
*/
if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) {
ASSERT(type != ND_NEIGHBOR_ADVERT);
/*
* Pick a source address for this solicitation, but restrict
* the selection to addresses assigned to the output
* interface. We do this because the destination will create
* a neighbor cache entry for the source address of this
* packet, so the source address needs to be a valid neighbor.
*/
src_ipif = ipif_select_source_v6(ill, target, B_TRUE,
IPV6_PREFER_SRC_DEFAULT, ALL_ZONES);
if (src_ipif == NULL) {
ip1dbg(("nce_xmit: No source ipif for dst %s\n",
inet_ntop(AF_INET6, target, buf, sizeof (buf))));
return (B_TRUE);
}
sender = &src_ipif->ipif_v6src_addr;
}
/*
* We're either sending a probe or we have a source address.
*/
ASSERT((flag & NDP_PROBE) || src_ipif != NULL);
maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8);
len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) +
maxplen;
mp = allocb(len, BPRI_LO);
if (mp == NULL) {
if (src_ipif != NULL)
ipif_refrele(src_ipif);
return (B_TRUE);
}
bzero((char *)mp->b_rptr, len);
mp->b_wptr = mp->b_rptr + len;
ip6i = (ip6i_t *)mp->b_rptr;
ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6i->ip6i_nxt = IPPROTO_RAW;
ip6i->ip6i_flags = IP6I_HOPLIMIT;
if (flag & NDP_PROBE)
ip6i->ip6i_flags |= IP6I_UNSPEC_SRC;
ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
ip6h->ip6_nxt = IPPROTO_ICMPV6;
ip6h->ip6_hops = IPV6_MAX_HOPS;
ip6h->ip6_src = *sender;
ip6h->ip6_dst = *target;
icmp6 = (icmp6_t *)&ip6h[1];
opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN +
sizeof (nd_neighbor_advert_t));
if (type == ND_NEIGHBOR_SOLICIT) {
nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
if (!(flag & NDP_PROBE))
opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
ns->nd_ns_target = *target;
if (!(flag & NDP_UNICAST)) {
/* Form multicast address of the target */
ip6h->ip6_dst = ipv6_solicited_node_mcast;
ip6h->ip6_dst.s6_addr32[3] |=
ns->nd_ns_target.s6_addr32[3];
}
} else {
nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
ASSERT(!(flag & NDP_PROBE));
opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
na->nd_na_target = *sender;
if (flag & NDP_ISROUTER)
na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER;
if (flag & NDP_SOLICITED)
na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED;
if (flag & NDP_ORIDE)
na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE;
}
hw_addr = NULL;
if (!(flag & NDP_PROBE)) {
/*
* Use our source address to find the hardware address to put
* in the packet, so that the hardware address and IP address
* will match up -- even if that hardware address doesn't
* match the ill we actually transmit the packet through.
*/
if (IS_IPMP(src_ipif->ipif_ill)) {
hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif);
if (hwaddr_ill == NULL) {
ip1dbg(("nce_xmit: no bound ill!\n"));
ipif_refrele(src_ipif);
freemsg(mp);
return (B_TRUE);
}
} else {
hwaddr_ill = src_ipif->ipif_ill;
ill_refhold(hwaddr_ill); /* for symmetry */
}
plen = roundup(sizeof (nd_opt_hdr_t) +
hwaddr_ill->ill_nd_lla_len, 8);
hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla :
hwaddr_ill->ill_phys_addr;
if (hw_addr != NULL) {
/* Fill in link layer address and option len */
opt->nd_opt_len = (uint8_t)(plen / 8);
bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len);
}
ill_refrele(hwaddr_ill);
}
if (hw_addr == NULL)
plen = 0;
/* Fix up the length of the packet now that plen is known */
len -= (maxplen - plen);
mp->b_wptr = mp->b_rptr + len;
ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
icmp6->icmp6_type = type;
icmp6->icmp6_code = 0;
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
/*
* Before we toss the src_ipif, look up the zoneid to pass to
* ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT
* packets to be routed correctly by IP (we cannot guarantee that the
* global zone has an interface route to the destination).
*/
if (src_ipif != NULL) {
if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES)
zoneid = GLOBAL_ZONEID;
ipif_refrele(src_ipif);
}
ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT);
return (B_FALSE);
}
/*
* Make a link layer address (does not include the SAP) from an nce.
* To form the link layer address, use the last four bytes of ipv6
* address passed in and the fixed offset stored in nce.
*/
static void
nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr)
{
uchar_t *mask, *to;
ill_t *ill = nce->nce_ill;
int len;
if (ill->ill_net_type == IRE_IF_NORESOLVER)
return;
ASSERT(nce->nce_res_mp != NULL);
ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
ASSERT(nce->nce_flags & NCE_F_MAPPING);
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
ASSERT(addr != NULL);
bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
addrpos, ill->ill_nd_lla_len);
len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start,
IPV6_ADDR_LEN);
mask = (uchar_t *)&nce->nce_extract_mask;
mask += (IPV6_ADDR_LEN - len);
addr += (IPV6_ADDR_LEN - len);
to = addrpos + nce->nce_ll_extract_start;
while (len-- > 0)
*to++ |= *mask++ & *addr++;
}
mblk_t *
nce_udreq_alloc(ill_t *ill)
{
mblk_t *template_mp = NULL;
dl_unitdata_req_t *dlur;
int sap_length;
ASSERT(ill->ill_isv6);
sap_length = ill->ill_sap_length;
template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) +
ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ);
if (template_mp == NULL)
return (NULL);
dlur = (dl_unitdata_req_t *)template_mp->b_rptr;
dlur->dl_priority.dl_min = 0;
dlur->dl_priority.dl_max = 0;
dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len;
dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t);
/* Copy in the SAP value. */
NCE_LL_SAP_COPY(ill, template_mp);
return (template_mp);
}
/*
* NDP retransmit timer.
* This timer goes off when:
* a. It is time to retransmit NS for resolver.
* b. It is time to send reachability probes.
*/
void
ndp_timer(void *arg)
{
nce_t *nce = arg;
ill_t *ill = nce->nce_ill;
char addrbuf[INET6_ADDRSTRLEN];
boolean_t dropped = B_FALSE;
ip_stack_t *ipst = ill->ill_ipst;
/*
* The timer has to be cancelled by ndp_delete before doing the final
* refrele. So the NCE is guaranteed to exist when the timer runs
* until it clears the timeout_id. Before clearing the timeout_id
* bump up the refcnt so that we can continue to use the nce
*/
ASSERT(nce != NULL);
mutex_enter(&nce->nce_lock);
NCE_REFHOLD_LOCKED(nce);
nce->nce_timeout_id = 0;
/*
* Check the reachability state first.
*/
switch (nce->nce_state) {
case ND_DELAY:
nce->nce_state = ND_PROBE;
mutex_exit(&nce->nce_lock);
(void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros,
NDP_UNICAST);
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ndp_timer: state for %s changed "
"to PROBE\n", AF_INET6, &nce->nce_addr);
}
NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
NCE_REFRELE(nce);
return;
case ND_PROBE:
/* must be retransmit timer */
nce->nce_pcnt--;
ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT &&
nce->nce_pcnt >= -1);
if (nce->nce_pcnt > 0) {
/*
* As per RFC2461, the nce gets deleted after
* MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
* Note that the first unicast solicitation is sent
* during the DELAY state.
*/
ip2dbg(("ndp_timer: pcount=%x dst %s\n",
nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr,
addrbuf, sizeof (addrbuf))));
mutex_exit(&nce->nce_lock);
dropped = nce_xmit_solicit(nce, B_FALSE,
&ipv6_all_zeros,
(nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE :
NDP_UNICAST);
if (dropped) {
mutex_enter(&nce->nce_lock);
nce->nce_pcnt++;
mutex_exit(&nce->nce_lock);
}
NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
} else if (nce->nce_pcnt < 0) {
/* No hope, delete the nce */
nce->nce_state = ND_UNREACHABLE;
mutex_exit(&nce->nce_lock);
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_timer: Delete IRE for"
" dst %s\n", AF_INET6, &nce->nce_addr);
}
ndp_delete(nce);
} else if (!(nce->nce_flags & NCE_F_PERMANENT)) {
/* Wait RetransTimer, before deleting the entry */
ip2dbg(("ndp_timer: pcount=%x dst %s\n",
nce->nce_pcnt, inet_ntop(AF_INET6,
&nce->nce_addr, addrbuf, sizeof (addrbuf))));
mutex_exit(&nce->nce_lock);
/* Wait one interval before killing */
NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
} else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) {
ipif_t *ipif;
/*
* We're done probing, and we can now declare this
* address to be usable. Let IP know that it's ok to
* use.
*/
nce->nce_state = ND_REACHABLE;
mutex_exit(&nce->nce_lock);
ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr,
nce->nce_ill);
if (ipif != NULL) {
if (ipif->ipif_was_dup) {
char ibuf[LIFNAMSIZ + 10];
char sbuf[INET6_ADDRSTRLEN];
ipif->ipif_was_dup = B_FALSE;
(void) inet_ntop(AF_INET6,
&ipif->ipif_v6lcl_addr,
sbuf, sizeof (sbuf));
ipif_get_name(ipif, ibuf,
sizeof (ibuf));
cmn_err(CE_NOTE, "recovered address "
"%s on %s", sbuf, ibuf);
}
if ((ipif->ipif_flags & IPIF_UP) &&
!ipif->ipif_addr_ready)
ipif_up_notify(ipif);
ipif->ipif_addr_ready = 1;
ipif_refrele(ipif);
}
/* Begin defending our new address */
nce->nce_unsolicit_count = 0;
dropped = nce_xmit_advert(nce, B_FALSE,
&ipv6_all_hosts_mcast, 0);
if (dropped) {
nce->nce_unsolicit_count = 1;
NDP_RESTART_TIMER(nce,
ipst->ips_ip_ndp_unsolicit_interval);
} else if (ipst->ips_ip_ndp_defense_interval != 0) {
NDP_RESTART_TIMER(nce,
ipst->ips_ip_ndp_defense_interval);
}
} else {
/*
* This is an address we're probing to be our own, but
* the ill is down. Wait until it comes back before
* doing anything, but switch to reachable state so
* that the restart will work.
*/
nce->nce_state = ND_REACHABLE;
mutex_exit(&nce->nce_lock);
}
NCE_REFRELE(nce);
return;
case ND_INCOMPLETE: {
ip6_t *ip6h;
ip6i_t *ip6i;
mblk_t *mp, *datamp, *nextmp, **prevmpp;
/*
* Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp
* for any IPMP probe packets, and toss 'em. IPMP probe
* packets will always be at the head of nce_qd_mp and always
* have an ip6i_t header, so we can stop at the first queued
* ND packet without an ip6i_t.
*/
prevmpp = &nce->nce_qd_mp;
for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) {
nextmp = mp->b_next;
datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp;
ip6h = (ip6_t *)datamp->b_rptr;
if (ip6h->ip6_nxt != IPPROTO_RAW)
break;
ip6i = (ip6i_t *)ip6h;
if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) {
inet_freemsg(mp);
*prevmpp = nextmp;
} else {
prevmpp = &mp->b_next;
}
}
ip_ndp_resolve(nce);
mutex_exit(&nce->nce_lock);
NCE_REFRELE(nce);
break;
}
case ND_REACHABLE:
if (((nce->nce_flags & NCE_F_UNSOL_ADV) &&
nce->nce_unsolicit_count != 0) ||
((nce->nce_flags & NCE_F_PERMANENT) &&
ipst->ips_ip_ndp_defense_interval != 0)) {
if (nce->nce_unsolicit_count > 0)
nce->nce_unsolicit_count--;
mutex_exit(&nce->nce_lock);
dropped = nce_xmit_advert(nce, B_FALSE,
&ipv6_all_hosts_mcast, 0);
if (dropped) {
mutex_enter(&nce->nce_lock);
nce->nce_unsolicit_count++;
mutex_exit(&nce->nce_lock);
}
if (nce->nce_unsolicit_count != 0) {
NDP_RESTART_TIMER(nce,
ipst->ips_ip_ndp_unsolicit_interval);
} else {
NDP_RESTART_TIMER(nce,
ipst->ips_ip_ndp_defense_interval);
}
} else {
mutex_exit(&nce->nce_lock);
}
NCE_REFRELE(nce);
break;
default:
mutex_exit(&nce->nce_lock);
NCE_REFRELE(nce);
break;
}
}
/*
* Set a link layer address from the ll_addr passed in.
* Copy SAP from ill.
*/
static void
nce_set_ll(nce_t *nce, uchar_t *ll_addr)
{
ill_t *ill = nce->nce_ill;
uchar_t *woffset;
ASSERT(ll_addr != NULL);
/* Always called before fast_path_probe */
ASSERT(nce->nce_fp_mp == NULL);
if (ill->ill_sap_length != 0) {
/*
* Copy the SAP type specified in the
* request into the xmit template.
*/
NCE_LL_SAP_COPY(ill, nce->nce_res_mp);
}
if (ill->ill_phys_addr_length > 0) {
/*
* The bcopy() below used to be called for the physical address
* length rather than the link layer address length. For
* ethernet and many other media, the phys_addr and lla are
* identical.
* However, with xresolv interfaces being introduced, the
* phys_addr and lla are no longer the same, and the physical
* address may not have any useful meaning, so we use the lla
* for IPv6 address resolution and destination addressing.
*
* For PPP or other interfaces with a zero length
* physical address, don't do anything here.
* The bcopy() with a zero phys_addr length was previously
* a no-op for interfaces with a zero-length physical address.
* Using the lla for them would change the way they operate.
* Doing nothing in such cases preserves expected behavior.
*/
woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
bcopy(ll_addr, woffset, ill->ill_nd_lla_len);
}
}
static boolean_t
nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len)
{
ill_t *ill = nce->nce_ill;
uchar_t *ll_offset;
ASSERT(nce->nce_res_mp != NULL);
if (ll_addr == NULL)
return (B_FALSE);
ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0)
return (B_TRUE);
return (B_FALSE);
}
/*
* Updates the link layer address or the reachability state of
* a cache entry. Reset probe counter if needed.
*/
static void
nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr)
{
ill_t *ill = nce->nce_ill;
boolean_t need_stop_timer = B_FALSE;
boolean_t need_fastpath_update = B_FALSE;
ASSERT(MUTEX_HELD(&nce->nce_lock));
ASSERT(nce->nce_ipversion == IPV6_VERSION);
/*
* If this interface does not do NUD, there is no point
* in allowing an update to the cache entry. Although
* we will respond to NS.
* The only time we accept an update for a resolver when
* NUD is turned off is when it has just been created.
* Non-Resolvers will always be created as REACHABLE.
*/
if (new_state != ND_UNCHANGED) {
if ((nce->nce_flags & NCE_F_NONUD) &&
(nce->nce_state != ND_INCOMPLETE))
return;
ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN);
ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX);
need_stop_timer = B_TRUE;
if (new_state == ND_REACHABLE)
nce->nce_last = TICK_TO_MSEC(lbolt64);
else {
/* We force NUD in this case */
nce->nce_last = 0;
}
nce->nce_state = new_state;
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
}
/*
* In case of fast path we need to free the the fastpath
* M_DATA and do another probe. Otherwise we can just
* overwrite the DL_UNITDATA_REQ data, noting we'll lose
* whatever packets that happens to be transmitting at the time.
*/
if (new_ll_addr != NULL) {
ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) +
ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr);
bcopy(new_ll_addr, nce->nce_res_mp->b_rptr +
NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len);
if (nce->nce_fp_mp != NULL) {
freemsg(nce->nce_fp_mp);
nce->nce_fp_mp = NULL;
}
need_fastpath_update = B_TRUE;
}
mutex_exit(&nce->nce_lock);
if (need_stop_timer) {
(void) untimeout(nce->nce_timeout_id);
nce->nce_timeout_id = 0;
}
if (need_fastpath_update)
nce_fastpath(nce);
mutex_enter(&nce->nce_lock);
}
void
nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert)
{
uint_t count = 0;
mblk_t **mpp, *tmp;
ASSERT(MUTEX_HELD(&nce->nce_lock));
for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) {
if (++count > nce->nce_ill->ill_max_buf) {
tmp = nce->nce_qd_mp->b_next;
nce->nce_qd_mp->b_next = NULL;
nce->nce_qd_mp->b_prev = NULL;
freemsg(nce->nce_qd_mp);
nce->nce_qd_mp = tmp;
}
}
if (head_insert) {
mp->b_next = nce->nce_qd_mp;
nce->nce_qd_mp = mp;
} else {
*mpp = mp;
}
}
static void
nce_queue_mp(nce_t *nce, mblk_t *mp)
{
boolean_t head_insert = B_FALSE;
ip6_t *ip6h;
ip6i_t *ip6i;
mblk_t *data_mp;
ASSERT(MUTEX_HELD(&nce->nce_lock));
if (mp->b_datap->db_type == M_CTL)
data_mp = mp->b_cont;
else
data_mp = mp;
ip6h = (ip6_t *)data_mp->b_rptr;
if (ip6h->ip6_nxt == IPPROTO_RAW) {
/*
* This message should have been pulled up already in
* ip_wput_v6. We can't do pullups here because the message
* could be from the nce_qd_mp which could have b_next/b_prev
* non-NULL.
*/
ip6i = (ip6i_t *)ip6h;
ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN);
/*
* If this packet is marked IP6I_IPMP_PROBE, then we need to:
*
* 1. Insert it at the head of the nce_qd_mp list. Consider
* the normal (non-probe) load-speading case where the
* source address of the ND packet is not tied to nce_ill.
* If the ill bound to the source address cannot receive,
* the response to the ND packet will not be received.
* However, if ND packets for nce_ill's probes are queued
* behind that ND packet, those probes will also fail to
* be sent, and thus in.mpathd will erroneously conclude
* that nce_ill has also failed.
*
* 2. Drop the probe packet in ndp_timer() if the ND did
* not succeed on the first attempt. This ensures that
* ND problems do not manifest as probe RTT spikes.
*/
if (ip6i->ip6i_flags & IP6I_IPMP_PROBE)
head_insert = B_TRUE;
}
nce_queue_mp_common(nce, mp, head_insert);
}
/*
* Called when address resolution failed due to a timeout.
* Send an ICMP unreachable in response to all queued packets.
*/
void
nce_resolv_failed(nce_t *nce)
{
mblk_t *mp, *nxt_mp, *first_mp;
char buf[INET6_ADDRSTRLEN];
ip6_t *ip6h;
zoneid_t zoneid = GLOBAL_ZONEID;
ip_stack_t *ipst = nce->nce_ill->ill_ipst;
ip1dbg(("nce_resolv_failed: dst %s\n",
inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf))));
mutex_enter(&nce->nce_lock);
mp = nce->nce_qd_mp;
nce->nce_qd_mp = NULL;
mutex_exit(&nce->nce_lock);
while (mp != NULL) {
nxt_mp = mp->b_next;
mp->b_next = NULL;
mp->b_prev = NULL;
first_mp = mp;
if (mp->b_datap->db_type == M_CTL) {
ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr;
ASSERT(io->ipsec_out_type == IPSEC_OUT);
zoneid = io->ipsec_out_zoneid;
ASSERT(zoneid != ALL_ZONES);
mp = mp->b_cont;
mp->b_next = NULL;
mp->b_prev = NULL;
}
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_nxt == IPPROTO_RAW) {
ip6i_t *ip6i;
/*
* This message should have been pulled up already
* in ip_wput_v6. ip_hdr_complete_v6 assumes that
* the header is pulled up.
*/
ip6i = (ip6i_t *)ip6h;
ASSERT((mp->b_wptr - (uchar_t *)ip6i) >=
sizeof (ip6i_t) + IPV6_HDR_LEN);
mp->b_rptr += sizeof (ip6i_t);
}
/*
* Ignore failure since icmp_unreachable_v6 will silently
* drop packets with an unspecified source address.
*/
(void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst);
icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp,
ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst);
mp = nxt_mp;
}
nce_cb_dispatch(nce);
}
/*
* Called by SIOCSNDP* ioctl to add/change an nce entry
* and the corresponding attributes.
* Disallow states other than ND_REACHABLE or ND_STALE.
*/
int
ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
{
sin6_t *sin6;
in6_addr_t *addr;
nce_t *nce;
int err;
uint16_t new_flags = 0;
uint16_t old_flags = 0;
int inflags = lnr->lnr_flags;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill->ill_isv6);
if ((lnr->lnr_state_create != ND_REACHABLE) &&
(lnr->lnr_state_create != ND_STALE))
return (EINVAL);
if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN)
return (EINVAL);
sin6 = (sin6_t *)&lnr->lnr_addr;
addr = &sin6->sin6_addr;
mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
/* We know it can not be mapping so just look in the hash table */
nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
/* See comment in ndp_query() regarding IS_IPMP(ill) usage */
nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce);
if (nce != NULL)
new_flags = nce->nce_flags;
switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) {
case NDF_ISROUTER_ON:
new_flags |= NCE_F_ISROUTER;
break;
case NDF_ISROUTER_OFF:
new_flags &= ~NCE_F_ISROUTER;
break;
case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON):
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
if (nce != NULL)
NCE_REFRELE(nce);
return (EINVAL);
}
switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) {
case NDF_ANYCAST_ON:
new_flags |= NCE_F_ANYCAST;
break;
case NDF_ANYCAST_OFF:
new_flags &= ~NCE_F_ANYCAST;
break;
case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON):
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
if (nce != NULL)
NCE_REFRELE(nce);
return (EINVAL);
}
if (nce == NULL) {
err = ndp_add_v6(ill,
(uchar_t *)lnr->lnr_hdw_addr,
addr,
&ipv6_all_ones,
&ipv6_all_zeros,
0,
new_flags,
lnr->lnr_state_create,
&nce);
if (err != 0) {
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err));
return (err);
}
}
old_flags = nce->nce_flags;
if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) {
/*
* Router turned to host, delete all ires.
* XXX Just delete the entry, but we need to add too.
*/
nce->nce_flags &= ~NCE_F_ISROUTER;
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
ndp_delete(nce);
NCE_REFRELE(nce);
return (0);
}
mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
mutex_enter(&nce->nce_lock);
nce->nce_flags = new_flags;
mutex_exit(&nce->nce_lock);
/*
* Note that we ignore the state at this point, which
* should be either STALE or REACHABLE. Instead we let
* the link layer address passed in to determine the state
* much like incoming packets.
*/
nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE);
NCE_REFRELE(nce);
return (0);
}
/*
* If the device driver supports it, we make nce_fp_mp to have
* an M_DATA prepend. Otherwise nce_fp_mp will be null.
* The caller ensures there is hold on nce for this function.
* Note that since ill_fastpath_probe() copies the mblk there is
* no need for the hold beyond this function.
*/
void
nce_fastpath(nce_t *nce)
{
ill_t *ill = nce->nce_ill;
int res;
ASSERT(ill != NULL);
ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE);
if (nce->nce_fp_mp != NULL) {
/* Already contains fastpath info */
return;
}
if (nce->nce_res_mp != NULL) {
nce_fastpath_list_add(nce);
res = ill_fastpath_probe(ill, nce->nce_res_mp);
/*
* EAGAIN is an indication of a transient error
* i.e. allocation failure etc. leave the nce in the list it
* will be updated when another probe happens for another ire
* if not it will be taken out of the list when the ire is
* deleted.
*/
if (res != 0 && res != EAGAIN)
nce_fastpath_list_delete(nce);
}
}
/*
* Drain the list of nce's waiting for fastpath response.
*/
void
nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *),
void *arg)
{
nce_t *next_nce;
nce_t *current_nce;
nce_t *first_nce;
nce_t *prev_nce = NULL;
mutex_enter(&ill->ill_lock);
first_nce = current_nce = (nce_t *)ill->ill_fastpath_list;
while (current_nce != (nce_t *)&ill->ill_fastpath_list) {
next_nce = current_nce->nce_fastpath;
/*
* Take it off the list if we're flushing, or if the callback
* routine tells us to do so. Otherwise, leave the nce in the
* fastpath list to handle any pending response from the lower
* layer. We can't drain the list when the callback routine
* comparison failed, because the response is asynchronous in
* nature, and may not arrive in the same order as the list
* insertion.
*/
if (func == NULL || func(current_nce, arg)) {
current_nce->nce_fastpath = NULL;
if (current_nce == first_nce)
ill->ill_fastpath_list = first_nce = next_nce;
else
prev_nce->nce_fastpath = next_nce;
} else {
/* previous element that is still in the list */
prev_nce = current_nce;
}
current_nce = next_nce;
}
mutex_exit(&ill->ill_lock);
}
/*
* Add nce to the nce fastpath list.
*/
void
nce_fastpath_list_add(nce_t *nce)
{
ill_t *ill;
ill = nce->nce_ill;
mutex_enter(&ill->ill_lock);
mutex_enter(&nce->nce_lock);
/*
* if nce has not been deleted and
* is not already in the list add it.
*/
if (!(nce->nce_flags & NCE_F_CONDEMNED) &&
(nce->nce_fastpath == NULL)) {
nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list;
ill->ill_fastpath_list = nce;
}
mutex_exit(&nce->nce_lock);
mutex_exit(&ill->ill_lock);
}
/*
* remove nce from the nce fastpath list.
*/
void
nce_fastpath_list_delete(nce_t *nce)
{
nce_t *nce_ptr;
ill_t *ill;
ill = nce->nce_ill;
ASSERT(ill != NULL);
mutex_enter(&ill->ill_lock);
if (nce->nce_fastpath == NULL)
goto done;
ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list);
if (ill->ill_fastpath_list == nce) {
ill->ill_fastpath_list = nce->nce_fastpath;
} else {
nce_ptr = ill->ill_fastpath_list;
while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) {
if (nce_ptr->nce_fastpath == nce) {
nce_ptr->nce_fastpath = nce->nce_fastpath;
break;
}
nce_ptr = nce_ptr->nce_fastpath;
}
}
nce->nce_fastpath = NULL;
done:
mutex_exit(&ill->ill_lock);
}
/*
* Update all NCE's that are not in fastpath mode and
* have an nce_fp_mp that matches mp. mp->b_cont contains
* the fastpath header.
*
* Returns TRUE if entry should be dequeued, or FALSE otherwise.
*/
boolean_t
ndp_fastpath_update(nce_t *nce, void *arg)
{
mblk_t *mp, *fp_mp;
uchar_t *mp_rptr, *ud_mp_rptr;
mblk_t *ud_mp = nce->nce_res_mp;
ptrdiff_t cmplen;
if (nce->nce_flags & NCE_F_MAPPING)
return (B_TRUE);
if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL))
return (B_TRUE);
ip2dbg(("ndp_fastpath_update: trying\n"));
mp = (mblk_t *)arg;
mp_rptr = mp->b_rptr;
cmplen = mp->b_wptr - mp_rptr;
ASSERT(cmplen >= 0);
ud_mp_rptr = ud_mp->b_rptr;
/*
* The nce is locked here to prevent any other threads
* from accessing and changing nce_res_mp when the IPv6 address
* becomes resolved to an lla while we're in the middle
* of looking at and comparing the hardware address (lla).
* It is also locked to prevent multiple threads in nce_fastpath_update
* from examining nce_res_mp atthe same time.
*/
mutex_enter(&nce->nce_lock);
if (ud_mp->b_wptr - ud_mp_rptr != cmplen ||
bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) {
mutex_exit(&nce->nce_lock);
/*
* Don't take the ire off the fastpath list yet,
* since the response may come later.
*/
return (B_FALSE);
}
/* Matched - install mp as the fastpath mp */
ip1dbg(("ndp_fastpath_update: match\n"));
fp_mp = dupb(mp->b_cont);
if (fp_mp != NULL) {
nce->nce_fp_mp = fp_mp;
}
mutex_exit(&nce->nce_lock);
return (B_TRUE);
}
/*
* This function handles the DL_NOTE_FASTPATH_FLUSH notification from
* driver. Note that it assumes IP is exclusive...
*/
/* ARGSUSED */
void
ndp_fastpath_flush(nce_t *nce, char *arg)
{
if (nce->nce_flags & NCE_F_MAPPING)
return;
/* No fastpath info? */
if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL)
return;
if (nce->nce_ipversion == IPV4_VERSION &&
nce->nce_flags & NCE_F_BCAST) {
/*
* IPv4 BROADCAST entries:
* We can't delete the nce since it is difficult to
* recreate these without going through the
* ipif down/up dance.
*
* All access to nce->nce_fp_mp in the case of these
* is protected by nce_lock.
*/
mutex_enter(&nce->nce_lock);
if (nce->nce_fp_mp != NULL) {
freeb(nce->nce_fp_mp);
nce->nce_fp_mp = NULL;
mutex_exit(&nce->nce_lock);
nce_fastpath(nce);
} else {
mutex_exit(&nce->nce_lock);
}
} else {
/* Just delete the NCE... */
ndp_delete(nce);
}
}
/*
* Return a pointer to a given option in the packet.
* Assumes that option part of the packet have already been validated.
*/
nd_opt_hdr_t *
ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type)
{
while (optlen > 0) {
if (opt->nd_opt_type == opt_type)
return (opt);
optlen -= 8 * opt->nd_opt_len;
opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len);
}
return (NULL);
}
/*
* Verify all option lengths present are > 0, also check to see
* if the option lengths and packet length are consistent.
*/
boolean_t
ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen)
{
ASSERT(opt != NULL);
while (optlen > 0) {
if (opt->nd_opt_len == 0)
return (B_FALSE);
optlen -= 8 * opt->nd_opt_len;
if (optlen < 0)
return (B_FALSE);
opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len);
}
return (B_TRUE);
}
/*
* ndp_walk function.
* Free a fraction of the NCE cache entries.
* A fraction of zero means to not free any in that category.
*/
void
ndp_cache_reclaim(nce_t *nce, char *arg)
{
nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg;
uint_t rand;
if (nce->nce_flags & NCE_F_PERMANENT)
return;
rand = (uint_t)lbolt +
NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE);
if (ncr->ncr_host != 0 &&
(rand/ncr->ncr_host)*ncr->ncr_host == rand) {
ndp_delete(nce);
return;
}
}
/*
* ndp_walk function.
* Count the number of NCEs that can be deleted.
* These would be hosts but not routers.
*/
void
ndp_cache_count(nce_t *nce, char *arg)
{
ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg;
if (nce->nce_flags & NCE_F_PERMANENT)
return;
ncc->ncc_total++;
if (!(nce->nce_flags & NCE_F_ISROUTER))
ncc->ncc_host++;
}
#ifdef DEBUG
void
nce_trace_ref(nce_t *nce)
{
ASSERT(MUTEX_HELD(&nce->nce_lock));
if (nce->nce_trace_disable)
return;
if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) {
nce->nce_trace_disable = B_TRUE;
nce_trace_cleanup(nce);
}
}
void
nce_untrace_ref(nce_t *nce)
{
ASSERT(MUTEX_HELD(&nce->nce_lock));
if (!nce->nce_trace_disable)
th_trace_unref(nce);
}
static void
nce_trace_cleanup(const nce_t *nce)
{
th_trace_cleanup(nce, nce->nce_trace_disable);
}
#endif
/*
* Called when address resolution fails due to a timeout.
* Send an ICMP unreachable in response to all queued packets.
*/
void
arp_resolv_failed(nce_t *nce)
{
mblk_t *mp, *nxt_mp, *first_mp;
char buf[INET6_ADDRSTRLEN];
zoneid_t zoneid = GLOBAL_ZONEID;
struct in_addr ipv4addr;
ip_stack_t *ipst = nce->nce_ill->ill_ipst;
IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr);
ip3dbg(("arp_resolv_failed: dst %s\n",
inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf))));
mutex_enter(&nce->nce_lock);
mp = nce->nce_qd_mp;
nce->nce_qd_mp = NULL;
mutex_exit(&nce->nce_lock);
while (mp != NULL) {
nxt_mp = mp->b_next;
mp->b_next = NULL;
mp->b_prev = NULL;
first_mp = mp;
/*
* Send icmp unreachable messages
* to the hosts.
*/
(void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst);
ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n"));
icmp_unreachable(nce->nce_ill->ill_wq, first_mp,
ICMP_HOST_UNREACHABLE, zoneid, ipst);
mp = nxt_mp;
}
}
int
ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
nce_t **newnce, nce_t *src_nce)
{
int err;
nce_t *nce;
in6_addr_t addr6;
ip_stack_t *ipst = ill->ill_ipst;
mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
/*
* NOTE: IPv4 never matches across the illgrp since the NCE's we're
* looking up have fastpath headers that are inherently per-ill.
*/
nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce);
if (nce == NULL) {
err = ndp_add_v4(ill, addr, flags, newnce, src_nce);
} else {
*newnce = nce;
err = EEXIST;
}
mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
return (err);
}
/*
* NDP Cache Entry creation routine for IPv4.
* Mapped entries are handled in arp.
* This routine must always be called with ndp4->ndp_g_lock held.
* Prior to return, nce_refcnt is incremented.
*/
static int
ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
nce_t **newnce, nce_t *src_nce)
{
static nce_t nce_nil;
nce_t *nce;
mblk_t *mp;
mblk_t *template = NULL;
nce_t **ncep;
ip_stack_t *ipst = ill->ill_ipst;
uint16_t state = ND_INITIAL;
int err;
ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock));
ASSERT(!ill->ill_isv6);
ASSERT((flags & NCE_F_MAPPING) == 0);
if (ill->ill_resolver_mp == NULL)
return (EINVAL);
/*
* Allocate the mblk to hold the nce.
*/
mp = allocb(sizeof (nce_t), BPRI_MED);
if (mp == NULL)
return (ENOMEM);
nce = (nce_t *)mp->b_rptr;
mp->b_wptr = (uchar_t *)&nce[1];
*nce = nce_nil;
nce->nce_ill = ill;
nce->nce_ipversion = IPV4_VERSION;
nce->nce_flags = flags;
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
nce->nce_rcnt = ill->ill_xmit_count;
IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr);
nce->nce_mask = ipv6_all_ones;
nce->nce_extract_mask = ipv6_all_zeros;
nce->nce_ll_extract_start = 0;
nce->nce_qd_mp = NULL;
nce->nce_mp = mp;
/* This one is for nce getting created */
nce->nce_refcnt = 1;
mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
nce->nce_trace_disable = B_FALSE;
if (src_nce != NULL) {
/*
* src_nce has been provided by the caller. The only
* caller who provides a non-null, non-broadcast
* src_nce is from ip_newroute() which must pass in
* a ND_REACHABLE src_nce (this condition is verified
* via an ASSERT for the save_ire->ire_nce in ip_newroute())
*/
mutex_enter(&src_nce->nce_lock);
state = src_nce->nce_state;
if ((src_nce->nce_flags & NCE_F_CONDEMNED) ||
(ipst->ips_ndp4->ndp_g_hw_change > 0)) {
/*
* src_nce has been deleted, or
* ip_arp_news is in the middle of
* flushing entries in the the nce.
* Fail the add, since we don't know
* if it is safe to copy the contents of
* src_nce
*/
DTRACE_PROBE2(nce__bad__src__nce,
nce_t *, src_nce, ill_t *, ill);
mutex_exit(&src_nce->nce_lock);
err = EINVAL;
goto err_ret;
}
template = copyb(src_nce->nce_res_mp);
mutex_exit(&src_nce->nce_lock);
if (template == NULL) {
err = ENOMEM;
goto err_ret;
}
} else if (flags & NCE_F_BCAST) {
/*
* broadcast nce.
*/
template = copyb(ill->ill_bcast_mp);
if (template == NULL) {
err = ENOMEM;
goto err_ret;
}
state = ND_REACHABLE;
} else if (ill->ill_net_type == IRE_IF_NORESOLVER) {
/*
* NORESOLVER entries are always created in the REACHABLE
* state.
*/
if (ill->ill_phys_addr_length == IP_ADDR_LEN &&
ill->ill_mactype != DL_IPV4 &&
ill->ill_mactype != DL_6TO4) {
/*
* We create a nce_res_mp with the IP nexthop address
* as the destination address if the physical length
* is exactly 4 bytes for point-to-multipoint links
* that do their own resolution from IP to link-layer
* address (e.g. IP over X.25).
*/
template = ill_dlur_gen((uchar_t *)addr,
ill->ill_phys_addr_length,
ill->ill_sap, ill->ill_sap_length);
} else {
template = copyb(ill->ill_resolver_mp);
}
if (template == NULL) {
err = ENOMEM;
goto err_ret;
}
state = ND_REACHABLE;
}
nce->nce_fp_mp = NULL;
nce->nce_res_mp = template;
nce->nce_state = state;
if (state == ND_REACHABLE) {
nce->nce_last = TICK_TO_MSEC(lbolt64);
nce->nce_init_time = TICK_TO_MSEC(lbolt64);
} else {
nce->nce_last = 0;
if (state == ND_INITIAL)
nce->nce_init_time = TICK_TO_MSEC(lbolt64);
}
ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) ||
(nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE));
/*
* Atomically ensure that the ill is not CONDEMNED, before
* adding the NCE.
*/
mutex_enter(&ill->ill_lock);
if (ill->ill_state_flags & ILL_CONDEMNED) {
mutex_exit(&ill->ill_lock);
err = EINVAL;
goto err_ret;
}
if ((nce->nce_next = *ncep) != NULL)
nce->nce_next->nce_ptpn = &nce->nce_next;
*ncep = nce;
nce->nce_ptpn = ncep;
*newnce = nce;
/* This one is for nce being used by an active thread */
NCE_REFHOLD(*newnce);
/* Bump up the number of nce's referencing this ill */
DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
(char *), "nce", (void *), nce);
ill->ill_nce_cnt++;
mutex_exit(&ill->ill_lock);
DTRACE_PROBE1(ndp__add__v4, nce_t *, nce);
return (0);
err_ret:
freeb(mp);
freemsg(template);
return (err);
}
/*
* ndp_walk routine to delete all entries that have a given destination or
* gateway address and cached link layer (MAC) address. This is used when ARP
* informs us that a network-to-link-layer mapping may have changed.
*/
void
nce_delete_hw_changed(nce_t *nce, void *arg)
{
nce_hw_map_t *hwm = arg;
mblk_t *mp;
dl_unitdata_req_t *dlu;
uchar_t *macaddr;
ill_t *ill;
int saplen;
ipaddr_t nce_addr;
if (nce->nce_state != ND_REACHABLE)
return;
IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr);
if (nce_addr != hwm->hwm_addr)
return;
mutex_enter(&nce->nce_lock);
if ((mp = nce->nce_res_mp) == NULL) {
mutex_exit(&nce->nce_lock);
return;
}
dlu = (dl_unitdata_req_t *)mp->b_rptr;
macaddr = (uchar_t *)(dlu + 1);
ill = nce->nce_ill;
if ((saplen = ill->ill_sap_length) > 0)
macaddr += saplen;
else
saplen = -saplen;
/*
* If the hardware address is unchanged, then leave this one alone.
* Note that saplen == abs(saplen) now.
*/
if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen &&
bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) {
mutex_exit(&nce->nce_lock);
return;
}
mutex_exit(&nce->nce_lock);
DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce);
ndp_delete(nce);
}
/*
* This function verifies whether a given IPv4 address is potentially known to
* the NCE subsystem. If so, then ARP must not delete the corresponding ace_t,
* so that it can continue to look for hardware changes on that address.
*/
boolean_t
ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns)
{
nce_t *nce;
struct in_addr nceaddr;
ip_stack_t *ipst = ns->netstack_ip;
if (addr == INADDR_ANY)
return (B_FALSE);
mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr);
for (; nce != NULL; nce = nce->nce_next) {
/* Note that only v4 mapped entries are in the table. */
IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr);
if (addr == nceaddr.s_addr &&
IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
/* Single flag check; no lock needed */
if (!(nce->nce_flags & NCE_F_CONDEMNED))
break;
}
}
mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
return (nce != NULL);
}
/*
* Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly
* with IPMP. Specifically, since neighbor discovery is always done on
* underlying interfaces (even for addresses owned by an IPMP interface), we
* need to check for `v6addrp' on both `ill' and on the IPMP meta-interface
* associated with `ill' (if it exists).
*/
static ipif_t *
ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill)
{
ipif_t *ipif;
ip_stack_t *ipst = ill->ill_ipst;
ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst);
if (ipif == NULL && IS_UNDER_IPMP(ill)) {
if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) {
ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst);
ill_refrele(ill);
}
}
return (ipif);
}