ip_ndp.c revision 2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/ethernet.h>
#include <net/if_types.h>
#include <inet/ipclassifier.h>
#include <inet/ipsec_impl.h>
#include <inet/ipsec_info.h>
#include <inet/ip2mac_impl.h>
/*
* Function names with nce_ prefix are static while function
* names with ndp_ prefix are used by rest of the IP.
*
* Lock ordering:
*
* ndp_g_lock -> ill_lock -> nce_lock
*
* The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
* nce_next. Nce_lock protects the contents of the NCE (particularly
* nce_refcnt).
*/
nce_t *);
#ifdef DEBUG
static void nce_trace_cleanup(const nce_t *);
#endif
NCE_TABLE_SIZE)]))
/* Non-tunable probe interval, based on link capabilities */
/*
* NDP Cache Entry creation routine.
* Mapped entries will never do NUD .
* This routine must always be called with ndp6->ndp_g_lock held.
* Prior to return, nce_refcnt is incremented.
*/
int
{
int err;
if (IN6_IS_ADDR_UNSPECIFIED(addr)) {
ip0dbg(("ndp_add_v6: no addr\n"));
return (EINVAL);
}
if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) {
return (EINVAL);
}
if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) &&
(flags & NCE_F_MAPPING)) {
ip0dbg(("ndp_add_v6: extract mask zero for mapping"));
return (EINVAL);
}
/*
* Allocate the mblk to hold the nce.
*
* XXX This can come out of a separate cache - nce_cache.
* We don't need the mp anymore as there are no more
* "qwriter"s
*/
return (ENOMEM);
/*
* This one holds link layer address
*/
} else {
/*
* We create a nce_res_mp with the IP nexthop address
* as the destination address if the physical length
* is exactly 16 bytes for point-to-multipoint links
* that do their own resolution from IP to link-layer
* address.
*/
} else {
return (EINVAL);
}
}
}
return (ENOMEM);
}
if (state == ND_REACHABLE)
else
/* This one is for nce getting created */
} else {
}
/*
* Atomically ensure that the ill is not CONDEMNED, before
* adding the NCE.
*/
return (EINVAL);
}
/* This one is for nce being used by an active thread */
/* Bump up the number of nce's referencing this ill */
(char *), "nce", (void *), nce);
ill->ill_nce_cnt++;
err = 0;
if (dropped) {
}
err = EINPROGRESS;
} else if (flags & NCE_F_UNSOL_ADV) {
/*
* We account for the transmit below by assigning one
* less than the ndd variable. Subsequent decrements
* are done in ndp_timer.
*/
0);
if (dropped)
if (nce->nce_unsolicit_count != 0) {
}
}
/*
* If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
* we call nce_fastpath as soon as the nce is resolved in ndp_process.
* We call nce_fastpath from nce_update if the link layer address of
* the peer changes from nce_update
*/
return (err);
}
int
{
int err = 0;
/* Get head of v6 hash table */
addr,
mask,
newnce);
} else {
}
return (err);
}
/*
* Remove all the CONDEMNED nces from the appropriate hash table.
* We create a private list of NCEs, these may have ires pointing
* to them, so the list will be passed through to clean up dependent
* ires and only then we can do NCE_REFRELE which can make NCE inactive.
*/
static void
{
*free_nce_list = nce;
}
}
}
/*
* 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup()
* will return this NCE. Also no new IREs will be created that
* point to this NCE (See ire_add_v6). Also no new timeouts will
* be started (See NDP_RESTART_TIMER).
* 2. Cancel any currently running timeouts.
* 3. If there is an ndp walker, return. The walker will do the cleanup.
* This ensures that walkers see a consistent list of NCEs while walking.
* 4. Otherwise remove the NCE from the list of NCEs
* 5. Delete all IREs pointing to this NCE.
*/
void
{
if (ipversion == IPV4_VERSION)
else
/* Serialize deletes */
/* Some other thread is doing the delete */
return;
}
/*
* Caller has a refhold. Also 1 ref for being in the list. Thus
* refcnt has to be >= 2
*/
/* Complete any waiting callbacks */
/*
* Cancel any running timer. Timeout can't be restarted
* since CONDEMNED is set. Can't hold nce_lock across untimeout.
* Passing invalid timeout id is fine.
*/
if (nce->nce_timeout_id != 0) {
nce->nce_timeout_id = 0;
}
/*
* The last ndp walker has already removed this nce from
* the list after we marked the nce CONDEMNED and before
* we grabbed the global lock.
*/
return;
}
if (ndp->ndp_g_walker > 0) {
/*
* Can't unlink. The walker will clean up
*/
return;
}
/*
* Now remove the nce from the list. NDP_RESTART_TIMER won't restart
* the timer since it is marked CONDEMNED.
*/
}
void
{
/* Free all nce allocated messages */
do {
}
/*
* must have been cleaned up in nce_delete
*/
}
#ifdef DEBUG
#endif
(char *), "nce", (void *), nce);
ill->ill_nce_cnt--;
/*
* If the number of nce's associated with this ill have dropped
* to zero, check whether we need to restart any operation that
* is waiting for this to happen.
*/
if (ILL_DOWN_OK(ill)) {
/* ipif_ill_refrele_tail drops the ill_lock */
} else {
}
}
/*
* ndp_walk routine. Delete the nce if it is associated with the ill
* that is going away. Always called as a writer.
*/
void
{
}
}
/*
* Walk a list of to be inactive NCEs and blow away all the ires.
*/
static void
{
/*
* It is possible for the last ndp walker (this thread)
* to come here after ndp_delete has marked the nce CONDEMNED
* and before it has removed the nce from the fastpath list
* or called untimeout. So we need to do it here. It is safe
* for both ndp_delete and this thread to do it twice or
* even simultaneously since each of the threads has a
* reference on the nce.
*/
/*
* Cancel any running timer. Timeout can't be restarted
* since CONDEMNED is set. Can't hold nce_lock across untimeout.
* Passing invalid timeout id is fine.
*/
if (nce->nce_timeout_id != 0) {
nce->nce_timeout_id = 0;
}
/*
* We might hit this func thus in the v4 case:
* ipif_down->ipif_ndp_down->ndp_walk
*/
} else {
}
}
}
/*
* Delete an ire when the nce goes away.
*/
/* ARGSUSED */
static void
{
} else {
}
}
/*
* ire_walk routine used to delete every IRE that shares this nce
*/
static void
{
}
}
/*
* Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted.
*/
{
return (B_FALSE);
if (dropped) {
}
} else {
}
return (started);
}
/*
* IPv6 Cache entry lookup. Try to find an nce matching the parameters passed.
* If one is found, the refcnt on the nce will be incremented.
*/
nce_t *
{
if (!caller_holds_lock)
/* Get head of v6 hash table */
if (!caller_holds_lock)
return (nce);
}
/*
* IPv4 Cache entry lookup. Try to find an nce matching the parameters passed.
* If one is found, the refcnt on the nce will be incremented.
* Since multicast mappings are handled in arp, there are no nce_mcast_entries
* so we skip the nce_lookup_mapping call.
* XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL
*/
nce_t *
{
if (!caller_holds_lock)
/* Get head of v4 hash table */
/*
* NOTE: IPv4 never matches across the illgrp since the NCE's we're
* looking up have fastpath headers that are inherently per-ill.
*/
if (!caller_holds_lock)
return (nce);
}
/*
* Cache entry lookup. Try to find an nce matching the parameters passed.
* Look only for exact entries (no mappings). If an nce is found, increment
* the hold count on that nce. The caller passes in the start of the
* appropriate hash table, and must be holding the appropriate global
* lock (ndp_g_lock).
*/
static nce_t *
{
else
if (IN6_IS_ADDR_UNSPECIFIED(addr))
return (NULL);
&ipv6_all_ones)) {
break;
}
}
}
}
return (nce);
}
/*
* Cache entry lookup. Try to find an nce matching the parameters passed.
* Look only for mappings.
*/
static nce_t *
{
if (!IN6_IS_ADDR_MULTICAST(addr))
return (NULL);
break;
}
}
return (nce);
}
/*
* Process passed in parameters either from an incoming packet or via
* user ioctl.
*/
static void
{
/*
* No updates of link layer address or the neighbor state is
* allowed, when the cache is in NONUD state. This still
* allows for responding to reachability solicitation.
*/
return;
}
/*
* Update nce state and send the queued packets
* back to ip this time ire will be added.
*/
if (flag & ND_NA_FLAG_SOLICITED) {
} else {
}
else
if (inbound_ill == NULL) {
return;
} else {
}
/*
* Send a forwarded packet back into ip_rput_v6
* just as in ire_send_v6().
* Extract the queue from b_prev (set in
* ip_rput_data_v6).
*/
/*
* Forwarded packets hop count will
* get decremented in ip_rput_data_v6
*/
} else {
/*
* Send locally originated packets back
* into ip_wput_v6.
*/
}
} else {
}
}
return;
}
if (!is_adv) {
/* If this is a SOLICITATION request only */
if (ll_changed)
return;
}
/* If in any other state than REACHABLE, ignore */
}
return;
} else {
if (ll_changed) {
ll_updated = B_TRUE;
}
if (flag & ND_NA_FLAG_SOLICITED) {
} else {
if (ll_updated) {
}
}
NCE_F_ISROUTER)) {
/*
* Router turned to host. We need to remove the
* entry as well as any default route that may be
* using this as a next hop. This is required by
* section 7.2.5 of RFC 2461.
*/
}
} else {
}
}
}
/*
* Walker state structure used by ndp_process() / ndp_process_entry().
*/
typedef struct ndp_process_data {
/*
* Walker callback used by ndp_process() for IPMP groups: calls nce_process()
* for each NCE with a matching address that's in the same IPMP group.
*/
static void
{
}
}
/*
* Wrapper around nce_process() that handles IPMP. In particular, for IPMP,
* NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have
* more than one NCE for a given IPv6 address to tend to. In that case, we
* need to walk all NCEs and callback nce_process() for each one. Since this
* is expensive, in the non-IPMP case we just directly call nce_process().
* Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP
* interfaces in an IPMP group share the same NCEs -- at which point this
* function can be removed entirely.
*/
void
{
return;
}
/* IPMP case: walk all NCEs */
}
/*
* Pass arg1 to the pfi supplied, along with each nce in existence.
* ndp_walk() places a REFHOLD on the nce and drops the lock when
* walking the hash list.
*/
void
{
/* Prevent ndp_delete from unlink and free of NCE */
ndp->ndp_g_walker++;
if (trace) {
} else {
}
}
}
}
if (trace) {
} else {
}
}
}
ndp->ndp_g_walker--;
/*
* While NCE's are removed from global list they are placed
* in a private list, to be passed to nce_ire_delete_list().
* The reason is, there may be ires pointing to this nce
* which needs to cleaned up.
*/
/* Time to delete condemned entries */
}
}
}
}
if (free_nce_list != NULL) {
}
}
/*
* Walk everything.
* Note that ill can be NULL hence can't derive the ipst from it.
*/
void
{
}
/*
* Process resolve requests. Handles both mapped entries
* as well as cases that needs to be send out on the wire.
* Lookup a NCE for a given IRE. Regardless of whether one exists
* or one is created, we defer making ire point to nce until the
* ire is actually added at which point the nce_refcnt on the nce is
* incremented. This is done primarily to have symmetry between ire_add()
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
*/
int
{
int err;
if (IN6_IS_ADDR_MULTICAST(dst))
/*
* If `ill' is under IPMP, then first check to see if there's an NCE
* for `dst' on the IPMP meta-interface (e.g., because an application
* explicitly did an SIOCLIFSETND to tie a hardware address to `dst').
* If so, we use that hardware address when creating the NCE below.
* Note that we don't yet have a mechanism to remove these NCEs if the
* NCE for `dst' on the IPMP meta-interface is subsequently removed --
* but rather than build such a beast, we should fix NCEs so that they
* can be properly shared across an IPMP group.
*/
if (IS_UNDER_IPMP(ill)) {
}
}
}
B_FALSE, /* NCE fastpath is per ill; don't match across group */
dst,
0,
&nce);
switch (err) {
case 0:
/*
* New cache entry was created. Make sure that the state
* is not ND_INCOMPLETE. It can be in some other state
* even before we send out the solicitation as we could
* get un-solicited advertisements.
*
* If this is an XRESOLV interface, simply return 0,
* since we don't want to solicit just yet.
*/
return (0);
}
return (0);
}
/* The caller will free mp */
return (ESRCH);
}
/* The caller will free mp */
return (ENOMEM);
}
return (EINPROGRESS);
case EEXIST:
/* Resolution in progress just queue the packet */
} else {
err = EINPROGRESS;
}
} else {
/*
* Any other state implies we have
* a nce but IRE needs to be added ...
* ire_add_v6() will take care of the
* the case when the nce becomes CONDEMNED
* before the ire is added to the table.
*/
err = 0;
}
break;
default:
break;
}
return (err);
}
/*
* When there is no resolver, the link layer template is passed in
* the IRE.
* Lookup a NCE for a given IRE. Regardless of whether one exists
* or one is created, we defer making ire point to nce until the
* ire is actually added at which point the nce_refcnt on the nce is
* incremented. This is done primarily to have symmetry between ire_add()
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
*/
int
{
int err = 0;
if (IN6_IS_ADDR_MULTICAST(dst)) {
return (err);
}
B_FALSE, /* NCE fastpath is per ill; don't match across group */
dst,
0,
&nce);
switch (err) {
case 0:
/*
* Cache entry with a proper resolver cookie was
* created.
*/
break;
case EEXIST:
err = 0;
break;
default:
break;
}
return (err);
}
/*
* For each interface an entry is added for the unspecified multicast group.
* Here that mapping is used to form the multicast cache entry for a particular
* multicast destination.
*/
static int
{
int err = 0;
return (0);
}
/* No entry, now lookup for a mapping this should never fail */
/* Something broken for the interface. */
return (ESRCH);
}
/*
* For IRE_IF_RESOLVER a hardware mapping can be
* generated, for IRE_IF_NORESOLVER, resolution cookie
* in the ill is copied in ndp_add_v6().
*/
return (ENOMEM);
}
}
/*
* IRE_IF_NORESOLVER type simply copies the resolution
* cookie passed in. So no hw_addr is needed.
*/
dst,
0,
&nce);
if (err != 0) {
return (err);
}
return (0);
}
/*
* Return the link layer address, and any flags of a nce.
*/
int
{
/*
* NOTE: if the ill is an IPMP interface, then match against the whole
* illgrp. This e.g. allows in.ndpd to retrieve the link layer
* addresses for the data addresses on an IPMP interface even though
* ipif_ndp_up() created them with an nce_ill of ipif_bound_ill.
*/
return (ESRCH);
/* If in INCOMPLETE state, no link layer address is available yet */
if (!NCE_ISREACHABLE(nce)) {
return (ESRCH);
}
else
sizeof (lnr->lnr_hdw_addr));
return (0);
}
/*
*/
int
{
return (EINVAL);
}
return (ESRCH);
}
/*
* Update dl_addr_length and dl_addr_offset for primitives that
* have physical addresses as opposed to full saps
*/
case DL_ENABMULTI_REQ:
/* Track the state if this is the first enabmulti */
ip1dbg(("ndp_mcastreq: ENABMULTI\n"));
break;
case DL_DISABMULTI_REQ:
ip1dbg(("ndp_mcastreq: DISABMULTI\n"));
break;
default:
ip1dbg(("ndp_mcastreq: default\n"));
return (EINVAL);
}
return (0);
}
/*
* Send out a NS for resolving the ip address in nce.
*/
void
{
/*
* Pick the src from outgoing packet, if one is available.
* Otherwise let nce_xmit figure out the src.
*/
/* Handle ip_newroute_v6 giving us IPSEC packets */
/*
* This message should have been pulled up already in
* ip_wput_v6. We can't do pullups here because
* the message could be from the nce_qd_mp which could
*/
}
}
if (ms == 0) {
}
} else {
}
}
/*
* Send a neighbor solicitation.
* Returns number of milliseconds after which we should either rexmit or abort.
* Return of zero means we should abort.
* The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt.
*
* NOTE: This routine drops nce_lock (and later reacquires it) when sending
* the packet.
*/
{
return (0);
if (dropped)
}
/*
* Attempt to recover an address on an interface that's been marked as a
* duplicate. Because NCEs are destroyed when the interface goes down, there's
* no easy way to just probe the address and have the right thing happen if
* it's no longer in use. Instead, we just bring it up normally and allow the
* regular interface start-up logic to probe for a remaining duplicate and take
* us back down if necessary.
* Neither DHCP nor temporary addresses arrive here; they're excluded by
* ip_ndp_excl.
*/
/* ARGSUSED */
static void
{
/*
* We do not support recovery of proxy ARP'd interfaces,
* because the system lacks a complete proxy ARP mechanism.
*/
continue;
}
/*
* If we have already recovered or if the interface is going
* away, then ignore.
*/
continue;
}
(void) ipif_up_done_v6(ipif);
}
}
/*
* Attempt to recover an IPv6 interface that's been shut down as a duplicate.
* As long as someone else holds the address, the interface will stay down.
* When that conflict goes away, the interface is brought back up. This is
* done so that accidental shutdowns of addresses aren't made permanent. Your
* server will recover from a failure.
*
* For DHCP and temporary addresses, recovery is not done in the kernel.
* Instead, it's handled by user space processes (dhcpagent and in.ndpd).
*
* This function is entered on a timer expiry; the ID is in ipif_recovery_id.
*/
static void
ipif6_dup_recovery(void *arg)
{
ipif->ipif_recovery_id = 0;
return;
/*
* No lock, because this is just an optimization.
*/
return;
/* If the link is down, we'll retry this later */
return;
}
/*
* Perform interface recovery by forcing the duplicate interfaces up and
* allowing the system to determine which ones should stay up.
*
* Called both by recovery timer expiry and link-up notification.
*/
void
{
if (ipif->ipif_recovery_id == 0 &&
}
} else {
/*
* A recovery timer may still be running if we got here from
* ill_restart_dad(); cancel that timer.
*/
if (ipif->ipif_recovery_id != 0)
ipif->ipif_recovery_id = 0;
sizeof (ipif->ipif_v6lcl_addr));
B_FALSE);
}
}
/*
*/
static void
{
int alen = 0;
int len;
/*
* If it's from the fast-path, then it can't be a probe
* message, and thus must include a linkaddr option.
* Extract that here.
*/
switch (icmp6->icmp6_type) {
case ND_NEIGHBOR_SOLICIT:
}
break;
case ND_NEIGHBOR_ADVERT:
}
break;
}
ill->ill_nd_lla_len) {
}
/*
* We cheat a bit here for the sake of printing usable log
* messages in the rare case where the reply we got was unicast
* without a source linkaddr option, and the interface is in
* fastpath mode. (Sigh.)
*/
struct ether_header *pether;
sizeof (*pether));
alen = ETHERADDRL;
}
} else {
if (ill->ill_sap_length < 0) {
} else {
}
}
}
if (alen > 0) {
} else {
*haddrlenp = 0;
}
/* nd_ns_target and nd_na_target are at the same offset, so we cheat */
}
/*
* This is for exclusive changes due to NDP duplicate address detection
* failure.
*/
/* ARGSUSED */
static void
{
}
/*
* Ignore conflicts generated by misbehaving switches that
* just reflect our own messages back to us. For IPMP, we may
* see reflections across any ill in the illgrp.
*/
IS_UNDER_IPMP(ill) &&
goto ignore_conflict;
}
/*
* Look up the appropriate ipif.
*/
goto ignore_conflict;
/* Reload the ill to match the ipif */
/* If it's already duplicate or ineligible, then don't do anything. */
goto ignore_conflict;
}
/*
* If this is a failure during duplicate recovery, then don't
* complain. It may take a long time to recover.
*/
if (!ipif->ipif_was_dup) {
char hbuf[MAC_STR_LEN];
char sbuf[INET6_ADDRSTRLEN];
" disabled", ibuf,
}
ipst->ips_ip_dup_recovery > 0) {
}
}
/*
* Handle failure by tearing down the ipifs with the specified address. Note
* that tearing down the ipif also means deleting the nce through ipif_down, so
* it's not possible to do recovery by just restarting the nce timer. Instead,
* we start a timer on the ipif.
*/
static void
{
} else {
B_FALSE);
}
}
}
/*
* Handle a discovered conflict: some other system is advertising that it owns
* one of our IP addresses. We need to defend ourselves, or just shut down the
* interface.
*/
static void
{
return;
/*
* First, figure out if this address is disposable.
*/
else
/*
* Now figure out how many times we've defended ourselves. Ignore
* defenses that happened long in the past.
*/
now = gethrestime_sec();
}
nce->nce_defense_count++;
/*
* If we've defended ourselves too many times already, then give up and
* tear down the interface(s) using this address. Otherwise, defend by
* sending out an unsolicited Neighbor Advertisement.
*/
if (defs >= maxdefense) {
} else {
char hbuf[MAC_STR_LEN];
char sbuf[INET6_ADDRSTRLEN];
}
}
static void
{
int len;
int flag = 0;
if (IN6_IS_ADDR_MULTICAST(&target)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_input_solicit: Target is"
}
goto done;
}
if (len > sizeof (nd_neighbor_solicit_t)) {
/* Options present */
len -= sizeof (nd_neighbor_solicit_t);
ip1dbg(("ndp_input_solicit: Bad opt len\n"));
goto done;
}
}
if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
/* Check to see if this is a valid DAD solicitation */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_input_solicit: IPv6 "
"Destination is not solicited node "
"multicast %s\n", AF_INET6,
}
goto done;
}
}
/*
* NOTE: with IPMP, it's possible the nominated multicast ill (which
* received this packet if it's multicast) is not the ill tied to
* e.g. the IPMP ill's data link-local. So we match across the illgrp
* to ensure we find the associated NCE.
*/
/*
* If this is a valid Solicitation, a permanent
* entry should exist in the cache
*/
ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
if (ip_debug > 2) {
/* ip1dbg */
}
goto done;
}
/* At this point we should have a verified NS per spec */
hlen == 0) {
ip1dbg(("ndp_input_solicit: bad SLLA\n"));
goto done;
}
}
}
/* If sending directly to peer, set the unicast flag */
flag |= NDP_UNICAST;
/*
* or respond to outstanding queries, don't if
* the source is unspecified address.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
int err;
/*
* Regular solicitations *must* include the Source Link-Layer
* Address option. Ignore messages that do not.
*/
ip1dbg(("ndp_input_solicit: source link-layer address "
"option missing with a specified source.\n"));
goto done;
}
/*
* This is a regular solicitation. If we're still in the
* process of verifying the address, then don't respond at all
* and don't keep track of the sender.
*/
goto done;
/*
* If the solicitation doesn't have sender hardware address
* (legal for unicast solicitation), then process without
* installing the return NCE. Either we already know it, or
* we'll be forced to look it up when (and if) we reply to the
* packet.
*/
goto no_source;
&src, /* Soliciting nodes address */
0,
0,
&nnce);
switch (err) {
case 0:
/* done with this entry */
break;
case EEXIST:
/*
* B_FALSE indicates this is not an an advertisement.
*/
break;
default:
ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
err));
goto done;
}
flag |= NDP_SOLICITED;
} else {
/*
* No source link layer address option should be present in a
* valid DAD request.
*/
ip1dbg(("ndp_input_solicit: source link-layer address "
"option present with an unspecified source.\n"));
goto done;
}
/*
* Internally looped-back probes won't have DLPI
* attached to them. External ones (which are sent by
* multicast) always will. Just ignore our own
* transmissions.
*/
/*
* If someone else is probing our address, then
* we've crossed wires. Declare failure.
*/
}
goto done;
}
/*
* This is a DAD probe. Multicast the advertisement to the
* all-nodes address.
*/
}
/* Response to a solicitation */
done:
if (bad_solicit)
}
void
{
int len;
ip1dbg(("ndp_input_advert: Target is multicast but the "
"solicited flag is not zero\n"));
return;
}
if (IN6_IS_ADDR_MULTICAST(&target)) {
ip1dbg(("ndp_input_advert: Target is multicast!\n"));
return;
}
if (len > sizeof (nd_neighbor_advert_t)) {
if (!ndp_verify_optlen(opt,
len - sizeof (nd_neighbor_advert_t))) {
ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
return;
}
/* At this point we have a verified NA per spec */
len -= sizeof (nd_neighbor_advert_t);
hlen == 0) {
ip1dbg(("ndp_input_advert: bad SLLA\n"));
return;
}
}
}
/*
* NOTE: we match across the illgrp since we need to do DAD for all of
* our local addresses, and those are spread across all the active
* ills in the group.
*/
return;
/*
* Someone just advertised one of our local addresses. First,
* check it it was us -- if so, we can safely ignore it.
*/
goto out; /* from us -- no conflict */
/*
* If we're in an IPMP group, check if this is an echo
* from another ill in the group. Use the double-
* checked locking pattern to avoid grabbing
* ill_g_lock in the non-IPMP case.
*/
if (IS_UNDER_IPMP(ill)) {
goto out;
}
}
}
/*
* Our own (looped-back) unsolicited neighbor advertisements
* will get here with dl_mp == NULL. (These will usually be
* filtered by the `haddr' checks above, but point-to-point
* links have no hardware address and thus make it here.)
*/
goto out;
/*
* This appears to be a real conflict. If we're trying to
* configure this NCE (ND_PROBE), then shut it down.
* Otherwise, handle the discovered conflict.
*
* In the ND_PROBE case, dl_mp might be NULL if we're getting
* a unicast reply. This isn't typically done (multicast is
* the norm in response to a probe), but we can handle it.
*/
else
} else {
/* B_TRUE indicates this an advertisement */
}
out:
}
/*
* Process NDP neighbor solicitation/advertisement messages.
* The checksum has already checked o.k before reaching here.
*/
void
{
int len;
ip1dbg(("ndp_input: pullupmsg failed\n"));
goto done;
}
ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
goto done;
}
/*
* NDP does not accept any extension headers between the
* IP header and the ICMP header since e.g. a routing
* header could be dangerous.
* This assumes that any AH or ESP headers are removed
* by ip prior to passing the packet to ndp_input.
*/
ip1dbg(("ndp_input: Wrong next header 0x%x\n",
goto done;
}
if (icmp_nd->icmp6_code != 0) {
ip1dbg(("ndp_input: icmp6 code != 0 \n"));
goto done;
}
/*
* Make sure packet length is large enough for either
* a NS or a NA icmp packet.
*/
ip1dbg(("ndp_input: packet too short\n"));
goto done;
}
} else {
}
done:
}
/*
* Utility routine to send an advertisement. Assumes that the NCE cannot
* go away (e.g., because it's refheld).
*/
static boolean_t
{
flags |= NDP_ISROUTER;
}
/*
* Utility routine to send a solicitation. Assumes that the NCE cannot
* go away (e.g., because it's refheld).
*/
static boolean_t
{
sender = &ipv6_all_zeros;
}
/*
* nce_xmit is called to form and transmit a ND solicitation or
* advertisement ICMP packet.
*
* If the source address is unspecified and this isn't a probe (used for
* duplicate address detection), an appropriate source address and link layer
* address will be chosen here. The link layer address option is included if
* the source is specified (i.e., all non-probe packets), and omitted (per the
* specification) otherwise.
*
* It returns B_FALSE only if it does a successful put() to the
* corresponding ill's ill_wq otherwise returns B_TRUE.
*/
static boolean_t
{
char buf[INET6_ADDRSTRLEN];
/*
* Check that the sender is actually a usable address on `ill', and if
* so, track that as the src_ipif. If not, for solicitations, set the
* sender to :: so that a new one will be picked below; for adverts,
* drop the packet since we expect nce_xmit_advert() to always provide
* a valid sender.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(sender)) {
!src_ipif->ipif_addr_ready) {
}
if (type == ND_NEIGHBOR_ADVERT) {
ip1dbg(("nce_xmit: No source ipif for src %s\n",
sizeof (buf))));
return (B_TRUE);
}
sender = &ipv6_all_zeros;
}
}
/*
* If we still have an unspecified source (sender) address and this
* isn't a probe, select a source address from `ill'.
*/
/*
* Pick a source address for this solicitation, but restrict
* the selection to addresses assigned to the output
* interface. We do this because the destination will create
* a neighbor cache entry for the source address of this
* packet, so the source address needs to be a valid neighbor.
*/
ip1dbg(("nce_xmit: No source ipif for dst %s\n",
return (B_TRUE);
}
}
/*
* We're either sending a probe or we have a source address.
*/
return (B_TRUE);
}
sizeof (nd_neighbor_advert_t));
if (type == ND_NEIGHBOR_SOLICIT) {
if (!(flag & NDP_UNICAST)) {
/* Form multicast address of the target */
}
} else {
if (flag & NDP_ISROUTER)
if (flag & NDP_SOLICITED)
}
/*
* Use our source address to find the hardware address to put
* in the packet, so that the hardware address and IP address
* will match up -- even if that hardware address doesn't
* match the ill we actually transmit the packet through.
*/
if (hwaddr_ill == NULL) {
ip1dbg(("nce_xmit: no bound ill!\n"));
return (B_TRUE);
}
} else {
}
/* Fill in link layer address and option len */
}
}
plen = 0;
/* Fix up the length of the packet now that plen is known */
icmp6->icmp6_code = 0;
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
/*
* Before we toss the src_ipif, look up the zoneid to pass to
* ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT
* packets to be routed correctly by IP (we cannot guarantee that the
* global zone has an interface route to the destination).
*/
}
return (B_FALSE);
}
/*
* Make a link layer address (does not include the SAP) from an nce.
* To form the link layer address, use the last four bytes of ipv6
* address passed in and the fixed offset stored in nce.
*/
static void
{
int len;
return;
while (len-- > 0)
}
mblk_t *
{
int sap_length;
if (template_mp == NULL)
return (NULL);
/* Copy in the SAP value. */
return (template_mp);
}
/*
* NDP retransmit timer.
* This timer goes off when:
* a. It is time to retransmit NS for resolver.
* b. It is time to send reachability probes.
*/
void
{
char addrbuf[INET6_ADDRSTRLEN];
/*
* The timer has to be cancelled by ndp_delete before doing the final
* refrele. So the NCE is guaranteed to exist when the timer runs
* until it clears the timeout_id. Before clearing the timeout_id
* bump up the refcnt so that we can continue to use the nce
*/
nce->nce_timeout_id = 0;
/*
* Check the reachability state first.
*/
case ND_DELAY:
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ndp_timer: state for %s changed "
}
return;
case ND_PROBE:
/* must be retransmit timer */
/*
* As per RFC2461, the nce gets deleted after
* MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
* Note that the first unicast solicitation is sent
* during the DELAY state.
*/
ip2dbg(("ndp_timer: pcount=%x dst %s\n",
if (dropped) {
}
/* No hope, delete the nce */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_timer: Delete IRE for"
}
/* Wait RetransTimer, before deleting the entry */
ip2dbg(("ndp_timer: pcount=%x dst %s\n",
/* Wait one interval before killing */
/*
* We're done probing, and we can now declare this
* address to be usable. Let IP know that it's ok to
* use.
*/
if (ipif->ipif_was_dup) {
char sbuf[INET6_ADDRSTRLEN];
sizeof (ibuf));
}
}
/* Begin defending our new address */
nce->nce_unsolicit_count = 0;
&ipv6_all_hosts_mcast, 0);
if (dropped) {
} else if (ipst->ips_ip_ndp_defense_interval != 0) {
}
} else {
/*
* This is an address we're probing to be our own, but
* the ill is down. Wait until it comes back before
* doing anything, but switch to reachable state so
* that the restart will work.
*/
}
return;
case ND_INCOMPLETE: {
/*
* Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp
* for any IPMP probe packets, and toss 'em. IPMP probe
* packets will always be at the head of nce_qd_mp and always
* have an ip6i_t header, so we can stop at the first queued
* ND packet without an ip6i_t.
*/
break;
} else {
}
}
break;
}
case ND_REACHABLE:
nce->nce_unsolicit_count != 0) ||
ipst->ips_ip_ndp_defense_interval != 0)) {
if (nce->nce_unsolicit_count > 0)
&ipv6_all_hosts_mcast, 0);
if (dropped) {
}
if (nce->nce_unsolicit_count != 0) {
} else {
}
} else {
}
break;
default:
break;
}
}
/*
* Set a link layer address from the ll_addr passed in.
* Copy SAP from ill.
*/
static void
{
/* Always called before fast_path_probe */
if (ill->ill_sap_length != 0) {
/*
* Copy the SAP type specified in the
* request into the xmit template.
*/
}
if (ill->ill_phys_addr_length > 0) {
/*
* The bcopy() below used to be called for the physical address
* length rather than the link layer address length. For
* ethernet and many other media, the phys_addr and lla are
* identical.
* However, with xresolv interfaces being introduced, the
* phys_addr and lla are no longer the same, and the physical
* address may not have any useful meaning, so we use the lla
* for IPv6 address resolution and destination addressing.
*
* For PPP or other interfaces with a zero length
* physical address, don't do anything here.
* The bcopy() with a zero phys_addr length was previously
* a no-op for interfaces with a zero-length physical address.
* Using the lla for them would change the way they operate.
* Doing nothing in such cases preserves expected behavior.
*/
}
}
static boolean_t
{
return (B_FALSE);
return (B_TRUE);
return (B_FALSE);
}
/*
* Updates the link layer address or the reachability state of
* a cache entry. Reset probe counter if needed.
*/
static void
{
/*
* If this interface does not do NUD, there is no point
* in allowing an update to the cache entry. Although
* we will respond to NS.
* The only time we accept an update for a resolver when
* NUD is turned off is when it has just been created.
* Non-Resolvers will always be created as REACHABLE.
*/
if (new_state != ND_UNCHANGED) {
return;
if (new_state == ND_REACHABLE)
else {
/* We force NUD in this case */
}
}
/*
* In case of fast path we need to free the the fastpath
* M_DATA and do another probe. Otherwise we can just
* overwrite the DL_UNITDATA_REQ data, noting we'll lose
* whatever packets that happens to be transmitting at the time.
*/
if (new_ll_addr != NULL) {
}
}
if (need_stop_timer) {
nce->nce_timeout_id = 0;
}
if (need_fastpath_update)
}
void
{
}
}
if (head_insert) {
} else {
}
}
static void
{
else
/*
* This message should have been pulled up already in
* ip_wput_v6. We can't do pullups here because the message
* non-NULL.
*/
/*
* If this packet is marked IP6I_IPMP_PROBE, then we need to:
*
* 1. Insert it at the head of the nce_qd_mp list. Consider
* the normal (non-probe) load-speading case where the
* source address of the ND packet is not tied to nce_ill.
* If the ill bound to the source address cannot receive,
* the response to the ND packet will not be received.
* However, if ND packets for nce_ill's probes are queued
* behind that ND packet, those probes will also fail to
* be sent, and thus in.mpathd will erroneously conclude
* that nce_ill has also failed.
*
* 2. Drop the probe packet in ndp_timer() if the ND did
* not succeed on the first attempt. This ensures that
* ND problems do not manifest as probe RTT spikes.
*/
}
}
/*
* Called when address resolution failed due to a timeout.
* Send an ICMP unreachable in response to all queued packets.
*/
void
{
char buf[INET6_ADDRSTRLEN];
ip1dbg(("nce_resolv_failed: dst %s\n",
}
/*
* This message should have been pulled up already
* in ip_wput_v6. ip_hdr_complete_v6 assumes that
* the header is pulled up.
*/
sizeof (ip6i_t) + IPV6_HDR_LEN);
}
/*
* Ignore failure since icmp_unreachable_v6 will silently
* drop packets with an unspecified source address.
*/
}
}
/*
* and the corresponding attributes.
* Disallow states other than ND_REACHABLE or ND_STALE.
*/
int
{
int err;
return (EINVAL);
return (EINVAL);
/* We know it can not be mapping so just look in the hash table */
/* See comment in ndp_query() regarding IS_IPMP(ill) usage */
case NDF_ISROUTER_ON:
break;
case NDF_ISROUTER_OFF:
new_flags &= ~NCE_F_ISROUTER;
break;
case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON):
return (EINVAL);
}
case NDF_ANYCAST_ON:
break;
case NDF_ANYCAST_OFF:
new_flags &= ~NCE_F_ANYCAST;
break;
case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON):
return (EINVAL);
}
addr,
0,
&nce);
if (err != 0) {
return (err);
}
}
/*
* Router turned to host, delete all ires.
* XXX Just delete the entry, but we need to add too.
*/
return (0);
}
/*
* Note that we ignore the state at this point, which
* should be either STALE or REACHABLE. Instead we let
* the link layer address passed in to determine the state
* much like incoming packets.
*/
return (0);
}
/*
* If the device driver supports it, we make nce_fp_mp to have
* an M_DATA prepend. Otherwise nce_fp_mp will be null.
* The caller ensures there is hold on nce for this function.
* Note that since ill_fastpath_probe() copies the mblk there is
* no need for the hold beyond this function.
*/
void
{
int res;
/* Already contains fastpath info */
return;
}
/*
* EAGAIN is an indication of a transient error
* i.e. allocation failure etc. leave the nce in the list it
* will be updated when another probe happens for another ire
* if not it will be taken out of the list when the ire is
* deleted.
*/
}
}
/*
* Drain the list of nce's waiting for fastpath response.
*/
void
void *arg)
{
/*
* Take it off the list if we're flushing, or if the callback
* routine tells us to do so. Otherwise, leave the nce in the
* fastpath list to handle any pending response from the lower
* layer. We can't drain the list when the callback routine
* comparison failed, because the response is asynchronous in
* nature, and may not arrive in the same order as the list
* insertion.
*/
if (current_nce == first_nce)
else
} else {
/* previous element that is still in the list */
}
}
}
/*
* Add nce to the nce fastpath list.
*/
void
{
/*
* if nce has not been deleted and
* is not already in the list add it.
*/
}
}
/*
* remove nce from the nce fastpath list.
*/
void
{
goto done;
} else {
break;
}
}
}
done:
}
/*
* Update all NCE's that are not in fastpath mode and
* have an nce_fp_mp that matches mp. mp->b_cont contains
* the fastpath header.
*
* Returns TRUE if entry should be dequeued, or FALSE otherwise.
*/
{
return (B_TRUE);
return (B_TRUE);
ip2dbg(("ndp_fastpath_update: trying\n"));
/*
* The nce is locked here to prevent any other threads
* from accessing and changing nce_res_mp when the IPv6 address
* becomes resolved to an lla while we're in the middle
* of looking at and comparing the hardware address (lla).
* It is also locked to prevent multiple threads in nce_fastpath_update
* from examining nce_res_mp atthe same time.
*/
/*
* Don't take the ire off the fastpath list yet,
* since the response may come later.
*/
return (B_FALSE);
}
/* Matched - install mp as the fastpath mp */
ip1dbg(("ndp_fastpath_update: match\n"));
}
return (B_TRUE);
}
/*
* This function handles the DL_NOTE_FASTPATH_FLUSH notification from
* driver. Note that it assumes IP is exclusive...
*/
/* ARGSUSED */
void
{
return;
/* No fastpath info? */
return;
/*
* IPv4 BROADCAST entries:
* We can't delete the nce since it is difficult to
* recreate these without going through the
*
* All access to nce->nce_fp_mp in the case of these
* is protected by nce_lock.
*/
} else {
}
} else {
/* Just delete the NCE... */
}
}
/*
* Return a pointer to a given option in the packet.
* Assumes that option part of the packet have already been validated.
*/
{
while (optlen > 0) {
return (opt);
}
return (NULL);
}
/*
* Verify all option lengths present are > 0, also check to see
* if the option lengths and packet length are consistent.
*/
{
while (optlen > 0) {
if (opt->nd_opt_len == 0)
return (B_FALSE);
if (optlen < 0)
return (B_FALSE);
}
return (B_TRUE);
}
/*
* ndp_walk function.
* Free a fraction of the NCE cache entries.
* A fraction of zero means to not free any in that category.
*/
void
{
return;
return;
}
}
/*
* ndp_walk function.
* Count the number of NCEs that can be deleted.
* These would be hosts but not routers.
*/
void
{
return;
}
#ifdef DEBUG
void
{
if (nce->nce_trace_disable)
return;
}
}
void
{
if (!nce->nce_trace_disable)
}
static void
{
}
#endif
/*
* Called when address resolution fails due to a timeout.
* Send an ICMP unreachable in response to all queued packets.
*/
void
{
char buf[INET6_ADDRSTRLEN];
ip3dbg(("arp_resolv_failed: dst %s\n",
/*
* Send icmp unreachable messages
* to the hosts.
*/
ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n"));
}
}
int
{
int err;
/*
* NOTE: IPv4 never matches across the illgrp since the NCE's we're
* looking up have fastpath headers that are inherently per-ill.
*/
} else {
}
return (err);
}
/*
* NDP Cache Entry creation routine for IPv4.
* Mapped entries are handled in arp.
* This routine must always be called with ndp4->ndp_g_lock held.
* Prior to return, nce_refcnt is incremented.
*/
static int
{
int err;
return (EINVAL);
/*
* Allocate the mblk to hold the nce.
*/
return (ENOMEM);
nce->nce_ll_extract_start = 0;
/* This one is for nce getting created */
/*
* src_nce has been provided by the caller. The only
* caller who provides a non-null, non-broadcast
* src_nce is from ip_newroute() which must pass in
* a ND_REACHABLE src_nce (this condition is verified
* via an ASSERT for the save_ire->ire_nce in ip_newroute())
*/
/*
* src_nce has been deleted, or
* ip_arp_news is in the middle of
* flushing entries in the the nce.
* Fail the add, since we don't know
* if it is safe to copy the contents of
* src_nce
*/
goto err_ret;
}
goto err_ret;
}
} else if (flags & NCE_F_BCAST) {
/*
* broadcast nce.
*/
goto err_ret;
}
/*
* NORESOLVER entries are always created in the REACHABLE
* state.
*/
/*
* We create a nce_res_mp with the IP nexthop address
* as the destination address if the physical length
* is exactly 4 bytes for point-to-multipoint links
* that do their own resolution from IP to link-layer
* address (e.g. IP over X.25).
*/
} else {
}
goto err_ret;
}
}
if (state == ND_REACHABLE) {
} else {
if (state == ND_INITIAL)
}
/*
* Atomically ensure that the ill is not CONDEMNED, before
* adding the NCE.
*/
goto err_ret;
}
/* This one is for nce being used by an active thread */
/* Bump up the number of nce's referencing this ill */
(char *), "nce", (void *), nce);
ill->ill_nce_cnt++;
return (0);
return (err);
}
/*
* ndp_walk routine to delete all entries that have a given destination or
* gateway address and cached link layer (MAC) address. This is used when ARP
* informs us that a network-to-link-layer mapping may have changed.
*/
void
{
int saplen;
return;
return;
return;
}
else
/*
* If the hardware address is unchanged, then leave this one alone.
* Note that saplen == abs(saplen) now.
*/
return;
}
}
/*
* This function verifies whether a given IPv4 address is potentially known to
* the NCE subsystem. If so, then ARP must not delete the corresponding ace_t,
* so that it can continue to look for hardware changes on that address.
*/
{
if (addr == INADDR_ANY)
return (B_FALSE);
/* Note that only v4 mapped entries are in the table. */
/* Single flag check; no lock needed */
break;
}
}
}
/*
* Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly
* with IPMP. Specifically, since neighbor discovery is always done on
* underlying interfaces (even for addresses owned by an IPMP interface), we
* need to check for `v6addrp' on both `ill' and on the IPMP meta-interface
* associated with `ill' (if it exists).
*/
static ipif_t *
{
}
}
return (ipif);
}