ip_ndp.c revision 309fa1740d090ebe5cd183c34c7bc282f94856fa
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <inet/ipsec_impl.h>
#include <inet/ipsec_info.h>
/*
* Function names with nce_ prefix are static while function
* names with ndp_ prefix are used by rest of the IP.
*/
extern void th_trace_rrecord(th_trace_t *);
#ifdef NCE_DEBUG
void nce_trace_inactive(nce_t *);
#endif
/* NDP Cache Entry Hash Table */
#define NCE_TABLE_SIZE 256
static int ndp_g_walker = 0; /* # of active thread */
/* walking nce hash list */
/* ndp_g_walker_cleanup will be true, when deletion have to be defered */
#define NCE_HASH_PTR(addr) \
/*
* NDP Cache Entry creation routine.
* Mapped entries will never do NUD .
* This routine must always be called with ndp_g_lock held.
* Prior to return, nce_refcnt is incremented.
*/
int
{
if (IN6_IS_ADDR_UNSPECIFIED(addr)) {
ip0dbg(("ndp_add: no addr\n"));
return (EINVAL);
}
if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) {
return (EINVAL);
}
if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) &&
(flags & NCE_F_MAPPING)) {
ip0dbg(("ndp_add: extract mask zero for mapping"));
return (EINVAL);
}
/*
* Allocate the mblk to hold the nce.
*
* XXX This can come out of a separate cache - nce_cache.
* We don't need the mp anymore as there are no more
* "qwriter"s
*/
return (ENOMEM);
/*
* This one holds link layer address
*/
} else {
}
return (ENOMEM);
}
if (state == ND_REACHABLE)
else
/* This one is for nce getting created */
ncep = &nce_mask_entries;
} else {
}
#ifdef NCE_DEBUG
#endif
/*
* Atomically ensure that the ill is not CONDEMNED, before
* adding the NCE.
*/
return (EINVAL);
}
/* This one is for nce being used by an active thread */
/* Bump up the number of nce's referencing this ill */
ill->ill_nce_cnt++;
/*
* Before we insert the nce, honor the UNSOL_ADV flag.
* We cannot hold the ndp_g_lock and call nce_xmit
* which does a putnext.
*/
if (flags & NCE_F_UNSOL_ADV) {
/*
* We account for the transmit below by assigning one
* less than the ndd variable. Subsequent decrements
* are done in ndp_timer.
*/
ill, /* ill to be used for extracting ill_nd_lla */
B_TRUE, /* use ill_nd_lla */
addr, /* Source and target of the advertisement pkt */
&ipv6_all_hosts_mcast, /* Destination of the packet */
flags);
if (dropped)
if (nce->nce_unsolicit_count != 0) {
}
}
/*
* If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
* we call nce_fastpath as soon as the nce is resolved in ndp_process.
* We call nce_fastpath from nce_update if the link layer address of
* the peer changes from nce_update
*/
return (0);
}
int
{
int err = 0;
addr,
mask,
newnce);
} else {
}
return (err);
}
/*
* Remove all the CONDEMNED nces from the appropriate hash table.
* We create a private list of NCEs, these may have ires pointing
* to them, so the list will be passed through to clean up dependent
* ires and only then we can do NCE_REFRELE which can make NCE inactive.
*/
static void
{
ASSERT(ndp_g_walker == 0);
*free_nce_list = nce;
}
}
}
/*
* 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup()
* will return this NCE. Also no new IREs will be created that
* point to this NCE (See ire_add_v6). Also no new timeouts will
* be started (See NDP_RESTART_TIMER).
* 2. Cancel any currently running timeouts.
* 3. If there is an ndp walker, return. The walker will do the cleanup.
* This ensures that walkers see a consistent list of NCEs while walking.
* 4. Otherwise remove the NCE from the list of NCEs
* 5. Delete all IREs pointing to this NCE.
*/
void
{
/* Serialize deletes */
/* Some other thread is doing the delete */
return;
}
/*
* Caller has a refhold. Also 1 ref for being in the list. Thus
* refcnt has to be >= 2
*/
/*
* Cancel any running timer. Timeout can't be restarted
* since CONDEMNED is set. Can't hold nce_lock across untimeout.
* Passing invalid timeout id is fine.
*/
if (nce->nce_timeout_id != 0) {
nce->nce_timeout_id = 0;
}
/*
* The last ndp walker has already removed this nce from
* the list after we marked the nce CONDEMNED and before
* we grabbed the ndp_g_lock.
*/
return;
}
if (ndp_g_walker > 0) {
/*
* Can't unlink. The walker will clean up
*/
return;
}
/*
* Now remove the nce from the list. NDP_RESTART_TIMER won't restart
* the timer since it is marked CONDEMNED.
*/
}
void
{
/* Free all nce allocated messages */
do {
}
#ifdef NCE_DEBUG
#endif
ill->ill_nce_cnt--;
/*
* If the number of nce's associated with this ill have dropped
* to zero, check whether we need to restart any operation that
* is waiting for this to happen.
*/
if (ill->ill_nce_cnt == 0) {
/* ipif_ill_refrele_tail drops the ill_lock */
} else {
}
}
/*
* ndp_walk routine. Delete the nce if it is associated with the ill
* that is going away. Always called as a writer.
*/
void
{
}
}
/*
* Walk a list of to be inactive NCEs and blow away all the ires.
*/
static void
{
/*
* It is possible for the last ndp walker (this thread)
* to come here after ndp_delete has marked the nce CONDEMNED
* and before it has removed the nce from the fastpath list
* or called untimeout. So we need to do it here. It is safe
* for both ndp_delete and this thread to do it twice or
* even simultaneously since each of the threads has a
* reference on the nce.
*/
/*
* Cancel any running timer. Timeout can't be restarted
* since CONDEMNED is set. Can't hold nce_lock across untimeout.
* Passing invalid timeout id is fine.
*/
if (nce->nce_timeout_id != 0) {
nce->nce_timeout_id = 0;
}
}
}
/*
* Delete an ire when the nce goes away.
*/
/* ARGSUSED */
static void
{
}
/*
* ire_walk routine used to delete every IRE that shares this nce
*/
static void
{
}
/*
* Cache entry lookup. Try to find an nce matching the parameters passed.
* If one is found, the refcnt on the nce will be incremented.
*/
nce_t *
{
if (!caller_holds_lock)
if (!caller_holds_lock)
return (nce);
}
/*
* Cache entry lookup. Try to find an nce matching the parameters passed.
* Look only for exact entries (no mappings). If an nce is found, increment
* the hold count on that nce.
*/
static nce_t *
{
if (IN6_IS_ADDR_UNSPECIFIED(addr))
return (NULL);
&ipv6_all_ones)) {
break;
}
}
}
}
return (nce);
}
/*
* Cache entry lookup. Try to find an nce matching the parameters passed.
* Look only for mappings.
*/
static nce_t *
{
if (!IN6_IS_ADDR_MULTICAST(addr))
return (NULL);
break;
}
}
return (nce);
}
/*
* Process passed in parameters either from an incoming packet or via
* user ioctl.
*/
void
{
/*
* No updates of link layer address or the neighbor state is
* allowed, when the cache is in NONUD state. This still
* allows for responding to reachability solicitation.
*/
return;
}
/*
* Update nce state and send the queued packets
* back to ip this time ire will be added.
*/
if (flag & ND_NA_FLAG_SOLICITED) {
} else {
}
if (inbound_ill == NULL) {
return;
} else {
}
/*
* Send a forwarded packet back into ip_rput_v6
* just as in ire_send_v6().
* Extract the queue from b_prev (set in
* ip_rput_data_v6).
*/
/*
* Forwarded packets hop count will
* get decremented in ip_rput_data_v6
*/
} else {
/*
* Send locally originated packets back
* into * ip_wput_v6.
*/
}
} else {
}
}
return;
}
if (!is_adv) {
/* If this is a SOLICITATION request only */
if (ll_changed)
return;
}
/* If in any other state than REACHABLE, ignore */
}
return;
} else {
if (ll_changed) {
ll_updated = B_TRUE;
}
if (flag & ND_NA_FLAG_SOLICITED) {
} else {
if (ll_updated) {
}
}
NCE_F_ISROUTER)) {
/*
* Router turned to host. We need to remove the
* entry as well as any default route that may be
* using this as a next hop. This is required by
* section 7.2.5 of RFC 2461.
*/
}
}
}
}
/*
* Pass arg1 to the pfi supplied, along with each nce in existence.
* ndp_walk() places a REFHOLD on the nce and drops the lock when
* walking the hash list.
*/
void
{
ndp_g_walker++; /* Prevent ndp_delete from unlink and free of NCE */
if (trace) {
} else {
}
}
}
}
if (trace) {
} else {
}
}
}
ndp_g_walker--;
/*
* While NCE's are removed from global list they are placed
* in a private list, to be passed to nce_ire_delete_list().
* The reason is, there may be ires pointing to this nce
* which needs to cleaned up.
*/
if (ndp_g_walker_cleanup && ndp_g_walker == 0) {
/* Time to delete condemned entries */
}
}
}
}
if (free_nce_list != NULL) {
}
}
void
{
}
/*
* Prepend the zoneid using an ipsec_out_t for later use by functions like
* ip_rput_v6() after neighbor discovery has taken place. If the message
* block already has a M_CTL at the front of it, then simply set the zoneid
* appropriately.
*/
static mblk_t *
{
return (mp);
}
return (NULL);
/* This is not a secure packet */
return (first_mp);
}
/*
* Process resolve requests. Handles both mapped entries
* as well as cases that needs to be send out on the wire.
* Lookup a NCE for a given IRE. Regardless of whether one exists
* or one is created, we defer making ire point to nce until the
* ire is actually added at which point the nce_refcnt on the nce is
* incremented. This is done primarily to have symmetry between ire_add()
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
*/
int
{
int err = 0;
if (IN6_IS_ADDR_MULTICAST(dst)) {
return (err);
}
NULL, /* No hardware address */
dst,
0,
&nce);
switch (err) {
case 0:
/*
* New cache entry was created. Make sure that the state
* is not ND_INCOMPLETE. It can be in some other state
* even before we send out the solicitation as we could
* get un-solicited advertisements.
*
* If this is an XRESOLV interface, simply return 0,
* since we don't want to solicit just yet.
*/
return (0);
}
return (0);
}
/* The caller will free mp */
return (ENOMEM);
}
if (ms == 0) {
/* The caller will free mp */
return (EBUSY);
}
return (EINPROGRESS);
case EEXIST:
/* Resolution in progress just queue the packet */
} else {
err = EINPROGRESS;
}
} else {
/*
* Any other state implies we have
* a nce but IRE needs to be added ...
* ire_add_v6() will take care of the
* the case when the nce becomes CONDEMNED
* before the ire is added to the table.
*/
err = 0;
}
break;
default:
break;
}
return (err);
}
/*
* When there is no resolver, the link layer template is passed in
* the IRE.
* Lookup a NCE for a given IRE. Regardless of whether one exists
* or one is created, we defer making ire point to nce until the
* ire is actually added at which point the nce_refcnt on the nce is
* incremented. This is done primarily to have symmetry between ire_add()
* and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
*/
int
{
int err = 0;
if (IN6_IS_ADDR_MULTICAST(dst)) {
return (err);
}
NULL, /* hardware address */
dst,
0,
&nce);
switch (err) {
case 0:
/*
* Cache entry with a proper resolver cookie was
* created.
*/
break;
case EEXIST:
err = 0;
break;
default:
break;
}
return (err);
}
/*
* For each interface an entry is added for the unspecified multicast group.
* Here that mapping is used to form the multicast cache entry for a particular
* multicast destination.
*/
static int
{
int err = 0;
return (0);
}
/* No entry, now lookup for a mapping this should never fail */
/* Something broken for the interface. */
return (ESRCH);
}
/*
* For IRE_IF_RESOLVER a hardware mapping can be
* generated, for IRE_IF_NORESOLVER, resolution cookie
* in the ill is copied in ndp_add().
*/
return (ENOMEM);
}
}
/*
* IRE_IF_NORESOLVER type simply copies the resolution
* cookie passed in. So no hw_addr is needed.
*/
dst,
0,
&nce);
if (err != 0) {
return (err);
}
return (0);
}
/*
* Return the link layer address, and any flags of a nce.
*/
int
{
return (ESRCH);
/* If in INCOMPLETE state, no link layer address is available yet */
goto done;
else
sizeof (lnr->lnr_hdw_addr));
done:
return (0);
}
/*
*/
int
{
return (EINVAL);
}
return (ESRCH);
}
/*
* Update dl_addr_length and dl_addr_offset for primitives that
* have physical addresses as opposed to full saps
*/
case DL_ENABMULTI_REQ:
/* Track the state if this is the first enabmulti */
ip1dbg(("ndp_mcastreq: ENABMULTI\n"));
break;
case DL_DISABMULTI_REQ:
ip1dbg(("ndp_mcastreq: DISABMULTI\n"));
break;
default:
ip1dbg(("ndp_mcastreq: default\n"));
return (EINVAL);
}
return (0);
}
/*
* Send a neighbor solicitation.
* Returns number of milliseconds after which we should either rexmit or abort.
* Return of zero means we should abort.
* The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt.
*
* NOTE: This routine drops nce_lock (and later reacquires it) when sending
* the packet.
* NOTE: This routine does not consume mp.
*/
{
return (0);
}
} else {
}
/* Handle ip_newroute_v6 giving us IPSEC packets */
/*
* This message should have been pulled up already in
* ip_wput_v6. We can't do pullups here because the message
* non-NULL.
*/
sizeof (ip6i_t) + IPV6_HDR_LEN);
}
/*
* If the src of outgoing packet is one of the assigned interface
* addresses use it, otherwise we will pick the source address below.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
if (IN6_ARE_ADDR_EQUAL(&src,
&ipif->ipif_v6lcl_addr)) {
break;
}
}
break;
}
/* May be a forwarding packet */
}
}
/*
* If source address is unspecified, nce_xmit will choose
* one for us and initialize the hardware address also
* appropriately.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&src))
&dst, 0);
if (dropped)
return (ill->ill_reachable_retrans_time);
}
void
{
int len;
int flag = 0;
if (IN6_IS_ADDR_MULTICAST(&target)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_input_solicit: Target is"
}
goto done;
}
if (len > sizeof (nd_neighbor_solicit_t)) {
/* Options present */
len -= sizeof (nd_neighbor_solicit_t);
ip1dbg(("ndp_input_solicit: Bad opt len\n"));
goto done;
}
}
if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
/* Check to see if this is a valid DAD solicitation */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_input_solicit: IPv6 "
"Destination is not solicited node "
"multicast %s\n", AF_INET6,
}
goto done;
}
}
/*
* If this is a valid Solicitation, a permanent
* entry should exist in the cache
*/
ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
if (ip_debug > 2) {
/* ip1dbg */
}
goto done;
}
/* At this point we should have a verified NS per spec */
/*
* No source link layer address option should
* be present in a valid DAD request.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
ip1dbg(("ndp_input_solicit: source link-layer "
"address option present with an "
"unspecified source. \n"));
goto done;
}
hlen == 0) {
goto done;
}
}
}
/*
* haddr can be NULL if no options are present,
* or no Source link layer address is present in,
* recvd NDP options of solicitation message.
*/
/* Form solicited node multicast address */
ill,
&target,
&dst,
flag);
goto done;
}
}
/* Set override flag, it will be reset later if need be. */
flag |= NDP_UNICAST;
}
/*
* or respond to outstanding queries, don't if
* the source is unspecified address.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
int err = 0;
&src, /* Soliciting nodes address */
0,
0,
&nnce);
switch (err) {
case 0:
/* done with this entry */
break;
case EEXIST:
/*
* B_FALSE indicates this is not an
* an advertisement.
*/
break;
default:
ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
err));
goto done;
}
flag |= NDP_SOLICITED;
} else {
/*
* This is a DAD req, multicast the advertisement
* to the all-nodes address.
*/
}
flag |= NDP_ISROUTER;
/* Response to a solicitation */
ill, /* ill to be used for extracting ill_nd_lla */
B_TRUE, /* use ill_nd_lla */
&target, /* Source and target of the advertisement pkt */
&src, /* IP Destination (source of original pkt) */
flag);
done:
if (bad_solicit)
}
void
{
int len;
ip1dbg(("ndp_input_advert: Target is multicast but the "
"solicited flag is not zero\n"));
return;
}
if (IN6_IS_ADDR_MULTICAST(&target)) {
ip1dbg(("ndp_input_advert: Target is multicast!\n"));
return;
}
if (len > sizeof (nd_neighbor_advert_t)) {
if (!ndp_verify_optlen(opt,
len - sizeof (nd_neighbor_advert_t))) {
return;
}
/* At this point we have a verified NA per spec */
len -= sizeof (nd_neighbor_advert_t);
hlen == 0) {
return;
}
}
}
/*
* If this interface is part of the group look at all the
* ills in the group.
*/
if (!ILL_CAN_LOOKUP(ill)) {
continue;
}
/* We have to drop the lock since ndp_process calls put* */
if (na->nd_na_flags_reserved &
}
/* B_TRUE indicates this an advertisement */
}
}
}
/*
* Process NDP neighbor solicitation/advertisement messages.
* The checksum has already checked o.k before reaching here.
*/
void
{
int len;
ip1dbg(("ndp_input: pullupmsg failed\n"));
goto done;
}
ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
goto done;
}
/*
* NDP does not accept any extension headers between the
* IP header and the ICMP header since e.g. a routing
* header could be dangerous.
* This assumes that any AH or ESP headers are removed
* by ip prior to passing the packet to ndp_input.
*/
ip1dbg(("ndp_input: Wrong next header 0x%x\n",
goto done;
}
if (icmp_nd->icmp6_code != 0) {
ip1dbg(("ndp_input: icmp6 code != 0 \n"));
goto done;
}
/*
* Make sure packet length is large enough for either
* a NS or a NA icmp packet.
*/
ip1dbg(("ndp_input: packet too short\n"));
goto done;
}
} else {
}
done:
}
/*
* nce_xmit is called to form and transmit a ND solicitation or
* advertisement ICMP packet.
* If source address is unspecified, appropriate source address
* and link layer address will be chosen here. This function
* *always* sends the link layer option.
* It returns B_FALSE only if it does a successful put() to the
* corresponding ill's ill_wq otherwise returns B_TRUE.
*/
static boolean_t
int flag)
{
/*
* If we have a unspecified source(sender) address, select a
* proper source address for the solicitation here itself so
* that we can initialize the h/w address correctly. This is
* needed for interface groups as source address can come from
* the whole group and the h/w address initialized from ill will
* be wrong if the source address comes from a different ill.
*
* Note that the NA never comes here with the unspecified source
* address. The following asserts that whenever the source
* address is specified, the haddr also should be specified.
*/
if (IN6_IS_ADDR_UNSPECIFIED(sender)) {
/*
* Pick a source address for this solicitation, but
* restrict the selection to addresses assigned to the
* output interface (or interface group). We do this
* because the destination will create a neighbor cache
* entry for the source address of this packet, so the
* source address had better be a valid neighbor.
*/
char buf[INET6_ADDRSTRLEN];
ip1dbg(("nce_xmit: No source ipif for dst %s\n",
sizeof (buf))));
return (B_TRUE);
}
}
/*
* spread. This is needed so that the probe packets sent
* by the in.mpathd daemon can really go out on the desired
* interface. Probe packets are made to go out on a desired
* interface by including a ip6i with ATTACH_IF flag. As these
* (neighbor doing NUD), we have to make sure that NA
* also go out on the same interface.
*/
plen * 8;
return (B_TRUE);
}
sizeof (nd_neighbor_advert_t));
if (operation == ND_NEIGHBOR_SOLICIT) {
if (!(flag & NDP_UNICAST)) {
/* Form multicast address of the target */
}
} else {
if (flag & NDP_ISROUTER)
if (flag & NDP_SOLICITED)
}
/* Fill in link layer address and option len */
icmp6->icmp6_code = 0;
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
return (B_FALSE);
}
return (B_TRUE);
}
/*
* Make a link layer address (does not include the SAP) from an nce.
* To form the link layer address, use the last four bytes of ipv6
* address passed in and the fixed offset stored in nce.
*/
static void
{
int len;
return;
while (len-- > 0)
}
/*
* Pass a cache report back out via NDD.
*/
/* ARGSUSED */
int
{
return (0);
}
/*
* convert a link level address of arbitrary length
* to an ascii string.
* The caller *must* have already verified that the string buffer
* is large enough to hold the entire string, including trailing NULL.
*/
static void
{
int i;
buf[0] = '\0';
for (i = 0; i < addrlen; i++) {
addrbyte[0] = '\0';
}
*--buf = '\0';
}
/*
* Add a single line to the NDP Cache Entry Report.
*/
static void
{
char local_buf[INET6_ADDRSTRLEN];
uchar_t *h;
/*
* Lock the nce to protect nce_res_mp from being changed
* if an external resolver address resolution completes
* while nce_res_mp is being accessed here.
*
* Deal with all address formats, not just Ethernet-specific
* In addition, make sure that the mblk has enough space
* before writing to it. If is doesn't, allocate a new one.
*/
if (flags & NCE_F_PERMANENT)
*m++ = 'P';
if (flags & NCE_F_ISROUTER)
*m++ = 'R';
if (flags & NCE_F_MAPPING)
*m++ = 'M';
*m = '\0';
else
if (addrlen <= 0) {
(void) mi_mpprintf(mp,
"%8s %9s %5s %s/%d",
"None",
} else {
/*
*/
return;
}
addr_buf);
else
addr_buf);
}
} else {
(void) mi_mpprintf(mp,
"%8s %9s %5s %s/%d",
"None",
}
}
mblk_t *
{
int sap_length;
if (template_mp == NULL)
return (NULL);
/* Copy in the SAP value. */
return (template_mp);
}
/*
* NDP retransmit timer.
* This timer goes off when:
* a. It is time to retransmit NS for resolver.
* b. It is time to send reachability probes.
*/
void
{
char addrbuf[INET6_ADDRSTRLEN];
/*
* The timer has to be cancelled by ndp_delete before doing the final
* refrele. So the NCE is guaranteed to exist when the timer runs
* until it clears the timeout_id. Before clearing the timeout_id
* bump up the refcnt so that we can continue to use the nce
*/
/*
* Grab the ill_g_lock now itself to avoid lock order problems.
* nce_solicit needs ill_g_lock to be able to traverse ills
*/
nce->nce_timeout_id = 0;
/*
* Check the reachability state first.
*/
case ND_DELAY:
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ndp_timer: state for %s changed "
}
return;
case ND_PROBE:
/* must be retransmit timer */
/* Wait RetransTimer, before deleting the entry */
ip2dbg(("ndp_timer: pcount=%x dst %s\n",
} else {
/*
* As per RFC2461, the nce gets deleted after
* MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
* Note that the first unicast solicitation is sent
* during the DELAY state.
*/
ip2dbg(("ndp_timer: pcount=%x dst %s\n",
if (dropped) {
}
} else {
/* No hope, delete the nce */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ndp_timer: Delete IRE for"
" dst %s\n", AF_INET6,
}
}
}
return;
case ND_INCOMPLETE:
/*
* Must be resolvers retransmit timer.
*/
/*
* Walk the list of packets queued, and see if there
* are any multipathing probe packets. Such packets
* are always queued at the head. Since this is a
* retransmit timer firing, mark such packets as
* delayed in ND resolution. This info will be used
* in ip_wput_v6(). Multipathing probe packets will
* always have an ip6i_t. Once we hit a packet without
* it, we can break out of this loop.
*/
else
break;
/*
* This message should have been pulled up already in
* ip_wput_v6. We can't do pullups here because the
*/
sizeof (ip6i_t) + IPV6_HDR_LEN);
/* Mark this packet as delayed due to ND resolution */
}
if (ms == 0) {
} else {
}
} else {
}
return;
}
break;
case ND_REACHABLE :
nce->nce_unsolicit_count != 0) {
ill, /* ill to be used for hw addr */
B_FALSE, /* use ill_phys_addr */
if (dropped) {
}
if (nce->nce_unsolicit_count != 0) {
}
} else {
}
break;
default:
break;
}
}
/*
* Set a link layer address from the ll_addr passed in.
* Copy SAP from ill.
*/
static void
{
/* Always called before fast_path_probe */
if (ill->ill_sap_length != 0) {
/*
* Copy the SAP type specified in the
* request into the xmit template.
*/
}
if (ill->ill_phys_addr_length > 0) {
/*
* The bcopy() below used to be called for the physical address
* length rather than the link layer address length. For
* ethernet and many other media, the phys_addr and lla are
* identical.
* However, with xresolv interfaces being introduced, the
* phys_addr and lla are no longer the same, and the physical
* address may not have any useful meaning, so we use the lla
* for IPv6 address resolution and destination addressing.
*
* For PPP or other interfaces with a zero length
* physical address, don't do anything here.
* The bcopy() with a zero phys_addr length was previously
* a no-op for interfaces with a zero-length physical address.
* Using the lla for them would change the way they operate.
* Doing nothing in such cases preserves expected behavior.
*/
}
}
static boolean_t
{
return (B_FALSE);
return (B_TRUE);
return (B_FALSE);
}
/*
* Updates the link layer address or the reachability state of
* a cache entry. Reset probe counter if needed.
*/
static void
{
/*
* If this interface does not do NUD, there is no point
* in allowing an update to the cache entry. Although
* we will respond to NS.
* The only time we accept an update for a resolver when
* NUD is turned off is when it has just been created.
* Non-Resolvers will always be created as REACHABLE.
*/
if (new_state != ND_UNCHANGED) {
return;
if (new_state == ND_REACHABLE)
else {
/* We force NUD in this case */
}
}
/*
* In case of fast path we need to free the the fastpath
* M_DATA and do another probe. Otherwise we can just
* overwrite the DL_UNITDATA_REQ data, noting we'll lose
* whatever packets that happens to be transmitting at the time.
*/
if (new_ll_addr != NULL) {
}
}
if (need_stop_timer) {
nce->nce_timeout_id = 0;
}
if (need_fastpath_update)
}
static void
{
else
/*
* This message should have been pulled up already in
* ip_wput_v6. We can't do pullups here because the message
* non-NULL.
*/
sizeof (ip6i_t) + IPV6_HDR_LEN);
/*
* Multipathing probe packets have IP6I_DROP_IFDELAYED set.
* This has 2 aspects mentioned below.
* 1. Perform head insertion in the nce_qd_mp for these packets.
* This ensures that next retransmit of ND solicitation
* will use the interface specified by the probe packet,
* for both NS and NA. This corresponds to the src address
* in the IPv6 packet. If we insert at tail, we will be
* depending on the packet at the head for successful
* ND resolution. This is not reliable, because the interface
* on which the NA arrives could be different from the interface
* on which the NS was sent, and if the receiving interface is
* failed, it will appear that the sending interface is also
* failed, causing in.mpathd to misdiagnose this as link
* failure.
* 2. Drop the original packet, if the ND resolution did not
* succeed in the first attempt. However we will create the
* nce and the ire, as soon as the ND resolution succeeds.
* We don't gain anything by queueing multiple probe packets
* and sending them back-to-back once resolution succeeds.
* It is sufficient to send just 1 packet after ND resolution
* succeeds. Since mpathd is sending down probe packets at a
* constant rate, we don't need to send the queued packet. We
* need to queue it only for NDP resolution. The benefit of
* dropping the probe packets that were delayed in ND
* resolution, is that in.mpathd will not see inflated
* RTT. If the ND resolution does not succeed within
* in.mpathd's failure detection time, mpathd may detect
* a failure, and it does not matter whether the packet
* was queued or dropped.
*/
}
if (++count >
ip1dbg(("nce_queue_mp: pkt dropped\n"));
}
}
/* put this on the list */
if (head_insert) {
} else {
}
}
/*
* Called when address resolution failed due to a timeout.
* Send an ICMP unreachable in response to all queued packets.
*/
void
{
char buf[INET6_ADDRSTRLEN];
ip1dbg(("nce_resolv_failed: dst %s\n",
}
/*
* This message should have been pulled up already
* in ip_wput_v6. ip_hdr_complete_v6 assumes that
* the header is pulled up.
*/
sizeof (ip6i_t) + IPV6_HDR_LEN);
}
/*
* Ignore failure since icmp_unreachable_v6 will silently
* drop packets with an unspecified source address.
*/
}
}
/*
* and the corresponding attributes.
* Disallow states other than ND_REACHABLE or ND_STALE.
*/
int
{
int err;
return (EINVAL);
/* We know it can not be mapping so just look in the hash table */
case NDF_ISROUTER_ON:
break;
case NDF_ISROUTER_OFF:
new_flags &= ~NCE_F_ISROUTER;
break;
case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON):
return (EINVAL);
}
case NDF_ANYCAST_ON:
break;
case NDF_ANYCAST_OFF:
new_flags &= ~NCE_F_ANYCAST;
break;
case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON):
return (EINVAL);
}
case NDF_PROXY_ON:
new_flags |= NCE_F_PROXY;
break;
case NDF_PROXY_OFF:
new_flags &= ~NCE_F_PROXY;
break;
case (NDF_PROXY_OFF|NDF_PROXY_ON):
return (EINVAL);
}
addr,
0,
&nce);
if (err != 0) {
return (err);
}
}
/*
* Router turned to host, delete all ires.
* XXX Just delete the entry, but we need to add too.
*/
return (0);
}
/*
* Note that we ignore the state at this point, which
* should be either STALE or REACHABLE. Instead we let
* the link layer address passed in to determine the state
* much like incoming packets.
*/
return (0);
}
/*
* If the device driver supports it, we make nce_fp_mp to have
* an M_DATA prepend. Otherwise nce_fp_mp will be null.
* The caller insures there is hold on nce for this function.
* Note that since ill_fastpath_probe() copies the mblk there is
* no need for the hold beyond this function.
*/
static void
{
int res;
/* Already contains fastpath info */
return;
}
/*
* EAGAIN is an indication of a transient error
* i.e. allocation failure etc. leave the nce in the list it
* will be updated when another probe happens for another ire
* if not it will be taken out of the list when the ire is
* deleted.
*/
}
}
/*
* Drain the list of nce's waiting for fastpath response.
*/
void
void *arg)
{
/*
* Take it off the list if we're flushing, or if the callback
* routine tells us to do so. Otherwise, leave the nce in the
* fastpath list to handle any pending response from the lower
* layer. We can't drain the list when the callback routine
* comparison failed, because the response is asynchronous in
* nature, and may not arrive in the same order as the list
* insertion.
*/
if (current_nce == first_nce)
else
} else {
/* previous element that is still in the list */
}
}
}
/*
* Add nce to the nce fastpath list.
*/
void
{
/*
* if nce has not been deleted and
* is not already in the list add it.
*/
}
}
/*
* remove nce from the nce fastpath list.
*/
void
{
goto done;
} else {
break;
}
}
}
done:
}
/*
* Update all NCE's that are not in fastpath mode and
* have an nce_fp_mp that matches mp. mp->b_cont contains
* the fastpath header.
*
* Returns TRUE if entry should be dequeued, or FALSE otherwise.
*/
{
return (B_TRUE);
return (B_TRUE);
ip2dbg(("ndp_fastpath_update: trying\n"));
/*
* The nce is locked here to prevent any other threads
* from accessing and changing nce_res_mp when the IPv6 address
* becomes resolved to an lla while we're in the middle
* of looking at and comparing the hardware address (lla).
* It is also locked to prevent multiple threads in nce_fastpath_update
* from examining nce_res_mp atthe same time.
*/
/*
* Don't take the ire off the fastpath list yet,
* since the response may come later.
*/
return (B_FALSE);
}
/* Matched - install mp as the fastpath mp */
ip1dbg(("ndp_fastpath_update: match\n"));
}
return (B_TRUE);
}
/*
* This function handles the DL_NOTE_FASTPATH_FLUSH notification from
* driver. Note that it assumes IP is exclusive...
*/
/* ARGSUSED */
void
{
return;
/* No fastpath info? */
return;
/* Just delete the NCE... */
}
/*
* Return a pointer to a given option in the packet.
* Assumes that option part of the packet have already been validated.
*/
{
while (optlen > 0) {
return (opt);
}
return (NULL);
}
/*
* Verify all option lengths present are > 0, also check to see
* if the option lengths and packet length are consistent.
*/
{
while (optlen > 0) {
if (opt->nd_opt_len == 0)
return (B_FALSE);
if (optlen < 0)
return (B_FALSE);
}
return (B_TRUE);
}
/*
* ndp_walk function.
* Free a fraction of the NCE cache entries.
* A fraction of zero means to not free any in that category.
*/
void
{
return;
return;
}
}
/*
* ndp_walk function.
* Count the number of NCEs that can be deleted.
* These would be hosts but not routers.
*/
void
{
return;
}
#ifdef NCE_DEBUG
{
int bucket_id;
return (th_trace);
}
return (NULL);
}
void
{
int bucket_id;
/*
* Attempt to locate the trace buffer for the curthread.
* If it does not exist, then allocate a new trace buffer
* and link it in list of trace bufs for this ipif, at the head
*/
return;
return;
}
}
}
void
{
return;
}
void
{
int i;
for (i = 0; i < IP_TR_HASH_MAX; i++) {
/* unlink th_trace and free it */
}
}
}
/* ARGSUSED */
int
{
return (0);
}
/* unlink th_trace and free it */
return (0);
}
#endif