ip6_ire.c revision 50f05cf1b823982cdf5d78591f761b56ea921bfd
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file contains routines that manipulate Internet Routing Entries (IREs).
*/
#include <inet/ipclassifier.h>
/*
* Named Dispatch routine to produce a formatted report on all IREs.
* This report is accessed by using the ndd utility to "get" ND variable
* "ip_ire_status_v6".
*/
/* ARGSUSED */
int
{
(void) mi_mpprintf(mp,
"IRE " MI_COL_HDRPAD_STR
"rfq " MI_COL_HDRPAD_STR
"stq " MI_COL_HDRPAD_STR
" zone mxfrg rtt rtt_sd ssthresh ref "
"rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe "
"src gateway");
/*
* 01234567 01234567 01234567 12345 12345 12345 12345 12345678 123
* 123456 123456789 123456789 123456 12345678 1234 12345678 12345678
* xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
* xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
* xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
* xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
*/
/*
* Because of the ndd constraint, at most we can have 64K buffer
* to put in all IRE info. So to be more efficient, just
* allocate a 64K buffer here, assuming we need that large buffer.
*/
/* The following may work even if we cannot get a large buf. */
return (0);
}
if (zoneid == GLOBAL_ZONEID)
ipst = CONNQ_TO_IPST(q);
return (0);
}
/*
* ire_walk routine invoked for ip_ire_report_v6 for each IRE.
*/
static void
{
char buf1[INET6_ADDRSTRLEN];
char buf2[INET6_ADDRSTRLEN];
char buf3[INET6_ADDRSTRLEN];
char buf4[INET6_ADDRSTRLEN];
int ref;
return;
if (buf_len <= 0)
return;
/* Number of active references of this ire */
/* "inbound" to a non local address is a forward */
fo_pkt_count = 0;
ib_pkt_count = 0;
}
"%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d "
"%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n",
(int)ire->ire_zoneid,
} else {
}
}
/* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */
static void
{
char buf1[INET6_ADDRSTRLEN];
char buf2[INET6_ADDRSTRLEN];
char buf3[INET6_ADDRSTRLEN];
char buf4[INET6_ADDRSTRLEN];
int ref;
return;
if (buf_len <= 0)
return;
/* Number of active references of this ire */
/* "inbound" to a non local address is a forward */
fo_pkt_count = 0;
ib_pkt_count = 0;
}
"%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d "
"%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n",
(int)ire->ire_zoneid,
} else {
}
}
/*
* Initialize the ire that is specific to IPv6 part and call
* ire_init_common to finish it.
*/
ire_t *
{
/*
* Reject IRE security attribute creation/initialization
* if system is not running in Trusted mode.
*/
return (NULL);
/*
* We can't dupb() here as multiple threads could be
* calling dupb on the same mp which is incorrect.
* First dupb() should be called only by one thread.
*/
return (NULL);
}
/*
* We can't dupb() here as multiple threads could be
* calling dupb on the same mp which is incorrect.
* First dupb() should be called only by one thread.
*/
return (NULL);
}
}
if (v6src_addr != NULL)
}
/*
* Multirouted packets need to have a fragment header added so that
* the receiver is able to discard duplicates according to their
* fragment identifier.
*/
}
/* ire_init_common will free the mblks upon encountering any failure */
return (NULL);
return (ire);
}
/*
* Similar to ire_create_v6 except that it is called only when
* we want to allocate ire as an mblk e.g. we have a external
* resolver. Do we need this in IPv6 ?
*/
ire_t *
{
/* Allocate the new IRE. */
ip1dbg(("ire_create_mp_v6: alloc failed\n"));
return (NULL);
}
/* Start clean. */
return (NULL);
}
return (ire);
}
/*
* ire_create_v6 is called to allocate and initialize a new IRE.
*
* NOTE : This is called as writer sometimes though not required
* by this function.
*/
ire_t *
{
ip1dbg(("ire_create_v6: alloc failed\n"));
return (NULL);
}
return (NULL);
}
return (ire);
}
/*
* Find an IRE_INTERFACE for the multicast group.
* Allows different routes for multicast addresses
* in the unicast routing table (akin to FF::0/8 but could be more specific)
* which point at different interfaces. This is used when IPV6_MULTICAST_IF
* isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
* specify the interface to join on.
*
*/
ire_t *
{
int match_flags = MATCH_IRE_TYPE;
/* We search a resolvable ire in case of multirouting. */
/*
* If the route is not resolvable, the looked up ire
* may be changed here. In that case, ire_multirt_lookup()
* IRE_REFRELE the original ire and change it.
*/
}
return (NULL);
/*
* Make sure we follow ire_ipif.
*
* We need to determine the interface route through
* which the gateway will be reached. We don't really
* care which interface is picked if the interface is
* part of a group.
*/
}
case IRE_DEFAULT:
case IRE_PREFIX:
case IRE_HOST:
return (ire);
case IRE_IF_NORESOLVER:
case IRE_IF_RESOLVER:
return (ire);
default:
return (NULL);
}
}
/*
* Return any local address. We use this to target ourselves
* when the src address was specified as 'default'.
* Preference for IRE_LOCAL entries.
*/
ire_t *
{
int i;
for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
continue;
continue;
case IRE_LOOPBACK:
}
break;
case IRE_LOCAL:
}
return (ire);
}
}
}
return (maybe);
}
/*
* This function takes a mask and returns number of bits set in the
* mask (the represented prefix length). Assumes a contiguous mask.
*/
int
{
int bits;
int plen = IPV6_ABITS;
int i;
for (i = 3; i >= 0; i--) {
plen -= 32;
continue;
}
if (bits == 0)
break;
}
return (plen);
}
/*
* Convert a prefix length to the mask for that prefix.
* Returns the argument bitmask.
*/
{
return (NULL);
while (plen > 32) {
*ptr++ = 0xffffffffU;
plen -= 32;
}
return (bitmask);
}
/*
* Add a fully initialized IRE to an appropriate
* table based on ire_type.
*
* The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and
* IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT.
*
* The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
* and IRE_CACHE.
*
* NOTE : This function is called as writer though not required
* by this function.
*/
int
{
int mask_table_index;
int flags;
int error;
/* Find the appropriate list head. */
case IRE_HOST:
break;
case IRE_CACHE:
case IRE_LOCAL:
case IRE_LOOPBACK:
break;
case IRE_PREFIX:
break;
case IRE_DEFAULT:
break;
case IRE_IF_RESOLVER:
case IRE_IF_NORESOLVER:
break;
default:
printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n",
return (EINVAL);
}
/* Make sure the address is properly masked. */
/* IRE goes into Forward Table */
NULL) {
int i;
return (ENOMEM);
}
for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
RW_DEFAULT, NULL);
}
mask_table_index] == NULL) {
mask_table_index] = ptr;
} else {
/*
* Some other thread won the race in
* initializing the forwarding table at the
* same index.
*/
for (i = 0; i < ipst->ips_ip6_ftable_hash_size;
i++) {
}
}
}
} else {
}
/*
* For xresolv interfaces (v6 interfaces with an external
* address resolver), ip_newroute_v6/ip_newroute_ipif_v6
* are unable to prevent the deletion of the interface route
* while adding an IRE_CACHE for an on-link destination
* in the IRE_IF_RESOLVER case, since the ire has to go to
* the external resolver and return. We can't do a REFHOLD on the
* associated interface ire for fear of the message being freed
* if the external resolver can't resolve the address.
* Here we look up the interface ire in the forwarding table
* and make sure that the interface route has not been deleted.
*/
return (EINVAL);
}
/* Prevent pire from getting deleted */
/* Has it been removed already? */
return (EINVAL);
}
}
/*
* For IRE_CACHES, MATCH_IRE_IPIF is not enough to check
* for duplicates because :
*
* 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be
* pointing at different ills. A real duplicate is
* a match on both ire_ipif and ire_stq.
*
* 2) We could have multiple packets trying to create
* an IRE_CACHE for the same ill.
*
* Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants
* to go out on a particular ill. Rather than looking at the
* packet, we depend on the above for MATCH_IRE_ILL here.
*
* Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have
* multiple IRE_CACHES for an ill for the same destination
* with various scoped addresses i.e represented by ipifs.
*
* MATCH_IRE_ILL is done implicitly below for IRE_CACHES.
*/
flags |= MATCH_IRE_IPIF;
/*
* If we are creating hidden ires, make sure we search on
* this ill (MATCH_IRE_ILL) and a hidden ire, while we are
* searching for duplicates below. Otherwise we could
* potentially find an IRE on some other interface
* and it may not be a IRE marked with IRE_MARK_HIDDEN. We
* shouldn't do this as this will lead to an infinite loop as
* eventually we need an hidden ire for this packet to go
* out. MATCH_IRE_ILL is already marked above.
*/
}
/*
* Start the atomic add of the ire. Grab the ill locks,
* ill_g_usesrc_lock and the bucket lock. Check for condemned.
* To avoid lock order problems, get the ndp6.ndp_g_lock now itself.
*/
}
/*
* If ipif or ill is changing ire_atomic_start() may queue the
* request and return EINPROGRESS.
*/
if (error != 0) {
if (ndp_g_lock_held)
/*
* We don't know whether it is a valid ipif or not.
* So, set it to NULL. This assumes that the ire has not added
* a reference to the ipif.
*/
}
return (error);
}
/*
* To avoid creating ires having stale values for the ire_max_frag
* we get the latest value atomically here. For more details
* see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
* in ip_rput_dlpi_writer
*/
else
} else {
}
/*
* Atomically check for duplicate and insert in the table.
*/
continue;
/*
* We do MATCH_IRE_ILL implicitly here for IRE_CACHES.
* As ire_ipif and ire_stq could point to two
* different ills, we can't pass just ire_ipif to
* ire_match_args and get a match on both ills.
* This is just needed for duplicate checks here and
* so we don't add an extra argument to
* ire_match_args for this. Do it locally.
*
* NOTE : Currently there is no part of the code
* that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL
* match for IRE_CACHEs. Thus we don't want to
* extend the arguments to ire_match_args_v6.
*/
continue;
/*
* Multiroute IRE_CACHEs for a given destination can
* have the same ire_ipif, typically if their source
* address is forced using RTF_SETSRC, and the same
* send-to queue. We differentiate them using the parent
* handle.
*/
continue;
}
continue;
flags)) {
/*
* Return the old ire after doing a REFHOLD.
* As most of the callers continue to use the IRE
* after adding, we return a held ire. This will
* avoid a lookup in the caller again. If the callers
* don't want to use it, they need to do a REFRELE.
*/
ip1dbg(("found dup ire existing %p new %p",
if (ndp_g_lock_held)
/*
* Assert that it is
* not yet removed from the list.
*/
}
return (0);
}
}
char buf[INET6_ADDRSTRLEN];
/*
* All IRE_CACHE types must have a nce. If this is
* not the case the entry will not be added. We need
* to make sure that if somebody deletes the nce
* after we looked up, they will find this ire and
* delete the ire. To delete this ire one needs the
* bucket lock which we are still holding here. So,
* even if the nce gets deleted after we looked up,
* this ire will get deleted.
*
* NOTE : Don't need the ire_lock for accessing
* ire_gateway_addr_v6 as it is appearing first
* time on the list and rts_setgwr_v6 could not
* be changing this.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) {
} else {
}
goto failed;
/* Pair of refhold, refrele just to get the tracing right */
/*
* Atomically make sure that new IREs don't point
* to an NCE that is logically deleted (CONDEMNED).
* ndp_delete() first marks the NCE CONDEMNED.
* This ensures that the nce_refcnt won't increase
* due to new nce_lookups or due to addition of new IREs
* pointing to this NCE. Then ndp_delete() cleans up
* existing references. If we don't do it atomically here,
* ndp_delete() -> nce_ire_delete() will not be able to
* clean up the IRE list completely, and the nce_refcnt
* won't go down to zero.
*/
/*
* If we used an external resolver, we may not
* have gone through neighbor discovery to get here.
* Must update the nce_state before the next check.
*/
}
if (ndp_g_lock_held)
ip1dbg(("ire_add_v6: No nce for dst %s \n",
/*
* Assert that it is
* not yet removed from the list.
*/
}
return (EINVAL);
} else {
}
}
/*
* Find the first entry that matches ire_addr - provides
* tail insertion. *irep will be null if no match.
*/
/*
* Find the last ire which matches ire_addr_v6.
* Needed to do tail insertion among entries with the same
* ire_addr_v6.
*/
&ire1->ire_addr_v6)) {
break;
}
}
/*
* We keep a count of default gateways which is used when
* assigning them as routes.
*/
}
/* Insert at *irep */
/* Link the new one in. */
/*
* ire_walk routines de-reference ire_next without holding
* a lock. Before we point to the new ire, we want to make
* sure the store that sets the ire_next of the new ire
* reaches global visibility, so that ire_walk routines
* don't see a truncated list of ires i.e if the ire_next
* of the new ire gets set after we do "*irep = ire" due
* to re-ordering, the ire_walk thread will see a NULL
* once it accesses the ire_next of the new ire.
* membar_producer() makes sure that the following store
* happens *after* all of the above stores.
*/
/*
* We return a bumped up IRE above. Keep it symmetrical
* so that the callers will always have to release. This
* helps the callers of this function because they continue
* to use the IRE after adding and hence they don't have to
* lookup again after we return the IRE.
*
* NOTE : We don't have to use atomics as this is appearing
* in the list for the first time and no one else can bump
* up the reference count on this yet.
*/
irb_ptr->irb_ire_cnt++;
stq_ill->ill_ire_cnt++;
}
} else {
}
if (ndp_g_lock_held)
/* Assert that it is not removed from the list yet */
}
/*
* For ire's with with host mask see if there is an entry
* in the cache. If there is one flush the whole cache as
* there might be multiple entries due to RTF_MULTIRT (CGTP).
* If no entry is found than there is no need to flush the
* cache.
*/
ipst);
}
} else {
}
}
return (0);
}
/*
* Search for all HOST REDIRECT routes that are
* pointing at the specified gateway and
* delete them. This routine is called only
* when a default gateway is going away.
*/
static void
{
int i;
/* get the hash table for HOST routes */
return;
for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) {
continue;
}
}
}
/*
* Delete all the cache entries with this 'addr'. This is the IPv6 counterpart
* of ip_ire_clookup_and_delete. The difference being this function does not
* return any value. IPv6 processing of a gratuitous ARP, as it stands, is
* different than IPv4 in that, regardless of the presence of a cache entry
* for this address, an ire_walk_v6 is done. Another difference is that unlike
* in the case of IPv4 this does not take an ipif_t argument, since it is only
* called by ip_arp_news and the match is always only on the address.
*/
void
{
continue;
/* This signifies start of a match */
if (!found)
}
/* End of the match */
} else if (found)
break;
}
}
/*
* Delete the specified IRE.
* All calls should use ire_delete().
* Sometimes called as writer though not required by this function.
*
* NOTE : This function is called only if the ire was added
* in the list.
*/
void
{
/*
* when a default gateway is going away
* delete all the host redirects pointing at that
* gateway.
*/
}
}
/*
* ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect
* entries.
*/
/*ARGSUSED1*/
void
{
char addrstr1[INET6_ADDRSTRLEN];
char addrstr2[INET6_ADDRSTRLEN];
ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n",
}
}
/*
* that have a given gateway address.
*/
void
{
char buf1[INET6_ADDRSTRLEN];
char buf2[INET6_ADDRSTRLEN];
return;
ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n",
}
}
/*
* Remove all IRE_CACHE entries that match
* the ire specified. (Sometimes called
* as writer though not required by this function.)
*
* The flag argument indicates if the
* flush request is due to addition
* of new route (IRE_FLUSH_ADD) or deletion of old
* route (IRE_FLUSH_DELETE).
*
* This routine takes only the IREs from the forwarding
* table and flushes the corresponding entries from
* the cache table.
*
* When flushing due to the deletion of an old route, it
* just checks the cache handles (ire_phandle and ire_ihandle) and
* deletes the ones that match.
*
* When flushing due to the creation of a new route, it checks
* if a cache entry's address matches the one in the IRE and
* that the cache entry's parent has a less specific mask than the
* one in IRE. The destination of such a cache entry could be the
* gateway for other cache entries, so we need to flush those as
* well by looking for gateway addresses matching the IRE's address.
*/
void
{
int i;
return;
/*
* If a default is just created, there is no point
* in going through the cache, as there will not be any
* cached ires.
*/
return;
if (flag == IRE_FLUSH_ADD) {
/*
* This selective flush is
* due to the addition of
* new IRE.
*/
for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
continue;
continue;
/*
* If 'cire' belongs to the same subnet
* as the new ire being added, and 'cire'
* is derived from a prefix that is less
* specific than the new ire being added,
* we need to flush 'cire'; for instance,
* when a new interface comes up.
*/
ire->ire_masklen))) {
continue;
}
/*
* This is the case when the ire_gateway_addr
* of 'cire' belongs to the same subnet as
* the new ire being added.
* Flushing such ires is sometimes required to
* avoid misrouting: say we have a machine with
* two interfaces (I1 and I2), a default router
* R on the I1 subnet, and a host route to an
* off-link destination D with a gateway G on
* the I2 subnet.
* Under normal operation, we will have an
* on-link cache entry for G and an off-link
* cache entry for D with G as ire_gateway_addr,
* traffic to D will reach its destination
* through gateway G.
* If the administrator does 'ifconfig I2 down',
* the cache entries for D and G will be
* flushed. However, G will now be resolved as
* an off-link destination using R (the default
* router) as gateway. Then D will also be
* resolved as an off-link destination using G
* as gateway - this behavior is due to
* compatibility reasons, see comment in
* ire_ihandle_lookup_offlink(). Traffic to D
* will go to the router R and probably won't
* reach the destination.
* The administrator then does 'ifconfig I2 up'.
* Since G is on the I2 subnet, this routine
* will flush its cache entry. It must also
* flush the cache entry for D, otherwise
* traffic will stay misrouted until the IRE
* times out.
*/
continue;
}
}
}
} else {
/*
* delete the cache entries based on
* handle in the IRE as this IRE is
*/
for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
continue;
continue;
if ((cire->ire_phandle == 0 ||
(cire->ire_ihandle == 0 ||
continue;
}
}
}
}
/*
* Matches the arguments passed with the values in the ire.
*
* Note: for match types that match using "ipif" passed in, ipif
* must be checked for non-NULL before calling this routine.
*/
static boolean_t
{
/*
* HIDDEN cache entries have to be looked up specifically with
* MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set
* when the interface is FAILED or INACTIVE. In that case,
* any IRE_CACHES that exists should be marked with
* IRE_MARK_HIDDEN. So, we don't really need to match below
* for IRE_MARK_HIDDEN. But we do so for consistency.
*/
if (!(match_flags & MATCH_IRE_MARK_HIDDEN) &&
return (B_FALSE);
/*
* If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is
* valid and does not match that of ire_zoneid, a failure to
* match is reported at this point. Otherwise, since some IREs
* that are available in the global zone can be used in local
* zones, additional checks need to be performed:
*
* IRE_CACHE and IRE_LOOPBACK entries should
* never be matched in this situation.
*
* IRE entries that have an interface associated with them
* should in general not match unless they are an IRE_LOCAL
* or in the case when MATCH_IRE_DEFAULT has been set in
* the caller. In the case of the former, checking of the
* other fields supplied should take place.
*
* In the case where MATCH_IRE_DEFAULT has been set,
* all of the ipif's associated with the IRE's ill are
* checked to see if there is a matching zoneid. If any
* one ipif has a matching zoneid, this IRE is a
* potential candidate so checking of the other fields
* takes place.
*
* In the case where the IRE_INTERFACE has a usable source
* address (indicated by ill_usesrc_ifindex) in the
* correct zone then it's permitted to return this IRE
*/
if (match_flags & MATCH_IRE_ZONEONLY)
return (B_FALSE);
return (B_FALSE);
/*
* Note, IRE_INTERFACE can have the stq as NULL. For
* example, if the default multicast route is tied to
* the loopback address.
*/
/*
* If there is a usable source address in the
* zone, then it's ok to return an
* IRE_INTERFACE
*/
if ((dst_ill->ill_usesrc_ifindex != 0) &&
!= NULL) {
ip3dbg(("ire_match_args: src_ipif %p"
" dst_ill %p", (void *)src_ipif,
(void *)dst_ill));
} else {
ip3dbg(("ire_match_args: src_ipif NULL"
" dst_ill %p\n", (void *)dst_ill));
return (B_FALSE);
}
}
if ((match_flags & MATCH_IRE_DEFAULT) == 0)
return (B_FALSE);
if (IPIF_CAN_LOOKUP(tipif) &&
break;
}
return (B_FALSE);
}
}
if (match_flags & MATCH_IRE_GW) {
}
/*
* For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that
* somebody wants to send out on a particular interface which
* is given by ire_stq and hence use ire_stq to derive the ill
* value. ire_ipif for IRE_CACHES is just the
* means of getting a source address i.e ire_src_addr_v6 =
* ire->ire_ipif->ipif_src_addr_v6.
*/
}
/* No ire_addr_v6 bits set past the mask */
ire->ire_addr_v6));
((!(match_flags & MATCH_IRE_GW)) ||
((!(match_flags & MATCH_IRE_TYPE)) ||
((!(match_flags & MATCH_IRE_SRC)) ||
&ipif->ipif_v6src_addr)) &&
((!(match_flags & MATCH_IRE_IPIF)) ||
((!(match_flags & MATCH_IRE_MARK_HIDDEN)) ||
((!(match_flags & MATCH_IRE_ILL)) ||
((!(match_flags & MATCH_IRE_IHANDLE)) ||
((!(match_flags & MATCH_IRE_ILL_GROUP)) ||
(ire_ill_group != NULL &&
ire_ill_group == ipif_ill_group)) &&
((!(match_flags & MATCH_IRE_SECATTR)) ||
(!is_system_labeled()) ||
/* We found the matched IRE */
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Lookup for a route in all the tables
*/
ire_t *
{
/*
* ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
* MATCH_IRE_ILL is set.
*/
return (NULL);
/*
* might be asking for a cache lookup,
* This is not best way to lookup cache,
* user should call ire_cache_lookup directly.
*
* If MATCH_IRE_TYPE was set, first lookup in the cache table and then
* in the forwarding table, if the applicable type flags were set.
*/
return (ire);
}
}
return (ire);
}
/*
* Lookup a route in forwarding table.
* specific lookup is indicated by passing the
* required parameters and indicating the
* match required in flag field.
*
* Looking for default route can be done in three ways
* 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field
* along with other matches.
* 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags
* field along with other matches.
* 3) if the destination and mask are passed as zeros.
*
* A request to return a default route if no route
* is found, can be specified by setting MATCH_IRE_DEFAULT
* in flags.
*
* It does not support recursion more than one level. It
* will do recursive lookup only when the lookup maps to
* a prefix or default route and MATCH_IRE_RECURSIVE flag is passed.
*
* If the routing table is setup to allow more than one level
* of recursion, the cleaning up cache table will not work resulting
* in invalid routing.
*
*
* NOTE : When this function returns NULL, pire has already been released.
* pire is valid only when this function successfully returns an
* ire.
*/
ire_t *
{
int i;
/*
* When we return NULL from this function, we should make
* sure that *pire is NULL so that the callers will not
* wrongly REFRELE the pire.
*/
/*
* ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
* MATCH_IRE_ILL is set.
*/
return (NULL);
/*
* If the mask is known, the lookup
* is simple, if the mask is not known
* we need to search.
*/
if (flags & MATCH_IRE_MASK) {
return (NULL);
continue;
goto found_ire;
}
} else {
/*
* In this case we don't know the mask, we need to
* search the table assuming different mask sizes.
* we start with 128 bit mask, we don't allow default here.
*/
for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) {
continue;
(void) ip_plen_to_mask_v6(i, &tmpmask);
continue;
goto found_ire;
}
}
}
/*
* We come here if no route has yet been found.
*
* Handle the case where default route is
* requested by specifying type as one of the possible
* types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE).
*
* If MATCH_IRE_MASK is specified, then the appropriate default route
* would have been found above if it exists so it isn't looked up here.
* If MATCH_IRE_DEFAULT was also specified, then a default route will be
* searched for later.
*/
/* addr & mask is zero for defaults */
continue;
goto found_ire;
}
}
}
/*
* We come here only if no route is found.
* see if the default route can be used which is allowed
* only if the default matching criteria is specified.
* The ipv6_ire_default_count tracks the number of IRE_DEFAULT
* entries. However, the ip_forwarding_table_v6[0] also contains
* interface routes thus the count can be zero.
*/
return (NULL);
/*
* Keep a tab on the bucket while looking the IRE_DEFAULT
* entries. We need to keep track of a particular IRE
* (ire_origin) so this ensures that it will not be unlinked
* from the hash list during the recursive lookup below.
*/
return (NULL);
}
/*
* Get the index first, since it can be changed by other
* threads. Then get to the right default route skipping
* default interface routes if any. As we hold a reference on
* the IRE bucket, ipv6_ire_default_count can only increase so
* we can't reach the end of the hash list unexpectedly.
*/
if (ipst->ips_ipv6_ire_default_count != 0) {
while (index != 0) {
index--;
}
} else {
/*
* No default route, so we only have default interface
* routes: don't enter the first loop.
*/
}
/*
* Round-robin the default routers list looking for a neighbor
* that matches the passed in parameters and is reachable. If
* none found, just return a route from the default router list
* if it exists. If we can't find a default route (IRE_DEFAULT),
* look for interface default routes.
* We start with the ire we found above and we walk the hash
* list until we're back where we started, see
* ire_get_next_default_ire(). It doesn't matter if default
* routes are added or deleted by other threads - we know this
* ire will stay in the list because we hold a reference on the
* ire bucket.
* NB: if we only have interface default routes, ire is NULL so
* we don't even enter this loop (see above).
*/
ire_origin = ire;
int match_flags;
/*
* We have something to work with.
* entry, we will use this. Otherwise
* we'll try to find an entry that has
* a resolved cache entry. We will fallback
* on this if we don't find anything else.
*/
ipst);
NCE_ISREACHABLE(nce) &&
goto found_ire_held;
NCE_F_ISROUTER)) {
/*
* Make sure we don't use
* this ire
*/
}
} else if (ipst->
ips_ipv6_ire_default_count > 1 &&
zoneid != GLOBAL_ZONEID) {
/*
* When we're in a local zone, we're
* only interested in default routers
* that are reachable through ipifs
* within our zone.
* The potentially expensive call to
* ire_route_lookup_v6() is avoided when
* we have only one default route.
*/
int ire_match_flags = MATCH_IRE_TYPE |
}
/*
* Make sure we don't use
* this ire
*/
}
}
}
}
goto found_ire_held;
} else {
/*
* Look for a interface default route matching the
* args passed in. No round robin here. Just pick
* the right one.
*/
continue;
continue;
goto found_ire_held;
}
}
}
}
ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
return (NULL);
if ((flags & MATCH_IRE_RJ_BHOLE) &&
return (ire);
}
/*
* At this point, IRE that was found must be an IRE_FORWARDTABLE
* or IRE_CACHETABLE type. If this is a recursive lookup and an
* IRE_INTERFACE type was found, return that. If it was some other
* IRE_FORWARDTABLE type of IRE (one of the prefix types), then it
* is necessary to fill in the parent IRE pointed to by pire, and
* then lookup the gateway address of the parent. For backwards
* compatiblity, if this lookup returns an
* IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level
* of lookup is done.
*/
if (flags & MATCH_IRE_RECURSIVE) {
int match_flags = MATCH_IRE_DSTONLY;
return (ire);
/*
* If we can't find an IRE_INTERFACE or the caller has not
* asked for pire, we need to REFRELE the saved_ire.
*/
/*
* Currently MATCH_IRE_ILL is never used with
* (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while
* sending out packets as MATCH_IRE_ILL is used only
* for communicating with on-link hosts. We can't assert
* that here as RTM_GET calls this function with
* MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE.
* We have already used the MATCH_IRE_ILL in determining
* the right prefix route at this point. To match the
* behavior of how we locate routes while sending out
* packets, we don't want to use MATCH_IRE_ILL below
* while locating the interface route.
*/
/*
* In this case we have to deal with the
* MATCH_IRE_PARENT flag, which means the
* parent has to be returned if ire is NULL.
* The aim of this is to have (at least) a starting
* ire when we want to look at all of the ires in a
* bucket aimed at a single destination (as is the
* case in ip_newroute_v6 for the RTF_MULTIRT
* flagged routes).
*/
if (flags & MATCH_IRE_PARENT) {
/*
* Need an extra REFHOLD, if the
* parent ire is returned via both
* ire and pire.
*/
}
} else {
}
return (ire);
}
/*
* If the caller did not ask for pire, release
* it now.
*/
}
return (ire);
}
/*
* In this case we have to deal with the
* MATCH_IRE_PARENT flag, which means the
* parent has to be returned if ire is NULL.
* The aim of this is to have (at least) a starting
* ire when we want to look at all of the ires in a
* bucket aimed at a single destination (as is the
* case in ip_newroute_v6 for the RTF_MULTIRT
* flagged routes).
*/
if (flags & MATCH_IRE_PARENT) {
/*
* Need an extra REFHOLD, if the
* parent ire is returned via both
* ire and pire.
*/
}
} else {
}
return (ire);
/*
* If the caller did not ask for pire, release
* it now.
*/
}
return (ire);
}
return (ire);
}
/*
* Delete the IRE cache for the gateway and all IRE caches whose
* ire_gateway_addr_v6 points to this gateway, and allow them to
* be created on demand by ip_newroute_v6.
*/
void
{
continue;
}
}
}
/*
* Looks up cache table for a route.
* specific lookup can be indicated by
* passing the MATCH_* flags and the
* necessary parameters.
*/
ire_t *
{
/*
* ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
* MATCH_IRE_ILL is set.
*/
return (NULL);
continue;
return (ire);
}
}
return (NULL);
}
/*
* Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers
* should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get
* to the hidden ones.
*
* In general the zoneid has to match (where ALL_ZONES match all of them).
* But for IRE_LOCAL we also need to handle the case where L2 should
* conceptually loop back the packet. This is necessary since neither
* Ethernet drivers nor Ethernet hardware loops back packets sent to their
* own MAC address. This loopback is needed when the normal
* routes (ignoring IREs with different zoneids) would send out the packet on
* the same ill (or ill group) as the ill with which this IRE_LOCAL is
* associated.
*
* Earlier versions of this code always matched an IRE_LOCAL independently of
* the zoneid. We preserve that earlier behavior when
* ip_restrict_interzone_loopback is turned off.
*/
ire_t *
{
continue;
/*
* Finally, check if the security policy has any
* restriction on using this route for the specified
* message.
*/
continue;
}
return (ire);
}
continue;
return (ire);
}
}
}
return (NULL);
}
/*
* Locate the interface ire that is tied to the cache ire 'cire' via
* cire->ire_ihandle.
*
* We are trying to create the cache ire for an onlink destn. or
* gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER
* case for xresolv interfaces, after the ire has come back from
* an external resolver.
*/
static ire_t *
{
int match_flags;
int i;
int j;
/*
* We know that the mask of the interface ire equals cire->ire_cmask.
* (When ip_newroute_v6() created 'cire' for an on-link destn.
* it set its cmask from the interface ire's mask)
*/
return (ire);
/*
* If we didn't find an interface ire above, we can't declare failure.
* For backwards compatibility, we need to support prefix routes
* pointing to next hop gateways that are not on-link.
*
* In the resolver/noresolver case, ip_newroute_v6() thinks
* it is creating the cache ire for an onlink destination in 'cire'.
* But 'cire' is not actually onlink, because ire_ftable_lookup_v6()
* cheated it, by doing ire_route_lookup_v6() twice and returning an
* interface ire.
*
* Eg. default - gw1 (line 1)
* gw1 - gw2 (line 2)
* gw2 - hme0 (line 3)
*
* In the above example, ip_newroute_v6() tried to create the cache ire
* 'cire' for gw1, based on the interface route in line 3. The
* ire_ftable_lookup_v6() above fails, because there is
* no interface route to reach gw1. (it is gw2). We fall thru below.
*
* Do a brute force search based on the ihandle in a subset of the
* forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise
* things become very complex, since we don't have 'pire' in this
* case. (Also note that this method is not possible in the offlink
* case because we don't know the mask)
*/
return (NULL);
for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) {
continue;
return (ire);
}
}
}
return (NULL);
}
/*
* Locate the interface ire that is tied to the cache ire 'cire' via
* cire->ire_ihandle.
*
* We are trying to create the cache ire for an offlink destn based
* on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire
* as found by ip_newroute_v6(). We are called from ip_newroute_v6() in
* the IRE_CACHE case.
*/
ire_t *
{
int match_flags;
/*
* ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only
* for on-link hosts. We should never be here for onlink.
* Thus, use MATCH_IRE_ILL_GROUP.
*/
/*
* We know that the mask of the interface ire equals cire->ire_cmask.
* (When ip_newroute_v6() created 'cire' for an on-link destn. it set
* its cmask from the interface ire's mask)
*/
return (ire);
/*
* If we didn't find an interface ire above, we can't declare failure.
* For backwards compatibility, we need to support prefix routes
* pointing to next hop gateways that are not on-link.
*
* Assume we are trying to ping some offlink destn, and we have the
* routing table below.
*
* Eg. default - gw1 <--- pire (line 1)
* gw1 - gw2 (line 2)
* gw2 - hme0 (line 3)
*
* If we already have a cache ire for gw1 in 'cire', the
* ire_ftable_lookup_v6 above would have failed, since there is no
* interface ire to reach gw1. We will fallthru below.
*
* Here we duplicate the steps that ire_ftable_lookup_v6() did in
* getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case.
* The differences are the following
* i. We want the interface ire only, so we call
* ire_ftable_lookup_v6() instead of ire_route_lookup_v6()
* ii. We look for only prefix routes in the 1st call below.
* ii. We want to match on the ihandle in the 2nd call below.
*/
return (NULL);
/*
* At this point 'ire' corresponds to the entry shown in line 2.
* gw_addr is 'gw2' in the example above.
*/
return (ire);
}
/*
* Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER
* ire associated with the specified ipif.
*
* This might occasionally be called when IPIF_UP is not set since
* the IPV6_MULTICAST_IF as well as creating interface routes
* allows specifying a down ipif (ipif_lookup* match ipifs that are down).
*
* Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on
* the ipif this routine might return NULL.
* (Sometimes called as writer though not required by this function.)
*/
ire_t *
{
/* In this case we need to lookup destination address. */
MATCH_IRE_MASK), ipst);
} else {
MATCH_IRE_MASK), ipst);
}
return (ire);
}
/*
* Return B_TRUE if a multirt route is resolvable
* (or if no route is resolved yet), B_FALSE otherwise.
* This only works in the global zone.
*/
{
int unres_cnt = 0;
/* Retrieve the first IRE_HOST that matches the destination */
/* No route at all */
if (first_fire == NULL) {
return (B_TRUE);
}
/* Retrieve the first IRE_CACHE ire for that destination. */
/* No resolved route. */
if (first_cire == NULL) {
return (B_TRUE);
}
/* At least one route is resolved. */
/* Count the number of routes to that dest that are declared. */
continue;
continue;
unres_cnt++;
}
/* Then subtract the number of routes to that dst that are resolved */
continue;
continue;
continue;
unres_cnt--;
}
/* At least one route is unresolved; search for a resolvable route. */
if (unres_cnt > 0)
if (first_fire)
if (first_cire)
return (resolvable);
}
/*
* Return B_TRUE and update *ire_arg and *fire_arg
* if at least one resolvable route is found.
* Return B_FALSE otherwise (all routes are resolved or
* the remaining unresolved routes are all unresolvable).
* This only works in the global zone.
*/
{
ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, "
/* Not an IRE_HOST ire; give up. */
return (B_FALSE);
}
/* This is the first IRE_HOST ire for that destination. */
first_fire = *fire_arg;
ip2dbg(("ire_multirt_lookup_v6: dst %08x\n",
/*
* Retrieve the first IRE_CACHE ire for that destination;
* if we don't find one, no route for that dest is
* resolved yet.
*/
if (first_cire) {
}
/*
* Search for a resolvable route, giving the top priority
* to routes that can be resolved without any call to the resolver.
*/
if (!IN6_IS_ADDR_MULTICAST(&v6dst)) {
/*
* For all multiroute IRE_HOST ires for that destination,
* check if the route via the IRE_HOST's gateway is
* resolved yet.
*/
continue;
continue;
continue;
}
ip2dbg(("ire_multirt_lookup_v6: fire %p, "
"ire_addr %08x, ire_gateway_addr %08x\n",
(void *)fire,
if (first_cire) {
/*
* For all IRE_CACHE ires for that
* destination.
*/
for (cire = first_cire;
continue;
if (!IN6_ARE_ADDR_EQUAL(
continue;
continue;
tsl) != 0) {
continue;
}
/*
* Check if the IRE_CACHE's gateway
* matches the IRE_HOST's gateway.
*/
if (IN6_ARE_ADDR_EQUAL(
&v6gw)) {
break;
}
}
}
/*
* This route is already resolved;
* proceed with next one.
*/
if (already_resolved) {
ip2dbg(("ire_multirt_lookup_v6: found cire %p, "
"already resolved\n", (void *)cire));
continue;
}
/*
* The route is unresolved; is it actually
* resolvable, i.e. is there a cache or a resolver
* for the gateway?
*/
ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n",
(void *)gw_ire));
/*
* This route can be resolved without any call to the
* resolver; if the MULTIRT_CACHEGW flag is set,
* give the top priority to this ire and exit the
* loop.
* This occurs when an resolver reply is processed
* through ip_wput_nondata()
*/
if ((flags & MULTIRT_CACHEGW) &&
/*
* Release the resolver associated to the
* previous candidate best ire, if any.
*/
if (best_cire) {
}
ip2dbg(("ire_multirt_lookup_v6: found top prio "
"best_fire %p, best_cire %p\n",
break;
}
/*
* Compute the time elapsed since our preceding
* attempt to resolve that route.
* If the MULTIRT_USESTAMP flag is set, we take that
* route into account only if this time interval
* exceeds ip_multirt_resolution_interval;
* this prevents us from attempting to resolve a
* broken route upon each sending of a packet.
*/
(!(flags & MULTIRT_USESTAMP)));
ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, "
"res %d\n",
if (res) {
/*
* A resolver exists for the gateway: save
* the current IRE_HOST ire as a candidate
* best ire. If we later discover that a
* top priority ire exists (i.e. no need to
* call the resolver), then this new ire
* will be preferred to the current one.
*/
ip2dbg(("ire_multirt_lookup_v6:"
"found candidate "
"best_fire %p, "
"best_cire %p\n",
(void *)best_fire,
(void *)best_cire));
/*
* If MULTIRT_CACHEGW is not
* set, we ignore the top
* priority ires that can
* be resolved without any
* call to the resolver;
* In that case, there is
* actually no need
* to continue the loop.
*/
if (!(flags &
MULTIRT_CACHEGW)) {
break;
}
continue;
}
} else {
/*
* No resolver for the gateway: the
* route is not resolvable.
* If the MULTIRT_SETSTAMP flag is
* set, we stamp the IRE_HOST ire,
* so we will not select it again
* during this resolution interval.
*/
if (flags & MULTIRT_SETSTAMP)
}
}
}
} else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */
for (fire = first_fire;
continue;
continue;
continue;
}
/* No resolver for the gateway; we skip this ire. */
continue;
}
if (first_cire) {
/*
* For all IRE_CACHE ires for that
* destination.
*/
for (cire = first_cire;
continue;
if (!IN6_ARE_ADDR_EQUAL(
continue;
continue;
tsl) != 0) {
continue;
}
/*
* Cache entries are linked to the
* parent routes using the parent handle
* (ire_phandle). If no cache entry has
* the same handle as fire, fire is
* still unresolved.
*/
if (cire->ire_phandle ==
fire->ire_phandle) {
break;
}
}
}
/*
* This route is already resolved; proceed with
* next one.
*/
if (already_resolved) {
continue;
}
/*
* Compute the time elapsed since our preceding
* attempt to resolve that route.
* If the MULTIRT_USESTAMP flag is set, we take
* that route into account only if this time
* interval exceeds ip_multirt_resolution_interval;
* this prevents us from attempting to resolve a
* broken route upon each sending of a packet.
*/
(!(flags & MULTIRT_USESTAMP)));
ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, "
"flags %04x, res %d\n",
if (res) {
if (best_cire) {
/*
* Release the resolver associated
* to the preceding candidate best
* ire, if any.
*/
}
continue;
}
}
}
if (best_fire) {
}
/* Release the first IRE_CACHE we initially looked up, if any. */
if (first_cire)
/* Found a resolvable route. */
if (best_fire) {
if (*fire_arg)
if (*ire_arg)
/*
* Update the passed arguments with the
* resolvable multirt route we found
*/
ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, "
"*fire_arg %p, *ire_arg %p\n",
return (B_TRUE);
}
ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, "
"*ire_arg %p\n",
/* No resolvable route. */
return (B_FALSE);
}
/*
* Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp'
* that goes through 'ipif'. As a fallback, a route that goes through
* ipif->ipif_ill can be returned.
*/
ire_t *
{
return (NULL);
continue;
}
case IRE_DEFAULT:
case IRE_PREFIX:
case IRE_HOST:
}
return (ire);
}
}
break;
case IRE_IF_NORESOLVER:
case IRE_IF_RESOLVER:
}
return (ire);
}
break;
}
}
return (save_ire);
}