ip_ire.c revision 83c269170dc796b2d060c751afdc7059c0dc8c1b
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
/*
* This file contains routines that manipulate Internet Routing Entries (IREs).
*/
#include <inet/ip_ftable.h>
#include <inet/tunables.h>
#include <inet/ipclassifier.h>
struct kmem_cache *rt_entry_cache;
typedef struct nce_clookup_s {
/*
* Synchronization notes:
*
* The fields of the ire_t struct are protected in the following way :
*
*
* - bucket lock of the forwarding table in which is ire stored.
*
* ire_ill, ire_u *except* ire_gateway_addr[v6], ire_mask,
* ire_type, ire_create_time, ire_masklen, ire_ipversion, ire_flags,
* ire_bucket
*
* - Set in ire_create_v4/v6 and never changes after that. Thus,
* we don't need a lock whenever these fields are accessed.
*
* - ire_bucket and ire_masklen (also set in ire_create) is set in
* ire_add before inserting in the bucket and never
* changes after that. Thus we don't need a lock whenever these
* fields are accessed.
*
* ire_gateway_addr_v4[v6]
*
* - ire_gateway_addr_v4[v6] is set during ire_create and later modified
* by rts_setgwr[v6]. As ire_gateway_addr is a uint32_t, updates to
* it assumed to be atomic and hence the other parts of the code
* does not use any locks. ire_gateway_addr_v6 updates are not atomic
*
* ire_refcnt, ire_identical_ref
*
* - Updated atomically using atomic_add_32
*
* ire_ssthresh, ire_rtt_sd, ire_rtt, ire_ib_pkt_count, ire_ob_pkt_count
*
* - Assumes that 32 bit writes are atomic. No locks. ire_lock is
* used to serialize updates to ire_ssthresh, ire_rtt_sd, ire_rtt.
*
* ire_generation
* - Under ire_lock
*
* ire_nce_cache
* - Under ire_lock
*
* ire_dep_parent (To next IRE in recursive lookup chain)
* - Under ips_ire_dep_lock. Write held when modifying. Read held when
* walking. We also hold ire_lock when modifying to allow the data path
* to only acquire ire_lock.
*
* ire_dep_parent_generation (Generation number from ire_dep_parent)
* and ire_lock held when modifying)
*
* ire_dep_children (From parent to first child)
* ire_dep_sib_next (linked list of siblings)
* ire_dep_sib_ptpn (linked list of siblings)
* - Under ips_ire_dep_lock. Write held when modifying. Read held when
* walking.
*
* As we always hold the bucket locks in all the places while accessing
* the above values, it is natural to use them for protecting them.
*
* We have a forwarding table for IPv4 and IPv6. The IPv6 forwarding table
* (ip_forwarding_table_v6) is an array of pointers to arrays of irb_t
* structures. ip_forwarding_table_v6 is allocated dynamically in
* ire_add_v6. ire_ft_init_lock is used to serialize multiple threads
* initializing the same bucket. Once a bucket is initialized, it is never
* de-alloacted. This assumption enables us to access
* ip_forwarding_table_v6[i] without any locks.
*
* The forwarding table for IPv4 is a radix tree whose leaves
* are rt_entry structures containing the irb_t for the rt_dst. The irb_t
* for IPv4 is dynamically allocated and freed.
*
* Each irb_t - ire bucket structure has a lock to protect
* a bucket and the ires residing in the bucket have a back pointer to
* the bucket structure. It also has a reference count for the number
* of threads walking the bucket - irb_refcnt which is bumped up
* using the irb_refhold function. The flags irb_marks can be
* set to IRB_MARK_CONDEMNED indicating that there are some ires
* in this bucket that are IRE_IS_CONDEMNED and the
* last thread to leave the bucket should delete the ires. Usually
* this is done by the irb_refrele function which is used to decrement
* the reference count on a bucket. See comments above irb_t structure
* definition in ip.h for further details.
*
* The ire_refhold/ire_refrele functions operate on the ire which increments/
* decrements the reference count, ire_refcnt, atomically on the ire.
* ire_refcnt is modified only using those functions. Operations on the IRE
* could be described as follows :
*
* CREATE an ire with reference count initialized to 1.
*
* ADDITION of an ire holds the bucket lock, checks for duplicates
* and then adds the ire. ire_add returns the ire after
* bumping up once more i.e the reference count is 2. This is to avoid
* an extra lookup in the functions calling ire_add which wants to
* work with the ire after adding.
*
* LOOKUP of an ire bumps up the reference count using ire_refhold
* function. It is valid to bump up the referece count of the IRE,
* after the lookup has returned an ire. Following are the lookup
* functions that return an HELD ire :
*
* ire_ftable_lookup[_v6], ire_lookup_multi_ill[_v6]
*
* DELETION of an ire holds the bucket lock, removes it from the list
* and then decrements the reference count for having removed from the list
* by using the ire_refrele function. If some other thread has looked up
* the ire, the reference count would have been bumped up and hence
* this ire will not be freed once deleted. It will be freed once the
* reference count drops to zero.
*
* Add and Delete acquires the bucket lock as RW_WRITER, while all the
* lookups acquire the bucket lock as RW_READER.
*
* The general rule is to do the ire_refrele in the function
* that is passing the ire as an argument.
*
* In trying to locate ires the following points are to be noted.
*
* IRE_IS_CONDEMNED signifies that the ire has been logically deleted and is
* to be ignored when walking the ires using ire_next.
*
* Zones note:
* Walking IREs within a given zone also walks certain ires in other
* zones. This is done intentionally. IRE walks with a specified
* zoneid are used only when doing informational reports, and
* zone users want to see things that they can access. See block
* comment in ire_walk_ill_match().
*/
/*
* The size of the forwarding table. We will make sure that it is a
* power of 2 in ip_ire_init().
*/
struct kmem_cache *ire_cache;
struct kmem_cache *ncec_cache;
struct kmem_cache *nce_cache;
#ifdef DEBUG
static void ire_trace_cleanup(const ire_t *);
#endif
static void ire_dep_incr_generation_locked(ire_t *);
/*
* count of the IREs and IRBs (ire bucket).
*
* 1) We bump up the reference count of an IRE to make sure that
* it does not get deleted and freed while we are using it.
* Typically all the lookup functions hold the bucket lock,
* and look for the IRE. If it finds an IRE, it bumps up the
* reference count before dropping the lock. Sometimes we *may* want
* to bump up the reference count after we *looked* up i.e without
* holding the bucket lock. So, the ire_refhold function does not assert
* on the bucket lock being held. Any thread trying to delete from
* the hash bucket can still do so but cannot free the IRE if
* ire_refcnt is not 0.
*
* 2) We bump up the reference count on the bucket where the IRE resides
* (IRB), when we want to prevent the IREs getting deleted from a given
* hash bucket. This makes life easier for ire_walk type functions which
* wants to walk the IRE list, call a function, but needs to drop
* the bucket lock to prevent recursive rw_enters. While the
* lock is dropped, the list could be changed by other threads or
* the same thread could end up deleting the ire or the ire pointed by
* ire_next. ire_refholding the ire or ire_next is not sufficient as
* a delete will still remove the ire from the bucket while we have
* dropped the lock and hence the ire_next would be NULL. Thus, we
* need a mechanism to prevent deletions from a given bucket.
*
* To prevent deletions, we bump up the reference count on the
* bucket. If the bucket is held, ire_delete just marks both
* the ire and irb as CONDEMNED. When the
* reference count on the bucket drops to zero, all the CONDEMNED ires
* are deleted. We don't have to bump up the reference count on the
* bucket if we are walking the bucket and never have to drop the bucket
* lock. Note that irb_refhold does not prevent addition of new ires
* in the list. It is okay because addition of new ires will not cause
* ire_next to point to freed memory. We do irb_refhold only when
* all of the 3 conditions are true :
*
* 1) The code needs to walk the IRE bucket from start to end.
* 2) It may have to drop the bucket lock sometimes while doing (1)
* 3) It does not want any ires to be deleted meanwhile.
*/
/*
* Bump up the reference count on the hash bucket - IRB to
* prevent ires from being deleted in this bucket.
*/
void
{
irb->irb_refcnt++;
}
void
{
irb->irb_refcnt++;
}
/*
* Note: when IRB_MARK_DYNAMIC is not set the irb_t
* is statically allocated, so that when the irb_refcnt goes to 0,
* we simply clean up the ire list and continue.
*/
void
{
} else {
if (--irb->irb_refcnt == 0 &&
} else {
}
}
}
/*
* Bump up the reference count on the IRE. We cannot assert that the
* bucket lock is being held as it is legal to bump up the reference
* count after the first lookup has returned the IRE without
* holding the lock.
*/
void
{
#ifdef DEBUG
#endif
}
void
{
}
void
{
#ifdef DEBUG
#endif
ire->ire_refcnt++;
}
/*
* Release a ref on an IRE.
*
* Must not be called while holding any locks. Otherwise if this is
* the last reference to be released there is a chance of recursive mutex
* panic due to ire_refrele -> ipif_ill_refrele_tail -> qwriter_ip trying
* to restart an ioctl. The one exception is when the caller is sure that
* this is not the last reference to be released. Eg. if the caller is
* sure that the ire has not been deleted and won't be deleted.
*
* In architectures e.g sun4u, where atomic_add_32_nv is just
* a cas, we need to maintain the right memory barrier semantics
* as that of mutex_exit i.e all the loads and stores should complete
* before the cas is executed. membar_exit() does that here.
*/
void
{
#ifdef DEBUG
#endif
membar_exit();
}
void
{
membar_exit();
}
/*
* This function is associated with the IP_IOC_IRE_DELETE[_NO_REPLY]
* IOCTL[s]. The NO_REPLY form is used by TCP to tell IP that it is
* having problems reaching a particular destination.
* This will make IP consider alternate routes (e.g., when there are
* muliple default routes), and it will also make IP discard any (potentially)
* stale redirect.
* Management processes may want to use the version that generates a reply.
*
* this function shouldn't be necessary for IP to recover from a bad redirect,
* a bad default router (when there are multiple default routers), or
* For instance, this is helpful when TCP suspects a failure before NUD does.
*/
int
{
ipst = CONNQ_TO_IPST(q);
/*
* Check privilege using the ioctl credential; if it is NULL
* then this is a kernel message and therefor privileged.
*/
return (EPERM);
return (EINVAL);
switch (ipid->ipid_addr_length) {
case sizeof (sin_t):
/*
* got complete (sockaddr) address - increment addr_ucp to point
* at the ip_addr field.
*/
break;
case sizeof (sin6_t):
/*
* got complete (sockaddr) address - increment addr_ucp to point
* at the ip_addr field.
*/
break;
default:
return (EINVAL);
}
if (ipversion == IPV4_VERSION) {
/* Extract the destination address. */
} else {
/* Extract the destination address. */
}
if (ipversion == IPV4_VERSION) {
(Q_TO_CONN(q))->conn_laddr_v4, 0, 0, 0,
}
(void) ire_no_good(ire);
}
return (0);
}
/*
* Initialize the ire that is specific to IPv4 part and call
* ire_init_common to finish it.
* Returns zero or errno.
*/
int
{
int error;
/*
* Reject IRE security attribute creation/initialization
* if system is not running in Trusted mode.
*/
return (EINVAL);
/* Make sure we don't have stray values in some fields */
switch (type) {
case IRE_LOOPBACK:
case IRE_HOST:
case IRE_BROADCAST:
case IRE_LOCAL:
case IRE_IF_CLONE:
break;
case IRE_PREFIX:
case IRE_DEFAULT:
case IRE_IF_RESOLVER:
case IRE_IF_NORESOLVER:
}
break;
case IRE_MULTICAST:
case IRE_NOROUTE:
break;
default:
ASSERT(0);
return (EINVAL);
}
return (error);
/* Determine which function pointers to use */
case IRE_LOCAL:
break;
case IRE_LOOPBACK:
break;
case IRE_BROADCAST:
break;
case IRE_MULTICAST:
break;
default:
/*
* For IRE_IF_ALL and IRE_OFFLINK we forward received
* packets by default.
*/
break;
}
/* Multirt receive of broadcast uses ire_recv_broadcast_v4 */
}
return (0);
}
/*
* Determine ire_nce_capable
*/
{
int max_masklen;
return (B_TRUE);
else
return (B_TRUE);
return (B_FALSE);
}
/*
* ire_create is called to allocate and initialize a new IRE.
*
* NOTE : This is called as writer sometimes though not required
* by this function.
*/
ire_t *
{
int error;
return (NULL);
}
if (error != 0) {
return (NULL);
}
return (ire);
}
/*
* Common to IPv4 and IPv6
* Returns zero or errno.
*/
int
{
int error;
#ifdef DEBUG
else
}
#endif /* DEBUG */
/*
* Create/initialize IRE security attribute only in Trusted mode;
* if the passed in gc is non-NULL, we expect that the caller
* has held a reference to it and will release it when this routine
* returns a failure, otherwise we own the reference. We do this
* prior to initializing the rest IRE fields.
*/
if (is_system_labeled()) {
/* release references on behalf of caller */
GC_REFRELE(gc);
} else {
if (error != 0)
return (error);
}
}
/*
* The ill_ire_cnt isn't increased until
* the IRE is added to ensure that a walker will find
* all IREs that hold a reference on an ill.
*
* Note that ill_ire_multicast doesn't hold a ref on the ill since
* ire_add() is not called for the IRE_MULTICAST.
*/
return (0);
}
/*
* This creates an IRE_BROADCAST based on the arguments.
* A mirror is ire_lookup_bcast().
*
* Any supression of unneeded ones is done in ire_add_v4.
* We add one IRE_BROADCAST per address. ire_send_broadcast_v4()
* takes care of generating a loopback copy of the packet.
*/
ire_t **
{
*irep++ = ire_create(
NULL, /* no gateway */
ill,
NULL,
ipst);
return (irep);
}
/*
* This looks up an IRE_BROADCAST based on the arguments.
* Mirrors ire_create_bcast().
*/
ire_t *
{
int match_args;
if (IS_UNDER_IPMP(ill))
addr, /* dest addr */
ip_g_all_ones, /* mask */
0, /* no gateway */
ill,
NULL,
0,
NULL);
return (ire);
}
/* Arrange to call the specified function for every IRE in the world. */
void
{
}
void
{
}
void
{
}
/*
* Walk a particular version. version == 0 means both v4 and v6.
*/
static void
{
if (vers != IPV6_VERSION) {
/*
* ip_forwarding_table variable doesn't matter for IPv4 since
* ire_walk_ill_tables uses ips_ip_ftable for IPv4.
*/
0, NULL,
}
if (vers != IPV4_VERSION) {
}
}
/*
* Arrange to call the specified function for every IRE that matches the ill.
*/
void
{
}
/*
* Walk a particular ill and version.
*/
static void
{
if (vers == IPV4_VERSION) {
0, NULL,
}
if (vers != IPV4_VERSION) {
}
}
/*
* Do the specific matching of IREs to shared-IP zones.
*
* We have the same logic as in ire_match_args but implemented slightly
* differently.
*/
{
/*
* We're walking the IREs for a specific zone. The only relevant
* IREs are:
* - all IREs with a matching ire_zoneid
* - IRE_IF_ALL IREs for interfaces with a usable source addr
* with a matching zone
* - IRE_OFFLINK with a gateway reachable from the zone
* Note that ealier we only did the IRE_OFFLINK check for
* IRE_DEFAULT (and only when we had multiple IRE_DEFAULTs).
*/
/*
* Note there is no IRE_INTERFACE on vniN thus
* can't do an IRE lookup for a matching route.
*/
if (ifindex == 0)
return (B_FALSE);
/*
* If there is a usable source address in the
* zone, then it's ok to return an
* IRE_INTERFACE
*/
return (B_FALSE);
}
}
if (!IPIF_IS_CONDEMNED(tipif) &&
break;
}
return (B_FALSE);
}
}
}
/*
* Except for ALL_ZONES, we only match the offlink routes
* where ire_gateway_addr has an IRE_INTERFACE for the zoneid.
* Since we can have leftover routes after the IP addresses have
* changed, the global zone will also match offlink routes where the
* gateway is unreachable from any zone.
*/
} else {
}
if (!reach) {
if (zoneid != GLOBAL_ZONEID)
return (B_FALSE);
/*
* Check if ALL_ZONES reachable - if not then let the
* global zone see it.
*/
} else {
}
if (reach) {
/*
* Some other zone could see it, hence hide it
* in the global zone.
*/
return (B_FALSE);
}
}
}
if (((!(match_flags & MATCH_IRE_TYPE)) ||
((!(match_flags & MATCH_IRE_ILL)) ||
return (B_TRUE);
}
return (B_FALSE);
}
int
{
if ((rtf->rt_match_flags != 0) ||
} else {
}
if (ret)
}
return (0);
}
/*
* Walk the ftable entries that match the ill.
*/
void
{
int i, j;
/* knobs such that routine is called only for v6 case */
for (i = (ftbl_sz - 1); i >= 0; i--) {
continue;
for (j = 0; j < htbl_sz; j++) {
continue;
if (match_flags == 0 &&
} else {
ret =
}
if (ret)
}
}
}
} else {
if (match_flags != 0) {
}
}
}
/*
* This function takes a mask and returns
* number of bits set in the mask. If no
* bit is set it returns 0.
* Assumes a contiguous mask.
*/
int
{
}
/*
* Convert length for a mask to the mask.
*/
{
if (masklen == 0)
return (0);
}
void
{
}
/*
* ire_add_v[46] atomically make sure that the ill associated
* with the new ire is not going away i.e., we check ILL_CONDEMNED.
*/
int
{
/*
* Don't allow IRE's to be created on dying ills.
*/
return (ENXIO);
}
if (IS_UNDER_IPMP(ill)) {
int error = 0;
if (!ipmp_ill_is_active(ill) &&
!ire->ire_testhidden) {
}
if (error != 0) {
return (error);
}
}
}
return (0);
}
/*
* Add a fully initialized IRE to the forwarding table.
* This returns NULL on failure, or a held IRE on success.
* Normally the returned IRE is the same as the argument. But a different
* IRE will be returned if the added IRE is deemed identical to an existing
* one. In that case ire_identical_ref will be increased.
* The caller always needs to do an ire_refrele() on the returned IRE.
*/
ire_t *
{
/*
* IREs hosted on interfaces that are under IPMP
* should be hidden so that applications don't
* accidentally end up sending packets with test
* addresses as their source addresses, or
* sending out interfaces that are e.g. IFF_INACTIVE.
* Hide them here.
*/
}
return (ire_add_v6(ire));
else
return (ire_add_v4(ire));
}
/*
* Add a fully initialized IPv4 IRE to the forwarding table.
* This returns NULL on failure, or a held IRE on success.
* Normally the returned IRE is the same as the argument. But a different
* IRE will be returned if the added IRE is deemed identical to an existing
* one. In that case ire_identical_ref will be increased.
* The caller always needs to do an ire_refrele() on the returned IRE.
*/
static ire_t *
{
int match_flags;
int error;
/* Make sure the address is properly masked. */
}
return (NULL);
}
/*
* Start the atomic add of the ire. Grab the ill lock,
* the bucket lock. Check for condemned.
*/
if (error != 0) {
return (NULL);
}
/*
* If we are creating a hidden IRE, make sure we search for
* hidden IREs when searching for duplicates below.
* Otherwise, we might find an IRE on some other interface
* that's not marked hidden.
*/
if (ire->ire_testhidden)
/*
* Atomically check for duplicate and insert in the table.
*/
if (IRE_IS_CONDEMNED(ire1))
continue;
/*
* Here we need an exact match on zoneid, i.e.,
* ire_match_args doesn't fit.
*/
continue;
continue;
/*
* Note: We do not allow multiple routes that differ only
* in the gateway security attributes; such routes are
* considered duplicates.
* To change that we explicitly have to treat them as
* different here.
*/
/*
* Return the old ire after doing a REFHOLD.
* As most of the callers continue to use the IRE
* after adding, we return a held ire. This will
* avoid a lookup in the caller again. If the callers
* don't want to use it, they need to do a REFRELE.
*
* We only allow exactly one IRE_IF_CLONE for any dst,
* so, if the is an IF_CLONE, return the ire without
* an identical_ref, but with an ire_ref held.
*/
}
return (ire1);
}
}
/*
* Normally we do head insertion since most things do not care about
* the order of the IREs in the bucket. Note that ip_cgtp_bcast_add
* assumes we at least do head insertion so that its IRE_BROADCAST
* arrive ahead of existing IRE_HOST for the same address.
* However, due to shared-IP zones (and restrict_interzone_loopback)
* we can have an IRE_LOCAL as well as IRE_IF_CLONE for the same
* address. For that reason we do tail insertion for IRE_IF_CLONE.
* Due to the IRE_BROADCAST on cgtp0, which must be last in the bucket,
* we do tail insertion of IRE_BROADCASTs that do not have RTF_MULTIRT
* set.
*/
}
/* Insert at *irep */
/* Link the new one in. */
/*
* ire_walk routines de-reference ire_next without holding
* a lock. Before we point to the new ire, we want to make
* sure the store that sets the ire_next of the new ire
* reaches global visibility, so that ire_walk routines
* don't see a truncated list of ires i.e if the ire_next
* of the new ire gets set after we do "*irep = ire" due
* to re-ordering, the ire_walk thread will see a NULL
* once it accesses the ire_next of the new ire.
* membar_producer() makes sure that the following store
* happens *after* all of the above stores.
*/
/*
* We return a bumped up IRE above. Keep it symmetrical
* so that the callers will always have to release. This
* helps the callers of this function because they continue
* to use the IRE after adding and hence they don't have to
* lookup again after we return the IRE.
*
* NOTE : We don't have to use atomics as this is appearing
* in the list for the first time and no one else can bump
* up the reference count on this yet.
*/
irb_ptr->irb_ire_cnt++;
}
/* Make any caching of the IREs be notified or updated */
return (ire);
}
/*
* irb_refrele is the only caller of the function. ire_unlink calls to
* do the final cleanup for this ire.
*/
void
{
} else {
}
/*
* Now it's really out of the list. Before doing the
* REFRELE, set ire_next to NULL as ire_inactive asserts
* so.
*/
}
}
/*
* irb_refrele is the only caller of the function. It calls to unlink
* all the CONDEMNED ires from this bucket.
*/
ire_t *
{
(irb->irb_refcnt == 0));
if (IRE_IS_CONDEMNED(ire)) {
if (ire1)
/*
* We need to call ire_delete_v4 or ire_delete_v6 to
* clean up dependents and the redirects pointing at
* the default gateway. We need to drop the lock
* as ire_flush_cache/ire_delete_host_redircts require
* so. But we can't drop the lock, as ire_unlink needs
* to atomically remove the ires from the list.
* So, create a temporary list of CONDEMNED ires
* for doing ire_delete_v4/ire_delete_v6 operations
* later on.
*/
}
}
return (ire_list);
}
/*
* Clean up the radix node for this ire. Must be called by irb_refrele
* when there are no ire's left in the bucket. Returns TRUE if the bucket
* is deleted and freed.
*/
{
struct radix_node *rn;
/* first remove it from the radix tree. */
/* irb_lock is freed */
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Delete the specified IRE.
* We assume that if ire_bucket is not set then ire_ill->ill_ire_cnt was
* not incremented i.e., that the insertion in the bucket and the increment
* of that counter is done atomically.
*/
void
{
/*
* It was never inserted in the list. Should call REFRELE
* to free this IRE.
*/
return;
}
/*
* Move the use counts from an IRE_IF_CLONE to its parent
* IRE_INTERFACE.
* We need to do this before acquiring irb_lock.
*/
ire->ire_ob_pkt_count = 0;
ire->ire_ib_pkt_count = 0;
}
}
/*
* Some other thread has removed us from the list.
* It should have done the REFRELE for us.
*/
return;
}
if (!IRE_IS_CONDEMNED(ire)) {
/* Is this an IRE representing multiple duplicate entries? */
/* Removed one of the identical parties */
return;
}
irb->irb_ire_cnt--;
}
if (irb->irb_refcnt != 0) {
/*
* The last thread to leave this bucket will
* delete this ire.
*/
return;
}
/*
* Normally to delete an ire, we walk the bucket. While we
* walk the bucket, we normally bump up irb_refcnt and hence
* we return from above where we mark CONDEMNED and the ire
* gets deleted from ire_unlink. This case is where somebody
* knows the ire e.g by doing a lookup, and wants to delete the
* IRE. irb_refcnt would be 0 in this case if nobody is walking
* the bucket.
*/
} else {
}
/* Cleanup dependents and related stuff */
} else {
}
/*
* We removed it from the list. Decrement the
* reference count.
*/
}
/*
* Delete the specified IRE.
* All calls should use ire_delete().
* Sometimes called as writer though not required by this function.
*
* NOTE : This function is called only if the ire was added
* in the list.
*/
static void
{
/*
* when a default gateway is going away
* delete all the host redirects pointing at that
* gateway.
*/
}
/*
* If we are deleting an IRE_INTERFACE then we make sure we also
* delete any IRE_IF_CLONE that has been created from it.
* Those are always in ire_dep_children.
*/
/* Remove from parent dependencies and child */
}
/*
* ire_refrele is the only caller of the function. It calls
* to free the ire when the reference count goes to zero.
*/
void
{
/* Count how many condemned ires for kmem_cache callback */
}
/*
* ire_nce_cache is cleared in ire_delete, and we make sure we don't
* set it once the ire is marked condemned.
*/
/*
* Since any parent would have a refhold on us they would already
* have been removed.
*/
/*
* Since any children would have a refhold on us they should have
* already been removed.
*/
/*
* ill_ire_ref is increased when the IRE is inserted in the
* bucket - not when the IRE is created.
*/
(char *), "ire", (void *), ire);
ill->ill_ire_cnt--;
if (ILL_DOWN_OK(ill)) {
/* Drops the ill lock */
} else {
}
}
/* This should be true for both V4 and V6 */
/*
* Instead of examining the conditions for freeing
* the radix node here, we do it by calling
* irb_refrele which is a single point in the code
* that embeds that logic. Bump up the refcnt to
* be able to call irb_refrele
*/
}
#ifdef DEBUG
#endif
} else {
}
}
/*
* ire_update_generation is the callback function provided by
* ire_get_bucket() to update the generation number of any
* matching shorter route when a new route is added.
*
* This fucntion always returns a failure return (B_FALSE)
* to force the caller (rn_matchaddr_args)
* to back-track up the tree looking for shorter matches.
*/
/* ARGSUSED */
static boolean_t
{
/* We need to handle all in the same bucket */
return (B_FALSE);
}
/*
* Take care of all the generation numbers in the bucket.
*/
void
{
return;
/*
* we cannot do an irb_refhold/irb_refrele here as the caller
* already has the global RADIX_NODE_HEAD_WLOCK, and the irb_refrele
* may result in an attempt to free the irb_t, which also needs
* the RADIX_NODE_HEAD lock. However, since we want to traverse the
* irb_ire list without fear of having a condemned ire removed from
* the list, we acquire the irb_lock as WRITER. Moreover, since
* the ire_generation increments are done under the ire_dep_lock,
* acquire the locks in the prescribed lock order first.
*/
if (!IRE_IS_CONDEMNED(ire))
}
}
/*
* When an IRE is added or deleted this routine is called to make sure
* any caching of IRE information is notified or updated.
*
* The flag argument indicates if the flush request is due to addition
* of new route (IRE_FLUSH_ADD), deletion of old route (IRE_FLUSH_DELETE),
* or a change to ire_gateway_addr (IRE_FLUSH_GWCHANGE).
*/
void
{
/*
* IRE_IF_CLONE ire's don't provide any new information
* than the parent from which they are cloned, so don't
* perturb the generation numbers.
*/
return;
/*
* Ensure that an ire_add during a lookup serializes the updates of the
* generation numbers under the radix head lock so that the lookup gets
* either the old ire and old generation number, or a new ire and new
* generation number.
*/
/*
* If a route was just added, we need to notify everybody that
* has cached an IRE_NOROUTE since there might now be a better
* route for them.
*/
if (flag == IRE_FLUSH_ADD) {
}
/* Adding a default can't otherwise provide a better route */
return;
}
switch (flag) {
case IRE_FLUSH_DELETE:
case IRE_FLUSH_GWCHANGE:
/*
* Update ire_generation for all ire_dep_children chains
* starting with this IRE
*/
break;
case IRE_FLUSH_ADD:
/*
* Update the generation numbers of all shorter matching routes.
* ire_update_generation takes care of the dependants by
* using ire_dep_incr_generation.
*/
break;
}
}
/*
* Matches the arguments passed with the values in the ire.
*
* Note: for match types that match using "ill" passed in, ill
* must be checked for non-NULL before calling this routine.
*/
{
/*
* If MATCH_IRE_TESTHIDDEN is set, then only return the IRE if it is
* in fact hidden, to ensure the caller gets the right one.
*/
if (ire->ire_testhidden) {
if (!(match_flags & MATCH_IRE_TESTHIDDEN))
return (B_FALSE);
}
/*
* If MATCH_IRE_ZONEONLY has been set and the supplied zoneid
* does not match that of ire_zoneid, a failure to
* match is reported at this point. Otherwise, since some IREs
* that are available in the global zone can be used in local
* zones, additional checks need to be performed:
*
* IRE_LOOPBACK
* entries should never be matched in this situation.
* Each zone has its own IRE_LOOPBACK.
*
* IRE_LOCAL
* We allow them for any zoneid. ire_route_recursive
* does additional checks when
* ip_restrict_interzone_loopback is set.
*
* If ill_usesrc_ifindex is set
* Then we check if the zone has a valid source address
* on the usesrc ill.
*
* If ire_ill is set, then check that the zone has an ipif
* on that ill.
*
* Outside of this function (in ire_round_robin) we check
* that any IRE_OFFLINK has a gateway that reachable from the
* zone when we have multiple choices (ECMP).
*/
if (match_flags & MATCH_IRE_ZONEONLY)
return (B_FALSE);
return (B_FALSE);
goto matchit;
/*
* The normal case of IRE_ONLINK has a matching zoneid.
* Here we handle the case when shared-IP zones have been
* configured with IP addresses on vniN. In that case it
* is ok for traffic from a zone to use IRE_ONLINK routes
* if the ill has a usesrc pointing at vniN
*/
/*
* Note there is no IRE_INTERFACE on vniN thus
* can't do an IRE lookup for a matching route.
*/
if (ifindex == 0)
return (B_FALSE);
/*
* If there is a usable source address in the
* zone, then it's ok to return this IRE_INTERFACE
*/
ip3dbg(("ire_match_args: no usrsrc for zone"
" dst_ill %p\n", (void *)dst_ill));
return (B_FALSE);
}
}
/*
* For example, with
* route add 11.0.0.0 gw1 -ifp bge0
* route add 11.0.0.0 gw2 -ifp bge1
* this code would differentiate based on
* where the sending zone has addresses.
* Only if the zone has an address on bge0 can it use the first
* route. It isn't clear if this behavior is documented
* anywhere.
*/
if (!IPIF_IS_CONDEMNED(tipif) &&
break;
}
return (B_FALSE);
}
}
}
if (match_flags & MATCH_IRE_ILL) {
/*
* If asked to match an ill, we *must* match
* on the ire_ill for ipmp test addresses, or
* any of the ill in the group for data addresses.
* If we don't, we may as well fail.
* However, we need an exception for IRE_LOCALs to ensure
* we loopback packets even sent to test addresses on different
* interfaces in the group.
*/
if ((match_flags & MATCH_IRE_TESTHIDDEN) &&
return (B_FALSE);
} else {
/*
* We know that ill is not NULL, but ire_ill could be
* NULL
*/
return (B_FALSE);
}
}
if (match_flags & MATCH_IRE_SRC_ILL) {
return (B_FALSE);
if (ire_ill->ill_usesrc_ifindex == 0 ||
return (B_FALSE);
}
}
((!(match_flags & MATCH_IRE_GW)) ||
((!(match_flags & MATCH_IRE_DIRECT)) ||
((!(match_flags & MATCH_IRE_SECATTR)) ||
(!is_system_labeled()) ||
/* We found the matched IRE */
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Check if the IRE_LOCAL uses the same ill as another route would use.
* If there is no alternate route, or the alternate is a REJECT or BLACKHOLE,
* then we don't allow this IRE_LOCAL to be used.
* We always return an IRE; will be RTF_REJECT if no route available.
*/
ire_t *
{
/*
* Need to match on everything but local.
* This might result in the creation of a IRE_IF_CLONE for the
* same address as the IRE_LOCAL when restrict_interzone_loopback is
* set. ire_add_*() ensures that the IRE_IF_CLONE are tail inserted
* to make sure the IRE_LOCAL is always found first.
*/
NULL, &generation);
} else {
NULL, &generation);
}
/* Going out the same ILL - ok to send to IRE_LOCAL */
} else {
/* Different ill - ignore IRE_LOCAL */
if (generationp != NULL)
}
return (ire);
}
{
if (irb->irb_ire_cnt == 0)
return (B_FALSE);
if (IRE_IS_CONDEMNED(ire))
continue;
continue;
continue;
continue;
if (is_system_labeled() &&
continue;
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Check if the zoneid (not ALL_ZONES) has an IRE_INTERFACE for the specified
* gateway address. If ill is non-NULL we also match on it.
* The caller must hold a read lock on RADIX_NODE_HEAD if lock_held is set.
*/
{
struct rt_sockaddr rdst;
if (lock_held)
else
/*
* We only use margs for ill, zoneid, and tsl matching in
* ire_find_zoneid
*/
if (!lock_held)
}
/*
* ire_walk routine to delete a fraction of redirect IREs and IRE_CLONE_IF IREs.
* The fraction argument tells us what fraction of the IREs to delete.
* Common for IPv4 and IPv6.
* Used when memory backpressure.
*/
static void
{
/* Pick a random number */
/* Use truncation */
}
}
}
/*
* kmem_cache callback to free up memory.
*
* Free a fraction (ips_ip_ire_reclaim_fraction) of things IP added dynamically
* (RTF_DYNAMIC and IRE_IF_CLONE).
*/
static void
{
/*
* Walk all CONNs that can have a reference on an ire, nce or dce.
* Get them to update any stale references to drop any refholds they
* have.
*/
}
/*
* Called by the memory allocator subsystem directly, when the system
* is running low on memory.
*/
/* ARGSUSED */
void
ip_ire_reclaim(void *args)
{
netstack_t *ns;
/*
* netstack_next() can return a netstack_t with a NULL
* netstack_ip at boot time.
*/
continue;
}
}
}
static void
{
int i;
for (i = 1; i < 31; i++) {
if (*value <= (1 << i))
break;
}
*value = (1 << i);
}
/* Global init for all zones */
void
{
/*
* Create kmem_caches. ip_ire_reclaim() and ip_nce_reclaim()
* will give disposable IREs back to system when needed.
* This needs to be done here before anything else, since
* ire_add() expects the cache to be created.
*/
/*
* Have radix code setup kmem caches etc.
*/
rn_init();
}
void
{
int error;
/*
* Make sure that the forwarding table size is a power of 2.
* The IRE*_ADDR_HASH() macroes depend on that.
*/
/*
* Allocate/initialize a pair of IRE_NOROUTEs for each of IPv4 and IPv6.
* The ire_reject_v* has RTF_REJECT set, and the ire_blackhole_v* has
* RTF_BLACKHOLE set. We use the latter for transient errors such
* as memory allocation failures and tripping on IRE_IS_CONDEMNED
* entries.
*/
}
void
ip_ire_g_fini(void)
{
rn_fini();
}
void
{
int i;
/*
* been removed so what remains are just the ftable to handle.
*/
for (i = 0; i < IP6_MASK_TABLE_SIZE; i++) {
int j;
continue;
for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) {
}
}
}
#ifdef DEBUG
void
{
if (ire->ire_trace_disable) {
return;
}
} else {
}
}
void
{
if (!ire->ire_trace_disable)
}
static void
{
}
#endif /* DEBUG */
/*
* Find, or create if needed, the nce_t pointer to the neighbor cache
* entry ncec_t for an IPv4 address. The nce_t will be created on the ill_t
* on the next available under-ill (selected by the IPMP rotor) in the
* unicast IPMP case.
*
* If a neighbor-cache entry has to be created (i.e., one does not already
* exist in the nce list) the ncec_lladdr and ncec_state of the neighbor cache
* entry are initialized in nce_add_v4(). The broadcast, multicast, and
* link-layer type determine the contents of {ncec_state, ncec_lladdr} of
* the ncec_t created. The ncec_lladdr is non-null for all link types with
* non-zero ill_phys_addr_length, though the contents may be zero in cases
* where the link-layer type is not known at the time of creation
* (e.g., IRE_IFRESOLVER links)
*
* All IRE_BROADCAST entries have ncec_state = ND_REACHABLE, and the nce_lladr
* has the physical broadcast address of the outgoing interface.
* For unicast ire entries,
* - if the outgoing interface is of type IRE_IF_RESOLVER, a newly created
* ncec_t with 0 nce_lladr contents, and will be in the ND_INITIAL state.
* - if the outgoing interface is a IRE_IF_NORESOLVER interface, no link
* layer resolution is necessary, so that the ncec_t will be in the
* ND_REACHABLE state
*
* The link layer information needed for broadcast addresses, and for
* packets sent on IRE_IF_NORESOLVER interfaces is a constant mapping that
* never needs re-verification for the lifetime of the ncec_t. These are
* therefore marked NCE_F_NONUD.
*
* The nce returned will be created such that the nce_ill == ill that
* is passed in. Note that the nce itself may not have ncec_ill == ill
* where IPMP links are involved.
*/
static nce_t *
{
int err;
return (NULL);
}
switch (ire_type) {
case IRE_BROADCAST:
break;
case IRE_MULTICAST:
break;
}
} else {
}
/* nce_state will be computed by nce_add_common() */
} else {
}
switch (err) {
case 0:
break;
case EEXIST:
/*
* When subnets change or partially overlap what was once
* a broadcast address could now be a unicast, or vice versa.
*/
NCE_F_BCAST) != 0) {
goto retry;
}
break;
default:
if (need_refrele)
return (NULL);
}
/*
* If the ill was an under-ill of an IPMP group, we need to verify
* that it is still active so that we select an active interface in
* the group. However, since ipmp_ill_is_active ASSERTs for
* IS_UNDER_IPMP(), we first need to verify that the ill is an
* under-ill, and since this is being done in the data path, the
* only way to ascertain this is by holding the ill_g_lock.
*/
/*
* need_refrele implies that the under ill was selected by
* ipmp_ill_hold_xmit_ill() because either the in_ill was an
* ipmp_ill, or we are sending a non-unicast packet on an
* under_ill. However, when we get here, the ill selected by
* ipmp_ill_hold_xmit_ill was pulled out of the active set
* (for unicast) or cast_ill nomination (for !unicast) after
* it was picked as the outgoing ill. We have to pick an
*/
return (NULL);
goto retry;
} else {
}
done:
if (need_refrele)
return (nce);
}
nce_t *
{
}
nce_t *
{
}
/*
* The caller should hold irb_lock as a writer if the ire is in a bucket.
* This routine will clear ire_nce_cache, and we make sure that we can never
* set ire_nce_cache after the ire is marked condemned.
*/
void
{
/* Count how many condemned ires for kmem_cache callback */
}
/*
* Increment the generation avoiding the special condemned value
*/
void
{
/*
* Even though the caller has a hold it can't prevent a concurrent
* ire_delete marking the IRE condemned
*/
if (!IRE_IS_CONDEMNED(ire)) {
if (generation == IRE_GENERATION_CONDEMNED)
}
}
/*
* Increment ire_generation on all the IRE_MULTICASTs
* Used when the default multicast interface (as determined by
* ill_lookup_multicast) might have changed.
*
* That includes the zoneid, IFF_ flags, the IPv6 scope of the address, and
* ill unplumb.
*/
void
{
if (isv6)
else
if (ILL_IS_CONDEMNED(ill))
continue;
}
}
/*
* Return a held IRE_NOROUTE with RTF_REJECT set
*/
ire_t *
{
if (isv6)
else
return (ire);
}
/*
* Return a held IRE_NOROUTE with RTF_BLACKHOLE set
*/
ire_t *
{
if (isv6)
else
return (ire);
}
/*
* Return a held IRE_MULTICAST.
*/
ire_t *
{
else
return (ire);
}
/*
* Given an IRE return its nexthop IRE. The nexthop IRE is an IRE_ONLINK
* that is an exact match (i.e., a /32 for IPv4 and /128 for IPv6).
* This can return an RTF_REJECT|RTF_BLACKHOLE.
* The returned IRE is held.
* The assumption is that ip_select_route() has been called and returned the
* IRE (thus ip_select_route would have set up the ire_dep* information.)
* If some IRE is deleteted then ire_dep_remove() will have been called and
* we might not find a nexthop IRE, in which case we return NULL.
*/
ire_t *
{
/* Acquire lock to walk ire_dep_parent */
goto done;
}
/*
* If we find an IRE_ONLINK we are done. This includes
* the case of IRE_MULTICAST.
* Note that in order to send packets we need a host-specific
* IRE_IF_ALL first in the ire_dep_parent chain. Normally this
* is done by inserting an IRE_IF_CLONE if the IRE_INTERFACE
* was not host specific.
* However, ip_rts_request doesn't want to send packets
* hence doesn't want to allocate an IRE_IF_CLONE. Yet
* it needs an IRE_IF_ALL to get to the ill. Thus
* we return IRE_IF_ALL that are not host specific here.
*/
goto done;
}
return (NULL);
done:
return (ire);
}
/*
* Find the ill used to send packets. This will be NULL in case
* of a reject or blackhole.
* The returned ill is held; caller needs to do ill_refrele when done.
*/
ill_t *
{
return (NULL);
/* ire_ill can not change for an existing ire */
return (ill);
}
#ifdef DEBUG
static boolean_t
{
&(parent->ire_dep_children));
} else {
&(prev->ire_dep_sib_next));
}
return (B_TRUE);
}
return (B_FALSE);
}
static void
{
}
}
}
#endif /* DEBUG */
/*
* Assumes ire_dep_parent is set. Remove this child from its parent's linkage.
*/
void
{
#ifdef DEBUG
#endif
/*
* Make sure all our children, grandchildren, etc set
* ire_dep_parent_generation to IRE_GENERATION_VERIFY since
* we can no longer guarantee than the children have a current
* ire_nce_cache and ire_nexthop_ill().
*/
/*
* Since the parent is gone we make sure we clear ire_nce_cache.
* We can clear it under ire_lock even if the IRE is used
*/
#ifdef DEBUG
#endif
}
/*
* Insert the child in the linkage of the parent
*/
static void
{
#ifdef DEBUG
#endif
/* No parents => no siblings */
/* Head insertion */
}
#ifdef DEBUG
#endif
}
/*
* Given count worth of ires and generations, build ire_dep_* relationships
* from ires[0] to ires[count-1]. Record generations[i+1] in
* ire_dep_parent_generation for ires[i].
* We graft onto an existing parent chain by making sure that we don't
* touch ire_dep_parent for ires[count-1].
*
* We check for any condemned ire_generation count and return B_FALSE in
* that case so that the caller can tear it apart.
*
* Note that generations[0] is not used. Caller handles that.
*/
{
uint_t i;
if (count == 1) {
/* No work to do */
return (B_TRUE);
}
/*
* Do not remove the linkage for any existing parent chain i.e.,
* ires[count-1] is left alone.
*/
for (i = 0; i < count-1; i++) {
/* Remove existing parent if we need to change it */
ire_dep_remove(ires[i]);
}
for (i = 0; i < count - 1; i++) {
/* Does it need to change? */
return (B_FALSE);
}
}
return (B_TRUE);
}
/*
* Given count worth of ires, unbuild ire_dep_* relationships
* from ires[0] to ires[count-1].
*/
void
{
uint_t i;
if (count == 0) {
/* No work to do */
return;
}
for (i = 0; i < count; i++) {
ire_dep_remove(ires[i]);
}
}
/*
* Both the forwarding and the outbound code paths can trip on
* a condemned NCE, in which case we call this function.
* We have two different behaviors: if the NCE was UNREACHABLE
* it is an indication that something failed. In that case
* we see if we should look for a different IRE (for example,
* delete any matching redirect IRE, or try a different
* IRE_DEFAULT (ECMP)). We mark the ire as bad so a hopefully
*
* If we are called by the output path then fail_if_better is set
* and we return NULL if there could be a better IRE. This is because the
*
* new (most likely ND_INITIAL) NCE and proceed with it.
*
* set for IPv4 and ip6h needs to be set for IPv6 packets.
*/
nce_t *
{
/*
* Did some changes, or ECMP likely to exist.
* Make ip_output look for a different IRE
*/
return (NULL);
}
}
/* The ire_dep_parent chain went bad, or no memory? */
(void) ire_no_good(ire);
return (NULL);
}
} else {
}
return (NULL);
if (nce->nce_is_condemned) {
return (NULL);
}
return (nce);
}
/*
* The caller has found that the ire is bad, either due to a reference to an NCE
* in ND_UNREACHABLE state, or a MULTIRT route whose gateway can't be resolved.
* We update things so a subsequent attempt to send to the destination
* is likely to find different IRE, or that a new NCE would be created.
*
* Returns B_TRUE if it is likely that a subsequent ire_ftable_lookup would
* find a different route (either due to having deleted a redirect, or there
* being ECMP routes.)
*
* If we have a redirect (RTF_DYNAMIC) we delete it.
* Otherwise we increment ire_badcnt and increment the generation number so
* that a cached ixa_ire will redo the route selection. ire_badcnt is taken
* into account in the route selection when we have multiple choices (multiple
* default routes or ECMP in general).
* Any time ip_select_route find an ire with a condemned ire_nce_cache
* (e.g., if no equal cost route to the bad one) ip_select_route will make
* sure the NCE is revalidated to avoid getting stuck on a
* NCE_F_CONDMNED ncec that caused ire_no_good to be called.
*/
{
return (B_TRUE);
}
/* Check if next IRE is a redirect */
} else {
}
return (B_TRUE);
}
}
/*
* No redirect involved. Increment badcnt so that if we have ECMP
* routes we are likely to pick a different one for the next packet.
*
* If the NCE is unreachable and condemned we should drop the reference
* to it so that a new NCE can be created.
*
* Finally we increment the generation number so that any ixa_ire
* cache will be revalidated.
*/
ire->ire_badcnt++;
else
}
/*
* Walk ire_dep_parent chain and validate that ire_dep_parent->ire_generation ==
* ire_dep_parent_generation.
* If they all match we just return ire_generation from the topmost IRE.
* Otherwise we propagate the mismatch by setting all ire_dep_parent_generation
* above the mismatch to IRE_GENERATION_VERIFY and also returning
* IRE_GENERATION_VERIFY.
*/
{
break;
if (ire1->ire_dep_parent_generation !=
goto mismatch;
}
return (generation);
/* Fill from top down to the mismatch with _VERIFY */
}
return (generation);
}
/*
* Used when we need to return an ire with ire_dep_parent, but we
* know the chain is invalid for instance we didn't create an IRE_IF_CLONE
* Using IRE_GENERATION_VERIFY means that next time we'll redo the
* recursive lookup.
*/
void
{
}
}
/* Set _VERIFY ire_dep_parent_generation for all children recursively */
static void
{
/* Depth first */
}
}
static void
{
/* Depth first */
if (!IRE_IS_CONDEMNED(child))
}
}
/*
* Walk all the children of this ire recursively and increment their
* generation number.
*/
static void
{
}
void
{
}
/*
* Get a new ire_nce_cache for this IRE as well as its nexthop.
* Returns zero if it succeeds. Can fail due to lack of memory or when
* the route has become unreachable. Returns ENOMEM and ENETUNREACH in those
* cases.
*
* In the in.mpathd case, the ire will have ire_testhidden
* set; so we should create the ncec for the underlying ill.
*
* Note that the error returned by ire_revalidate_nce() is ignored by most
* callers except ire_handle_condemned_nce(), which handles the ENETUNREACH
* error to mark potentially bad ire's. For all the other callers, an
* error return could indicate a transient condition like ENOMEM, or could
* be the result of an interface that is going down/unplumbing. In the former
* case (transient error), we would leave the old stale ire/ire_nce_cache
* in place, and possibly use incorrect link-layer information to send packets
* ire_revalidate_nce() might return a condemned nce back, but we would then
* recover in the packet output path.
*/
int
{
/*
* For multicast we conceptually have an NCE but we don't store it
* in ire_nce_cache; when ire_to_nce is called we allocate the nce.
*/
return (0);
/* ire_testhidden should only be set on under-interfaces */
/* The route is potentially bad */
(void) ire_no_good(ire);
return (ENETUNREACH);
}
else
} else {
} else {
}
}
/*
* Leave the old stale one in place to avoid a NULL
* ire_nce_cache.
*/
return (ENOMEM);
}
/* Update the nexthop ire */
if (!IRE_IS_CONDEMNED(nexthop)) {
} else {
}
}
if (!IRE_IS_CONDEMNED(ire)) {
} else {
}
return (0);
}
/*
* Get a held nce for a given ire.
* In the common case this is just from ire_nce_cache.
* For IRE_MULTICAST this needs to do an explicit lookup since we do not
* have an IRE_MULTICAST per address.
* Note that this explicitly returns CONDEMNED NCEs. The caller needs those
* so they can check whether the NCE went unreachable (as opposed to was
* condemned for some other reason).
*/
nce_t *
{
return (NULL);
/* ire_testhidden should only be set on under-interfaces */
return (nce);
}
} else {
}
return (nce);
}
return (NULL);
}
nce_t *
{
} else {
}
}
/*
* Given an IRE_INTERFACE (that matches more than one address) create
* and return an IRE_IF_CLONE for the specific address.
* Return the generation number.
* Returns NULL is no memory for the IRE.
* Handles both IPv4 and IPv6.
*
* IRE_IF_CLONE entries may only be created adn added by calling
* ire_create_if_clone(), and we depend on the fact that ire_add will
* atomically ensure that attempts to add multiple identical IRE_IF_CLONE
* entries will not result in duplicate (i.e., ire_identical_ref > 1)
* CLONE entries, so that a single ire_delete is sufficient to remove the
* CLONE.
*/
ire_t *
{
ire = ire_create(
IRE_IF_CLONE, /* IRE type */
NULL, /* No security attr for IRE_IF_ALL */
} else {
ire = ire_create_v6(
addr, /* dest address */
&ipv6_all_ones, /* mask */
IRE_IF_CLONE, /* IRE type */
NULL, /* No security attr for IRE_IF_ALL */
}
return (NULL);
/* Take the metrics, in particular the mtu, from the IRE_IF */
return (NULL);
if (generationp != NULL)
return (nire);
}
/*
* The argument is an IRE_INTERFACE. Delete all of IRE_IF_CLONE in the
* ire_dep_children (just walk the ire_dep_sib_next since they are all
* immediate children.)
* Since we hold a lock while we remove them we need to defer the actual
* calls to ire_delete() until we have dropped the lock. This makes things
* less efficient since we restart at the top after dropping the lock. But
* we only run when an IRE_INTERFACE is deleted which is infrquent.
*
* Note that ire_dep_children can be any mixture of offlink routes and
* IRE_IF_CLONE entries.
*/
void
{
return;
}
!IRE_IS_CONDEMNED(child)) {
goto restart;
}
}
}
/*
* ire_t entries with ire_unbound set to B_TRUE) are bound to an interface
* by selecting the first available interface that has an interface route for
* the ire_gateway. If that interface is subsequently brought down, ill_downi()
* will call ire_rebind() so that the unbound route can be bound to some other
* matching interface thereby preserving the intended reachability information
* from the original unbound route.
*/
void
{
int match_flags = MATCH_IRE_TYPE;
if (isv6) {
} else {
}
/* see comments in ip_rt_add[_v6]() for IPMP */
if (match_flags & MATCH_IRE_TESTHIDDEN)
return;
goto again;
}
if (isv6) {
} else {
}
return;
}