iptun.c revision 5d3b8cb7141cfa596d20cdc5043b8a6df635938d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* iptun - IP Tunneling Driver
*
* This module is a GLDv3 driver that implements virtual datalinks over IP
* (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl
* interface (see iptun_ctl.c), and registered with GLDv3 using
* mac_register(). It implements the logic for various forms of IP (IPv4 or
* IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip
* module below it. Each virtual IP tunnel datalink has a conn_t associated
* with it representing the "outer" IP connection.
*
* The module implements the following locking semantics:
*
* Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock.
* See comments above iptun_hash_lock for details.
*
* No locks are ever held while calling up to GLDv3. The general architecture
* of GLDv3 requires this, as the mac perimeter (essentially a lock) for a
* given link will be held while making downcalls (iptun_m_*() callbacks).
* Because we need to hold locks while handling downcalls, holding these locks
* while issuing upcalls results in deadlock scenarios. See the block comment
* above iptun_task_cb() for details on how we safely issue upcalls without
* holding any locks.
*
* The contents of each iptun_t is protected by an iptun_mutex which is held
* in iptun_enter() (called by iptun_enter_by_linkid()), and exited in
* iptun_exit().
*
* See comments in iptun_delete() and iptun_free() for details on how the
* iptun_t is deleted safely.
*/
#include <sys/mac_provider.h>
#include <sys/mac_ipv4.h>
#include <sys/mac_ipv6.h>
#include <sys/mac_6to4.h>
#include <inet/ipsec_impl.h>
#include "iptun_impl.h"
/* Do the tunnel type and address family match? */
#define IPTUNQ_DEV "/dev/iptunq"
#define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU
sizeof (iptun_encaplim_t))
#define IPTUN_MIN_HOPLIMIT 1
#define IPTUN_MAX_HOPLIMIT UINT8_MAX
#define IPTUN_MIN_ENCAPLIMIT 0
#define IPTUN_MAX_ENCAPLIMIT UINT8_MAX
static iptun_encaplim_t iptun_encaplim_init = {
{ IPPROTO_NONE, 0 },
1,
IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
1,
0
};
/* Table containing per-iptun-type information. */
static iptun_typeinfo_t iptun_type_table[] = {
};
/*
* iptun_hash is an iptun_t lookup table by link ID protected by
* iptun_hash_lock. While the hash table's integrity is maintained via
* internal locking in the mod_hash_*() functions, we need additional locking
* so that an iptun_t cannot be deleted after a hash lookup has returned an
* iptun_t and before iptun_lock has been entered. As such, we use
* iptun_hash_lock when doing lookups and removals from iptun_hash.
*/
static kmutex_t iptun_hash_lock;
typedef enum {
IPTUN_TASK_PMTU_UPDATE, /* obtain new destination path-MTU */
IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */
IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */
IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */
IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */
IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */
} iptun_task_t;
typedef struct iptun_task_data_s {
static int iptun_enter(iptun_t *);
static void iptun_exit(iptun_t *);
static void iptun_input(void *, mblk_t *, void *);
static mac_callbacks_t iptun_m_callbacks;
static int
{
int err = 0;
switch (stat) {
case MAC_STAT_IERRORS:
break;
case MAC_STAT_OERRORS:
break;
case MAC_STAT_RBYTES:
break;
case MAC_STAT_IPACKETS:
break;
case MAC_STAT_OBYTES:
break;
case MAC_STAT_OPACKETS:
break;
case MAC_STAT_NORCVBUF:
break;
case MAC_STAT_NOXMTBUF:
break;
default:
}
return (err);
}
static int
iptun_m_start(void *arg)
{
int err;
}
return (err);
}
static void
iptun_m_stop(void *arg)
{
if (iptun_enter(iptun) == 0) {
}
}
/*
* iptun_m_setpromisc() does nothing and always succeeds. This is because a
* tunnel data-link only ever receives packets that are destined exclusively
* for the local address of the tunnel.
*/
/* ARGSUSED */
static int
{
return (0);
}
/* ARGSUSED */
static int
{
return (ENOTSUP);
}
/*
* iptun_m_unicst() sets the local address.
*/
/* ARGSUSED */
static int
{
int err;
struct sockaddr_storage ss;
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
case IPV4_VERSION:
break;
case IPV6_VERSION:
break;
default:
ASSERT(0);
}
}
return (err);
}
static mblk_t *
{
if (!IS_IPTUN_RUNNING(iptun)) {
return (NULL);
}
/*
* Request the destination's path MTU information regularly in case
* path MTU has increased.
*/
if (IPTUN_PMTU_TOO_OLD(iptun))
}
return (NULL);
}
/* ARGSUSED */
static int
{
int err;
/*
* We need to enter this iptun_t since we'll be modifying the outer
* header.
*/
return (err);
switch (pr_num) {
case MAC_PROP_IPTUN_HOPLIMIT:
break;
}
}
break;
break;
}
}
break;
case MAC_PROP_MTU: {
break;
}
}
break;
}
default:
}
return (err);
}
/* ARGSUSED */
static int
{
int err;
return (err);
goto done;
}
if (is_default && is_possible) {
goto done;
}
*perm = MAC_PROP_PERM_RW;
if (is_possible) {
if (pr_valsize < sizeof (mac_propval_range_t)) {
goto done;
}
} else if (pr_valsize < sizeof (uint32_t)) {
goto done;
}
switch (pr_num) {
case MAC_PROP_IPTUN_HOPLIMIT:
if (is_possible) {
} else if (is_default) {
} else {
}
break;
goto done;
}
if (is_possible) {
} else if (is_default) {
} else {
}
break;
case MAC_PROP_MTU: {
if (is_possible) {
} else {
/*
* The MAC module knows the current value and should
* never call us for it. There is also no default
* MTU, as by default, it is a dynamic property.
*/
goto done;
}
break;
}
default:
goto done;
}
if (is_possible)
done:
return (err);
}
iptun_count(void)
{
return (iptun_tunnelcount);
}
/*
* Enter an iptun_t exclusively. This is essentially just a mutex, but we
* don't allow iptun_enter() to succeed on a tunnel if it's in the process of
* being deleted.
*/
static int
{
return (ENOENT);
}
return (0);
}
/*
* Exit the tunnel entered in iptun_enter().
*/
static void
{
}
/*
* Enter the IP tunnel instance by datalink ID.
*/
static int
{
int err;
(mod_hash_val_t *)iptun) == 0)
else
if (err != 0)
return (err);
}
/*
* Handle tasks that were deferred through the iptun_taskq. These fall into
* two categories:
*
* 1. Tasks that were defered because we didn't want to spend time doing them
* while in the data path. Only IPTUN_TASK_PMTU_UPDATE falls into this
* category.
*
* 2. Tasks that were defered because they require calling up to the mac
* module, and we can't call up to the mac module while holding locks.
*
* Handling 1 is easy; we just lookup the iptun_t, perform the task, exit the
* tunnel, and we're done.
*
* Handling 2 is tricky to get right without introducing race conditions and
* deadlocks with the mac module, as we cannot issue an upcall while in the
* iptun_t. The reason is that upcalls may try and enter the mac perimeter,
* while iptun callbacks (such as iptun_m_setprop()) called from the mac
* module will already have the perimeter held, and will then try and enter
* the iptun_t. You can see the lock ordering problem with this; this will
* deadlock.
*
* The safe way to do this is to enter the iptun_t in question and copy the
* information we need out of it so that we can exit it and know that the
* information being passed up to the upcalls won't be subject to modification
* by other threads. The problem now is that we need to exit it prior to
* issuing the upcall, but once we do this, a thread could come along and
* delete the iptun_t and thus the mac handle required to issue the upcall.
* To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the
* iptun_t. This flag is the condition associated with iptun_upcall_cv, which
* iptun_delete() will cv_wait() on. When the upcall completes, we clear
* IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting
* iptun_delete(). We can thus still safely use iptun->iptun_mh after having
* exited the iptun_t.
*/
static void
iptun_task_cb(void *arg)
{
/*
* Note that if the lookup fails, it's because the tunnel was deleted
* between the time the task was dispatched and now. That isn't an
* error.
*/
return;
if (task == IPTUN_TASK_PMTU_UPDATE) {
(void) iptun_update_mtu(iptun, 0);
return;
}
switch (task) {
case IPTUN_TASK_MTU_UPDATE:
break;
case IPTUN_TASK_LADDR_UPDATE:
break;
case IPTUN_TASK_RADDR_UPDATE:
break;
case IPTUN_TASK_LINK_UPDATE:
break;
case IPTUN_TASK_PDATA_UPDATE:
break;
default:
ASSERT(0);
}
switch (task) {
case IPTUN_TASK_MTU_UPDATE:
break;
case IPTUN_TASK_LADDR_UPDATE:
break;
case IPTUN_TASK_RADDR_UPDATE:
break;
case IPTUN_TASK_LINK_UPDATE:
break;
case IPTUN_TASK_PDATA_UPDATE:
break;
}
}
static void
{
return;
}
}
}
/*
* Convert an iptun_addr_t to sockaddr_storage.
*/
static void
{
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
switch (iptun_addr->ia_family) {
case AF_INET:
break;
case AF_INET6:
break;
default:
ASSERT(0);
}
}
/*
* General purpose function to set an IP tunnel source or destination address.
*/
static int
const struct sockaddr_storage *ss)
{
return (EINVAL);
case AF_INET: {
return (EADDRNOTAVAIL);
}
break;
}
case AF_INET6: {
return (EADDRNOTAVAIL);
}
break;
}
default:
return (EAFNOSUPPORT);
}
return (0);
}
static int
{
}
static int
{
return (EINVAL);
}
static boolean_t
{
/*
* A tunnel may bind when its source address has been set, and if its
* tunnel type requires one, also its destination address.
*/
}
static int
{
int err;
case IPTUN_TYPE_IPV4:
/*
* When we set a tunnel's destination address, we do not care
* if the destination is reachable. Transient routing issues
* should not inhibit the creation of a tunnel interface, for
* example. For that reason, we pass in B_FALSE for the
* verify_dst argument of ip_proto_bind_connected_v4() (and
* similarly for IPv6 tunnels below).
*/
break;
case IPTUN_TYPE_IPV6:
break;
case IPTUN_TYPE_6TO4:
break;
}
if (err == 0) {
/*
* Now that we're bound with ip below us, this is a good time
* to initialize the destination path MTU and to re-calculate
* the tunnel's link MTU.
*/
(void) iptun_update_mtu(iptun, 0);
if (IS_IPTUN_RUNNING(iptun))
}
return (err);
}
static void
{
}
/*
* Re-generate the template data-link header for a given IP tunnel given the
* tunnel's current parameters.
*/
static void
{
case IPV4_VERSION:
/*
* We only need to use a custom IP header if the administrator
* has supplied a non-default hoplimit.
*/
iptun->iptun_header_size = 0;
break;
}
break;
case IPV6_VERSION: {
/*
* We only need to use a custom IPv6 header if either the
* administrator has supplied a non-default hoplimit, or we
* need to include an encapsulation limit option in the outer
* header.
*/
iptun->iptun_encaplimit == 0) {
iptun->iptun_header_size = 0;
break;
}
if (iptun->iptun_encaplimit == 0) {
} else {
/*
* The mac_ipv6 plugin requires ip6_plen to be in host
* byte order and reflect the extension headers
* present in the template. The actual network byte
* order ip6_plen will be set on a per-packet basis on
* transmit.
*/
}
break;
}
}
if (update_mac)
}
/*
* Insert inbound and outbound IPv4 and IPv6 policy into the given policy
* head.
*/
static boolean_t
{
int f = IPSEC_AF_V4;
return (B_FALSE);
f = IPSEC_AF_V6;
}
/*
* Used to set IPsec policy when policy is set through the IPTUN_CREATE or
* IPTUN_MODIFY ioctls.
*/
static int
{
int rc = 0;
char name[MAXLINKNAMELEN];
/* Can't specify self-encap on a tunnel. */
if (ipsr->ipsr_self_encap_req != 0)
return (EINVAL);
/*
* If it's a "clear-all" entry, unset the security flags and resume
* normal cleartext (or inherit-from-global) policy.
*/
if (clear_all)
goto bail;
goto bail;
goto bail;
}
/* Allocate the actvec now, before holding itp or polhead locks. */
goto bail;
}
/*
* for spdsock operations.
*
* Mutex because we need to write to the polhead AND flags atomically.
* Other threads will acquire the polhead lock as a reader if the
* (unprotected) flag is set.
*/
/* Oops, we lost a race. Let's get out of here. */
goto mutex_bail;
}
if (old_policy) {
if (rc != 0) {
/* inactive has already been cleared. */
goto mutex_bail;
}
} else {
/* Else assume itp->itp_policy is already flushed. */
}
if (clear_all) {
goto recover_bail;
}
/*
* Adjust MTU and make sure the DL side knows what's up.
*/
(void) iptun_update_mtu(iptun, 0);
} else {
}
if (old_policy) {
/* Recover policy in in active polhead. */
}
/* Clear policy in inactive polhead. */
bail:
return (rc);
}
static iptun_typeinfo_t *
{
int i;
break;
}
return (&iptun_type_table[i]);
}
/*
* Set the parameters included in ik on the tunnel iptun. Parameters that can
* only be set at creation time are set in iptun_create().
*/
static int
{
int err = 0;
if (orig_flags & IPTUN_LADDR)
return (err);
}
if (orig_flags & IPTUN_RADDR)
goto done;
}
/*
* Set IPsec policy originating from the ifconfig(1M) command
* line. This is traditionally called "simple" policy because
* the ipsec_req_t (iptun_kparam_secinfo) can only describe a
* everything" (as opposed to the rich policy that can be
* defined with ipsecconf(1M)).
*/
/*
* Can't set security properties for automatic
* tunnels.
*/
goto done;
}
/* If IPsec can be loaded, try and load it now. */
goto done;
}
/*
* ipsec_loader_loadnow() returns while IPsec is
* loaded asynchronously. While a method exists to
* wait for IPsec to load (ipsec_loader_wait()), it
* requires use of a STREAMS queue to do a qwait().
* We're not in STREAMS context here, and so we can't
* use it. This is not a problem in practice because
* in the vast majority of cases, key management and
* global policy will have loaded before any tunnels
* are plumbed, and so IPsec will already have been
* loaded.
*/
goto done;
}
if (err == 0) {
}
}
done:
if (err != 0) {
/* Restore original source and destination. */
(orig_flags & IPTUN_LADDR))
(orig_flags & IPTUN_RADDR))
}
return (err);
}
static int
{
int err;
return (EINVAL);
if (iptun->iptun_header_size != 0) {
}
return (err);
}
static int
{
int err;
return (err);
}
static conn_t *
{
return (NULL);
/*
* For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done
* for all other conn_t's.
*
* Note that there's an important distinction between iptun_zoneid and
* conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global
* exclusive stack zones to make the ip module believe that the
* non-global zone is actually a global zone. Therefore, when
* interacting with the ip module, we must always use conn_zoneid.
*/
/* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */
return (connp);
}
static void
{
}
static int
{
int err;
/*
* The global queue for this stack is set when iptunq_open() calls
* iptun_set_g_q().
*/
if (err == 0) {
}
return (err);
}
static iptun_t *
iptun_alloc(void)
{
}
return (iptun);
}
static void
{
(mod_hash_val_t *)&iptun) == 0);
}
/*
* After iptun_unregister(), there will be no threads executing a
* downcall from the mac module, including in the tx datapath.
*/
/*
* Remove from the AVL tree, AND release the reference iptun_t
* itself holds on the ITP.
*/
}
/*
* After ipcl_conn_destroy(), there will be no threads executing an
* upcall from ip (i.e., iptun_input()), and it is then safe to free
* the iptun_t.
*/
}
}
int
{
char linkname[MAXLINKNAMELEN];
/* The tunnel type is mandatory */
return (EINVAL);
/*
* Is the linkid that the caller wishes to associate with this new
* tunnel assigned to this zone?
*/
if (zoneid != GLOBAL_ZONEID)
return (EINVAL);
} else if (zoneid == GLOBAL_ZONEID) {
return (EINVAL);
}
/*
* Make sure that we're not trying to create a tunnel that has already
* been created.
*/
goto done;
}
/*
* Before we create any tunnel, we need to ensure that the default
* STREAMS queue (used to satisfy the ip module's requirement for one)
* is created. We only do this once per stack. The stream is closed
* when the stack is destroyed in iptun_stack_fni().
*/
if (err != 0)
goto done;
goto done;
}
goto done;
}
goto done;
goto done;
}
/*
* Find an ITP based on linkname. If we have parms already set via
* the iptun_setparams() call above, it may have created an ITP for
* us. We always try get_tunnel_policy() for DEBUG correctness
* checks, and we may wish to refactor this to only check when
* iptun_itp is NULL.
*/
goto done;
/*
* See if we have the necessary IP addresses assigned to this tunnel
* to try and bind them with ip underneath us. If we're not ready to
* bind yet, then we'll defer the bind operation until the addresses
* are modified.
*/
goto done;
goto done;
if (err != 0)
goto done;
/*
* We hash by link-id as that is the key used by all other iptun
* interfaces (modify, delete, etc.).
*/
} else if (mherr == MH_ERR_NOMEM) {
} else if (mherr == MH_ERR_DUPLICATE) {
} else {
}
done:
if (link_created) {
B_TRUE);
}
}
return (err);
}
int
{
int err;
return (err);
/* One cannot delete a tunnel that belongs to another zone. */
return (EACCES);
}
/*
* We need to exit iptun in order to issue calls up the stack such as
* dls_devnet_destroy(). If we call up while still in iptun, deadlock
* with calls coming down the stack is possible. We prevent other
* threads from entering this iptun after we've exited it by setting
* the IPTUN_DELETE_PENDING flag. This will cause callers of
* iptun_enter() to block waiting on iptun_enter_cv. The assumption
* here is that the functions we're calling while IPTUN_DELETE_PENDING
* is set dont resuult in an iptun_enter() call, as that would result
* in deadlock.
*/
/* Wait for any pending upcall to the mac module to complete. */
/*
* mac_disable() will fail with EBUSY if there are references
* to the iptun MAC. If there are none, then mac_disable()
* will assure that none can be acquired until the MAC is
* unregistered.
*
* XXX CR 6791335 prevents us from calling mac_disable() prior
* to dls_devnet_destroy(), so we unfortunately need to
* attempt to re-create the devnet node if mac_disable()
* fails.
*/
}
}
/*
* Now that we know the fate of this iptun_t, we need to clear
* IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is
* slated to be freed. Either way, we need to signal the threads
* waiting in iptun_enter() so that they can either fail if
* IPTUN_CONDEMNED is set, or continue if it's not.
*/
if (err == 0)
/*
* Note that there is no danger in calling iptun_free() after having
* dropped the iptun_lock since callers of iptun_enter() at this point
* are doing so from iptun_enter_by_linkid() (mac_disable() got rid of
* threads entering from mac callbacks which call iptun_enter()
* directly) which holds iptun_hash_lock, and iptun_free() grabs this
* lock in order to remove the iptun_t from the hash table.
*/
if (err == 0)
return (err);
}
int
{
int err;
return (err);
/* One cannot modify a tunnel that belongs to another zone. */
goto done;
}
/* The tunnel type cannot be changed */
goto done;
}
goto done;
/*
* If any of the tunnel's addresses has been modified and the tunnel
* has the necessary addresses assigned to it, we need to try to bind
* with ip underneath us. If we're not ready to bind yet, then we'll
* try again when the addresses are modified later.
*/
if (laddr_change || raddr_change) {
if (laddr_change)
if (raddr_change)
goto done;
}
}
if (laddr_change)
if (raddr_change)
done:
return (err);
}
/* Given an IP tunnel's datalink id, fill in its parameters. */
int
{
int err;
/* Is the tunnel link visible from the caller's zone? */
crgetzoneid(credp)))
return (ENOENT);
return (err);
}
}
}
}
}
done:
return (err);
}
int
{
return (EADDRNOTAVAIL);
return (0);
}
void
{
}
void
{
return;
/* IPsec policy means IPsec overhead, which means lower MTU. */
(void) iptun_update_mtu(iptun, 0);
}
}
/*
* Obtain the path MTU to the tunnel destination.
*/
static uint32_t
{
/*
* We only obtain the destination IRE for tunnels that have a remote
* tunnel address.
*/
return (0);
case IPV4_VERSION:
break;
case IPV6_VERSION:
break;
}
}
return (pmtu);
}
/*
* Returns the max of old_ovhd and the overhead associated with pol.
*/
static uint32_t
{
}
return (new_ovhd);
}
static uint32_t
{
int i;
uint32_t ipsec_ovhd = 0;
/*
* Consult global policy, just in case. This will only work
* if we have both source and destination addresses to work
* with.
*/
return (0);
case IPV4_VERSION:
break;
case IPV6_VERSION:
break;
}
/* Check for both IPv4 and IPv6. */
}
}
} else {
/*
* Look through all of the possible IPsec actions for the
* tunnel, and find the largest potential IPsec overhead.
*/
for (i = 0; i < ipr->ipr_nchains; i++) {
}
}
return (ipsec_ovhd);
}
/*
* Calculate and return the maximum possible MTU for the given tunnel.
*/
static uint32_t
{
/*
* Start with the path-MTU to the remote address, which is either
* provided as the new_pmtu argument, or obtained using
* iptun_get_dst_pmtu().
*/
if (new_pmtu != 0) {
}
/*
* We weren't able to obtain the path-MTU of the
* destination. Use the previous value.
*/
} else {
}
} else {
/*
* We have no path-MTU information to go on, use the maximum
* possible value.
*/
}
/*
* Now calculate tunneling overhead and subtract that from the
* path-MTU information obtained above.
*/
if (iptun->iptun_header_size != 0) {
} else {
case IPV4_VERSION:
header_size = sizeof (ipha_t);
if (is_system_labeled())
break;
case IPV6_VERSION:
header_size = sizeof (iptun_ipv6hdrs_t);
break;
}
}
}
/*
* Re-calculate the tunnel's MTU and notify the MAC layer of any change in
* MTU. The new_pmtu argument is the new path MTU to the tunnel destination
* to be used in the tunnel MTU calculation. Passing in 0 for new_pmtu causes
* the path MTU to be dynamically updated using iptun_update_pmtu().
*
* If the calculated tunnel MTU is different than its previous value, then we
* notify the MAC layer above us of this change using mac_maxsdu_update().
*/
static uint32_t
{
/*
* We return the current MTU without updating it if it was pegged to a
* static value using the MAC_PROP_MTU link property.
*/
/* If the MTU isn't fixed, then use the maximum possible value. */
/*
* We only dynamically adjust the tunnel MTU for tunnels with
* destinations because dynamic MTU calculations are based on the
* destination path-MTU.
*/
}
return (newmtu);
}
/*
* Frees a packet or packet chain and bumps stat for each freed packet.
*/
static void
{
}
}
/*
* Allocate and return a new mblk to hold an IP and ICMP header, and chain the
* original packet to its b_cont. Returns NULL on failure.
*/
static mblk_t *
{
/* tack on the offending packet */
}
return (icmperr_mp);
}
/*
* Transmit an ICMP error. mp->b_rptr points at the packet to be included in
* the ICMP error.
*/
static void
{
return;
}
new_ipha->ipha_type_of_service = 0;
new_ipha->ipha_ident = 0;
new_icmp->icmph_checksum = 0;
IP_WPUT);
}
static void
{
return;
}
/* The checksum is calculated in ip_wput_ire_v6(). */
}
static void
{
}
static void
{
icmp.icmph_du_zero = 0;
}
static void
{
if (type == ICMP6_PARAM_PROB)
}
static void
{
icmp6.icmp6_code = 0;
}
/*
* Determines if the packet pointed to by ipha or ip6h is an ICMP error. The
* mp argument is only used to do bounds checking.
*/
static boolean_t
{
return (B_FALSE);
} else {
*nexthdrp != IPPROTO_ICMPV6) {
return (B_FALSE);
}
}
}
/*
* Find inner and outer IP headers from a tunneled packet as setup for calls
* into ipsec_tun_{in,out}bound().
*/
static size_t
{
/*
* Don't bother handling packets that don't have a full IP header in
* the fist mblk. For the input path, the ip module ensures that this
* won't happen, and on the output path, the IP tunneling MAC-type
* plugins ensure that this also won't happen.
*/
if (first_mblkl < sizeof (ipha_t))
return (0);
switch (IPH_HDR_VERSION(ipha)) {
case IPV4_VERSION:
break;
case IPV6_VERSION:
break;
default:
return (0);
}
if (first_mblkl < outer_hlen ||
return (0);
/*
* We don't bother doing a pullup here since the outer header will
* just get stripped off soon on input anyway. We just want to ensure
* that the inner* pointer points to a full header.
*/
if (first_mblkl == outer_hlen) {
} else {
}
switch (IPH_HDR_VERSION(ipha)) {
case IPV4_VERSION:
return (0);
break;
case IPV6_VERSION:
return (0);
break;
default:
return (0);
}
return (outer_hlen);
}
/*
* Received ICMP error in response to an X over IPv4 packet that we
* transmitted.
*
* NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
* the following:
*
* [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
*
* or
*
* [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
*
* And "outer4" will get set to IPv4(1), and inner[46] will correspond to
* whatever the very-inner packet is (IPv4(2) or IPv6).
*/
static void
{
int outer_hlen;
/*
* Change the db_type to M_DATA because subsequent operations assume
* the ICMP packet is M_DATA again (i.e. calls to msgdsize()).
*/
/*
* Temporarily move b_rptr forward so that iptun_find_headers() can
* find headers in the ICMP packet payload.
*/
/*
* The ip module ensures that ICMP errors contain at least the
* original IP header (otherwise, the error would never have made it
* here).
*/
&inner6);
if (outer_hlen == 0) {
&iptun->iptun_ierrors);
return;
}
/* Only ICMP errors due to tunneled packets should reach here. */
/* ipsec_tun_inbound() always frees ipsec_mp. */
/* Callee did all of the freeing. */
return;
}
/* We should never see reassembled fragment here. */
/*
* If the original packet being transmitted was itself an ICMP error,
* then drop this packet. We don't want to generate an ICMP error in
* response to an ICMP error.
*/
return;
}
switch (icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
switch (icmph->icmph_code) {
case ICMP_FRAGMENTATION_NEEDED: {
/*
* We reconcile this with the fact that the tunnel may
* also have IPsec policy by letting iptun_update_mtu
* take care of it.
*/
newmtu =
data_mp);
} else {
data_mp);
}
return;
}
break;
default:
break;
}
break;
case ICMP_TIME_EXCEEDED:
code = 0;
} /* else we're already set. */
break;
case ICMP_PARAM_PROBLEM:
/*
* This is a problem with the outer header we transmitted.
* Treat this as an output error.
*/
return;
default:
return;
}
else
}
/*
* Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel
* Encapsulation Limit destination option. If there is one, set encaplim_ptr
* to point to the option value.
*/
static boolean_t
{
} else {
return (B_FALSE);
}
return (B_FALSE);
return (B_TRUE);
}
}
return (B_FALSE);
}
/*
* Received ICMPv6 error in response to an X over IPv6 packet that we
* transmitted.
*
* NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
* the following:
*
* [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
*
* or
*
* [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP]
*
* And "outer6" will get set to IPv6(1), and inner[46] will correspond to
* whatever the very-inner packet is (IPv4 or IPv6(2)).
*/
static void
{
int outer_hlen;
/*
* Change the db_type to M_DATA because subsequent operations assume
* the ICMP packet is M_DATA again (i.e. calls to msgdsize().)
*/
/*
* Temporarily move b_rptr forward so that iptun_find_headers() can
* find IP headers in the ICMP packet payload.
*/
/*
* The ip module ensures that ICMP errors contain at least the
* original IP header (otherwise, the error would never have made it
* here).
*/
&inner6);
if (outer_hlen == 0) {
&iptun->iptun_ierrors);
return;
}
/* Callee did all of the freeing. */
return;
}
/* We should never see reassembled fragment here. */
/*
* If the original packet being transmitted was itself an ICMP error,
* then drop this packet. We don't want to generate an ICMP error in
* response to an ICMP error.
*/
return;
}
switch (icmp6h->icmp6_type) {
case ICMP6_PARAM_PROB: {
/*
* If the ICMPv6 error points to a valid Tunnel Encapsulation
* Limit option and the limit value is 0, then fall through
* and send a host unreachable message. Otherwise, treat the
* error as an output error, as there must have been a problem
* with a packet we sent.
*/
(icmp6h->icmp6_pptr !=
*encaplim_ptr != 0) {
return;
}
/* FALLTHRU */
}
case ICMP6_TIME_EXCEEDED:
case ICMP6_DST_UNREACH:
break;
case ICMP6_PACKET_TOO_BIG: {
/*
* We reconcile this with the fact that the tunnel may also
* have IPsec policy by letting iptun_update_mtu take care of
* it.
*/
data_mp);
} else {
}
return;
}
default:
return;
}
else
}
static void
{
/*
* Since ICMP error processing necessitates access to bits
* that are within the ICMP error payload (the original packet
* that caused the error), pull everything up into a single
* block for convenience.
*/
&iptun->iptun_norcvbuf);
return;
}
}
case IPV4_VERSION:
/*
* The outer IP header coming up from IP is always ipha_t
* alligned (otherwise, we would have crashed in ip).
*/
break;
case IPV6_VERSION:
break;
}
}
static boolean_t
{
/*
* It's possible that someone sent us an IPv4-in-IPv4 packet with the
* IPv4 address of a 6to4 tunnel as the destination.
*/
return (B_FALSE);
/*
* Make sure that the IPv6 destination is within the site that this
* 6to4 tunnel is routing for. We don't want people bouncing random
* tunneled IPv6 packets through this 6to4 router.
*/
return (B_FALSE);
/*
* Section 9 of RFC 3056 (security considerations) suggests
* that when a packet is from a 6to4 site (i.e., it's not a
* global address being forwarded froma relay router), make
* sure that the packet was tunneled by that site's 6to4
* router.
*/
return (B_FALSE);
} else {
/*
* Only accept packets from a relay router if we've configured
* outbound relay router functionality.
*/
return (B_FALSE);
}
return (B_TRUE);
}
/*
* Input function for everything that comes up from the ip module below us.
* This is called directly from the ip module via connp->conn_recv().
*
* There are two kinds of packets that can arrive here: (1) IP-in-IP tunneled
* packets and (2) ICMP errors containing IP-in-IP packets transmitted by us.
* They have the following structure:
*
* 1) M_DATA
* 2) M_CTL[->M_DATA]
*
* (2) Is an M_CTL optionally followed by M_DATA, where the M_CTL block is the
* start of the actual ICMP packet (it doesn't contain any special control
* information).
*
* Either (1) or (2) can be IPsec-protected, in which case an M_CTL block
* containing an ipsec_in_t will have been prepended to either (1) or (2),
* making a total of four combinations of possible mblk chains:
*
* A) (1)
* B) (2)
* C) M_CTL(ipsec_in_t)->(1)
* D) M_CTL(ipsec_in_t)->(2)
*/
/* ARGSUSED */
static void
{
int outer_hlen;
return;
}
/* Protected ICMP packet. */
return;
}
}
/*
* Request the destination's path MTU information regularly in case
* path MTU has increased.
*/
if (IPTUN_PMTU_TOO_OLD(iptun))
&inner6)) == 0)
goto drop;
/*
* If the system is labeled, we call tsol_check_dest() on the packet
* destination (our local tunnel address) to ensure that the packet as
* labeled should be allowed to be sent to us. We don't need to call
* the more involved tsol_receive_local() since the tunnel link itself
* cannot be assigned to shared-stack non-global zones.
*/
if (is_system_labeled()) {
goto drop;
CONN_MAC_DEFAULT, NULL) != 0)
goto drop;
}
/* Callee did all of the freeing. */
return;
}
goto drop;
/*
* We need to statistically account for each packet individually, so
* we might as well split up any b_next chains here.
*/
do {
return;
drop:
}
/*
* Do 6to4-specific header-processing on output. Return B_TRUE if the packet
* was processed without issue, or B_FALSE if the packet had issues and should
* be dropped.
*/
static boolean_t
{
/*
* IPv6 source must be a 6to4 address. This is because a conscious
* decision was made to not allow a Solaris system to be used as a
* relay router (for security reasons) when 6to4 was initially
* integrated. If this decision is ever reversed, the following check
* can be removed.
*/
return (B_FALSE);
/*
* RFC3056 mandates that the IPv4 source MUST be set to the IPv4
* portion of the 6to4 IPv6 source address. In other words, make sure
* that we're tunneling packets from our own 6to4 site.
*/
return (B_FALSE);
/*
* Automatically set the destination of the outer IPv4 header as
* described in RFC3056. There are two possibilities:
*
* a. If the IPv6 destination is a 6to4 address, set the IPv4 address
* to the IPv4 portion of the 6to4 address.
* b. If the IPv6 destination is a native IPv6 address, set the IPv4
* destination to the address of a relay router.
*
* Design Note: b shouldn't be necessary here, and this is a flaw in
* the design of the 6to4relay command. Instead of setting a 6to4
* relay address in this module via an ioctl, the 6to4relay command
* could simply add a IPv6 route for native IPv6 addresses (such as a
* default route) in the forwarding table that uses a 6to4 destination
* as its next hop, and the IPv4 portion of that address could be a
* 6to4 relay address. In order for this to work, IP would have to
* resolve the next hop address, which would necessitate a link-layer
* address resolver for 6to4 links, which doesn't exist today.
*
* In fact, if a resolver existed for 6to4 links, then setting the
* IPv4 destination in the outer header could be done as part of
* link-layer address resolution and fast-path header generation, and
* not here.
*/
/* destination is a 6to4 router */
} else {
/*
* The destination is a native IPv6 address. If output to a
* relay-router is enabled, use the relay-router's IPv4
* address as the destination.
*/
return (B_FALSE);
}
/*
* If the outer source and destination are equal, this means that the
* 6to4 router somehow forwarded an IPv6 packet destined for its own
* 6to4 site to its 6to4 tunnel interface, which will result in this
* packet infinitely bouncing between ip and iptun.
*/
}
/*
* Process output packets with outer IPv4 headers. Frees mp and bumps stat on
* error.
*/
static mblk_t *
{
/*
* Copy the tos from the inner IPv4 header. We mask off ECN
* bits (bits 6 and 7) because there is currently no
* tunnel-tunnel communication to determine if both sides
* support ECN. We opt for the safe choice: don't copy the
* ECN bits when doing encapsulation.
*/
} else {
return (NULL);
}
}
/*
* As described in section 3.2.2 of RFC4213, if the packet payload is
* less than or equal to the minimum MTU size, then we need to allow
* IPv4 to fragment the packet. The reason is that even if we end up
* receiving an ICMP frag-needed, the interface above this tunnel
* won't be allowed to drop its MTU as a result, since the packet was
* already smaller than the smallest allowable MTU for that interface.
*/
return (mp);
}
/*
* Insert an encapsulation limit destination option in the packet provided.
* Always consumes the mp argument and returns a new mblk pointer.
*/
static mblk_t *
{
return (NULL);
}
/* Copy the payload (Starting with the inner IPv6 header). */
/* Now copy the outer IPv6 header. */
/*
* The payload length will be set at the end of
* iptun_out_process_ipv6().
*/
return (newmp);
}
/*
* Process output packets with outer IPv6 headers. Frees mp and bumps stats
* on error.
*/
static mblk_t *
{
/*
* The inner packet is an IPv6 packet which itself contains an
* encapsulation limit option. The limit variable points to
* the value in the embedded option. Process the
* encapsulation limit option as specified in RFC 2473.
*
* If limit is 0, then we've exceeded the limit and we need to
* send back an ICMPv6 parameter problem message.
*
* If limit is > 0, then we decrement it by 1 and make sure
* that the encapsulation limit option in the outer header
* reflects that (adding an option if one isn't already
* there).
*/
if (*limit == 0) {
0, offset);
return (NULL);
}
/*
* The outer header requires an encapsulation limit option.
* If there isn't one already, add one.
*/
if (iptun->iptun_encaplimit == 0) {
return (NULL);
} else {
/*
* There is an existing encapsulation limit option in
* the outer header. If the inner encapsulation limit
* is less than the configured encapsulation limit,
* update the outer encapsulation limit to reflect
* this lesser value.
*/
}
}
return (mp);
}
/*
* The IP tunneling MAC-type plugins have already done most of the header
* processing and validity checks. We are simply responsible for multiplexing
* down to the ip module below us.
*/
static void
{
int outer_hlen;
return;
}
}
if (outer_hlen == 0) {
return;
}
/* Perform header processing. */
else
return;
/*
* Let's hope the compiler optimizes this with "branch taken".
*/
/* ipsec_tun_outbound() frees mp on error. */
return;
}
/*
* ipsec_tun_outbound() returns a chain of tunneled IP
* fragments linked with b_next (or a single message if the
* tunneled packet wasn't a fragment). Each message in the
* chain is prepended by an IPSEC_OUT M_CTL block with
* instructions for outbound IPsec processing.
*/
}
} else {
/*
* The ip module will potentially apply global policy to the
* packet in its output path if there's no active tunnel
* policy.
*/
}
}
/*
* Note that the setting or clearing iptun_{set,get}_g_q() is serialized via
* iptuns_lock and iptunq_open(), so we must never be in a situation where
* iptun_set_g_q() is called if the queue has already been set or vice versa
* (hence the ASSERT()s.)
*/
void
{
}
void
{
}
static mac_callbacks_t iptun_m_callbacks = {
.mc_stop = iptun_m_stop,
.mc_tx = iptun_m_tx,
};