tun.c revision 91785ffff883655a89eb843ed89bcd24d717e320
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Tunnel driver
* This module acts like a driver/DLPI provider as viewed from the top
* and a stream head/TPI user from the bottom
* Implements the logic for IP (IPv4 or IPv6) encapsulation
* within IP (IPv4 or IPv6)
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#include <sys/tihdr.h>
#include <sys/tiuser.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ethernet.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/netstack.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/vtrace.h>
#include <sys/isa_defs.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/route.h>
#include <sys/sockio.h>
#include <netinet/in.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/arp.h>
#include <inet/snmpcom.h>
#include <netinet/igmp_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <net/if_dl.h>
#include <inet/ip_if.h>
#include <sys/strsun.h>
#include <inet/ipsec_impl.h>
#include <inet/ipdrop.h>
#include <inet/tun.h>
#include <inet/ipsec_impl.h>
#include <sys/conf.h>
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/stat.h>
#include <inet/ip_ire.h> /* for ire_route_lookup_v6 */
static void tun_cancel_rec_evs(queue_t *, eventid_t *);
static void tun_bufcall_handler(void *);
static boolean_t tun_icmp_message_v4(queue_t *, ipha_t *, icmph_t *, mblk_t *);
static boolean_t tun_icmp_too_big_v4(queue_t *, ipha_t *, uint16_t, mblk_t *);
static boolean_t tun_icmp_message_v6(queue_t *, ip6_t *, icmp6_t *, uint8_t,
mblk_t *);
static boolean_t tun_icmp_too_big_v6(queue_t *, ip6_t *, uint32_t, uint8_t,
mblk_t *);
static void tun_sendokack(queue_t *, mblk_t *, t_uscalar_t);
static void tun_sendsdusize(queue_t *);
static void tun_senderrack(queue_t *, mblk_t *, t_uscalar_t, t_uscalar_t,
t_uscalar_t);
static int tun_fastpath(queue_t *, mblk_t *);
static int tun_ioctl(queue_t *, mblk_t *);
static void tun_timeout_handler(void *);
static int tun_rproc(queue_t *, mblk_t *);
static int tun_wproc_mdata(queue_t *, mblk_t *);
static int tun_wproc(queue_t *, mblk_t *);
static int tun_rdata(queue_t *, mblk_t *, mblk_t *, tun_t *, uint_t);
static int tun_rdata_v4(queue_t *, mblk_t *, mblk_t *, tun_t *);
static int tun_rdata_v6(queue_t *, mblk_t *, mblk_t *, tun_t *);
static int tun_set_sec_simple(tun_t *, ipsec_req_t *);
static void tun_send_ire_req(queue_t *);
static uint32_t tun_update_link_mtu(queue_t *, uint32_t, boolean_t);
static mblk_t *tun_realloc_mblk(queue_t *, mblk_t *, size_t, mblk_t *,
boolean_t);
static void tun_recover(queue_t *, mblk_t *, size_t);
static void tun_rem_ppa_list(tun_t *);
static void tun_rem_tun_byaddr_list(tun_t *);
static void tun_rput_icmp_err_v4(queue_t *, mblk_t *, mblk_t *);
static void icmp_ricmp_err_v4_v4(queue_t *, mblk_t *, mblk_t *);
static void icmp_ricmp_err_v6_v4(queue_t *, mblk_t *, mblk_t *);
static void icmp_ricmp_err_v4_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *);
static void icmp_ricmp_err_v6_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *);
static void tun_rput_icmp_err_v6(queue_t *, mblk_t *, mblk_t *);
static int tun_rput_tpi(queue_t *, mblk_t *);
static int tun_send_bind_req(queue_t *);
static void tun_statinit(tun_stats_t *, char *, netstackid_t);
static int tun_stat_kstat_update(kstat_t *, int);
static void tun_wdata_v4(queue_t *, mblk_t *);
static void tun_wdata_v6(queue_t *, mblk_t *);
static char *tun_who(queue_t *, char *);
static int tun_wput_dlpi(queue_t *, mblk_t *);
static int tun_wputnext_v6(queue_t *, mblk_t *);
static int tun_wputnext_v4(queue_t *, mblk_t *);
static boolean_t tun_limit_value_v6(queue_t *, mblk_t *, ip6_t *, int *);
static void tun_freemsg_chain(mblk_t *, uint64_t *);
static void *tun_stack_init(netstackid_t, netstack_t *);
static void tun_stack_fini(netstackid_t, void *);
/* module's defined constants, globals and data structures */
#define IP "ip"
#define IP6 "ip6"
static major_t IP_MAJ;
static major_t IP6_MAJ;
#define TUN_DEBUG
#define TUN_LINK_EXTRA_OFF 32
#define IPV6V4_DEF_TTL 60
#define IPV6V4_DEF_ENCAP 60
#define TUN_WHO_BUF 60
#ifdef TUN_DEBUG
/* levels of debugging verbosity */
#define TUN0DBG 0x00 /* crucial */
#define TUN1DBG 0x01 /* informational */
#define TUN2DBG 0x02 /* verbose */
#define TUN3DBG 0x04 /* very verbose */
/*
* Global variable storing debugging level for all tunnels. By default
* all crucial messages will be printed. Value can be masked to exclusively
* print certain debug levels and not others.
*/
int8_t tun_debug = TUN0DBG;
#define TUN_LEVEL(dbg, lvl) ((dbg & lvl) == lvl)
#define tun0dbg(a) printf a
#define tun1dbg(a) if (TUN_LEVEL(tun_debug, TUN1DBG)) printf a
#define tun2dbg(a) if (TUN_LEVEL(tun_debug, TUN2DBG)) printf a
#define tun3dbg(a) if (TUN_LEVEL(tun_debug, TUN3DBG)) printf a
#else
#define tun0dbg(a) /* */
#define tun1dbg(a) /* */
#define tun2dbg(a) /* */
#define tun3dbg(a) /* */
#endif /* TUN_DEBUG */
#define TUN_RECOVER_WAIT (1*hz)
/* canned DL_INFO_ACK - adjusted based on tunnel type */
dl_info_ack_t infoack = {
DL_INFO_ACK, /* dl_primitive */
4196, /* dl_max_sdu */
0, /* dl_min_sdu */
0, /* dl_addr_length */
DL_IPV4, /* dl_mac_type */
0, /* dl_reserved */
DL_UNATTACHED, /* dl_current_state */
0, /* dl_sap_length */
DL_CLDLS, /* dl_service_mode */
0, /* dl_qos_length */
0, /* dl_qos_offset */
0, /* dl_qos_range_length */
0, /* dl_qos_range_offset */
DL_STYLE2, /* dl_provider_style */
0, /* dl_addr_offset */
DL_VERSION_2, /* dl_version */
0, /* dl_brdcast_addr_length */
0, /* dl_brdcst_addr_offset */
0 /* dl_grow */
};
/*
* canned DL_BIND_ACK - IP doesn't use any of this info.
*/
dl_bind_ack_t bindack = {
DL_BIND_ACK, /* dl_primitive */
0, /* dl_sap */
0, /* dl_addr_length */
0, /* dl_addr_offset */
0, /* dl_max_conind */
0 /* dl_xidtest_flg */
};
/*
* Canned IPv6 destination options header containing Tunnel
* Encapsulation Limit option.
*/
static struct tun_encap_limit tun_limit_init_upper_v4 = {
{ IPPROTO_ENCAP, 0 },
IP6OPT_TUNNEL_LIMIT,
1,
IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
IP6OPT_PADN,
1,
0
};
static struct tun_encap_limit tun_limit_init_upper_v6 = {
{ IPPROTO_IPV6, 0 },
IP6OPT_TUNNEL_LIMIT,
1,
IPV6_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
IP6OPT_PADN,
1,
0
};
static tun_stats_t *tun_add_stat(queue_t *);
static void tun_add_byaddr(tun_t *);
static ipsec_tun_pol_t *itp_get_byaddr_fn(uint32_t *, uint32_t *, int,
netstack_t *);
/* Setable in /etc/system */
static boolean_t tun_do_fastpath = B_TRUE;
/* streams linkages */
static struct module_info info = {
TUN_MODID, /* module id number */
TUN_NAME, /* module name */
1, /* min packet size accepted */
INFPSZ, /* max packet size accepted */
65536, /* hi-water mark */
1024 /* lo-water mark */
};
static struct qinit tunrinit = {
(pfi_t)tun_rput, /* read side put procedure */
(pfi_t)tun_rsrv, /* read side service procedure */
tun_open, /* open procedure */
tun_close, /* close procedure */
NULL, /* for future use */
&info, /* module information structure */
NULL /* module statistics structure */
};
static struct qinit tunwinit = {
(pfi_t)tun_wput, /* write side put procedure */
(pfi_t)tun_wsrv, /* write side service procedure */
NULL,
NULL,
NULL,
&info,
NULL
};
struct streamtab tuninfo = {
&tunrinit, /* read side queue init */
&tunwinit, /* write side queue init */
NULL, /* mux read side init */
NULL /* mux write side init */
};
static struct fmodsw tun_fmodsw = {
TUN_NAME,
&tuninfo,
(D_MP | D_MTQPAIR | D_MTPUTSHARED)
};
static struct modlstrmod modlstrmod = {
&mod_strmodops,
"configured tunneling module",
&tun_fmodsw
};
static struct modlinkage modlinkage = {
MODREV_1,
&modlstrmod,
NULL
};
int
_init(void)
{
int rc;
IP_MAJ = ddi_name_to_major(IP);
IP6_MAJ = ddi_name_to_major(IP6);
/*
* We want to be informed each time a stack is created or
* destroyed in the kernel, so we can maintain the
* set of tun_stack_t's.
*/
netstack_register(NS_TUN, tun_stack_init, NULL, tun_stack_fini);
rc = mod_install(&modlinkage);
if (rc != 0)
netstack_unregister(NS_TUN);
return (rc);
}
int
_fini(void)
{
int error;
error = mod_remove(&modlinkage);
if (error == 0)
netstack_unregister(NS_TUN);
return (error);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
/*
* this module is meant to be pushed on an instance of IP and
* have an instance of IP pushed on top of it.
*/
/* ARGSUSED */
int
tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
tun_t *atp;
mblk_t *hello;
ipsec_info_t *ii;
netstack_t *ns;
zoneid_t zoneid;
if (q->q_ptr != NULL) {
/* re-open of an already open instance */
return (0);
}
if (sflag != MODOPEN) {
return (EINVAL);
}
tun1dbg(("tun_open\n"));
ns = netstack_find_by_cred(credp);
ASSERT(ns != NULL);
/*
* For exclusive stacks we set the zoneid to zero
* to make IP operate as if in the global zone.
*/
if (ns->netstack_stackid != GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
else
zoneid = crgetzoneid(credp);
hello = allocb(sizeof (ipsec_info_t), BPRI_HI);
if (hello == NULL) {
netstack_rele(ns);
return (ENOMEM);
}
/* allocate per-instance structure */
atp = kmem_zalloc(sizeof (tun_t), KM_SLEEP);
atp->tun_state = DL_UNATTACHED;
atp->tun_dev = *devp;
atp->tun_zoneid = zoneid;
atp->tun_netstack = ns;
/*
* Based on the lower version of IP, initialize stuff that
* won't change
*/
if (getmajor(*devp) == IP_MAJ) {
ipha_t *ipha;
atp->tun_flags = TUN_L_V4 | TUN_HOP_LIM;
atp->tun_hop_limit = IPV6V4_DEF_TTL;
/*
* The tunnel MTU is recalculated when we know more
* about the tunnel destination.
*/
atp->tun_mtu = IP_MAXPACKET - sizeof (ipha_t);
ipha = &atp->tun_ipha;
ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
ipha->ipha_type_of_service = 0;
ipha->ipha_ident = 0; /* to be filled in by IP */
ipha->ipha_fragment_offset_and_flags = htons(IPH_DF);
ipha->ipha_ttl = atp->tun_hop_limit;
ipha->ipha_hdr_checksum = 0; /* to be filled in by IP */
} else if (getmajor(*devp) == IP6_MAJ) {
atp->tun_flags = TUN_L_V6 | TUN_HOP_LIM | TUN_ENCAP_LIM;
atp->tun_hop_limit = IPV6_DEFAULT_HOPS;
atp->tun_encap_lim = IPV6_DEFAULT_ENCAPLIMIT;
atp->tun_mtu = IP_MAXPACKET - sizeof (ip6_t) -
IPV6_TUN_ENCAP_OPT_LEN;
atp->tun_ip6h.ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
atp->tun_ip6h.ip6_hops = IPV6_DEFAULT_HOPS;
} else {
netstack_rele(ns);
kmem_free(atp, sizeof (tun_t));
return (ENXIO);
}
atp->tun_extra_offset = TUN_LINK_EXTRA_OFF;
mutex_init(&atp->tun_lock, NULL, MUTEX_DEFAULT, NULL);
/*
* If this is the automatic tunneling module, atun, verify that the
* lower protocol is IPv4 and set TUN_AUTOMATIC. Since we don't do
* automatic tunneling over IPv6, trying to run over IPv6 is an error,
* so free memory and return an error.
*/
if (q->q_qinfo->qi_minfo->mi_idnum == ATUN_MODID) {
if (atp->tun_flags & TUN_L_V4) {
atp->tun_flags |= TUN_AUTOMATIC;
atp->tun_mtu = ATUN_MTU;
} else {
/* Error. */
netstack_rele(ns);
kmem_free(atp, sizeof (tun_t));
return (ENXIO);
}
} else if (q->q_qinfo->qi_minfo->mi_idnum == TUN6TO4_MODID) {
/*
* Set 6to4 flag if this is the 6to4tun module and make
* the same checks mentioned above.
*/
if (atp->tun_flags & TUN_L_V4) {
atp->tun_flags |= TUN_6TO4;
atp->tun_mtu = ATUN_MTU;
} else {
/* Error. */
netstack_rele(ns);
kmem_free(atp, sizeof (tun_t));
return (ENXIO);
}
}
q->q_ptr = WR(q)->q_ptr = atp;
atp->tun_wq = WR(q);
tun_add_byaddr(atp);
ii = (ipsec_info_t *)hello->b_rptr;
hello->b_wptr = hello->b_rptr + sizeof (*ii);
hello->b_datap->db_type = M_CTL;
ii->ipsec_info_type = TUN_HELLO;
ii->ipsec_info_len = sizeof (*ii);
qprocson(q);
putnext(WR(q), hello);
return (0);
}
/* ARGSUSED */
int
tun_close(queue_t *q, int flag, cred_t *cred_p)
{
tun_t *atp = (tun_t *)q->q_ptr;
netstack_t *ns;
ASSERT(atp != NULL);
ns = atp->tun_netstack;
/* Cancel outstanding qtimeouts() or qbufcalls() */
tun_cancel_rec_evs(q, &atp->tun_events);
qprocsoff(q);
/* NOTE: tun_rem_ppa_list() may unlink tun_itp from its AVL tree. */
if (atp->tun_stats != NULL)
tun_rem_ppa_list(atp);
if (atp->tun_itp != NULL) {
/* In brackets because of ITP_REFRELE's brackets. */
ITP_REFRELE(atp->tun_itp, ns);
}
netstack_rele(ns);
mutex_destroy(&atp->tun_lock);
/* remove tun_t from global list */
tun_rem_tun_byaddr_list(atp);
/* free per-instance struct */
kmem_free(atp, sizeof (tun_t));
q->q_ptr = WR(q)->q_ptr = NULL;
return (0);
}
/*
* Cancel bufcall and timer requests
* Don't need to hold lock. protected by perimeter
*/
static void
tun_cancel_rec_evs(queue_t *q, eventid_t *evs)
{
if (evs->ev_rbufcid != 0) {
qunbufcall(RD(q), evs->ev_rbufcid);
evs->ev_rbufcid = 0;
}
if (evs->ev_wbufcid != 0) {
qunbufcall(WR(q), evs->ev_wbufcid);
evs->ev_wbufcid = 0;
}
if (evs->ev_rtimoutid != 0) {
(void) quntimeout(RD(q), evs->ev_rtimoutid);
evs->ev_rtimoutid = 0;
}
if (evs->ev_wtimoutid != 0) {
(void) quntimeout(WR(q), evs->ev_wtimoutid);
evs->ev_wtimoutid = 0;
}
}
/*
* Called by bufcall() when memory becomes available
* Don't need to hold lock. protected by perimeter
*/
static void
tun_bufcall_handler(void *arg)
{
queue_t *q = arg;
tun_t *atp = (tun_t *)q->q_ptr;
eventid_t *evs;
ASSERT(atp);
evs = &atp->tun_events;
if ((q->q_flag & QREADR) != 0) {
ASSERT(evs->ev_rbufcid);
evs->ev_rbufcid = 0;
} else {
ASSERT(evs->ev_wbufcid);
evs->ev_wbufcid = 0;
}
enableok(q);
qenable(q);
}
/*
* Called by timeout (if we couldn't do a bufcall)
* Don't need to hold lock. protected by perimeter
*/
static void
tun_timeout_handler(void *arg)
{
queue_t *q = arg;
tun_t *atp = (tun_t *)q->q_ptr;
eventid_t *evs;
ASSERT(atp);
evs = &atp->tun_events;
if (q->q_flag & QREADR) {
ASSERT(evs->ev_rtimoutid);
evs->ev_rtimoutid = 0;
} else {
ASSERT(evs->ev_wtimoutid);
evs->ev_wtimoutid = 0;
}
enableok(q);
qenable(q);
}
/*
* This routine is called when a message buffer can not
* be allocated. M_PCPROT message are converted to M_PROTO, but
* other than that, the mblk passed in must not be a high
* priority message (putting a hight priority message back on
* the queue is a bad idea)
* Side effect: the queue is disabled
* (timeout or bufcall handler will re-enable the queue)
* tun_cancel_rec_evs() must be called in close to cancel all
* outstanding requests.
*/
static void
tun_recover(queue_t *q, mblk_t *mp, size_t size)
{
tun_t *atp = (tun_t *)q->q_ptr;
timeout_id_t tid;
bufcall_id_t bid;
eventid_t *evs = &atp->tun_events;
ASSERT(mp != NULL);
/*
* To avoid re-enabling the queue, change the high priority
* M_PCPROTO message to a M_PROTO before putting it on the queue
*/
if (mp->b_datap->db_type == M_PCPROTO)
mp->b_datap->db_type = M_PROTO;
ASSERT(mp->b_datap->db_type < QPCTL);
(void) putbq(q, mp);
/*
* Make sure there is at most one outstanding request per queue.
*/
if (q->q_flag & QREADR) {
if (evs->ev_rtimoutid || evs->ev_rbufcid)
return;
} else {
if (evs->ev_wtimoutid || evs->ev_wbufcid)
return;
}
noenable(q);
/*
* locking is needed here because this routine may be called
* with two puts() running
*/
mutex_enter(&atp->tun_lock);
if (!(bid = qbufcall(q, size, BPRI_MED, tun_bufcall_handler, q))) {
tid = qtimeout(q, tun_timeout_handler, q, TUN_RECOVER_WAIT);
if (q->q_flag & QREADR)
evs->ev_rtimoutid = tid;
else
evs->ev_wtimoutid = tid;
} else {
if (q->q_flag & QREADR)
evs->ev_rbufcid = bid;
else
evs->ev_wbufcid = bid;
}
mutex_exit(&atp->tun_lock);
}
/*
* tun_realloc_mblk(q, mp, size, orig_mp, copy)
*
* q - pointer to a queue_t, must not be NULL
* mp - pointer to an mblk to copy, can be NULL
* size - Number of bytes being (re)allocated
* orig_mp - pointer to the original mblk_t which will be passed to
* tun_recover if the memory (re)allocation fails. This is done
* so that the message can be rescheduled on the queue.
* orig_mp must be NULL if the original mblk_t is a high priority
* message of type other then M_PCPROTO.
* copy - a boolean to specify wheater the contents of mp should be copied
* into the new mblk_t returned by this function.
*
* note: this routine will adjust the b_rptr and b_wptr of the
* mblk. Returns an mblk able to hold the requested size or
* NULL if allocation failed. If copy is true, original
* contents, if any, will be copied to new mblk
*/
static mblk_t *
tun_realloc_mblk(queue_t *q, mblk_t *mp, size_t size, mblk_t *orig_mp,
boolean_t copy)
{
/*
* If we are passed in an mblk.. check to make sure that
* it is big enough and we are the only users of the mblk
* If not, then try and allocate one
*/
if (mp == NULL || mp->b_datap->db_lim - mp->b_datap->db_base < size ||
mp->b_datap->db_ref > 1) {
size_t asize;
mblk_t *newmp;
/* allocate at least as much as we had -- don't shrink */
if (mp != NULL) {
asize = MAX(size,
mp->b_datap->db_lim - mp->b_datap->db_base);
} else {
asize = size;
}
newmp = allocb(asize, BPRI_HI);
if (newmp == NULL) {
/*
* Reschedule the mblk via bufcall or timeout
* if orig_mp is non-NULL
*/
if (orig_mp != NULL) {
tun_recover(q, orig_mp, asize);
}
tun1dbg(("tun_realloc_mblk: couldn't allocate" \
" dl_ok_ack mblk\n"));
return (NULL);
}
if (mp != NULL) {
if (copy)
bcopy(mp->b_rptr, newmp->b_rptr,
mp->b_wptr - mp->b_rptr);
newmp->b_datap->db_type = mp->b_datap->db_type;
freemsg(mp);
}
mp = newmp;
} else {
if (mp->b_rptr != mp->b_datap->db_base) {
if (copy)
bcopy(mp->b_rptr, mp->b_datap->db_base,
mp->b_wptr - mp->b_rptr);
mp->b_rptr = mp->b_datap->db_base;
}
}
mp->b_wptr = mp->b_rptr + size;
return (mp);
}
/* send a DL_OK_ACK back upstream */
static void
tun_sendokack(queue_t *q, mblk_t *mp, t_uscalar_t prim)
{
dl_ok_ack_t *dlok;
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
dlok = (dl_ok_ack_t *)mp->b_rptr;
dlok->dl_primitive = DL_OK_ACK;
dlok->dl_correct_primitive = prim;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
}
/*
* Send a DL_NOTIFY_IND message with DL_NOTE_SDU_SIZE up to notify IP of a
* link MTU change.
*/
static void
tun_sendsdusize(queue_t *q)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *mp = NULL;
dl_notify_ind_t *notify;
if (!(atp->tun_notifications & DL_NOTE_SDU_SIZE))
return;
if ((mp = tun_realloc_mblk(q, NULL, DL_NOTIFY_IND_SIZE, NULL,
B_FALSE)) == NULL) {
return;
}
mp->b_datap->db_type = M_PROTO;
notify = (dl_notify_ind_t *)mp->b_rptr;
notify->dl_primitive = DL_NOTIFY_IND;
notify->dl_notification = DL_NOTE_SDU_SIZE;
notify->dl_data = atp->tun_mtu;
notify->dl_addr_length = 0;
notify->dl_addr_offset = 0;
tun1dbg(("tun_sendsdusize: notifying ip of new mtu: %d", atp->tun_mtu));
/*
* We send this notification to the upper IP instance who is using
* us as a device.
*/
putnext(RD(q), mp);
}
/* send a DL_ERROR_ACK back upstream */
static void
tun_senderrack(queue_t *q, mblk_t *mp, t_uscalar_t prim, t_uscalar_t dl_err,
t_uscalar_t error)
{
dl_error_ack_t *dl_err_ack;
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_error_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
dl_err_ack = (dl_error_ack_t *)mp->b_rptr;
dl_err_ack->dl_error_primitive = prim;
dl_err_ack->dl_primitive = DL_ERROR_ACK;
dl_err_ack->dl_errno = dl_err;
dl_err_ack->dl_unix_errno = error;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
}
/*
* Free all messages in an mblk chain and optionally collect
* byte-counter stats. Caller responsible for per-packet stats
*/
static void
tun_freemsg_chain(mblk_t *mp, uint64_t *bytecount)
{
mblk_t *mpnext;
while (mp != NULL) {
ASSERT(mp->b_prev == NULL);
mpnext = mp->b_next;
mp->b_next = NULL;
if (bytecount != NULL)
atomic_add_64(bytecount, (int64_t)msgdsize(mp));
freemsg(mp);
mp = mpnext;
}
}
/*
* Send all messages in a chain of mblk chains and optionally collect
* byte-counter stats. Caller responsible for per-packet stats, and insuring
* mp is always non-NULL.
*
* This is a macro so we can save stack. Assume the caller function
* has local-variable "nmp" as a placeholder. Define two versions, one with
* byte-counting stats and one without.
*/
#define TUN_PUTMSG_CHAIN_STATS(q, mp, nmp, bytecount) \
(nmp) = NULL; \
ASSERT((mp) != NULL); \
do { \
if ((nmp) != NULL) \
putnext(q, (nmp)); \
ASSERT((mp)->b_prev == NULL); \
(nmp) = (mp); \
(mp) = (mp)->b_next; \
(nmp)->b_next = NULL; \
atomic_add_64(bytecount, (int64_t)msgdsize(nmp)); \
} while ((mp) != NULL); \
\
putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */
#define TUN_PUTMSG_CHAIN(q, mp, nmp) \
(nmp) = NULL; \
ASSERT((mp) != NULL); \
do { \
if ((nmp) != NULL) \
putnext(q, (nmp)); \
ASSERT((mp)->b_prev == NULL); \
(nmp) = (mp); \
(mp) = (mp)->b_next; \
(nmp)->b_next = NULL; \
} while ((mp) != NULL); \
\
putnext((q), (nmp)) /* trailing semicolon provided by instantiator. */
/*
* Macro that not only checks tun_itp, but also sees if one got loaded
* via ipsecconf(1m)/PF_POLICY behind our backs. Note the sleazy update of
* (tun)->tun_itp_gen so we don't lose races with other possible updates via
* PF_POLICY.
*/
#define tun_policy_present(tun, ns, ipss) \
(((tun)->tun_itp != NULL) || \
(((tun)->tun_itp_gen < ipss->ipsec_tunnel_policy_gen) && \
((tun)->tun_itp_gen = ipss->ipsec_tunnel_policy_gen) && \
(((tun)->tun_itp = get_tunnel_policy((tun)->tun_lifname, ns)) \
!= NULL)))
/*
* Search tuns_byaddr_list for occurrence of tun_t with matching
* inner addresses. This function does not take into account
* prefixes. Possibly we could generalize this function in the
* future with V6_MASK_EQ() and pass in an all 1's prefix for IP
* address matches.
* Returns NULL on no match.
* This function is not directly called - it's assigned into itp_get_byaddr().
*/
static ipsec_tun_pol_t *
itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af, netstack_t *ns)
{
tun_t *tun_list;
uint_t index;
in6_addr_t lmapped, fmapped, *laddr, *faddr;
ipsec_stack_t *ipss = ns->netstack_ipsec;
tun_stack_t *tuns = ns->netstack_tun;
if (af == AF_INET) {
laddr = &lmapped;
faddr = &fmapped;
IN6_INADDR_TO_V4MAPPED((struct in_addr *)lin, laddr);
IN6_INADDR_TO_V4MAPPED((struct in_addr *)fin, faddr);
} else {
laddr = (in6_addr_t *)lin;
faddr = (in6_addr_t *)fin;
}
index = TUN_BYADDR_LIST_HASH(*faddr);
/*
* it's ok to grab global lock while holding tun_lock/perimeter
*/
mutex_enter(&tuns->tuns_global_lock);
/*
* walk through list of tun_t looking for a match of
* inner addresses. Addresses are inserted with
* IN6_IPADDR_TO_V4MAPPED(), so v6 matching works for
* all cases.
*/
for (tun_list = tuns->tuns_byaddr_list[index]; tun_list;
tun_list = tun_list->tun_next) {
if (IN6_ARE_ADDR_EQUAL(&tun_list->tun_laddr, laddr) &&
IN6_ARE_ADDR_EQUAL(&tun_list->tun_faddr, faddr)) {
ipsec_tun_pol_t *itp;
if (!tun_policy_present(tun_list, ns, ipss)) {
tun1dbg(("itp_get_byaddr: No IPsec policy on "
"matching tun_t instance %p/%s\n",
(void *)tun_list, tun_list->tun_lifname));
continue;
}
tun1dbg(("itp_get_byaddr: Found matching tun_t %p with "
"IPsec policy\n", (void *)tun_list));
mutex_enter(&tun_list->tun_itp->itp_lock);
itp = tun_list->tun_itp;
mutex_exit(&tuns->tuns_global_lock);
ITP_REFHOLD(itp);
mutex_exit(&itp->itp_lock);
tun1dbg(("itp_get_byaddr: Found itp %p \n",
(void *)itp));
return (itp);
}
}
/* didn't find one, return zilch */
tun1dbg(("itp_get_byaddr: No matching tunnel instances with policy\n"));
mutex_exit(&tuns->tuns_global_lock);
return (NULL);
}
/*
* Search tuns_byaddr_list for occurrence of tun_t, same upper and lower stream,
* and same type (6to4 vs automatic vs configured)
* If none is found, insert this tun entry.
*/
static void
tun_add_byaddr(tun_t *atp)
{
tun_t *tun_list;
t_uscalar_t ppa = atp->tun_ppa;
uint_t mask = atp->tun_flags & (TUN_LOWER_MASK | TUN_UPPER_MASK);
uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4));
uint_t index = TUN_BYADDR_LIST_HASH(atp->tun_faddr);
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
tun1dbg(("tun_add_byaddr: index = %d\n", index));
ASSERT(atp->tun_next == NULL);
/*
* it's ok to grab global lock while holding tun_lock/perimeter
*/
mutex_enter(&tuns->tuns_global_lock);
/*
* walk through list of tun_t looking for a match of
* ppa, same upper and lower stream and same tunnel type
* (automatic or configured).
* There shouldn't be all that many tunnels, so a sequential
* search of the bucket should be fine.
*/
for (tun_list = tuns->tuns_byaddr_list[index]; tun_list;
tun_list = tun_list->tun_next) {
if (tun_list->tun_ppa == ppa &&
((tun_list->tun_flags & (TUN_LOWER_MASK |
TUN_UPPER_MASK)) == mask) &&
((tun_list->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) ==
tun_type)) {
tun1dbg(("tun_add_byaddr: tun 0x%p Found ppa %d " \
"tun_stats 0x%p\n", (void *)atp, ppa,
(void *)tun_list));
tun1dbg(("tun_add_byaddr: Nothing to do."));
mutex_exit(&tuns->tuns_global_lock);
return;
}
}
/* didn't find one, throw it in the global list */
atp->tun_next = tuns->tuns_byaddr_list[index];
atp->tun_ptpn = &(tuns->tuns_byaddr_list[index]);
if (tuns->tuns_byaddr_list[index] != NULL)
tuns->tuns_byaddr_list[index]->tun_ptpn = &(atp->tun_next);
tuns->tuns_byaddr_list[index] = atp;
mutex_exit(&tuns->tuns_global_lock);
}
/*
* Search tuns_ppa_list for occurrence of tun_ppa, same lower stream,
* and same type (6to4 vs automatic vs configured)
* If none is found, insert this tun entry and create a new kstat for
* the entry.
* This is needed so that multiple tunnels with the same interface
* name (e.g. ip.tun0 under IPv4 and ip.tun0 under IPv6) can share the
* same kstats. (they share the same tun_stat and kstat)
* Don't need to hold tun_lock if we are coming is as qwriter()
*/
static tun_stats_t *
tun_add_stat(queue_t *q)
{
tun_t *atp = (tun_t *)q->q_ptr;
tun_stats_t *tun_list;
tun_stats_t *tun_stat;
t_uscalar_t ppa = atp->tun_ppa;
uint_t lower = atp->tun_flags & TUN_LOWER_MASK;
uint_t tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4));
uint_t index = TUN_LIST_HASH(ppa);
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
ASSERT(atp->tun_stats == NULL);
ASSERT(atp->tun_kstat_next == NULL);
/*
* it's ok to grab global lock while holding tun_lock/perimeter
*/
mutex_enter(&tuns->tuns_global_lock);
/*
* walk through list of tun_stats looking for a match of
* ppa, same lower stream and same tunnel type (automatic
* or configured
* There shouldn't be all that many tunnels, so a sequential
* search should be fine
* XXX - this may change if tunnels get ever get created on the fly
*/
for (tun_list = tuns->tuns_ppa_list[index]; tun_list;
tun_list = tun_list->ts_next) {
if (tun_list->ts_ppa == ppa &&
tun_list->ts_lower == lower &&
tun_list->ts_type == tun_type) {
tun1dbg(("tun_add_stat: tun 0x%p Found ppa %d " \
"tun_stats 0x%p\n", (void *)atp, ppa,
(void *)tun_list));
mutex_enter(&tun_list->ts_lock);
mutex_exit(&tuns->tuns_global_lock);
ASSERT(tun_list->ts_refcnt > 0);
tun_list->ts_refcnt++;
ASSERT(atp->tun_kstat_next == NULL);
ASSERT(atp != tun_list->ts_atp);
/*
* add this tunnel instance to head of list
* of tunnels referencing this kstat structure
*/
atp->tun_kstat_next = tun_list->ts_atp;
tun_list->ts_atp = atp;
atp->tun_stats = tun_list;
mutex_exit(&tun_list->ts_lock);
/*
* Check for IPsec tunnel policy pointer, if it hasn't
* been set already. If we call get_tunnel_policy()
* and return NULL, there's none configured.
*/
if (atp->tun_lifname[0] != '\0' &&
atp->tun_itp == NULL) {
atp->tun_itp =
get_tunnel_policy(atp->tun_lifname,
atp->tun_netstack);
}
return (tun_list);
}
}
/* didn't find one, allocate a new one */
tun_stat = kmem_zalloc(sizeof (tun_stats_t), KM_NOSLEEP);
if (tun_stat != NULL) {
mutex_init(&tun_stat->ts_lock, NULL, MUTEX_DEFAULT,
NULL);
tun1dbg(("tun_add_stat: New ppa %d tun_stat 0x%p\n", ppa,
(void *)tun_stat));
tun_stat->ts_refcnt = 1;
tun_stat->ts_lower = lower;
tun_stat->ts_type = tun_type;
tun_stat->ts_ppa = ppa;
tun_stat->ts_next = tuns->tuns_ppa_list[index];
tuns->tuns_ppa_list[index] = tun_stat;
tun_stat->ts_atp = atp;
atp->tun_kstat_next = NULL;
atp->tun_stats = tun_stat;
mutex_exit(&tuns->tuns_global_lock);
tun_statinit(tun_stat, q->q_qinfo->qi_minfo->mi_idname,
atp->tun_netstack->netstack_stackid);
} else {
mutex_exit(&tuns->tuns_global_lock);
}
return (tun_stat);
}
/*
* remove tun from tuns_byaddr_list
* called either holding tun_lock or in perimeter
*/
static void
tun_rem_tun_byaddr_list(tun_t *atp)
{
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
mutex_enter(&tuns->tuns_global_lock);
/*
* remove tunnel instance from list of tun_t
*/
*(atp->tun_ptpn) = atp->tun_next;
if (atp->tun_next != NULL) {
atp->tun_next->tun_ptpn = atp->tun_ptpn;
atp->tun_next = NULL;
}
atp->tun_ptpn = NULL;
ASSERT(atp->tun_next == NULL);
mutex_exit(&tuns->tuns_global_lock);
}
/*
* remove tun from tuns_ppa_list
* called either holding tun_lock or in perimeter
*/
static void
tun_rem_ppa_list(tun_t *atp)
{
uint_t index = TUN_LIST_HASH(atp->tun_ppa);
tun_stats_t *tun_stat = atp->tun_stats;
tun_stats_t **tun_list;
tun_t **at_list;
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
if (tun_stat == NULL)
return;
ASSERT(atp->tun_ppa == tun_stat->ts_ppa);
mutex_enter(&tuns->tuns_global_lock);
mutex_enter(&tun_stat->ts_lock);
atp->tun_stats = NULL;
tun_stat->ts_refcnt--;
/*
* If this is the last instance, delete the tun_stat AND unlink the
* ipsec_tun_pol_t from the AVL tree.
*/
if (tun_stat->ts_refcnt == 0) {
kstat_t *tksp;
tun1dbg(("tun_rem_ppa_list: tun 0x%p Last ref ppa %d tun_stat" \
" 0x%p\n", (void *)atp, tun_stat->ts_ppa,
(void *)tun_stat));
if (atp->tun_itp != NULL)
itp_unlink(atp->tun_itp, atp->tun_netstack);
ASSERT(atp->tun_kstat_next == NULL);
for (tun_list = &tuns->tuns_ppa_list[index]; *tun_list;
tun_list = &(*tun_list)->ts_next) {
if (tun_stat == *tun_list) {
*tun_list = tun_stat->ts_next;
tun_stat->ts_next = NULL;
break;
}
}
mutex_exit(&tuns->tuns_global_lock);
tksp = tun_stat->ts_ksp;
tun_stat->ts_ksp = NULL;
mutex_exit(&tun_stat->ts_lock);
kstat_delete_netstack(tksp,
atp->tun_netstack->netstack_stackid);
mutex_destroy(&tun_stat->ts_lock);
kmem_free(tun_stat, sizeof (tun_stats_t));
return;
}
mutex_exit(&tuns->tuns_global_lock);
tun1dbg(("tun_rem_ppa_list: tun 0x%p Removing ref ppa %d tun_stat " \
"0x%p\n", (void *)atp, tun_stat->ts_ppa, (void *)tun_stat));
ASSERT(tun_stat->ts_atp->tun_kstat_next != NULL);
/*
* remove tunnel instance from list of tunnels referencing
* this kstat. List should be short, so we just search
* sequentially
*/
for (at_list = &tun_stat->ts_atp; *at_list;
at_list = &(*at_list)->tun_kstat_next) {
if (atp == *at_list) {
*at_list = atp->tun_kstat_next;
atp->tun_kstat_next = NULL;
break;
}
}
ASSERT(tun_stat->ts_atp != NULL);
ASSERT(atp->tun_kstat_next == NULL);
mutex_exit(&tun_stat->ts_lock);
}
/*
* handle all non-unitdata DLPI requests from above
* called as qwriter()
*/
static void
tun_wput_dlpi_other(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
uint_t lvers;
t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr);
t_uscalar_t dl_err = DL_UNSUPPORTED;
t_uscalar_t dl_errno = 0;
switch (prim) {
case DL_INFO_REQ: {
dl_info_ack_t *dinfo;
tun1dbg(("tun_wput_dlpi_other: got DL_INFO_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_info_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
mp->b_datap->db_type = M_PCPROTO;
/* send DL_INFO_ACK back up */
dinfo = (dl_info_ack_t *)mp->b_rptr;
*dinfo = infoack;
dinfo->dl_current_state = atp->tun_state;
dinfo->dl_max_sdu = atp->tun_mtu;
/* dl_mac_type is set to DL_IPV4 by default. */
if (atp->tun_flags & TUN_L_V6)
dinfo->dl_mac_type = DL_IPV6;
/*
* We set the address length to non-zero so that
* automatic tunnels will not have multicast or
* point to point set.
* Someday IPv6 needs to support multicast over automatic
* tunnels
* 6to4 tunnels should behave the same as automatic tunnels
*/
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
/*
* set length to size of ip address so that
* ip_newroute will generate dl_unitdata_req for
* us with gateway or dest filed in. (i.e.
* might as well have ip do something useful)
*/
dinfo->dl_addr_length = IPV6_ADDR_LEN;
} else {
dinfo->dl_addr_length = 0;
}
qreply(q, mp);
return;
}
case DL_ATTACH_REQ: {
dl_attach_req_t *dla;
tun1dbg(("tun_wput_dlpi_other: got DL_ATTACH_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_TRUE)) == NULL) {
return;
}
dla = (dl_attach_req_t *)mp->b_rptr;
if (atp->tun_state != DL_UNATTACHED) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: "
"DL_ATTACH_REQ state not DL_UNATTACHED (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_ppa = dla->dl_ppa;
/*
* get (possibly shared) kstat structure
*/
if (tun_add_stat(q) == NULL) {
ASSERT(atp->tun_stats == NULL);
dl_err = DL_SYSERR;
dl_errno = ENOMEM;
break;
}
atp->tun_state = DL_UNBOUND;
tun_sendokack(q, mp, prim);
return;
}
case DL_DETACH_REQ:
tun1dbg(("tun_wput_dlpi_other: got DL_DETACH_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
if (atp->tun_state != DL_UNBOUND) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_DETACH_REQ state not DL_UNBOUND (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_state = DL_UNATTACHED;
/*
* don't need to hold tun_lock
* since this is really a single thread operation
* for this instance
*/
if (atp->tun_stats) {
tun_rem_ppa_list(atp);
tun1dbg(("tun_wput_dlpi_other: deleting kstat"));
}
tun_sendokack(q, mp, prim);
return;
case DL_BIND_REQ: {
dl_bind_req_t *bind_req;
t_uscalar_t dl_sap = 0;
tun1dbg(("tun_wput_dlpi_other: got DL_BIND_REQ\n"));
if (atp->tun_state != DL_UNBOUND) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ state not DL_UNBOUND (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_state = DL_IDLE;
bind_req = (dl_bind_req_t *)mp->b_rptr;
dl_sap = bind_req->dl_sap;
ASSERT(bind_req->dl_sap == IP_DL_SAP ||
bind_req->dl_sap == IP6_DL_SAP);
lvers = atp->tun_flags & TUN_LOWER_MASK;
if (dl_sap == IP_DL_SAP) {
if ((atp->tun_flags & TUN_U_V6) != 0) {
dl_err = DL_BOUND;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ upper TUN_U_V6 (0x%x)\n",
atp->tun_flags));
break;
}
if ((atp->tun_flags & TUN_AUTOMATIC) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ for IPv4 atun (0x%x)\n",
atp->tun_flags));
break;
}
if ((atp->tun_flags & TUN_6TO4) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_BIND_REQ for 6to4 tunnel (0x%x)\n",
atp->tun_flags));
break;
}
atp->tun_flags |= TUN_U_V4;
if (lvers == TUN_L_V4) {
atp->tun_ipha.ipha_protocol = IPPROTO_ENCAP;
} else {
ASSERT(lvers == TUN_L_V6);
/* Adjust headers. */
if (atp->tun_encap_lim >= 0) {
atp->tun_ip6h.ip6_nxt =
IPPROTO_DSTOPTS;
atp->tun_telopt =
tun_limit_init_upper_v4;
atp->tun_telopt.tel_telopt.
ip6ot_encap_limit =
atp->tun_encap_lim;
} else {
atp->tun_ip6h.ip6_nxt = IPPROTO_ENCAP;
}
}
} else if (dl_sap == IP6_DL_SAP) {
if ((atp->tun_flags & TUN_U_V4) != 0) {
dl_err = DL_BOUND;
tun0dbg(("tun_wput_dlpi_other: "
"DL_BIND_REQ upper TUN_U_V4 (0x%x)\n",
atp->tun_flags));
break;
}
atp->tun_flags |= TUN_U_V6;
if (lvers == TUN_L_V4) {
atp->tun_ipha.ipha_protocol = IPPROTO_IPV6;
} else {
ASSERT(lvers == TUN_L_V6);
if (atp->tun_encap_lim >= 0) {
atp->tun_ip6h.ip6_nxt =
IPPROTO_DSTOPTS;
atp->tun_telopt =
tun_limit_init_upper_v6;
atp->tun_telopt.tel_telopt.
ip6ot_encap_limit =
atp->tun_encap_lim;
} else {
atp->tun_ip6h.ip6_nxt = IPPROTO_IPV6;
}
}
} else {
atp->tun_state = DL_UNBOUND;
break;
}
/*
* Send DL_BIND_ACK, which is the same size as the
* request, so we can re-use the mblk.
*/
*(dl_bind_ack_t *)mp->b_rptr = bindack;
((dl_bind_ack_t *)mp->b_rptr)->dl_sap = dl_sap;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
return;
}
case DL_UNBIND_REQ:
tun1dbg(("tun_wput_dlpi_other: got DL_UNBIND_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_ok_ack_t), mp,
B_FALSE)) == NULL) {
return;
}
if (atp->tun_state != DL_IDLE) {
dl_err = DL_OUTSTATE;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_UNBIND_REQ state not DL_IDLE (0x%x)\n",
atp->tun_state));
break;
}
atp->tun_state = DL_UNBOUND;
/* Send a DL_OK_ACK. */
tun_sendokack(q, mp, prim);
return;
case DL_PHYS_ADDR_REQ: {
dl_phys_addr_ack_t *dpa;
tun1dbg(("tun_wput_dlpi_other: got DL_PHYS_ADDR_REQ\n"));
if ((mp = tun_realloc_mblk(q, mp, sizeof (dl_phys_addr_ack_t),
mp, B_FALSE)) == NULL) {
return;
}
dpa = (dl_phys_addr_ack_t *)mp->b_rptr;
dpa->dl_primitive = DL_PHYS_ADDR_ACK;
/*
* dl_addr_length must match info ack
*/
if (atp->tun_flags & TUN_AUTOMATIC) {
if ((atp->tun_flags & TUN_U_V4) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_PHYS_ADDR_REQ for IPv4 atun\n"));
break;
} else {
dpa->dl_addr_length = IPV6_ADDR_LEN;
}
} else if (atp->tun_flags & TUN_6TO4) {
if ((atp->tun_flags & TUN_U_V4) != 0) {
dl_err = DL_SYSERR;
dl_errno = EINVAL;
tun0dbg(("tun_wput_dlpi_other: " \
"DL_PHYS_ADDR_REQ for 6to4 tunnel\n"));
break;
} else {
dpa->dl_addr_length = IPV6_ADDR_LEN;
}
} else {
dpa->dl_addr_length = 0;
}
dpa->dl_addr_offset = 0;
mp->b_datap->db_type = M_PCPROTO;
qreply(q, mp);
return;
}
case DL_SUBS_BIND_REQ:
case DL_ENABMULTI_REQ:
case DL_DISABMULTI_REQ:
case DL_PROMISCON_REQ:
case DL_PROMISCOFF_REQ:
case DL_AGGR_REQ:
case DL_UNAGGR_REQ:
case DL_UDQOS_REQ:
case DL_CONNECT_REQ:
case DL_TOKEN_REQ:
case DL_DISCONNECT_REQ:
case DL_RESET_REQ:
case DL_DATA_ACK_REQ:
case DL_REPLY_REQ:
case DL_REPLY_UPDATE_REQ:
case DL_XID_REQ:
case DL_TEST_REQ:
case DL_SET_PHYS_ADDR_REQ:
case DL_GET_STATISTICS_REQ:
case DL_CAPABILITY_REQ:
case DL_CONTROL_REQ:
/* unsupported command */
break;
default:
/* unknown command */
tun0dbg(("tun_wput_dlpi_other: unknown DLPI message type: " \
"%d\n", prim));
dl_err = DL_BADPRIM;
}
tun_senderrack(q, mp, prim, dl_err, dl_errno);
}
/*
* handle all DLPI requests from above
*/
static int
tun_wput_dlpi(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *mp1;
int error = 0;
t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr);
switch (prim) {
case DL_UNITDATA_REQ:
if (atp->tun_state != DL_IDLE) {
break;
}
if (!canputnext(q)) {
atomic_add_32(&atp->tun_xmtretry, 1);
(void) putbq(q, mp);
return (ENOMEM); /* to get service proc to stop */
}
/* we don't use any of the data in the DLPI header */
mp1 = mp->b_cont;
freeb(mp);
if (mp1 == NULL) {
break;
}
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
tun_wdata_v4(q, mp1);
break;
case TUN_U_V6:
tun_wdata_v6(q, mp1);
break;
default:
atomic_add_32(&atp->tun_OutErrors, 1);
ASSERT((atp->tun_flags & TUN_UPPER_MASK) != TUN_U_V4 ||
(atp->tun_flags & TUN_UPPER_MASK) != TUN_U_V6);
break;
}
break;
case DL_NOTIFY_REQ: {
dl_notify_req_t *dlip;
if (MBLKL(mp) < DL_NOTIFY_REQ_SIZE) {
tun_senderrack(q, mp, prim, DL_BADPRIM, 0);
break;
}
dlip = (dl_notify_req_t *)mp->b_rptr;
atp->tun_notifications =
dlip->dl_notifications & DL_NOTE_SDU_SIZE;
dlip->dl_notifications &= DL_NOTE_SDU_SIZE;
dlip->dl_primitive = DL_NOTIFY_ACK;
mp->b_wptr = mp->b_rptr + DL_NOTIFY_ACK_SIZE;
qreply(q, mp);
tun_sendsdusize(q);
break;
}
default:
qwriter(q, mp, tun_wput_dlpi_other, PERIM_INNER);
break;
}
return (error);
}
/*
* set the tunnel parameters
* called as qwriter
*/
static void
tun_sparam(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
struct iocblk *iocp = (struct iocblk *)(mp->b_rptr);
struct iftun_req *ta;
mblk_t *mp1;
int uerr = 0;
uint_t lvers;
sin_t *sin;
sin6_t *sin6;
size_t size;
boolean_t new;
ipsec_stack_t *ipss = atp->tun_netstack->netstack_ipsec;
/* don't allow changes after dl_bind_req */
if (atp->tun_state == DL_IDLE) {
uerr = EAGAIN;
goto nak;
}
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
mp1 = mp1->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if ((size != sizeof (struct iftun_req)) &&
(size != sizeof (struct old_iftun_req))) {
uerr = EPROTO;
goto nak;
}
new = (size == sizeof (struct iftun_req));
if (atp->tun_iocmp) {
uerr = EBUSY;
goto nak;
}
lvers = atp->tun_flags & TUN_LOWER_MASK;
ta = (struct iftun_req *)mp1->b_rptr;
/*
* Check version number for parsing the security settings.
*/
if (ta->ifta_vers != IFTUN_VERSION) {
uerr = EINVAL;
goto nak;
}
/*
* Upper layer will give us a v4/v6 indicator, in case we don't know
* already.
*/
if ((atp->tun_flags & TUN_UPPER_MASK) == 0) {
if (ta->ifta_flags & 0x80000000) {
atp->tun_flags |= TUN_U_V6;
} else {
atp->tun_flags |= TUN_U_V4;
}
}
if (((atp->tun_flags & (TUN_AUTOMATIC | TUN_U_V4)) ==
(TUN_AUTOMATIC | TUN_U_V4)) ||
((atp->tun_flags & (TUN_6TO4 | TUN_U_V4)) ==
(TUN_6TO4 | TUN_U_V4))) {
uerr = EINVAL;
goto nak;
}
if (ta->ifta_flags & IFTUN_SRC) {
switch (ta->ifta_saddr.ss_family) {
case AF_INET:
sin = (sin_t *)&ta->ifta_saddr;
if (lvers != TUN_L_V4) {
uerr = EINVAL;
goto nak;
}
if ((sin->sin_addr.s_addr == INADDR_ANY) ||
(sin->sin_addr.s_addr == 0xffffffff) ||
CLASSD(sin->sin_addr.s_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
atp->tun_ipha.ipha_src = sin->sin_addr.s_addr;
IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
&atp->tun_laddr);
break;
case AF_INET6:
sin6 = (sin6_t *)&ta->ifta_saddr;
if (lvers != TUN_L_V6) {
uerr = EINVAL;
goto nak;
}
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
atp->tun_ip6h.ip6_src = atp->tun_laddr =
sin6->sin6_addr;
break;
default:
uerr = EAFNOSUPPORT;
goto nak;
}
/*
* If I reach here, then I didn't bail, the src address
* was good.
*/
atp->tun_flags |= TUN_SRC;
}
if (ta->ifta_flags & IFTUN_DST) {
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
uerr = EINVAL;
goto nak;
}
if (ta->ifta_saddr.ss_family == AF_INET) {
sin = (sin_t *)&ta->ifta_daddr;
if (lvers != TUN_L_V4) {
uerr = EINVAL;
goto nak;
}
if ((sin->sin_addr.s_addr == 0) ||
(sin->sin_addr.s_addr == 0xffffffff) ||
CLASSD(sin->sin_addr.s_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
atp->tun_ipha.ipha_dst = sin->sin_addr.s_addr;
/* Remove from previous hash bucket */
IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
&atp->tun_faddr);
} else if (ta->ifta_saddr.ss_family == AF_INET6) {
sin6 = (sin6_t *)&ta->ifta_daddr;
if (lvers != TUN_L_V6) {
uerr = EINVAL;
goto nak;
}
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
uerr = EADDRNOTAVAIL;
goto nak;
}
/* Remove from previous hash bucket */
atp->tun_ip6h.ip6_dst = atp->tun_faddr =
sin6->sin6_addr;
} else {
uerr = EAFNOSUPPORT;
goto nak;
}
/*
* If I reach here, then I didn't bail, the dst address
* was good.
*/
atp->tun_flags |= TUN_DST;
/* tun_faddr changed, move to proper hash bucket */
tun_rem_tun_byaddr_list(atp);
tun_add_byaddr(atp);
}
if (new && (ta->ifta_flags & IFTUN_HOPLIMIT)) {
/* Check bounds. */
if (ta->ifta_hop_limit < 1) {
uerr = EINVAL;
goto nak;
}
atp->tun_hop_limit = ta->ifta_hop_limit;
/* XXX do we really need this flag */
atp->tun_flags |= TUN_HOP_LIM;
if (lvers == TUN_L_V4) {
atp->tun_ipha.ipha_ttl = atp->tun_hop_limit;
} else {
atp->tun_ip6h.ip6_hops = atp->tun_hop_limit;
}
}
if (new && (ta->ifta_flags & IFTUN_ENCAP)) {
/* Bounds checking. */
if ((ta->ifta_encap_lim > IPV6_MAX_ENCAPLIMIT) ||
(lvers != TUN_L_V6)) {
uerr = EINVAL;
goto nak;
}
atp->tun_encap_lim = ta->ifta_encap_lim;
atp->tun_flags |= TUN_ENCAP_LIM;
if (ta->ifta_encap_lim >= 0) {
atp->tun_telopt.tel_telopt.ip6ot_encap_limit =
ta->ifta_encap_lim;
atp->tun_ip6h.ip6_nxt = IPPROTO_DSTOPTS;
} else {
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
atp->tun_ip6h.ip6_nxt = IPPROTO_ENCAP;
break;
case TUN_U_V6:
atp->tun_ip6h.ip6_nxt = IPPROTO_IPV6;
break;
default:
/* This shouldn't happen! */
ASSERT((atp->tun_flags & TUN_UPPER_MASK) != 0);
break;
}
}
}
/*
* If we passed in IFTUN_COMPLEX_SECURITY, do not do anything. This
* allows us to let dumb ifconfig(1m)-like apps reflect what they see
* without a penalty.
*/
if ((ta->ifta_flags & (IFTUN_SECURITY | IFTUN_COMPLEX_SECURITY)) ==
IFTUN_SECURITY) {
/* Can't set security properties for automatic tunnels. */
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
uerr = EINVAL;
goto nak;
}
/*
* The version number checked out, so just cast
* ifta_secinfo to an ipsr.
*/
if (ipsec_loaded(ipss)) {
uerr = tun_set_sec_simple(atp,
(ipsec_req_t *)&ta->ifta_secinfo);
} else {
if (ipsec_failed(ipss)) {
uerr = EPROTONOSUPPORT;
goto nak;
}
/* Otherwise, try again later and load IPsec. */
(void) putq(q, mp);
ipsec_loader_loadnow(ipss);
return;
}
if (uerr != 0)
goto nak;
}
mp->b_datap->db_type = M_IOCACK;
iocp->ioc_error = 0;
/*
* Send a T_BIND_REQ if and only if a tsrc/tdst change was requested
* _AND_ tsrc is turned on _AND_ the tunnel either has tdst turned on
* or is an automatic tunnel.
*/
if ((ta->ifta_flags & (IFTUN_SRC | IFTUN_DST)) != 0 &&
(atp->tun_flags & TUN_SRC) != 0 &&
(atp->tun_flags & (TUN_DST | TUN_AUTOMATIC | TUN_6TO4)) != 0) {
atp->tun_iocmp = mp;
uerr = tun_send_bind_req(q);
if (uerr == 0) {
/* qreply() done by T_BIND_ACK processing */
return;
} else {
atp->tun_iocmp = NULL;
goto nak;
}
}
qreply(q, mp);
return;
nak:
iocp->ioc_error = uerr;
mp->b_datap->db_type = M_IOCNAK;
qreply(q, mp);
}
static boolean_t
tun_thisvers_policy(tun_t *atp)
{
boolean_t rc;
ipsec_policy_head_t *iph;
int uvec = atp->tun_flags & TUN_UPPER_MASK;
if (atp->tun_itp == NULL)
return (B_FALSE);
iph = atp->tun_itp->itp_policy;
rw_enter(&iph->iph_lock, RW_READER);
rc = iph_ipvN(iph, (uvec & TUN_U_V6));
rw_exit(&iph->iph_lock);
return (rc);
}
/*
* Processes SIOCs to setup a tunnel and IOCs to configure tunnel module.
* M_IOCDATA->M_COPY->DATA or M_IOCTL->DATA
*/
static int
tun_ioctl(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
struct iocblk *iocp = (struct iocblk *)(mp->b_rptr);
struct iftun_req *ta;
mblk_t *mp1;
int reterr = 0;
int uerr = 0;
uint_t lvers;
sin_t *sin;
sin6_t *sin6;
size_t size;
boolean_t new;
ipaddr_t *rr_addr;
char buf[INET6_ADDRSTRLEN];
struct lifreq *lifr;
netstack_t *ns = atp->tun_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
tun_stack_t *tuns = ns->netstack_tun;
lvers = atp->tun_flags & TUN_LOWER_MASK;
switch (iocp->ioc_cmd) {
case OSIOCSTUNPARAM:
case SIOCSTUNPARAM:
qwriter(q, mp, tun_sparam, PERIM_INNER);
return (0);
case OSIOCGTUNPARAM:
case SIOCGTUNPARAM:
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
mp1 = mp1->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if ((size != sizeof (struct iftun_req)) &&
(size != sizeof (struct old_iftun_req))) {
uerr = EPROTO;
goto nak;
}
new = (size == sizeof (struct iftun_req));
/*
* don't need to hold any locks. Can only be
* changed by qwriter
*/
ta = (struct iftun_req *)mp1->b_rptr;
ta->ifta_flags = 0;
/*
* Unlike tun_sparam(), the version number for security
* parameters is ignored, since we're filling it in!
*/
ta->ifta_vers = IFTUN_VERSION;
/* in case we are pushed under something unsupported */
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
ta->ifta_upper = IFTAP_IPV4;
break;
case TUN_U_V6:
ta->ifta_upper = IFTAP_IPV6;
break;
default:
ta->ifta_upper = 0;
break;
}
/*
* Copy in security information.
*
* If we revise IFTUN_VERSION, this will become revision-
* dependent.
*/
if (tun_policy_present(atp, ns, ipss) &&
tun_thisvers_policy(atp)) {
mutex_enter(&atp->tun_itp->itp_lock);
if (!(atp->tun_itp->itp_flags & ITPF_P_TUNNEL) &&
(atp->tun_policy_index >=
atp->tun_itp->itp_next_policy_index)) {
ipsec_req_t *ipsr;
/*
* Convert 0.0.0.0/0, 0::0/0 tree entry to
* ipsec_req_t.
*/
ipsr = (ipsec_req_t *)ta->ifta_secinfo;
*ipsr = atp->tun_secinfo;
/* Reality check for empty polhead. */
if (ipsr->ipsr_ah_req != 0 ||
ipsr->ipsr_esp_req != 0)
ta->ifta_flags |= IFTUN_SECURITY;
} else {
ta->ifta_flags |=
(IFTUN_COMPLEX_SECURITY | IFTUN_SECURITY);
}
mutex_exit(&atp->tun_itp->itp_lock);
}
if (new && (iocp->ioc_cmd == SIOCGTUNPARAM)) {
/* Copy in hop limit. */
if (atp->tun_flags & TUN_HOP_LIM) {
ta->ifta_flags |= IFTUN_HOPLIMIT;
ta->ifta_hop_limit = atp->tun_hop_limit;
}
/* Copy in encapsulation limit. */
if (atp->tun_flags & TUN_ENCAP_LIM) {
ta->ifta_flags |= IFTUN_ENCAP;
ta->ifta_encap_lim = atp->tun_encap_lim;
}
}
/* lower must be IPv4 or IPv6, otherwise open fails */
if (lvers == TUN_L_V4) {
sin = (sin_t *)&ta->ifta_saddr;
ta->ifta_lower = IFTAP_IPV4;
bzero(sin, sizeof (sin_t));
sin->sin_family = AF_INET;
if (atp->tun_flags & TUN_SRC) {
IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr,
sin->sin_addr.s_addr);
ta->ifta_flags |= IFTUN_SRC;
} else {
sin->sin_addr.s_addr = 0;
}
sin = (sin_t *)&ta->ifta_daddr;
bzero(sin, sizeof (sin_t));
sin->sin_family = AF_INET;
if (atp->tun_flags & TUN_DST) {
IN6_V4MAPPED_TO_IPADDR(&atp->tun_faddr,
sin->sin_addr.s_addr);
ta->ifta_flags |= IFTUN_DST;
} else {
sin->sin_addr.s_addr = 0;
}
} else {
ASSERT(lvers == TUN_L_V6);
ta->ifta_lower = IFTAP_IPV6;
sin6 = (sin6_t *)&ta->ifta_saddr;
bzero(sin6, sizeof (sin6_t));
sin6->sin6_family = AF_INET6;
if (atp->tun_flags & TUN_SRC) {
sin6->sin6_addr = atp->tun_laddr;
ta->ifta_flags |= IFTUN_SRC;
} else {
V6_SET_ZERO(sin6->sin6_addr);
}
sin6 = (sin6_t *)&ta->ifta_daddr;
bzero(sin6, sizeof (sin6_t));
sin6->sin6_family = AF_INET6;
if (atp->tun_flags & TUN_DST) {
ta->ifta_flags |= IFTUN_DST;
sin6->sin6_addr = atp->tun_faddr;
} else {
V6_SET_ZERO(sin6->sin6_addr);
}
}
break;
case SIOCS6TO4TUNRRADDR: {
struct iocblk *iocp;
/* check to make sure this is not a TRANSPARENT ioctl */
iocp = (struct iocblk *)mp->b_rptr;
if (iocp->ioc_count == TRANSPARENT) {
uerr = EINVAL;
goto nak;
}
/* skip over iocblk to M_DATA */
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if (size != (sizeof (ipaddr_t))) {
uerr = EPROTO;
goto nak;
}
rr_addr = (ipaddr_t *)mp1->b_rptr;
/*
* Value read MUST equal either:
* 1) a valid unicast IPv4 Address
* 2) INADDR_ANY
*
* (1) enables 6to4 Relay Router communication support on
* this system and denotes the IPv4 destination address used
* for sending to 6to4 Relay Routers.
* (2) disables 6to4 Relay Router communication support on
* this system.
*
* Any other value results in a NAK.
*/
if ((*rr_addr == INADDR_ANY) || (!CLASSD(*rr_addr))) {
tun1dbg(("tun_ioctl: 6to4 Relay Router = %s\n",
inet_ntop(AF_INET, rr_addr, buf,
sizeof (buf))));
tuns->tuns_relay_rtr_addr_v4 = *rr_addr;
} else {
tun1dbg(("tun_ioctl: Invalid 6to4 Relay Router " \
"address (%s)\n",
inet_ntop(AF_INET, rr_addr, buf,
sizeof (buf))));
uerr = EINVAL;
goto nak;
}
break;
}
case SIOCG6TO4TUNRRADDR:
/* skip over iocblk to M_DATA */
mp1 = mp->b_cont;
if (mp1 == NULL) {
uerr = EPROTO;
goto nak;
}
size = mp1->b_wptr - mp1->b_rptr;
if (size != (sizeof (ipaddr_t))) {
uerr = EPROTO;
goto nak;
}
rr_addr = (ipaddr_t *)mp1->b_rptr;
*rr_addr = tuns->tuns_relay_rtr_addr_v4;
break;
case DL_IOC_HDR_INFO:
uerr = tun_fastpath(q, mp);
if (uerr != 0)
goto nak;
break;
case SIOCSLIFNAME:
/*
* Intercept SIOCSLIFNAME and attach the name to my
* tunnel_instance. For extra paranoia, if my name is not ""
* (as it would be at tun_t initialization), don't change
* anything.
*
* For now, this is the only way to tie tunnel names (as
* used in IPsec Tunnel Policy (ITP) instances) to actual
* tunnel instances. In practice, SIOCSLIFNAME is only
* used by ifconfig(1m) to change the ill name to something
* ifconfig can handle.
*/
mp1 = mp->b_cont;
if (mp1 != NULL) {
lifr = (struct lifreq *)mp1->b_rptr;
if (atp->tun_lifname[0] == '\0') {
(void) strncpy(atp->tun_lifname,
lifr->lifr_name, LIFNAMSIZ);
ASSERT(atp->tun_itp == NULL);
atp->tun_itp =
get_tunnel_policy(atp->tun_lifname,
ns);
/*
* It really doesn't matter if we return
* NULL or not. If we get the itp pointer,
* we're in good shape.
*/
} else {
tun0dbg(("SIOCSLIFNAME: new is %s, old is %s"
" - not changing\n",
lifr->lifr_name, atp->tun_lifname));
}
}
break;
default:
/*
* We are module that thinks it's a driver so nak anything we
* don't understand
*/
uerr = EINVAL;
goto nak;
}
mp->b_datap->db_type = M_IOCACK;
iocp->ioc_error = 0;
qreply(q, mp);
return (reterr);
nak:
iocp->ioc_error = uerr;
mp->b_datap->db_type = M_IOCNAK;
qreply(q, mp);
return (reterr);
}
/*
* mp contains the M_IOCTL DL_IOC_HDR_INFO message
* allocate mblk for fast path.
* XXX - fix IP so that db_base and rptr can be different
*/
static int
tun_fastpath(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *nmp;
int error;
dl_unitdata_req_t *dludp;
int hdrlen;
if (!tun_do_fastpath || atp->tun_state != DL_IDLE)
return (EINVAL);
error = miocpullup(mp, sizeof (dl_unitdata_req_t));
if (error != 0)
return (error);
dludp = (dl_unitdata_req_t *)mp->b_cont->b_rptr;
if (dludp->dl_primitive != DL_UNITDATA_REQ)
return (EINVAL);
switch (atp->tun_flags & TUN_LOWER_MASK) {
case TUN_L_V4:
nmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset, BPRI_HI);
if (nmp == NULL) {
return (ENOMEM);
}
linkb(mp, nmp);
nmp->b_rptr += atp->tun_extra_offset;
nmp->b_wptr = nmp->b_rptr + sizeof (ipha_t);
*(ipha_t *)(nmp->b_rptr) = atp->tun_ipha;
nmp->b_rptr = nmp->b_datap->db_base;
break;
case TUN_L_V6:
hdrlen = sizeof (ip6_t);
if (atp->tun_encap_lim >= 0) {
hdrlen += IPV6_TUN_ENCAP_OPT_LEN;
}
nmp = allocb(hdrlen + atp->tun_extra_offset, BPRI_HI);
if (nmp == NULL) {
return (ENOMEM);
}
linkb(mp, nmp);
nmp->b_rptr += atp->tun_extra_offset;
nmp->b_wptr = nmp->b_rptr + hdrlen;
bcopy(&atp->tun_ip6h, nmp->b_rptr, hdrlen);
nmp->b_rptr = nmp->b_datap->db_base;
break;
default:
return (EPFNOSUPPORT);
}
atp->tun_flags |= TUN_FASTPATH;
return (0);
}
/*
* write side service procedure
*/
void
tun_wsrv(queue_t *q)
{
mblk_t *mp;
tun_t *atp = (tun_t *)q->q_ptr;
while (mp = getq(q)) {
/* out of memory or canputnext failed */
if (tun_wproc(q, mp) == ENOMEM) {
break;
}
/*
* If we called qwriter, then the only way we
* can tell if we ran out of memory is to check if
* any events have been scheduled
*/
if (atp->tun_events.ev_wtimoutid != 0 &&
atp->tun_events.ev_wbufcid != 0) {
break;
}
}
}
/* write side put procedure */
void
tun_wput(queue_t *q, mblk_t *mp)
{
/* note: q_first is 'protected' by perimeter */
if (q->q_first != NULL) {
(void) putq(q, mp);
} else {
(void) tun_wproc(q, mp);
}
}
/*
* called from write side put or service procedure to process
* messages
*/
static int
tun_wproc(queue_t *q, mblk_t *mp)
{
int error = 0;
switch (mp->b_datap->db_type) {
case M_DATA:
error = tun_wproc_mdata(q, mp);
break;
case M_PROTO:
case M_PCPROTO:
/* its a DLPI message */
error = tun_wput_dlpi(q, mp);
break;
case M_IOCDATA:
case M_IOCTL:
/* Data to be copied out arrives from ip as M_IOCDATA */
error = tun_ioctl(q, mp);
break;
/* we are a module pretending to be a driver.. turn around flush */
case M_FLUSH:
if (*mp->b_rptr & FLUSHW) {
flushq(q, FLUSHALL);
*mp->b_rptr &= ~FLUSHW;
}
if (*mp->b_rptr & FLUSHR)
flushq(RD(q), FLUSHALL);
qreply(q, mp);
break;
/*
* we are a module pretending to be a driver.. so just free message
* we don't understand
*/
default: {
char buf[TUN_WHO_BUF];
tun0dbg(("tun_wproc: %s got unknown mblk type %d\n",
tun_who(q, buf), mp->b_datap->db_type));
freemsg(mp);
break;
}
}
return (error);
}
/*
* handle fast path M_DATA message
*/
static int
tun_wproc_mdata(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
int error = 0;
ASSERT(atp->tun_flags & TUN_FASTPATH);
ASSERT((atp->tun_flags & TUN_L_V6) ?
(mp->b_wptr - mp->b_rptr >= atp->tun_extra_offset +
sizeof (ip6_t)) :
((atp->tun_flags & TUN_L_V4) ?
(mp->b_wptr - mp->b_rptr >= atp->tun_extra_offset +
sizeof (ipha_t)) : 1));
if (!canputnext(q)) {
atomic_add_32(&atp->tun_xmtretry, 1);
(void) putbq(q, mp);
return (ENOMEM); /* get service procedure to stop */
}
if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
int iph_hdr_length;
/*
* get rid of fastpath header. let tun_wdata*
* fill in real thing
*/
iph_hdr_length = IPH_HDR_LENGTH((ipha_t *)(mp->b_rptr +
atp->tun_extra_offset));
if (mp->b_wptr - mp->b_rptr < iph_hdr_length +
atp->tun_extra_offset + sizeof (ip6_t)) {
if (!pullupmsg(mp, iph_hdr_length +
atp->tun_extra_offset + sizeof (ip6_t))) {
tun0dbg(("tun_wproc_mdata: message too " \
"short for IPv6 header\n"));
atomic_add_32(&atp->tun_InErrors, 1);
atomic_add_32(&atp->tun_InDiscard, 1);
freemsg(mp);
return (0);
}
}
mp->b_rptr += atp->tun_extra_offset + iph_hdr_length;
ASSERT((atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6);
tun_wdata_v6(q, mp);
return (error);
}
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V4:
error = tun_wputnext_v4(q, mp);
break;
case TUN_U_V6:
error = tun_wputnext_v6(q, mp);
break;
default:
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
error = EINVAL;
}
return (error);
}
/*
* Because a TUNSPARAM ioctl()'s requirement to only set IPsec policy for a
* given upper instance (IPv4-over-IP* or IPv6-over-IP*), have a special
* AF-specific flusher. This way, setting one upper instance doesn't sabotage
* the other. Don't bother with the hash-chained policy heads - they won't be
* filled in in TUNSPARAM cases.
*/
static void
flush_af(ipsec_policy_head_t *polhead, int ulp_vector, netstack_t *ns)
{
int dir;
int af = (ulp_vector == TUN_U_V4) ? IPSEC_AF_V4 : IPSEC_AF_V6;
ipsec_policy_t *ip, *nip;
ASSERT(RW_WRITE_HELD(&polhead->iph_lock));
for (dir = 0; dir < IPSEC_NTYPES; dir++) {
for (ip = polhead->iph_root[dir].ipr_nonhash[af]; ip != NULL;
ip = nip) {
nip = ip->ipsp_hash.hash_next;
IPPOL_UNCHAIN(polhead, ip, ns);
}
}
}
/*
* Set and insert the actual simple policies.
*/
static boolean_t
insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact,
int ulp_vector, netstack_t *ns)
{
ipsec_selkey_t selkey;
ipsec_policy_t *pol;
ipsec_policy_root_t *pr;
ipsec_policy_head_t *polhead = itp->itp_policy;
bzero(&selkey, sizeof (selkey));
if (ulp_vector & TUN_U_V4) {
selkey.ipsl_valid = IPSL_IPV4;
/* v4 inbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_INBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
/* v4 outbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
}
if (ulp_vector & TUN_U_V6) {
selkey.ipsl_valid = IPSL_IPV6;
/* v6 inbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_INBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
/* v6 outbound */
pol = ipsec_policy_create(&selkey, actp, nact,
IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index, ns);
if (pol == NULL)
return (B_FALSE);
pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND];
HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]);
ipsec_insert_always(&polhead->iph_rulebyid, pol);
}
return (B_TRUE);
}
/*
* For the old-fashioned tunnel-ioctl method of setting tunnel security
* properties. In the new world, set this to be a low-priority 0.0.0.0/0
* match.
*/
static int
tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr)
{
int rc = 0;
uint_t nact;
ipsec_act_t *actp = NULL;
boolean_t clear_all, old_policy = B_FALSE;
ipsec_tun_pol_t *itp;
tun_t *other_tun;
netstack_t *ns = atp->tun_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
tun1dbg(
("tun_set_sec_simple: adjusting tunnel security the old way."));
#define REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
/* Can't specify self-encap on a tunnel!!! */
if ((ipsr->ipsr_self_encap_req && REQ_MASK) != 0)
return (EINVAL);
/*
* If it's a "clear-all" entry, unset the security flags and
* resume normal cleartext (or inherit-from-global) policy.
*/
clear_all = ((ipsr->ipsr_ah_req & REQ_MASK) == 0 &&
(ipsr->ipsr_esp_req & REQ_MASK) == 0);
#undef REQ_MASK
mutex_enter(&atp->tun_lock);
if (!tun_policy_present(atp, ns, ipss)) {
if (clear_all) {
bzero(&atp->tun_secinfo, sizeof (ipsec_req_t));
atp->tun_policy_index = 0;
goto bail; /* No need to allocate! */
}
ASSERT(atp->tun_lifname[0] != '\0');
atp->tun_itp = create_tunnel_policy(atp->tun_lifname,
&rc, &atp->tun_itp_gen, ns);
/* NOTE: "rc" set by create_tunnel_policy(). */
if (atp->tun_itp == NULL)
goto bail;
}
itp = atp->tun_itp;
/* Allocate the actvec now, before holding itp or polhead locks. */
ipsec_actvec_from_req(ipsr, &actp, &nact, ns);
if (actp == NULL) {
rc = ENOMEM;
goto bail;
}
/*
* Just write on the active polhead. Save the primary/secondary
* stuff for spdsock operations.
*
* Mutex because we need to write to the polhead AND flags atomically.
* Other threads will acquire the polhead lock as a reader if the
* (unprotected) flag is set.
*/
mutex_enter(&itp->itp_lock);
if (itp->itp_flags & ITPF_P_TUNNEL) {
/*
* Oops, we lost a race. Let's get out of here.
*/
rc = EBUSY;
goto mutex_bail;
}
old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0);
if (old_policy) {
/*
* We have to be more subtle here than we would
* in the spdosock code-paths, due to backward compatibility.
*/
ITPF_CLONE(itp->itp_flags);
rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns);
if (rc != 0) {
/* inactive has already been cleared. */
itp->itp_flags &= ~ITPF_IFLAGS;
goto mutex_bail;
}
rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
flush_af(itp->itp_policy, atp->tun_flags & TUN_UPPER_MASK, ns);
} else {
/* Else assume itp->itp_policy is already flushed. */
rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
}
if (clear_all) {
/* We've already cleared out the polhead. We are now done. */
if (avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0)
itp->itp_flags &= ~ITPF_PFLAGS;
rw_exit(&itp->itp_policy->iph_lock);
bzero(&atp->tun_secinfo, sizeof (ipsec_req_t));
old_policy = B_FALSE; /* Clear out the inactive one too. */
goto recover_bail;
}
if (insert_actual_policies(itp, actp, nact,
atp->tun_flags & TUN_UPPER_MASK, ns)) {
rw_exit(&itp->itp_policy->iph_lock);
/*
* Adjust MTU and make sure the DL side knows what's up.
*/
atp->tun_ipsec_overhead = ipsec_act_ovhd(actp);
itp->itp_flags = ITPF_P_ACTIVE;
/*
* <sigh> There has to be a better way, but for now, send an
* IRE_DB_REQ again. We will resynch from scratch, but have
* the tun_ipsec_overhead taken into account.
*/
tun_send_ire_req(atp->tun_wq);
old_policy = B_FALSE; /* Blank out inactive - we succeeded */
/* Copy ipsec_req_t for subsequent SIOGTUNPARAM ops. */
atp->tun_secinfo = *ipsr;
} else {
rw_exit(&itp->itp_policy->iph_lock);
rc = ENOMEM;
}
recover_bail:
atp->tun_policy_index = itp->itp_next_policy_index;
/* Find the "other guy" (v4/v6) and update his tun_policy_index too. */
if (atp->tun_stats != NULL) {
if (atp->tun_stats->ts_atp == atp) {
other_tun = atp->tun_kstat_next;
ASSERT(other_tun == NULL ||
other_tun->tun_kstat_next == NULL);
} else {
other_tun = atp->tun_stats->ts_atp;
ASSERT(other_tun != NULL);
ASSERT(other_tun->tun_kstat_next == atp);
}
if (other_tun != NULL)
other_tun->tun_policy_index = atp->tun_policy_index;
}
if (old_policy) {
/* Recover policy in in active polhead. */
ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns);
ITPF_SWAP(itp->itp_flags);
atp->tun_extra_offset = TUN_LINK_EXTRA_OFF;
}
/* Clear policy in inactive polhead. */
itp->itp_flags &= ~ITPF_IFLAGS;
rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER);
ipsec_polhead_flush(itp->itp_inactive, ns);
rw_exit(&itp->itp_inactive->iph_lock);
mutex_bail:
mutex_exit(&itp->itp_lock);
bail:
if (actp != NULL)
ipsec_actvec_free(actp, nact);
mutex_exit(&atp->tun_lock);
return (rc);
}
/*
* Send an IRE_DB_REQ_TYPE to the lower module to obtain an IRE for the
* tunnel destination. If the tunnel has no destination, then request an
* IRE for the source instead.
*/
static void
tun_send_ire_req(queue_t *q)
{
tun_t *atp = q->q_ptr;
mblk_t *mp;
ire_t *ire;
uint_t lvers = (atp->tun_flags & TUN_LOWER_MASK);
char addrstr[INET6_ADDRSTRLEN];
if ((mp = tun_realloc_mblk(q, NULL, sizeof (ire_t), NULL, B_FALSE)) ==
NULL) {
tun0dbg(("tun_send_ire_req: couldn't allocate mblk\n"));
return;
}
mp->b_datap->db_type = IRE_DB_REQ_TYPE;
ire = (ire_t *)mp->b_rptr;
if (lvers == TUN_L_V4) {
ire->ire_ipversion = IPV4_VERSION;
/*
* For tunnels without destinations, we request the source
* ire so that we can account for IPsec policy in our MTU
* calculation.
*/
ire->ire_addr = (atp->tun_flags & TUN_DST) ?
atp->tun_ipha.ipha_dst : atp->tun_ipha.ipha_src;
} else {
ASSERT(lvers == TUN_L_V6 && (atp->tun_flags & TUN_DST));
ire->ire_ipversion = IPV6_VERSION;
ire->ire_addr_v6 = atp->tun_ip6h.ip6_dst;
}
tun1dbg(("tun_send_ire_req: requesting ire for %s",
(lvers == TUN_L_V4 ?
inet_ntop(AF_INET, &ire->ire_addr, addrstr, INET6_ADDRSTRLEN) :
inet_ntop(AF_INET6, &ire->ire_addr_v6, addrstr,
INET6_ADDRSTRLEN))));
atp->tun_ire_lastreq = lbolt;
putnext(WR(q), mp);
}
/*
* Given the path MTU to the tunnel destination, calculate tunnel's link
* mtu. For configured tunnels, we update the tunnel's link MTU and notify
* the upper instance of IP of the change so that the IP interface's MTU
* can be updated. If the tunnel is a 6to4 or automatic tunnel, just
* return the effective MTU of the tunnel without updating it. We don't
* update the link MTU of 6to4 or automatic tunnels because they tunnel to
* multiple destinations all with potentially differing path MTU's.
*/
static uint32_t
tun_update_link_mtu(queue_t *q, uint32_t pmtu, boolean_t icmp)
{
tun_t *atp = (tun_t *)q->q_ptr;
uint32_t newmtu = pmtu;
boolean_t sendsdusize = B_FALSE;
/*
* If the pmtu provided came from an ICMP error being passed up
* from below, then the pmtu argument has already been adjusted
* by the IPsec overhead.
*/
if (!icmp && atp->tun_itp != NULL &&
(atp->tun_itp->itp_flags & ITPF_P_ACTIVE))
newmtu -= atp->tun_ipsec_overhead;
if (atp->tun_flags & TUN_L_V4) {
newmtu -= sizeof (ipha_t);
if (newmtu < IP_MIN_MTU)
newmtu = IP_MIN_MTU;
} else {
ASSERT(atp->tun_flags & TUN_L_V6);
newmtu -= sizeof (ip6_t);
if (atp->tun_encap_lim > 0)
newmtu -= IPV6_TUN_ENCAP_OPT_LEN;
if (newmtu < IPV6_MIN_MTU)
newmtu = IPV6_MIN_MTU;
}
if (!(atp->tun_flags & (TUN_6TO4 | TUN_AUTOMATIC))) {
if (newmtu != atp->tun_mtu) {
atp->tun_mtu = newmtu;
sendsdusize = B_TRUE;
}
if (sendsdusize)
tun_sendsdusize(q);
}
return (newmtu);
}
/*
* Process TPI messages responses comming up the read side
*/
/* ARGSUSED */
int
tun_rput_tpi(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
t_uscalar_t prim = *((t_uscalar_t *)mp->b_rptr);
mblk_t *iocmp;
switch (prim) {
case T_BIND_ACK:
tun1dbg(("tun_rput_tpi: got a T_BIND_ACK\n"));
mutex_enter(&atp->tun_lock);
/*
* XXX This first assert may fail if this path gets re-
* executed because of tun_recover() being invoked.
*/
ASSERT((atp->tun_flags & TUN_BIND_SENT) != 0);
ASSERT(atp->tun_iocmp != NULL);
/*
* If we have an IRE in mp->b_cont, use it to help compute
* atp->tun_extra_offset, tun_ipsec_overhead, and the link
* MTU of configured tunnels.
*/
if (mp->b_cont != NULL) {
ire_t *ire;
ire = (ire_t *)mp->b_cont->b_rptr;
/*
* Take advice from lower-layer if it is bigger than
* what we have cached now. We do manage per-tunnel
* policy, but there may be global overhead to account
* for.
*/
atp->tun_ipsec_overhead = max(ire->ire_ipsec_overhead,
atp->tun_ipsec_overhead);
if (atp->tun_flags & TUN_DST) {
atp->tun_extra_offset =
MAX(ire->ire_ll_hdr_length,
TUN_LINK_EXTRA_OFF);
(void) tun_update_link_mtu(q,
ire->ire_max_frag, B_FALSE);
}
}
/*
* Automatic and 6to4 tunnels only require source to be set
* Configured tunnels require both
*/
if ((atp->tun_flags & TUN_SRC) &&
(atp->tun_flags & (TUN_DST | TUN_AUTOMATIC | TUN_6TO4))) {
atp->tun_flags |= TUN_BOUND;
}
atp->tun_flags &= ~TUN_BIND_SENT;
iocmp = atp->tun_iocmp;
/*
* Ack the ioctl
*/
atp->tun_iocmp = NULL;
mutex_exit(&atp->tun_lock);
freemsg(mp);
putnext(q, iocmp);
break;
case T_ERROR_ACK: {
struct T_error_ack *terr = (struct T_error_ack *)mp->b_rptr;
switch (terr->ERROR_prim) {
case T_BIND_REQ: {
struct iftun_req *ta;
mblk_t *mp1;
struct iocblk *iocp;
mutex_enter(&atp->tun_lock);
atp->tun_flags &= ~(TUN_BOUND | TUN_BIND_SENT);
iocmp = atp->tun_iocmp;
atp->tun_iocmp = NULL;
mutex_exit(&atp->tun_lock);
iocp = (struct iocblk *)(iocmp->b_rptr);
mp1 = iocmp->b_cont;
if (mp1 != NULL)
mp1 = mp1->b_cont;
if (mp1 != NULL) {
ta = (struct iftun_req *)mp1->b_rptr;
if (ta->ifta_flags & IFTUN_SRC) {
atp->tun_flags &= ~TUN_SRC;
}
if (ta->ifta_flags & IFTUN_DST) {
atp->tun_flags &= ~TUN_DST;
}
}
switch (terr->TLI_error) {
default:
iocp->ioc_error = EINVAL;
break;
case TSYSERR:
iocp->ioc_error = terr->UNIX_error;
break;
case TBADADDR:
iocp->ioc_error = EADDRNOTAVAIL;
break;
}
putnext(q, iocmp);
freemsg(mp);
return (0);
}
default: {
char buf[TUN_WHO_BUF];
tun0dbg(("tun_rput_tpi: %s got an unkown TPI Error " \
"message: %d\n",
tun_who(q, buf), terr->ERROR_prim));
freemsg(mp);
break;
}
}
break;
}
case T_OK_ACK:
freemsg(mp);
break;
/* act like a stream head and eat all up comming tpi messages */
default: {
char buf[TUN_WHO_BUF];
tun0dbg(("tun_rput_tpi: %s got an unkown TPI message: %d\n",
tun_who(q, buf), prim));
freemsg(mp);
break;
}
}
return (0);
}
/*
* handle tunnel over IPv6
*/
static int
tun_rdata_v6(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp)
{
ip6_t *outer_ip6h, *ip6h;
ipha_t *inner_iph;
uint8_t *rptr;
size_t hdrlen;
mblk_t *mp1, *nmp, *orig_mp = data_mp;
uint8_t nexthdr;
boolean_t inner_v4;
in6_addr_t v6src;
in6_addr_t v6dst;
char buf[TUN_WHO_BUF];
char buf1[INET6_ADDRSTRLEN];
char buf2[INET6_ADDRSTRLEN];
int pullup_len;
/* need at least an IPv6 header. */
ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
outer_ip6h = (ip6_t *)data_mp->b_rptr;
/* Handle ip6i_t case. */
if (outer_ip6h->ip6_nxt == IPPROTO_RAW) {
/*
* Assume sizeof (ip6i_t) == sizeof(ip6_t), can't
* use ASSERT because of lint warnings.
*/
rptr = (uint8_t *)(outer_ip6h + 1);
data_mp->b_rptr = rptr;
if (rptr == data_mp->b_wptr) {
mp1 = data_mp->b_cont;
freeb(data_mp);
orig_mp = data_mp = mp1;
rptr = data_mp->b_rptr;
if (ipsec_mp != NULL)
ipsec_mp->b_cont = data_mp;
}
ASSERT(data_mp->b_wptr - rptr >= sizeof (ip6_t));
outer_ip6h = (ip6_t *)rptr;
}
hdrlen = ip_hdr_length_v6(data_mp, outer_ip6h);
ASSERT(IPH_HDR_VERSION(outer_ip6h) == IPV6_VERSION);
ASSERT(hdrlen >= sizeof (ip6_t));
ASSERT(hdrlen <= (data_mp->b_wptr - data_mp->b_rptr));
v6src = outer_ip6h->ip6_src;
v6dst = outer_ip6h->ip6_dst;
/*
* If the Next Header field is not IPPROTO_ENCAP or IPPROTO_IPV6, there
* are IPv6 options present that we need to parse in order to figure
* out whether we have an encapsulated IPv4 or IPv6 packet here.
*/
if (outer_ip6h->ip6_nxt != IPPROTO_ENCAP &&
outer_ip6h->ip6_nxt != IPPROTO_IPV6) {
/* Tunnel packet with options!!! */
ip6_pkt_t ipp;
ipp.ipp_fields = 0; /* must be initialized */
(void) ip_find_hdr_v6(data_mp, outer_ip6h, &ipp, NULL);
if (ipp.ipp_dstopts != NULL) {
nexthdr = ipp.ipp_dstopts->ip6d_nxt;
} else if (ipp.ipp_rthdr != NULL) {
nexthdr = ipp.ipp_rthdr->ip6r_nxt;
} else if (ipp.ipp_hopopts != NULL) {
nexthdr = ipp.ipp_hopopts->ip6h_nxt;
} else {
/* Otherwise, pretend it's IP + ESP. */
cmn_err(CE_WARN, "tun IPv6 headers wrong (%d).\n",
outer_ip6h->ip6_nxt);
nexthdr = outer_ip6h->ip6_nxt;
}
} else {
nexthdr = outer_ip6h->ip6_nxt;
}
inner_v4 = (nexthdr == IPPROTO_ENCAP);
/*
* NOTE: The "+ 4" is for the upper-layer protocol information
* (ports) so we can enforce policy.
*/
pullup_len = hdrlen + (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)) + 4;
if ((data_mp->b_wptr - data_mp->b_rptr) < pullup_len) {
if (!pullupmsg(data_mp, pullup_len)) {
atomic_add_32(&atp->tun_InErrors, 1);
atomic_add_32(&atp->tun_InDiscard, 1);
goto drop;
}
outer_ip6h = (ip6_t *)data_mp->b_rptr;
}
/* Shave off the outer header(s). */
data_mp->b_rptr += hdrlen;
if (inner_v4) {
/* IPv4 in IPv6 */
inner_iph = (ipha_t *)data_mp->b_rptr;
ASSERT(IPH_HDR_VERSION(inner_iph) == IPV4_VERSION);
ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr) &&
IN6_ARE_ADDR_EQUAL(&v6src, &atp->tun_faddr));
if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp,
inner_iph, NULL, NULL, outer_ip6h, 0,
atp->tun_netstack)) {
data_mp = NULL;
ipsec_mp = NULL;
atomic_add_32(&atp->tun_InErrors, 1);
goto drop;
}
ipsec_mp = NULL;
if (data_mp != orig_mp) {
/* mp has changed, reset appropriate pointers */
/* Outer hdrlen is already shaved off */
ASSERT(data_mp != NULL);
inner_iph = (ipha_t *)data_mp->b_rptr;
}
/*
* Remember - ipsec_tun_inbound() may return a whole chain
* of packets if there was per-port policy on the ITP and
* we got a fragmented packet.
*/
if (CLASSD(inner_iph->ipha_dst)) {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
} else {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInUcastPkts, 1);
}
} else {
/* IPv6 in IPv6 */
ip6h = (ip6_t *)data_mp->b_rptr;
ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr));
if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL,
ip6h, NULL, outer_ip6h, 0, atp->tun_netstack)) {
data_mp = NULL;
ipsec_mp = NULL;
atomic_add_32(&atp->tun_InErrors, 1);
goto drop;
}
ipsec_mp = NULL;
if (data_mp != orig_mp) {
/* mp has changed, reset appropriate pointers */
/* v6src should still be a valid and relevant ptr */
ASSERT(data_mp != NULL);
ip6h = (ip6_t *)data_mp->b_rptr;
}
/*
* Remember - ipsec_tun_inbound() may return a whole chain
* of packets if there was per-port policy on the ITP and
* we got a fragmented packet.
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
} else {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInUcastPkts, 1);
}
if (!IN6_ARE_ADDR_EQUAL(&v6src, &atp->tun_faddr)) {
/*
* Configured Tunnel packet source should match our
* destination
* Lower IP should ensure that this is true
*/
tun0dbg(("tun_rdata_v6: %s src (%s) != tun_faddr " \
"(%s)\n", tun_who(q, buf),
inet_ntop(AF_INET6, &v6src, buf1,
sizeof (buf1)),
inet_ntop(AF_INET6, &atp->tun_faddr, buf2,
sizeof (buf2))));
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_32(&atp->tun_InErrors, 1);
goto drop;
}
}
TUN_PUTMSG_CHAIN_STATS(q, data_mp, nmp, &atp->tun_HCInOctets);
return (0);
drop:
if (ipsec_mp != NULL)
freeb(ipsec_mp);
tun_freemsg_chain(data_mp, NULL);
return (0);
}
/*
* Handle tunnels over IPv4
* XXX - we don't do any locking here. The worst that
* can happen is we drop the packet or don't record stats quite right
* what's the worst that can happen if the header stuff changes?
*/
static int
tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp)
{
ipha_t *iph, *inner_iph;
ip6_t *ip6h;
size_t hdrlen;
mblk_t *mp1, *nmp, *orig_mp = data_mp;
boolean_t inner_v4;
ipaddr_t v4src;
ipaddr_t v4dst;
in6_addr_t v4mapped_src;
in6_addr_t v4mapped_dst;
char buf1[INET6_ADDRSTRLEN];
char buf2[INET6_ADDRSTRLEN];
char buf[TUN_WHO_BUF];
int pullup_len;
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
/* need at least an IP header */
ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
iph = (ipha_t *)data_mp->b_rptr;
hdrlen = IPH_HDR_LENGTH(iph);
/* check IP version number */
ASSERT(IPH_HDR_VERSION(iph) == IPV4_VERSION);
ASSERT(hdrlen >= sizeof (ipha_t));
ASSERT(hdrlen <= (data_mp->b_wptr - data_mp->b_rptr));
v4src = iph->ipha_src;
v4dst = iph->ipha_dst;
IN6_IPADDR_TO_V4MAPPED(v4src, &v4mapped_src);
IN6_IPADDR_TO_V4MAPPED(v4dst, &v4mapped_dst);
inner_v4 = (iph->ipha_protocol == IPPROTO_ENCAP);
/*
* NOTE: The "+ 4" is for the upper-layer protocol headers
* so we can enforce policy.
*/
pullup_len = hdrlen + (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)) + 4;
if ((data_mp->b_wptr - data_mp->b_rptr) < pullup_len) {
if (!pullupmsg(data_mp, hdrlen + pullup_len)) {
atomic_add_32(&atp->tun_InErrors, 1);
atomic_add_32(&atp->tun_InDiscard, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
goto drop;
}
iph = (ipha_t *)data_mp->b_rptr;
}
/* Shave off the IPv4 header. */
data_mp->b_rptr += hdrlen;
if (inner_v4) {
/* IPv4 in IPv4 */
inner_iph = (ipha_t *)data_mp->b_rptr;
ASSERT(IPH_HDR_VERSION(inner_iph) == IPV4_VERSION);
ASSERT(IN6_ARE_ADDR_EQUAL(&v4mapped_dst, &atp->tun_laddr) &&
IN6_ARE_ADDR_EQUAL(&v4mapped_src, &atp->tun_faddr));
/* NOTE: ipsec_tun_inbound() always frees ipsec_mp. */
if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp,
inner_iph, NULL, iph, NULL, 0, atp->tun_netstack)) {
data_mp = NULL;
atomic_add_32(&atp->tun_InErrors, 1);
goto drop;
}
if (data_mp != orig_mp) {
/* mp has changed, reset appropriate pointers */
/* Outer hdrlen is already shaved off */
ASSERT(data_mp != NULL);
inner_iph = (ipha_t *)data_mp->b_rptr;
}
/*
* Remember - ipsec_tun_inbound() may return a whole chain
* of packets if there was per-port policy on the ITP and
* we got a fragmented packet.
*/
if (CLASSD(inner_iph->ipha_dst)) {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
} else {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInUcastPkts, 1);
}
} else {
/* IPv6 in IPv4 */
ip6h = (ip6_t *)data_mp->b_rptr;
ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
/* NOTE: ipsec_tun_inbound() always frees ipsec_mp. */
if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL,
ip6h, iph, NULL, 0, atp->tun_netstack)) {
data_mp = NULL;
atomic_add_32(&atp->tun_InErrors, 1);
goto drop;
}
if (data_mp != orig_mp) {
/* mp has changed, reset appropriate pointers */
/*
* v6src and v4dst should still be
* valid and relevant pointers
*/
ASSERT(data_mp != NULL);
ip6h = (ip6_t *)data_mp->b_rptr;
}
/*
* Remember - ipsec_tun_inbound() may return a whole chain
* of packets if there was per-port policy on the ITP and
* we got a fragmented packet.
*/
ASSERT(IN6_ARE_ADDR_EQUAL(&v4mapped_dst, &atp->tun_laddr));
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
} else {
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_64(&atp->tun_HCInUcastPkts, 1);
}
/* Is this an automatic tunnel ? */
if ((atp->tun_flags & TUN_AUTOMATIC) != 0) {
dl_unitdata_ind_t *dludindp;
/*
* make sure IPv4 destination makes sense
*/
if (v4dst == INADDR_ANY || CLASSD(v4dst)) {
tun0dbg(("tun_rdata_v4: %s tun: invalid IPv4" \
" dest (%s)\n",
tun_who(q, buf),
inet_ntop(AF_INET, &v4dst,
buf1, sizeof (buf1))));
for (nmp = data_mp; nmp != NULL;
nmp = nmp->b_next) {
atomic_add_32(&atp->tun_InErrors, 1);
}
goto drop;
}
/*
* send packet up as DL_UNITDATA_IND so that it won't
* be forwarded
*/
mp1 = allocb(sizeof (dl_unitdata_ind_t), BPRI_HI);
if (mp1 == NULL) {
tun0dbg(("tun_rdata_v4: allocb failed\n"));
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
goto drop;
}
mp1->b_cont = data_mp;
data_mp = mp1;
/*
* create dl_unitdata_ind with group address set so
* we don't forward
*/
data_mp->b_wptr = data_mp->b_rptr +
sizeof (dl_unitdata_ind_t);
data_mp->b_datap->db_type = M_PROTO;
dludindp = (dl_unitdata_ind_t *)data_mp->b_rptr;
dludindp->dl_primitive = DL_UNITDATA_IND;
dludindp->dl_dest_addr_length = 0;
dludindp->dl_dest_addr_offset = 0;
dludindp->dl_src_addr_length = 0;
dludindp->dl_src_addr_offset = 0;
dludindp->dl_group_address = 1;
/* Is this a 6to4 tunnel ? */
} else if ((atp->tun_flags & TUN_6TO4) != 0) {
struct in_addr v4addr;
/*
* Make sure IPv6 destination is a 6to4 address.
* ip_rput_data_v6 will ensure that 6to4 prefix
* of IPv6 destination and the prefix assigned to
* the interface, on which this packet was received,
* match.
*/
if (!IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) {
tun0dbg(("tun_rdata_v4: %s tun: invalid " \
"IPv6 dest (%s)\n", tun_who(q, buf),
inet_ntop(AF_INET6, &ip6h->ip6_dst, buf1,
sizeof (buf1))));
atomic_add_32(&atp->tun_InErrors, 1);
goto drop;
}
/*
* make sure IPv4 destination makes sense
*/
if (v4dst == INADDR_ANY || CLASSD(v4dst)) {
tun0dbg(("tun_rdata_v4: %s tun: invalid " \
"IPv4 dest (%s)\n", tun_who(q, buf),
inet_ntop(AF_INET, &v4dst, buf1,
sizeof (buf1))));
for (nmp = data_mp; nmp != NULL;
nmp = nmp->b_next) {
atomic_add_32(&atp->tun_InErrors, 1);
}
goto drop;
}
/*
* 6to4 router security considerations state that
* the V4ADDR portion of the IPv6 destination
* MUST be equal to the IPv4 destination.
*/
IN6_6TO4_TO_V4ADDR(&ip6h->ip6_dst, &v4addr);
if ((ipaddr_t)v4addr.s_addr != v4dst) {
tun0dbg(("tun_rdata_v4: %s tun: V4ADDR " \
"portion of 6to4 IPv6 dest (%s) does not" \
" equal IPv4 dest (%s)\n", tun_who(q, buf),
inet_ntop(AF_INET, &v4addr,
buf1, sizeof (buf1)),
inet_ntop(AF_INET, &v4dst,
buf2, sizeof (buf2))));
for (nmp = data_mp; nmp != NULL;
nmp = nmp->b_next) {
atomic_add_32(&atp->tun_InErrors, 1);
}
goto drop;
}
/*
* check to see if the source was another 6to4 router
*/
if (IN6_IS_ADDR_6TO4(&ip6h->ip6_src)) {
/*
* 6to4 router security considerations state
* that the V4ADDR portion of the IPv6 source
* MUST be equal to the IPv4 source, when
* the source machine is another 6to4 router
*/
IN6_6TO4_TO_V4ADDR(&ip6h->ip6_src, &v4addr);
if ((ipaddr_t)v4addr.s_addr != v4src) {
tun0dbg(("tun_rdata_v4: %s tun: " \
"V4ADDR portion of 6to4 IPv6 src" \
" (%s) does not equal IPv4 src " \
"(%s)\n",
tun_who(q, buf),
inet_ntop(AF_INET, &v4addr,
buf1, sizeof (buf1)),
inet_ntop(AF_INET, &v4src,
buf2, sizeof (buf2))));
for (nmp = data_mp; nmp != NULL;
nmp = nmp->b_next) {
atomic_add_32(
&atp->tun_InErrors, 1);
}
goto drop;
}
/*
* IPv6 source is, possibly, a "Native"
* (ie non-6to4) IPv6 host. IPv4 source is,
* possibly, a 6to4 Relay Router.
*/
} else {
/*
* Check if tun module support 6to4 Relay
* Router is disabled or enabled.
* tuns_relay_rtr_addr_v4 will equal INADDR_ANY
* if support is disabled. Otherwise, it will
* equal a valid, routable, IPv4 address;
* denoting that the packet will be accepted.
*
* There is no standard trust mechanism for
* 6to4 Relay Routers, thus communication
* support is disabled by default for
* security reasons.
*/
if (tuns->tuns_relay_rtr_addr_v4 ==
INADDR_ANY) {
tun1dbg(("tun_rdata_v4: "
"%s tuns_relay_rtr_addr_v4 = %s, "
"dropping packet from IPv4 src "
"%s\n", tun_who(q, buf),
inet_ntop(AF_INET,
&tuns->tuns_relay_rtr_addr_v4,
buf1, sizeof (buf1)),
inet_ntop(AF_INET, &v4src, buf2,
sizeof (buf2))));
for (nmp = data_mp; nmp != NULL;
nmp = nmp->b_next) {
atomic_add_32(
&atp->tun_InErrors, 1);
}
goto drop;
}
}
/*
* this might happen if we are in the middle of
* re-binding
*/
} else if (!IN6_ARE_ADDR_EQUAL(&v4mapped_src,
&atp->tun_faddr)) {
/*
* Configured Tunnel packet source should match our
* destination
* Lower IP should ensure that this is true
*/
tun0dbg(("tun_rdata_v4: %s src (%s) != tun_faddr " \
"(%s)\n", tun_who(q, buf),
inet_ntop(AF_INET6, &v4mapped_src,
buf1, sizeof (buf1)),
inet_ntop(AF_INET6, &atp->tun_faddr,
buf2, sizeof (buf2))));
/* XXX - should this be per-frag? */
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
atomic_add_32(&atp->tun_InErrors, 1);
goto drop;
}
}
TUN_PUTMSG_CHAIN_STATS(q, data_mp, nmp, &atp->tun_HCInOctets);
return (0);
drop:
tun_freemsg_chain(data_mp, NULL);
return (0);
}
static void
tun_rput_icmp_err_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
ip6_t *ip6;
icmp6_t *icmph;
int hdr_length;
ip6 = (ip6_t *)mp->b_rptr;
hdr_length = ip_hdr_length_v6(mp, ip6);
icmph = (icmp6_t *)(&mp->b_rptr[hdr_length]);
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V6:
icmp_ricmp_err_v6_v6(q, mp, ipsec_mp, icmph);
break;
case TUN_U_V4:
icmp_ricmp_err_v4_v6(q, mp, ipsec_mp, icmph);
break;
default:
atomic_add_32(&atp->tun_InErrors, 1);
ASSERT(0);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
}
}
/*
* icmp from lower IPv4
* Process ICMP messages from IPv4. Pass them to the appropriate
* lower processing function.
*/
static void
tun_rput_icmp_err_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
switch (atp->tun_flags & TUN_UPPER_MASK) {
case TUN_U_V6:
icmp_ricmp_err_v6_v4(q, mp, ipsec_mp);
break;
case TUN_U_V4:
icmp_ricmp_err_v4_v4(q, mp, ipsec_mp);
break;
default:
atomic_add_32(&atp->tun_InErrors, 1);
ASSERT(0);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
}
}
/*
* Process ICMP message from IPv4 encapsulating an IPv4 packet.
* If this message contains path mtu information, cut out the
* encapsulation from the icmp data. If there is still useful
* information in the icmp data pass it upstream (packaged correctly for
* the upper layer IP)
*/
static void
icmp_ricmp_err_v4_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
ipha_t *outer_ipha, *inner_ipha;
int outer_hlen;
int inner_hlen;
int hlen;
icmph_t icmp;
uint8_t type;
uint8_t code;
char buf1[INET_ADDRSTRLEN];
char buf2[INET_ADDRSTRLEN];
icmph_t *icmph;
mblk_t *orig_mp = mp;
/*
* The packet looks like this :
*
* [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
*
* We want most of this in one piece. But if the ULP is ICMP, we
* need to see whether it is an ICMP error or not. We should not
* send icmp errors in response to icmp errors. "outer_ipha" points
* to IP header (1), "inner_ipha" points to IP header (2). Inbound
* policy lookups for ICMP need to reverse the src/dst of things.
* Fortunately, ipsec_tun_inbound() can determine if this is an ICMP
* message or not.
*
* The caller already pulled up the entire message, or should have!
*/
ASSERT(mp->b_cont == NULL);
hlen = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr);
icmph = (icmph_t *)(&mp->b_rptr[hlen]);
outer_ipha = (ipha_t *)&icmph[1];
outer_hlen = IPH_HDR_LENGTH(outer_ipha);
inner_ipha = (ipha_t *)((uint8_t *)outer_ipha + outer_hlen);
if (((uchar_t *)inner_ipha + sizeof (ipha_t)) > mp->b_wptr) {
atomic_add_32(&atp->tun_InDiscard, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
return;
}
if (inner_ipha->ipha_protocol == IPPROTO_ICMP) {
icmph_t *inner_icmph;
inner_hlen = IPH_HDR_LENGTH(inner_ipha);
inner_icmph = (icmph_t *)((uchar_t *)inner_ipha + inner_hlen);
if (((uchar_t *)inner_icmph + sizeof (icmph_t)) > mp->b_wptr) {
atomic_add_32(&atp->tun_InDiscard, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
return;
}
switch (inner_icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
case ICMP_SOURCE_QUENCH:
case ICMP_TIME_EXCEEDED:
case ICMP_PARAM_PROBLEM:
case ICMP_REDIRECT:
atomic_add_32(&atp->tun_InDiscard, 1);
freemsg(mp);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
return;
default :
break;
}
}
type = icmph->icmph_type;
code = icmph->icmph_code;
/*
* NOTE: icmp_inbound() in IP already checked global policy on the
* outermost header. If we got here, IP thought it was okay for
* us to receive it. We now have to use inner policy to see if
* we want to percolate it up (like conn_t's are checked).
*
* Use -outer_hlen to indicate this is an ICMP packet. And
* ipsec_tun_inbound() always frees ipsec_mp.
*/
if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, inner_ipha, NULL,
outer_ipha, NULL, -outer_hlen, atp->tun_netstack)) {
/* Callee did all of the freeing */
return;
}
ASSERT(mp == orig_mp);
/* New packet will contain all of old packet */
mp->b_rptr = (uchar_t *)inner_ipha;
switch (type) {
case ICMP_DEST_UNREACHABLE:
switch (code) {
case ICMP_FRAGMENTATION_NEEDED: {
uint16_t mtu;
mtu = ntohs(icmph->icmph_du_mtu);
if (icmph->icmph_du_zero != 0 && mtu <= IP_MIN_MTU) {
tun0dbg(("icmp_ricmp_err_v4_v4: invalid " \
"icmp mtu\n"));
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
mutex_enter(&atp->tun_lock);
mtu = tun_update_link_mtu(q, mtu, B_TRUE);
mutex_exit(&atp->tun_lock);
if (!tun_icmp_too_big_v4(q, inner_ipha, mtu, mp)) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
}
return;
}
case ICMP_PROTOCOL_UNREACHABLE:
/*
* XXX may need way to throttle messages
* XXX should we do this for automatic or
* just configured tunnels ?
*/
(void) strlog(q->q_qinfo->qi_minfo->mi_idnum,
atp->tun_ppa, 1,
SL_ERROR | SL_WARN,
"%s.%s%d: Protocol unreachable. "
"Misconfigured tunnel? source %s"
" destination %s\n",
(atp->tun_flags & TUN_LOWER_MASK) ==
TUN_L_V4 ? "ip" : "ip6",
TUN_NAME, atp->tun_ppa,
inet_ntop(AF_INET, &outer_ipha->ipha_dst,
buf1, sizeof (buf1)),
inet_ntop(AF_INET, &outer_ipha->ipha_src,
buf2, sizeof (buf2)));
/* FALLTHRU */
case ICMP_NET_UNREACHABLE:
case ICMP_HOST_UNREACHABLE:
case ICMP_DEST_NET_UNKNOWN:
case ICMP_DEST_HOST_UNKNOWN:
case ICMP_SRC_HOST_ISOLATED:
case ICMP_SOURCE_ROUTE_FAILED:
case ICMP_DEST_NET_UNREACH_TOS:
case ICMP_DEST_HOST_UNREACH_TOS:
icmp.icmph_type = ICMP_DEST_UNREACHABLE;
/* XXX HOST or NET unreachable? */
icmp.icmph_code = ICMP_NET_UNREACHABLE;
icmp.icmph_rd_gateway = (ipaddr_t)0;
break;
case ICMP_DEST_NET_UNREACH_ADMIN:
case ICMP_DEST_HOST_UNREACH_ADMIN:
icmp.icmph_type = ICMP_DEST_UNREACHABLE;
icmp.icmph_code = ICMP_DEST_NET_UNREACH_ADMIN;
icmp.icmph_rd_gateway = (ipaddr_t)0;
break;
default:
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
break;
case ICMP_TIME_EXCEEDED:
icmp.icmph_type = ICMP_TIME_EXCEEDED;
icmp.icmph_code = code;
icmp.icmph_rd_gateway = (ipaddr_t)0;
break;
case ICMP_PARAM_PROBLEM:
icmp.icmph_type = ICMP_PARAM_PROBLEM;
if (icmph->icmph_pp_ptr < (uchar_t *)inner_ipha - mp->b_rptr) {
tun0dbg(("icmp_ricmp_err_v4_v4: ICMP_PARAM_PROBLEM " \
"too short\n"));
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
icmp.icmph_pp_ptr = htonl(icmph->icmph_pp_ptr -
((uchar_t *)inner_ipha - mp->b_rptr) + sizeof (ipha_t) +
sizeof (icmph_t));
break;
default:
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
if (!tun_icmp_message_v4(q, inner_ipha, &icmp, mp)) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
}
}
/*
* Process ICMP message from IPv6 encapsulating an IPv4 packet
* If this message contains path mtu information, cut out the
* encapsulation from the icmp data. If there is still useful
* information in the icmp data pass it upstream (packaged correctly for
* the upper layer IP)
*/
static void
icmp_ricmp_err_v4_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph)
{
tun_t *atp = (tun_t *)q->q_ptr;
ip6_t *ip6;
ipha_t *ipha;
int outer_hlen;
icmph_t icmp;
uint8_t type;
size_t offset, newoffset;
uint8_t *hdrp;
ip6_dest_t *destp;
size_t optlen, length;
struct ip6_opt *optp;
boolean_t found = B_FALSE;
ip6_pkt_t pkt;
mblk_t *orig_mp = mp;
ip6 = (ip6_t *)&(icmph[1]);
/*
* The packet looks like this:
*
* [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
*
* "ip6" points to the IPv6 header labelled (1).
*/
outer_hlen = ip_hdr_length_v6(mp, ip6);
ipha = (ipha_t *)((uint8_t *)ip6 + outer_hlen);
type = icmph->icmp6_type;
/*
* NOTE: icmp_inbound() in IP already checked global policy on the
* outermost header. If we got here, IP thought it was okay for
* us to receive it. We now have to use inner policy to see if
* we want to percolate it up (like conn_t's are checked).
*
* Use -outer_hlen to indicate this is an ICMP packet. And
* ipsec_tun_inbound() always frees ipsec_mp.
*/
if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, ipha, NULL, NULL,
ip6, -outer_hlen, atp->tun_netstack))
/* Callee did all of the freeing */
return;
ASSERT(mp == orig_mp);
/* new packet will contain all of old packet */
mp->b_rptr = (uchar_t *)ipha;
/*
* Fill in "icmp" data structure for passing to tun_icmp_message_v4().
*/
switch (type) {
case ICMP6_PARAM_PROB:
/*
* If the ICMPv6 error points to a valid Tunnel
* Encapsulation Limit option and the limit value is
* 0, then fall through and send a host unreachable
* message. Otherwise, break.
*/
hdrp = (uint8_t *)&ip6[1];
pkt.ipp_fields = 0; /* must be initialized */
(void) ip_find_hdr_v6(mp, ip6, &pkt, NULL);
if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) {
destp = pkt.ipp_dstopts;
} else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) {
destp = pkt.ipp_rtdstopts;
} else {
break; /* out of switch */
}
offset = sizeof (ip6_t) + ((uint8_t *)destp - hdrp);
newoffset = offset + 8 * (destp->ip6d_len + 1);
hdrp = (uint8_t *)destp;
if ((offset <= icmph->icmp6_pptr) &&
(icmph->icmp6_pptr < newoffset)) {
/*
* We have a potential match. Parse the header into
* options.
*/
length = (newoffset - offset) - 2;
optp = (struct ip6_opt *)(destp + 1);
offset += 2;
hdrp += 2;
while (length > 0 && found != B_TRUE) {
/*
* hdrp[2] is the tunnel encapsulation limit
* value.
*/
if ((optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) &&
((offset + 2) == icmph->icmp6_pptr) &&
(hdrp[2] == 0)) {
/* Found it. */
found = B_TRUE;
}
optlen = optp->ip6o_len + 2;
length -= optlen;
hdrp += optlen;
offset += optlen;
}
}
if (found != B_TRUE) {
freemsg(mp);
return;
}
/*FALLTHRU*/
case ICMP6_TIME_EXCEEDED:
case ICMP6_DST_UNREACH:
icmp.icmph_type = ICMP_DEST_UNREACHABLE;
icmp.icmph_code = ICMP_HOST_UNREACHABLE;
break;
case ICMP6_PACKET_TOO_BIG: {
uint32_t mtu;
mtu = ntohl(icmph->icmp6_mtu);
mutex_enter(&atp->tun_lock);
mtu = tun_update_link_mtu(q, mtu, B_TRUE);
mutex_exit(&atp->tun_lock);
/*
* RFC 2473 says we should only forward this on to the IPv4
* original source if the IPv4 header has the DF bit set.
*/
if (ipha->ipha_fragment_offset_and_flags & IPH_DF) {
icmp.icmph_type = ICMP_DEST_UNREACHABLE;
icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED;
/*
* NOTE - htons() because ICMP (for IPv4) uses a
* uint16_t here.
*/
icmp.icmph_du_mtu = htons(mtu);
icmp.icmph_du_zero = 0;
}
break;
}
default:
freemsg(mp);
return;
}
if (!tun_icmp_message_v4(q, ipha, &icmp, mp)) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
}
}
/*
* Process ICMP message from IPv6 encapsulating an IPv6 packet
* If this message contains path mtu information, cut out the
* encapsulation from the icmp data. If there is still useful
* information in the icmp data pass it upstream (packaged correctly for
* the upper layer IP). Otherwise, drop the message.
*/
static void
icmp_ricmp_err_v6_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph)
{
ip6_t *ip6;
ip6_t *inner_ip6;
int outer_hlen;
tun_t *atp = (tun_t *)q->q_ptr;
icmp6_t icmp;
uint8_t type;
size_t offset, newoffset;
uint8_t *hdrp;
ip6_dest_t *destp;
size_t optlen, length;
struct ip6_opt *optp;
boolean_t found = B_FALSE;
ip6_pkt_t pkt;
mblk_t *orig_mp = mp;
/*
* The packet looks like this :
*
* [IPv6(0)][ICMPv4][IPv6(1)][IPv6(2)][ULP]
*
* "ip6" points to the IPv6 header labelled (1), and inner_ip6 points
* to IPv6 header (2).
*/
ip6 = (ip6_t *)&icmph[1];
outer_hlen = ip_hdr_length_v6(mp, ip6);
inner_ip6 = (ip6_t *)((uint8_t *)ip6 + outer_hlen);
type = icmph->icmp6_type;
/*
* NOTE: icmp_inbound() in IP already checked global policy on the
* outermost header. If we got here, IP thought it was okay for
* us to receive it. We now have to use inner policy to see if
* we want to percolate it up (like conn_t's are checked).
*
* Use -outer_hlen to indicate this is an ICMP packet. And
* ipsec_tun_inbound() always frees ipsec_mp.
*/
if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, inner_ip6,
NULL, ip6, -outer_hlen, atp->tun_netstack))
/* Callee did all of the freeing */
return;
ASSERT(mp == orig_mp);
/* new packet will contain all of old packet */
mp->b_rptr = (uchar_t *)inner_ip6;
/*
* Fill in "icmp" data structure for passing to tun_icmp_message_v6().
*/
switch (type) {
case ICMP6_PARAM_PROB:
/*
* If the ICMPv6 error points to a valid Tunnel
* Encapsulation Limit option and the limit value is
* 0, then fall through and send a host unreachable
* message. Otherwise, break.
*/
hdrp = (uint8_t *)&ip6[1];
pkt.ipp_fields = 0; /* must be initialized */
(void) ip_find_hdr_v6(mp, ip6, &pkt, NULL);
if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) {
destp = pkt.ipp_dstopts;
} else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) {
destp = pkt.ipp_rtdstopts;
} else {
break; /* out of switch */
}
offset = sizeof (ip6_t) + ((uint8_t *)destp - hdrp);
newoffset = offset + 8 * (destp->ip6d_len + 1);
hdrp = (uint8_t *)destp;
if ((offset <= icmph->icmp6_pptr) &&
(icmph->icmp6_pptr < newoffset)) {
/*
* We have a potential match. Parse the header into
* options.
*/
length = (newoffset - offset) - 2;
optp = (struct ip6_opt *)(destp + 1);
offset += 2;
hdrp += 2;
while (length > 0 && found != B_TRUE) {
/*
* hdrp[2] is the tunnel encapsulation limit
* value.
*/
if ((optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) &&
((offset + 2) == icmph->icmp6_pptr) &&
(hdrp[2] == 0)) {
/* Found it. */
found = B_TRUE;
}
optlen = optp->ip6o_len + 2;
length -= optlen;
hdrp += optlen;
offset += optlen;
}
}
if (found != B_TRUE) {
freemsg(mp);
return; /* case */
}
/*FALLTHRU*/
case ICMP6_TIME_EXCEEDED:
case ICMP6_DST_UNREACH:
icmp.icmp6_type = ICMP6_DST_UNREACH;
icmp.icmp6_code = ICMP6_DST_UNREACH_ADDR;
break;
case ICMP6_PACKET_TOO_BIG: {
uint32_t mtu;
mtu = ntohl(icmph->icmp6_mtu);
mutex_enter(&atp->tun_lock);
mtu = tun_update_link_mtu(q, mtu, B_TRUE);
mutex_exit(&atp->tun_lock);
/*
* RFC 2473 says we should forward this on to the IPv6 original
* source only if the original packet size is larger than the
* IPv6 minimum link MTU.
*/
if (ip_hdr_length_v6(mp, inner_ip6) > IPV6_MIN_MTU) {
icmp.icmp6_type = ICMP6_PACKET_TOO_BIG;
icmp.icmp6_code = 0;
icmp.icmp6_mtu = htonl(mtu);
}
break;
}
default:
freemsg(mp);
return;
}
if (tun_icmp_message_v6(q, inner_ip6, &icmp, IPV6_DEFAULT_HOPS, mp) ==
B_FALSE) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
}
}
/*
* Process ICMP message from IPv4 encapsulating an IPv6 packet
* If this message contains path mtu information, cut out the
* encapsulation from the icmp data. If there is still useful
* information in the icmp data pass it upstream (packaged correctly for
* the upper layer IP)
*/
static void
icmp_ricmp_err_v6_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
ip6_t *ip6h;
ipha_t *outer_ipha;
int outer_hlen;
int hlen;
icmp6_t icmp6;
uint8_t type;
uint8_t code;
uint8_t hoplim;
char buf1[INET_ADDRSTRLEN];
char buf2[INET_ADDRSTRLEN];
icmph_t *icmph;
uint16_t ip6_hdr_length;
uint8_t *nexthdrp;
mblk_t *orig_mp = mp;
/*
* The case here is pretty easy when compared to IPv4 in IPv4
* encapsulation.
*
* The packet looks like this :
*
* [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
*
* We want most of this in one piece. But if the ULP is ICMPv6, we
* need to see whether it is an ICMPv6 error or not. We should not
* send icmp errors in response to icmp errors. "outer_ipha" points to
* IP header (1). "ip6h" is obvious. To see whether ULP is ICMPv6 or
* not, we need to call ip_hdr_length_nexthdr_v6 function which
* expects everything to be pulled up. Fortunately, the caller
* should've done all of the pulling up.
*/
ASSERT(mp->b_cont == NULL);
/*
* icmp_inbound has pulled up the message until the
* outer IP header excluding any IP options.
*/
hlen = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr);
icmph = (icmph_t *)(&mp->b_rptr[hlen]);
outer_ipha = (ipha_t *)&icmph[1];
outer_hlen = IPH_HDR_LENGTH(outer_ipha);
ip6h = (ip6_t *)((uint8_t *)outer_ipha + outer_hlen);
if (((uchar_t *)ip6h + sizeof (ip6_t)) > mp->b_wptr) {
atomic_add_32(&atp->tun_InDiscard, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
return;
}
/*
* Do not send ICMPv6 error in reply to ICMPv6 error.
*/
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, &nexthdrp)) {
atomic_add_32(&atp->tun_InErrors, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
return;
}
if (*nexthdrp == IPPROTO_ICMPV6) {
icmp6_t *inner_icmp6;
ip6_hdr_length += (hlen + sizeof (icmph_t) + outer_hlen);
inner_icmp6 = (icmp6_t *)(&mp->b_rptr[ip6_hdr_length]);
if ((mp->b_wptr < ((uchar_t *)inner_icmp6 + ICMP6_MINLEN)) ||
(ICMP6_IS_ERROR(inner_icmp6->icmp6_type)) ||
inner_icmp6->icmp6_type == ND_REDIRECT) {
atomic_add_32(&atp->tun_InErrors, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
return;
}
}
type = icmph->icmph_type;
code = icmph->icmph_code;
hoplim = outer_ipha->ipha_ttl;
/*
* NOTE: icmp_inbound() in IP already checked global policy on the
* outermost header. If we got here, IP thought it was okay for
* us to receive it. We now have to use inner policy to see if
* we want to percolate it up (like conn_t's are checked).
*
* Use -outer_hlen to indicate this is an ICMP packet. And
* ipsec_tun_inbound() always frees ipsec_mp.
*/
if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, ip6h,
outer_ipha, NULL, -outer_hlen, atp->tun_netstack))
/* Callee did all of the freeing */
return;
ASSERT(mp == orig_mp);
/* New packet will contain all of old packet */
mp->b_rptr = (uchar_t *)ip6h;
switch (type) {
case ICMP_DEST_UNREACHABLE:
switch (code) {
case ICMP_FRAGMENTATION_NEEDED: {
uint16_t mtu;
mtu = ntohs(icmph->icmph_du_mtu);
if (icmph->icmph_du_zero != 0 && mtu <= IP_MIN_MTU) {
tun0dbg(("icmp_ricmp_err_v6_v4: invalid " \
"icmp mtu\n"));
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
mutex_enter(&atp->tun_lock);
mtu = tun_update_link_mtu(q, mtu, B_TRUE);
mutex_exit(&atp->tun_lock);
if (!tun_icmp_too_big_v6(q, ip6h, mtu, hoplim, mp)) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
}
return;
}
case ICMP_PROTOCOL_UNREACHABLE: {
/*
* XXX may need way to throttle messages
* XXX should we do this for automatic or
* just configured tunnels ?
*/
(void) strlog(q->q_qinfo->qi_minfo->mi_idnum,
atp->tun_ppa, 1,
SL_ERROR | SL_WARN,
"%s.%s%d: Protocol unreachable. "
"Misconfigured tunnel? source %s"
" destination %s\n",
(atp->tun_flags & TUN_LOWER_MASK) ==
TUN_L_V4 ? "ip" : "ip6",
TUN_NAME, atp->tun_ppa,
inet_ntop(AF_INET, &outer_ipha->ipha_dst,
buf1, sizeof (buf1)),
inet_ntop(AF_INET, &outer_ipha->ipha_src,
buf2, sizeof (buf2)));
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code = ICMP6_DST_UNREACH_ADDR;
icmp6.icmp6_data32[0] = 0;
break;
}
case ICMP_PORT_UNREACHABLE:
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code = ICMP6_DST_UNREACH_NOPORT;
icmp6.icmp6_data32[0] = 0;
break;
case ICMP_NET_UNREACHABLE:
case ICMP_HOST_UNREACHABLE:
case ICMP_DEST_NET_UNKNOWN:
case ICMP_DEST_HOST_UNKNOWN:
case ICMP_SRC_HOST_ISOLATED:
case ICMP_DEST_NET_UNREACH_TOS:
case ICMP_DEST_HOST_UNREACH_TOS:
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code = ICMP6_DST_UNREACH_NOROUTE;
icmp6.icmp6_data32[0] = 0;
break;
case ICMP_DEST_NET_UNREACH_ADMIN:
case ICMP_DEST_HOST_UNREACH_ADMIN:
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code = ICMP6_DST_UNREACH_ADMIN;
icmp6.icmp6_data32[0] = 0;
break;
case ICMP_SOURCE_ROUTE_FAILED:
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code =
ICMP6_DST_UNREACH_BEYONDSCOPE;
icmp6.icmp6_data32[0] = 0;
break;
default:
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
break;
case ICMP_TIME_EXCEEDED:
icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
icmp6.icmp6_code = code;
icmp6.icmp6_data32[0] = 0;
break;
case ICMP_PARAM_PROBLEM:
icmp6.icmp6_type = ICMP6_PARAM_PROB;
if (icmph->icmph_pp_ptr < (uchar_t *)ip6h - mp->b_rptr) {
tun0dbg(("icmp_ricmp_err_v6_v4: ICMP_PARAM_PROBLEM " \
"too short\n"));
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
icmp6.icmp6_pptr = htonl(
icmph->icmph_pp_ptr - ((uchar_t *)ip6h - mp->b_rptr)
+ sizeof (ip6_t) + sizeof (icmp6_t));
break;
default:
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(mp);
return;
}
if (!tun_icmp_message_v6(q, ip6h, &icmp6, hoplim, mp)) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
}
}
/*
* Rewhack the packet for the upper IP.
*/
static boolean_t
tun_icmp_too_big_v4(queue_t *q, ipha_t *ipha, uint16_t mtu, mblk_t *mp)
{
icmph_t icmp;
tun2dbg(("tun_icmp_too_big_v4: mtu %u src %08x dst %08x len %d\n",
(uint_t)mtu, ipha->ipha_src, ipha->ipha_dst,
ipha->ipha_length));
icmp.icmph_type = ICMP_DEST_UNREACHABLE;
icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED;
ASSERT(mtu >= IP_MIN_MTU);
icmp.icmph_du_zero = 0;
icmp.icmph_du_mtu = htons(mtu);
return (tun_icmp_message_v4(q, ipha, &icmp, mp));
}
/*
* Send an ICMP6_PACKET_TOO_BIG message
*/
static boolean_t
tun_icmp_too_big_v6(queue_t *q, ip6_t *ip6ha, uint32_t mtu, uint8_t hoplim,
mblk_t *mp)
{
icmp6_t icmp6;
icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
icmp6.icmp6_code = 0;
ASSERT(mtu >= IPV6_MIN_MTU);
icmp6.icmp6_mtu = htonl(mtu);
return (tun_icmp_message_v6(q, ip6ha, &icmp6, hoplim, mp));
}
/*
* Send an icmp message up an IPv4 stream. Take the data in mp,
* and prepend a new set of IPv4 + ICMP set of headers. Use the ipha and
* icmp pointers to help construct the aforementioned new headers.
*/
static boolean_t
tun_icmp_message_v4(queue_t *q, ipha_t *ipha, icmph_t *icmp, mblk_t *mp)
{
ssize_t plen, nsize;
mblk_t *send_mp;
tun_t *atp = (tun_t *)q->q_ptr;
ipha_t *nipha;
icmph_t *nicmp;
plen = mp->b_wptr - mp->b_rptr;
nsize = sizeof (ipha_t) + sizeof (icmph_t) + plen;
if ((send_mp = allocb(nsize, BPRI_HI)) == NULL) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
freemsg(mp);
return (B_FALSE);
}
send_mp->b_wptr = send_mp->b_rptr + nsize;
nipha = (ipha_t *)send_mp->b_rptr;
nicmp = (icmph_t *)(nipha + 1);
nipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
nipha->ipha_type_of_service = 0;
nipha->ipha_ident = 0;
nipha->ipha_fragment_offset_and_flags = htons(IPH_DF);
nipha->ipha_ttl = ipha->ipha_ttl;
nipha->ipha_protocol = IPPROTO_ICMP;
nipha->ipha_src = ipha->ipha_dst;
nipha->ipha_dst = ipha->ipha_src;
nipha->ipha_hdr_checksum = 0;
bcopy(ipha, &nicmp[1], plen);
if (mp->b_cont != NULL) {
size_t remainder = msgdsize(mp->b_cont);
send_mp->b_cont = mp->b_cont;
plen += remainder;
nsize += remainder;
}
nipha->ipha_length = htons(nsize);
nipha->ipha_hdr_checksum = ip_csum_hdr(nipha);
freeb(mp);
ASSERT(send_mp->b_rptr == send_mp->b_datap->db_base);
*nicmp = *icmp;
nicmp->icmph_checksum = 0;
nicmp->icmph_checksum = IP_CSUM(send_mp, sizeof (ipha_t), 0);
/* let ip know we are an icmp message */
atomic_add_64(&atp->tun_HCInOctets,
(int64_t)(plen + sizeof (icmph_t)));
putnext(q, send_mp);
return (B_TRUE);
}
/*
* Send an icmp message up an IPv6 stream.
*/
static boolean_t
tun_icmp_message_v6(queue_t *q, ip6_t *ip6h, icmp6_t *icmp6, uint8_t hoplim,
mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *send_mp;
ssize_t nsize;
icmp6_t *nicmp6;
ip6_t *nip6h;
uint16_t *up;
uint32_t sum;
ssize_t plen;
plen = mp->b_wptr - mp->b_rptr;
nsize = sizeof (ip6_t) + sizeof (icmp6_t) + plen;
if ((send_mp = allocb(nsize, BPRI_HI)) == NULL) {
atomic_add_32(&atp->tun_InDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
freemsg(mp);
return (B_FALSE);
}
send_mp->b_wptr = send_mp->b_rptr + nsize;
nip6h = (ip6_t *)send_mp->b_rptr;
nicmp6 = (icmp6_t *)&nip6h[1];
*nicmp6 = *icmp6;
nip6h->ip6_vcf = ip6h->ip6_vcf;
nip6h->ip6_plen = ip6h->ip6_plen;
nip6h->ip6_hops = hoplim;
nip6h->ip6_nxt = IPPROTO_ICMPV6;
nip6h->ip6_src = ip6h->ip6_dst;
nip6h->ip6_dst = ip6h->ip6_src;
/* copy of ipv6 header into icmp6 message */
bcopy(ip6h, &nicmp6[1], plen);
/* add in the rest of the packet if any */
if (mp->b_cont) {
send_mp->b_cont = mp->b_cont;
mp->b_cont = NULL;
plen += msgdsize(send_mp->b_cont);
}
freeb(mp);
nip6h->ip6_plen = htons(plen + sizeof (icmp6_t));
nicmp6->icmp6_cksum = 0;
up = (uint16_t *)&nip6h->ip6_src;
sum = htons(IPPROTO_ICMPV6 +
ntohs(nip6h->ip6_plen)) +
up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
sum = (sum & 0xffff) + (sum >> 16);
nicmp6->icmp6_cksum = IP_CSUM(send_mp, IPV6_HDR_LEN, sum);
/* let ip know we are an icmp message */
atomic_add_64(&atp->tun_HCInOctets,
(int64_t)(plen + sizeof (icmp6_t)));
send_mp->b_datap->db_type = M_DATA;
putnext(q, send_mp);
return (B_TRUE);
}
/*
* Read side service routine.
*/
void
tun_rsrv(queue_t *q)
{
mblk_t *mp;
tun_t *atp = (tun_t *)q->q_ptr;
while (mp = getq(q)) {
if (tun_rproc(q, mp) == ENOMEM) {
break;
}
/*
* If we called qwriter, then the only way we
* can tell if we ran out of memory is to check if
* any events have been scheduled
*/
if (atp->tun_events.ev_rtimoutid != 0 &&
atp->tun_events.ev_rbufcid != 0) {
break;
}
}
}
/*
* Read side put procedure
*/
void
tun_rput(queue_t *q, mblk_t *mp)
{
/* note: q_first is 'protected' by perimeter */
if (q->q_first != NULL) {
(void) putq(q, mp);
} else {
(void) tun_rproc(q, mp);
}
}
static int
tun_rdata(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp,
uint_t lvers)
{
char buf[TUN_WHO_BUF];
int error = 0;
ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp);
#define MESSAGE ((ipsec_mp == NULL) ? data_mp : ipsec_mp)
/*
* If it's an IPSEC_IN w/o any security properties, start treating
* it like a cleartext packet.
*/
if (ipsec_mp != NULL && !ipsec_in_is_secure(ipsec_mp)) {
freeb(ipsec_mp);
ipsec_mp = NULL;
}
if (atp->tun_state != DL_IDLE) {
atomic_add_32(&atp->tun_InErrors, 1);
atomic_add_64(&atp->tun_HCInUcastPkts, 1);
freemsg(MESSAGE);
return (error); /* pre-set to 0 */
}
if (!canputnext(q)) {
tun1dbg(("tun_rdata: flow controlled\n"));
ASSERT(data_mp->b_datap->db_type < QPCTL);
atomic_add_32(&atp->tun_nocanput, 1);
(void) putbq(q, MESSAGE);
error = ENOMEM;
goto bail;
}
if (lvers != TUN_L_V4 && lvers != TUN_L_V6) {
tun0dbg(("tun_rproc: %s no lower version\n",
tun_who(q, buf)));
atomic_add_32(&atp->tun_InErrors, 1);
freemsg(MESSAGE);
error = EIO;
goto bail;
}
#undef MESSAGE
error = (lvers == TUN_L_V4) ? tun_rdata_v4(q, ipsec_mp, data_mp, atp) :
tun_rdata_v6(q, ipsec_mp, data_mp, atp);
bail:
if (error) {
/* only record non flow control problems */
if (error != EBUSY) {
tun0dbg(("tun_rproc: %s error encounterd %d\n",
tun_who(q, buf), error));
}
}
return (error);
}
/*
* Process read side messages
*/
static int
tun_rproc(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
uint_t lvers;
int error = 0;
char buf[TUN_WHO_BUF];
ipsec_in_t *ii;
mblk_t *ipsec_mp;
/* no lock needed, won't ever change */
lvers = atp->tun_flags & TUN_LOWER_MASK;
switch (mp->b_datap->db_type) {
case M_DATA:
error = tun_rdata(q, NULL, mp, atp, lvers);
break;
case M_PROTO:
case M_PCPROTO:
/* its a TPI message */
error = tun_rput_tpi(q, mp);
break;
case M_CTL:
/* its either an IPsec-protect packet... */
ii = (ipsec_in_t *)mp->b_rptr;
if (ii->ipsec_in_type == IPSEC_IN) {
if (mp->b_cont->b_datap->db_type == M_DATA) {
error = tun_rdata(q, mp, mp->b_cont, atp,
lvers);
break; /* Out of switch. */
} else {
ASSERT(mp->b_cont->b_datap->db_type == M_CTL);
/*
* ICMP message protected by IPsec.
* Split out IPSEC_IN and pass it up separately.
*/
ipsec_mp = mp;
mp = mp->b_cont;
}
} else {
ipsec_mp = NULL;
}
/* ... or an ICMP error message from IP */
atomic_add_64(&atp->tun_HCInUcastPkts, 1);
if (!canputnext(q)) {
atomic_add_32(&atp->tun_nocanput, 1);
atomic_add_32(&atp->tun_InDiscard, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
break;
}
/* Pull everything up into mp. */
mp->b_datap->db_type = M_DATA;
if (!pullupmsg(mp, -1)) {
atomic_add_32(&atp->tun_InErrors, 1);
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
break;
}
mp->b_datap->db_type = M_CTL;
if (lvers == TUN_L_V4) {
tun_rput_icmp_err_v4(q, mp, ipsec_mp);
} else if (lvers == TUN_L_V6) {
tun_rput_icmp_err_v6(q, mp, ipsec_mp);
} else {
if (ipsec_mp != NULL)
freeb(ipsec_mp);
freemsg(mp);
}
break;
case M_FLUSH:
if (*mp->b_rptr & FLUSHR) {
flushq(q, FLUSHALL);
*mp->b_rptr &= ~FLUSHR;
}
/* we're pretending to be a stream head */
if (*mp->b_rptr & FLUSHW) {
qreply(q, mp);
} else {
freemsg(mp);
}
break;
case IRE_DB_TYPE: {
ire_t *ire;
ip1dbg(("tun_rproc: received IRE_DB_TYPE."));
ire = (ire_t *)mp->b_rptr;
tun1dbg(("tun_rproc: received IRE_DB_TYPE, "
"ipsec_overhead is %d bytes", ire->ire_ipsec_overhead));
mutex_enter(&atp->tun_lock);
/*
* Take advice from lower-layer if it is bigger than what we
* have cached now. We do manage per-tunnel policy, but
* there may be global overhead to account for.
*/
atp->tun_ipsec_overhead = max(ire->ire_ipsec_overhead,
atp->tun_ipsec_overhead);
if (atp->tun_flags & TUN_DST) {
(void) tun_update_link_mtu(q, ire->ire_max_frag,
B_FALSE);
}
mutex_exit(&atp->tun_lock);
freemsg(mp);
break;
}
default:
tun0dbg(("tun_rproc: %s got unknown mblk type %d\n",
tun_who(q, buf), mp->b_datap->db_type));
freemsg(mp);
break;
}
return (error);
}
/*
* Handle Upper IPv4
*/
static void
tun_wdata_v4(queue_t *q, mblk_t *mp)
{
ipha_t *outer_ipha = NULL, *inner_ipha;
ip6_t *ip6 = NULL;
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *nmp;
size_t hdrlen;
int16_t encap_limit;
ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (ipha_t));
inner_ipha = (ipha_t *)mp->b_rptr;
/*
* increment mib counters and pass message off to ip
* note: we must always increment packet counters, but
* only increment byte counter if we actually send packet
*/
if (CLASSD(inner_ipha->ipha_dst)) {
atomic_add_64(&atp->tun_HCOutMulticastPkts, 1);
} else {
atomic_add_64(&atp->tun_HCOutUcastPkts, 1);
}
if (atp->tun_state != DL_IDLE || !(atp->tun_flags & TUN_BOUND)) {
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return;
}
switch (atp->tun_flags & TUN_LOWER_MASK) {
case TUN_L_V4:
if (inner_ipha->ipha_dst == atp->tun_ipha.ipha_dst) {
/*
* Watch out! There is potential for an infinite loop.
* If IP sent a packet with destination address equal
* to the tunnel's destination address, we'll hit
* an infinite routing loop, where the packet will keep
* going through here.
*
* In the long term, perhaps IP should be somewhat
* intelligent about this. Until then, nip this in
* the bud.
*/
tun0dbg(("tun_wdata: inner dst == tunnel dst.\n"));
atp->tun_OutErrors++;
freemsg(mp);
return;
}
/* room for IPv4 header? */
if ((mp->b_rptr - mp->b_datap->db_base) < sizeof (ipha_t)) {
/* no */
nmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset,
BPRI_HI);
if (nmp == NULL) {
atomic_add_32(&atp->tun_OutDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
freemsg(mp);
return;
}
nmp->b_cont = mp;
mp = nmp;
mp->b_wptr = mp->b_datap->db_lim;
mp->b_rptr = mp->b_wptr - sizeof (ipha_t);
} else {
/* yes */
mp->b_rptr -= sizeof (ipha_t);
}
outer_ipha = (ipha_t *)mp->b_rptr;
/*
* copy template header into packet IPv4 header
*/
*outer_ipha = atp->tun_ipha;
outer_ipha->ipha_length = htons(ntohs(inner_ipha->ipha_length)
+ sizeof (ipha_t));
/*
* copy the tos from inner header. We mask off
* ECN bits (bits 6 and 7) because there is currently no
* tunnel-tunnel communication to determine if
* both sides support ECN, so we opt for the safe
* choice: don't copy the ECN bits when doing encapsulation.
*/
outer_ipha->ipha_type_of_service =
(inner_ipha->ipha_type_of_service & ~0x03);
break;
case TUN_L_V6:
/* room for IPv6 header? */
hdrlen = sizeof (ip6_t);
encap_limit = atp->tun_encap_lim;
if (encap_limit >= 0) {
hdrlen += IPV6_TUN_ENCAP_OPT_LEN;
}
if ((mp->b_rptr - mp->b_datap->db_base) < hdrlen) {
/* no */
nmp = allocb(hdrlen + atp->tun_extra_offset,
BPRI_HI);
if (nmp == NULL) {
atomic_add_32(&atp->tun_OutDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
freemsg(mp);
return;
}
nmp->b_cont = mp;
mp = nmp;
mp->b_wptr = mp->b_datap->db_lim;
mp->b_rptr = mp->b_wptr - hdrlen;
} else {
/* yes */
mp->b_rptr -= hdrlen;
}
ip6 = (ip6_t *)mp->b_rptr;
/*
* copy template header into packet IPv6 header
*/
bcopy(&atp->tun_ip6h, mp->b_rptr, hdrlen);
ip6->ip6_plen = htons(ntohs(inner_ipha->ipha_length) + hdrlen -
sizeof (ip6_t));
break;
default:
/* LINTED */
ASSERT(0 && "not supported");
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return;
}
/*
* Request the destination ire regularly in case Path MTU has
* increased.
*/
if (TUN_IRE_TOO_OLD(atp))
tun_send_ire_req(q);
atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgdsize(mp));
mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6,
hdrlen, atp->tun_netstack);
if (mp == NULL)
return;
/* send the packet chain down the transport stream to IPv4/IPv6 */
TUN_PUTMSG_CHAIN(q, mp, nmp);
}
/*
* put M_DATA fastpath upper IPv4
* Assumes canput is possible
*/
static int
tun_wputnext_v4(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
ipha_t *inner_ipha, *outer_ipha = NULL;
ip6_t *ip6 = NULL;
uint_t hdrlen;
mblk_t *nmp;
mp->b_rptr += atp->tun_extra_offset;
if ((atp->tun_flags & TUN_L_V4) != 0) {
outer_ipha = (ipha_t *)mp->b_rptr;
hdrlen = IPH_HDR_LENGTH(outer_ipha);
if (mp->b_wptr - mp->b_rptr < hdrlen + sizeof (ipha_t)) {
if (!pullupmsg(mp, hdrlen + sizeof (ipha_t))) {
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return (0); /* silently fail */
}
outer_ipha = (ipha_t *)mp->b_rptr;
}
inner_ipha = (ipha_t *)((uint8_t *)outer_ipha + hdrlen);
outer_ipha->ipha_length = htons(ntohs(inner_ipha->ipha_length) +
sizeof (ipha_t));
/*
* copy the tos from inner header. We mask off
* ECN bits (bits 6 and 7) because there is currently no
* tunnel-tunnel communication to determine if
* both sides support ECN, so we opt for the safe
* choice: don't copy the ECN bits when doing encapsulation.
*/
outer_ipha->ipha_type_of_service =
(inner_ipha->ipha_type_of_service & ~0x03);
if (inner_ipha->ipha_dst == outer_ipha->ipha_dst) {
/*
* Infinite loop check. See the TUN_L_V4 case in
* tun_wdata_v4() for details.
*/
tun0dbg(
("tun_wputnext_v4: inner dst == tunnel dst.\n"));
atp->tun_OutErrors++;
freemsg(mp);
return (EINVAL);
}
} else if ((atp->tun_flags & TUN_L_V6) != 0) {
ip6 = (ip6_t *)mp->b_rptr;
ASSERT(ip6->ip6_nxt == IPPROTO_ENCAP ||
ip6->ip6_nxt == IPPROTO_DSTOPTS);
hdrlen = sizeof (ip6_t);
if (ip6->ip6_nxt == IPPROTO_DSTOPTS) {
/* XXX The code should be more general */
hdrlen += IPV6_TUN_ENCAP_OPT_LEN;
}
if (mp->b_wptr - mp->b_rptr < hdrlen + sizeof (ipha_t)) {
if (!pullupmsg(mp, hdrlen + sizeof (ipha_t))) {
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return (0); /* silently fail */
}
ip6 = (ip6_t *)mp->b_rptr;
}
inner_ipha = (ipha_t *)((uint8_t *)ip6 + hdrlen);
ip6->ip6_plen = htons(ntohs(inner_ipha->ipha_length) +
hdrlen - sizeof (ip6_t));
} else {
/* XXX can't get here yet - force assert */
ASSERT((atp->tun_flags & TUN_L_V4) != 0);
freemsg(mp);
return (EINVAL);
}
/* XXX Do I hit this, given I have this check earlier? */
if (inner_ipha->ipha_dst == atp->tun_ipha.ipha_dst) {
/*
* Watch out! There is potential for an infinite loop.
* If IP sent a packet with destination address equal
* to the tunnel's destination address, we'll hit
* an infinite routing loop, where the packet will keep
* going through here.
*
* In the long term, perhaps IP should be somewhat
* intelligent about this. Until then, nip this in
* the bud.
*/
tun0dbg(("tun_wputnext_v4: inner dst == tunnel dst.\n"));
atp->tun_OutErrors++;
freemsg(mp);
return (EINVAL);
}
/*
* increment mib counters and pass message off to ip
* note: we must always increment packet counters, but
* only increment byte counter if we actually send packet
*/
if (CLASSD(inner_ipha->ipha_dst)) {
atomic_add_64(&atp->tun_HCOutMulticastPkts, 1);
} else {
atomic_add_64(&atp->tun_HCOutUcastPkts, 1);
}
if (!(atp->tun_flags & TUN_BOUND)) {
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return (EINVAL);
}
atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgsize(mp));
mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6,
hdrlen, atp->tun_netstack);
if (mp == NULL)
return (0);
/*
* Request the destination ire regularly in case Path MTU has
* increased.
*/
if (TUN_IRE_TOO_OLD(atp))
tun_send_ire_req(q);
/* send the packet chain down the transport stream to IPv4/IPv6 */
TUN_PUTMSG_CHAIN(q, mp, nmp);
return (0);
}
/*
* put M_DATA fastpath upper IPv6
* Assumes canput is possible
*/
static int
tun_wputnext_v6(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
ip6_t *ip6h;
ip6_t *outer_ip6 = NULL;
uint_t hdrlen;
struct ip6_opt_tunnel *encap_opt;
int encap_limit = 0;
ipha_t *ipha = NULL;
mblk_t *nmp;
/*
* fastpath reserves a bit more then we can use.
* get rid of hardware bits.. ip below us will fill it in
*/
mp->b_rptr += atp->tun_extra_offset;
if ((atp->tun_flags & TUN_L_V4) != 0) {
ipha = (ipha_t *)mp->b_rptr;
hdrlen = IPH_HDR_LENGTH(ipha);
if (mp->b_wptr - mp->b_rptr < hdrlen + sizeof (ip6_t)) {
if (!pullupmsg(mp, hdrlen + sizeof (ip6_t))) {
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return (0); /* silently fail */
}
ipha = (ipha_t *)mp->b_rptr;
}
ip6h = (ip6_t *)((uint8_t *)ipha + hdrlen);
/*
* if we are less than the minimum IPv6 mtu size, then
* allow IPv4 to fragment the packet
*/
if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN <= IPV6_MIN_MTU) {
ipha->ipha_fragment_offset_and_flags = 0;
} else {
ipha->ipha_fragment_offset_and_flags = htons(IPH_DF);
}
ipha->ipha_length = htons(ntohs(ip6h->ip6_plen) +
(uint16_t)sizeof (ip6_t) + (uint16_t)sizeof (ipha_t));
} else if ((atp->tun_flags & TUN_L_V6) != 0) {
outer_ip6 = (ip6_t *)mp->b_rptr;
ASSERT(outer_ip6->ip6_nxt == IPPROTO_IPV6 ||
outer_ip6->ip6_nxt == IPPROTO_DSTOPTS);
hdrlen = sizeof (ip6_t);
if (outer_ip6->ip6_nxt == IPPROTO_DSTOPTS)
hdrlen += IPV6_TUN_ENCAP_OPT_LEN;
if (mp->b_wptr - mp->b_rptr <
hdrlen + sizeof (ip6_t) + IPV6_TUN_ENCAP_OPT_LEN) {
if (!pullupmsg(mp, hdrlen + sizeof (ip6_t) +
IPV6_TUN_ENCAP_OPT_LEN)) {
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return (0); /* silently fail */
}
outer_ip6 = (ip6_t *)mp->b_rptr;
}
ip6h = (ip6_t *)((uint8_t *)outer_ip6 + hdrlen);
if (IN6_ARE_ADDR_EQUAL(&outer_ip6->ip6_dst, &ip6h->ip6_dst)) {
/*
* Watch out! There is potential for an infinite loop.
* If IP sent a packet with destination address equal
* to the tunnel's destination address, we'll hit
* an infinite routing loop, where the packet will keep
* going through here.
*
* In the long term, perhaps IP should be somewhat
* intelligent about this. Until then, nip this in
* the bud.
*/
tun0dbg(
("tun_wputnext_v6: inner dst == tunnel dst.\n"));
atp->tun_OutErrors++;
freemsg(mp);
return (EINVAL);
}
if ((ip6h->ip6_nxt == IPPROTO_DSTOPTS) &&
(outer_ip6->ip6_nxt == IPPROTO_DSTOPTS)) {
if (tun_limit_value_v6(q, mp, ip6h, &encap_limit)) {
if (encap_limit >= 0) {
encap_opt = (struct ip6_opt_tunnel *)
((char *)outer_ip6 +
sizeof (ip6_t) +
sizeof (struct ip6_dest));
encap_opt->ip6ot_encap_limit =
(uint8_t)encap_limit;
}
} else {
/* mp already freed by tun_limit_value_v6 */
return (0); /* silently fail */
}
}
outer_ip6->ip6_plen = htons(ntohs(ip6h->ip6_plen) + hdrlen);
} else {
/* XXX can't get here yet - force assert */
ASSERT((atp->tun_flags & TUN_L_V4) != 0);
freemsg(mp);
return (EINVAL);
}
/*
* increment mib counters and pass message off to ip
* note: we must always increment packet counters, but
* only increment byte counter if we actually send packet
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
atomic_add_64(&atp->tun_HCOutMulticastPkts, 1);
} else {
atomic_add_64(&atp->tun_HCOutUcastPkts, 1);
}
if (!(atp->tun_flags & TUN_BOUND)) {
atomic_add_32(&atp->tun_OutErrors, 1);
freemsg(mp);
return (EINVAL);
}
atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgsize(mp));
/*
* Request the destination ire regularly in case Path MTU has
* increased, but only for configured tunnels.
*/
if ((atp->tun_flags & TUN_DST) && TUN_IRE_TOO_OLD(atp))
tun_send_ire_req(q);
/* send the packet down the transport stream to IPv4/IPv6 */
mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen,
atp->tun_netstack);
if (mp == NULL)
return (0);
/* send the packet chain down the transport stream to IPv4/IPv6 */
TUN_PUTMSG_CHAIN(q, mp, nmp);
return (0);
}
/*
* Determine whether we need to add a Tunnel Encapsulation Limit option and
* what it's value should be. There are two reasons to add a TEL option:
* 1. The tunnel data structure specifies it by a greater-than-zero
* tun_encap_lim member.
* 2. The data being encapsulated is an IPv6 packet that contains a TEL
* option. RFC 2473 says if the value is 1, return an ICMP parameter
* problem error report, else decrement the value and use it for a TEL
* option to be inserted in the encapsulating IPv6 packet.
*
* Return values:
* B_TRUE: Has a limit, use the value in *limitp.
* B_FALSE: Problem with limit, i.e. it was zero.
*/
static boolean_t
tun_limit_value_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, int *limitp)
{
int limit = 0;
ip6_dest_t *destp;
int optlen;
struct ip6_opt *optp;
tun_t *atp = (tun_t *)q->q_ptr;
ip6_pkt_t ipp;
icmp6_t icmp6;
size_t offset;
/*
* If tunnel has a non-negative limit, use it, but allow it to be
* overridden by tunnel encapsulation limit option in original packet
* (mp).
*/
limit = atp->tun_encap_lim;
/* Check mp for tunnel encapsulation limit destination option. */
ipp.ipp_fields = 0; /* must be initialized */
(void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL);
if ((ipp.ipp_fields & IPPF_DSTOPTS) != 0) {
destp = ipp.ipp_dstopts;
optlen = 8 * (destp->ip6d_len + 1) - sizeof (*destp);
optp = (struct ip6_opt *)(destp + 1);
while (optlen > 0) {
if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) {
/*
* XXX maybe we should send an ICMP parameter
* problem in this case instead.
*/
ASSERT(optp->ip6o_len == 1);
limit = *(uint8_t *)(optp + 1);
/*
* RFC 2473 says send an ICMP parameter problem
* if the limit is 0, send an ICMP parameter
* problem error and return B_FALSE.
*/
if (limit == 0) {
mp->b_rptr = (unsigned char *) ip6h;
icmp6.icmp6_type = ICMP6_PARAM_PROB;
icmp6.icmp6_code = 0;
offset = ((unsigned char *)(optp + 1))
- mp->b_rptr;
icmp6.icmp6_pptr = htonl(offset);
(void) tun_icmp_message_v6(q, ip6h,
&icmp6, IPV6_DEFAULT_HOPS, mp);
return (B_FALSE);
}
--limit;
break;
}
optlen -= (optp->ip6o_len + sizeof (*optp));
optp = (struct ip6_opt *)
(((char *)(optp + 1)) + optp->ip6o_len);
}
}
*limitp = limit;
return (B_TRUE);
}
/*
* Handle Upper IPv6 write side data
* Note: all lower tunnels must have a source
* This routine assumes that a canput has already been done on the
* stream.
*/
static void
tun_wdata_v6(queue_t *q, mblk_t *mp)
{
tun_t *atp = (tun_t *)q->q_ptr;
ipha_t *ipha = NULL;
ip6_t *ip6h, *outer_ip6 = NULL;
mblk_t *nmp;
ipaddr_t v4addr;
char buf1[INET6_ADDRSTRLEN];
char buf2[INET6_ADDRSTRLEN];
char buf[TUN_WHO_BUF];
size_t hdrlen;
int encap_limit = 0;
struct ip6_opt_tunnel *encap_opt;
tun_stack_t *tuns = atp->tun_netstack->netstack_tun;
ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (ip6_t));
ip6h = (ip6_t *)mp->b_rptr;
/*
* increment mib counters and pass message off to ip
* note: we must always increment packet counters, but
* only increment byte counter if we actually send packet
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
atomic_add_64(&atp->tun_HCOutMulticastPkts, 1);
} else {
atomic_add_64(&atp->tun_HCOutUcastPkts, 1);
}
if (atp->tun_state != DL_IDLE || !(atp->tun_flags & TUN_BOUND)) {
atomic_add_32(&atp->tun_OutErrors, 1);
goto drop;
}
/* check version */
ASSERT((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) ==
IPV6_DEFAULT_VERS_AND_FLOW);
switch (atp->tun_flags & TUN_LOWER_MASK) {
case TUN_L_V4:
/* room for IPv4 header? */
hdrlen = sizeof (ipha_t);
if ((mp->b_rptr - mp->b_datap->db_base) < sizeof (ipha_t)) {
/* no */
nmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset,
BPRI_HI);
if (nmp == NULL) {
atomic_add_32(&atp->tun_OutDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
goto drop;
}
nmp->b_cont = mp;
mp = nmp;
mp->b_wptr = mp->b_datap->db_lim;
mp->b_rptr = mp->b_wptr - sizeof (ipha_t);
} else {
/* yes */
mp->b_rptr -= sizeof (ipha_t);
}
ipha = (ipha_t *)mp->b_rptr;
/*
* copy template header into packet IPv4 header
* for configured tunnels everything should be
* in template.
* Automatic tunnels need the dest set from
* incoming ipv6 packet
*/
*ipha = atp->tun_ipha;
/* XXX don't support tun_laddr of 0 */
ASSERT(IN6_IS_ADDR_V4MAPPED(&atp->tun_laddr));
/* Is this an automatic tunnel ? */
if ((atp->tun_flags & TUN_AUTOMATIC) != 0) {
/*
* Process packets for automatic tunneling
*/
IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr,
ipha->ipha_src);
/*
* destination address must be compatible address
* and cannot be multicast
*/
if (!IN6_IS_ADDR_V4COMPAT(&ip6h->ip6_dst)) {
tun0dbg(
("tun_wdata_v6: %s dest is not IPv4: %s\n",
tun_who(q, buf),
inet_ntop(AF_INET6, &ip6h->ip6_dst,
buf1, sizeof (buf1))));
atomic_add_32(&atp->tun_OutErrors, 1);
goto drop;
}
IN6_V4MAPPED_TO_IPADDR(&ip6h->ip6_dst, v4addr);
if (CLASSD(v4addr)) {
tun0dbg(("tun_wdata_v6: %s Multicast dst not" \
" allowed : %s\n", tun_who(q, buf),
inet_ntop(AF_INET6, &ip6h->ip6_src,
buf2, sizeof (buf2))));
atomic_add_32(&atp->tun_OutErrors, 1);
goto drop;
}
ipha->ipha_dst = v4addr;
/* Is this a 6to4 tunnel ? */
} else if ((atp->tun_flags & TUN_6TO4) != 0) {
struct in_addr in_v4addr;
/*
* make sure IPv6 source is a 6to4 address.
*/
if (!IN6_IS_ADDR_6TO4(&ip6h->ip6_src)) {
tun0dbg(("tun_wdata_v6: %s tun: invalid " \
"IPv6 src (%s)\n", tun_who(q, buf),
inet_ntop(AF_INET6, &ip6h->ip6_src,
buf1, sizeof (buf1))));
atomic_add_32(&atp->tun_OutErrors, 1);
goto drop;
}
/*
* As per RFC 3056, the IPv4 source MUST be set to the
* V4ADDR portion of the IPv6 source.
*/
IN6_6TO4_TO_V4ADDR(&ip6h->ip6_src, &in_v4addr);
ipha->ipha_src = (ipaddr_t)in_v4addr.s_addr;
/*
* As per RFC 3056, the IPv4 destination MUST be set to
* either:
* - the V4ADDR portion of the IPv6 destination, if the
* destination is a 6to4 address.
* - the well known 6to4 Relay Router anycast address
* (192.88.99.1, defined in RFC 3068), if IPv6
* destination is a native IPv6 address.
* - a unicast address of a 6to4 relay router set by
* the administrator.
*
* This implementation will drop packets with native
* IPv6 destinations if 6to4 Relay Router communication
* support is disabled. This support is checked
* by examining tuns_relay_rtr_addr_v4; INADDR_ANY
* denotes
* support is disabled; a valid, routable IPv4 addr
* denotes support is enabled. Support is disabled
* by default, because there is no standard trust
* mechanism for communicating with 6to4 Relay Routers.
*/
if (IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) {
/* destination is a 6to4 router */
IN6_6TO4_TO_V4ADDR(&ip6h->ip6_dst,
&in_v4addr);
ipha->ipha_dst = (ipaddr_t)in_v4addr.s_addr;
} else {
/*
* destination is a native IPv6 address
*/
if (tuns->tuns_relay_rtr_addr_v4 ==
INADDR_ANY) {
/*
* 6to4 Relay Router communication
* support is disabled.
*/
tun1dbg(("tun_wdata_v6: "
"%s tuns_relay_rtr_addr_v4 = %s, "
"dropping packet with IPv6 dst "
"%s\n", tun_who(q, buf),
inet_ntop(AF_INET,
&tuns->tuns_relay_rtr_addr_v4,
buf1, sizeof (buf1)),
inet_ntop(AF_INET6, &ip6h->ip6_dst,
buf2, sizeof (buf2))));
atomic_add_32(&atp->tun_OutDiscard, 1);
goto drop;
}
/*
* 6to4 Relay Router communication support
* is enabled. Set IPv4 destination to
* address of configured Relay Router
* (this addr may equal the well-known
* 6to4 Relay Router anycast address,
* defined in RFC 3068)
*/
ipha->ipha_dst = tuns->tuns_relay_rtr_addr_v4;
}
}
/*
* If IPv4 mtu is less than the minimum IPv6 mtu size, then
* allow IPv4 to fragment the packet.
* This works because if our IPv6 length is less than
* min IPv6 mtu, IPv4 might have to fragment anyway
* and we really can't handle an message too big icmp
* error. If the packet is greater them min IPv6 mtu,
* then a message too big icmp error will cause the
* IPv6 to shrink its packets
*/
if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN <= IPV6_MIN_MTU) {
ipha->ipha_fragment_offset_and_flags = 0;
} else {
ipha->ipha_fragment_offset_and_flags = htons(IPH_DF);
}
ipha->ipha_length = htons(ntohs(ip6h->ip6_plen) +
(uint16_t)sizeof (ip6_t) + (uint16_t)sizeof (ipha_t));
tun3dbg(("tun_wdata_v6: %s sending IPv4 packet src %s dest " \
"%s\n", tun_who(q, buf),
inet_ntop(AF_INET, &ipha->ipha_src, buf1, sizeof (buf1)),
inet_ntop(AF_INET, &ipha->ipha_dst,
buf2, sizeof (buf2))));
break;
case TUN_L_V6:
/* room for IPv6 header? */
hdrlen = sizeof (ip6_t);
/*
* Calculate tunnel encapsulation limit. < 0 means error, 0
* means don't include a TEL option, and > 0 means use this
* value as the limit. Right here, just update the header
* length to take the extra TEL destination option into
* account, or send an ICMP parameter problem and return.
*/
if (tun_limit_value_v6(q, mp, ip6h, &encap_limit)) {
if (encap_limit >= 0)
hdrlen += IPV6_TUN_ENCAP_OPT_LEN;
} else
return; /* mp freed by tun_limit_value_v6 */
if ((mp->b_rptr - mp->b_datap->db_base) < hdrlen) {
/* no */
nmp = allocb(hdrlen + atp->tun_extra_offset,
BPRI_HI);
if (nmp == NULL) {
atomic_add_32(&atp->tun_OutDiscard, 1);
atomic_add_32(&atp->tun_allocbfail, 1);
freemsg(mp);
return;
}
nmp->b_cont = mp;
mp = nmp;
mp->b_wptr = mp->b_datap->db_lim;
mp->b_rptr = mp->b_wptr - hdrlen;
} else {
/* yes */
mp->b_rptr -= hdrlen;
}
outer_ip6 = (ip6_t *)mp->b_rptr;
bcopy(&atp->tun_ip6h, mp->b_rptr, hdrlen);
if (encap_limit >= 0) {
encap_opt = (struct ip6_opt_tunnel *)
((char *)outer_ip6 + sizeof (ip6_t) +
sizeof (struct ip6_dest));
encap_opt->ip6ot_encap_limit = (uint8_t)encap_limit;
}
/* Is this a 6to4 or automatic tunnel ? */
if ((atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) != 0) {
atomic_add_32(&atp->tun_OutErrors, 1);
goto drop;
}
outer_ip6->ip6_plen = htons(ntohs(ip6h->ip6_plen) +
hdrlen);
break;
default:
/* LINTED */
ASSERT(0 && "not supported");
atomic_add_32(&atp->tun_OutErrors, 1);
goto drop;
}
atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgdsize(mp));
/*
* Request the destination ire regularly in case Path MTU has
* increased, but only for configured tunnels.
*/
if ((atp->tun_flags & TUN_DST) && TUN_IRE_TOO_OLD(atp))
tun_send_ire_req(q);
/* send the packet down the transport stream to IP */
mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen,
atp->tun_netstack);
if (mp == NULL)
return;
/* send the packet chain down the transport stream to IPv4/IPv6 */
TUN_PUTMSG_CHAIN(q, mp, nmp);
return;
drop:
freemsg(mp);
}
/*
* T_BIND to lower stream.
*/
static int
tun_send_bind_req(queue_t *q)
{
tun_t *atp = (tun_t *)q->q_ptr;
mblk_t *mp;
struct T_bind_req *tbr;
int err = 0;
size_t size;
uint_t lvers;
char *cp;
if ((atp->tun_flags & TUN_SRC) == 0) {
return (EINVAL);
}
lvers = atp->tun_flags & TUN_LOWER_MASK;
if (lvers == TUN_L_V4) {
if (atp->tun_flags & TUN_SRC) {
ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(&atp->tun_laddr)));
if (atp->tun_flags & TUN_DST) {
ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(
&atp->tun_faddr)));
size = sizeof (ipa_conn_x_t);
} else {
size = sizeof (sin_t);
}
} else {
return (EINVAL);
}
} else { /* lower is V6 */
if (atp->tun_flags & TUN_SRC) {
ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(&atp->tun_laddr)));
if (atp->tun_flags & TUN_DST) {
ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(
&atp->tun_faddr)));
size = sizeof (ipa6_conn_x_t);
} else {
size = sizeof (sin6_t);
}
} else {
return (EINVAL);
}
}
/* allocate an mblk */
if ((mp = tun_realloc_mblk(q, NULL, size + sizeof (struct T_bind_req) +
1, NULL, B_FALSE)) == NULL) {
tun0dbg(("tun_send_bind_req: couldn't allocate mblk\n"));
return (ENOMEM);
}
if ((mp->b_cont = tun_realloc_mblk(q, NULL, sizeof (ire_t), NULL,
B_FALSE)) == NULL) {
tun0dbg(("tun_send_bind_req: couldn't allocate mblk\n"));
freeb(mp);
return (ENOMEM);
}
mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
tbr = (struct T_bind_req *)mp->b_rptr;
tbr->CONIND_number = 0;
tbr->PRIM_type = T_BIND_REQ;
tbr->ADDR_length = size;
tbr->ADDR_offset = sizeof (struct T_bind_req);
cp = (char *)&tbr[1];
if (lvers == TUN_L_V4) {
/*
* Send a T_BIND_REQ down to IP to bind to IPPROTO_IPV6
* or IPPROTO_ENCAP.
*/
/* Source is always required */
ASSERT((atp->tun_flags & TUN_SRC) &&
!IN6_IS_ADDR_UNSPECIFIED(&atp->tun_laddr));
if (!(atp->tun_flags & TUN_DST) ||
IN6_IS_ADDR_UNSPECIFIED(&atp->tun_faddr)) {
sin_t *sin;
sin = (sin_t *)cp;
bzero(sin, sizeof (sin_t));
IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr,
sin->sin_addr.s_addr);
sin->sin_port = 0;
} else {
/*
* We used to use ipa_conn_t here, but discovered that
* IP insisted that the tunnel destination address be
* reachable, i.e. have a route. This causes problems
* in a number of cases. ipa_conn_x_t was invented to
* allow verifying destination reachability to be
* controlled. We choose not to verify destination
* reachability. All we really want is to register to
* receive packets for the tunnel, and don't care at
* this point whether the tunnel destination is
* reachable.
*/
ipa_conn_x_t *ipa;
if (!IN6_IS_ADDR_V4MAPPED(&atp->tun_faddr)) {
err = EINVAL;
goto error;
}
ipa = (ipa_conn_x_t *)cp;
bzero(ipa, sizeof (ipa_conn_x_t));
IN6_V4MAPPED_TO_IPADDR(&atp->tun_laddr,
ipa->acx_conn.ac_laddr);
IN6_V4MAPPED_TO_IPADDR(&atp->tun_faddr,
ipa->acx_conn.ac_faddr);
ipa->acx_conn.ac_fport = 0;
ipa->acx_conn.ac_lport = 0;
}
if ((atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6)
*(cp + size) = (uchar_t)IPPROTO_IPV6;
else
*(cp + size) = (uchar_t)IPPROTO_ENCAP;
} else {
ASSERT(lvers == TUN_L_V6);
if (!(atp->tun_flags & TUN_DST) ||
IN6_IS_ADDR_UNSPECIFIED(&atp->tun_faddr)) {
sin6_t *sin6;
sin6 = (sin6_t *)cp;
bzero(sin6, sizeof (sin6_t));
bcopy(&atp->tun_laddr, &sin6->sin6_addr,
sizeof (in6_addr_t));
} else {
ipa6_conn_x_t *ipa;
ipa = (ipa6_conn_x_t *)cp;
bzero(ipa, sizeof (ipa6_conn_x_t));
bcopy(&atp->tun_laddr, &ipa->ac6x_conn.ac6_laddr,
sizeof (in6_addr_t));
bcopy(&atp->tun_faddr, &ipa->ac6x_conn.ac6_faddr,
sizeof (in6_addr_t));
}
if ((atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6)
*(cp + size) = (uchar_t)IPPROTO_IPV6;
else
*(cp + size) = (uchar_t)IPPROTO_ENCAP;
}
mp->b_datap->db_type = M_PCPROTO;
/*
* Since we're requesting ire information for the destination
* along with this T_BIND_REQ, stamp the tunnel's tun_ire_lastreq
* with the current time.
*/
atp->tun_ire_lastreq = lbolt;
atp->tun_flags |= TUN_BIND_SENT;
putnext(WR(q), mp);
return (0);
error:
freemsg(mp);
return (err);
}
/*
* Update kstats
*/
static int
tun_stat_kstat_update(kstat_t *ksp, int rw)
{
tun_t *tunp;
tun_stats_t *tstats;
struct tunstat *tunsp;
if (ksp == NULL || ksp->ks_data == NULL)
return (EIO);
tstats = (tun_stats_t *)ksp->ks_private;
mutex_enter(&tstats->ts_lock);
tunsp = (struct tunstat *)ksp->ks_data;
/* Initialize kstat, but only the first one */
if (rw == KSTAT_WRITE) {
if (tstats->ts_refcnt > 1) {
mutex_exit(&tstats->ts_lock);
return (EACCES);
}
tunp = tstats->ts_atp;
/*
* MIB II kstat variables
*/
tunp->tun_nocanput = tunsp->tuns_nocanput.value.ui32;
tunp->tun_xmtretry = tunsp->tuns_xmtretry.value.ui32;
tunp->tun_allocbfail = tunsp->tuns_allocbfail.value.ui32;
tunp->tun_InDiscard = tunsp->tuns_InDiscard.value.ui32;
tunp->tun_InErrors = tunsp->tuns_InErrors.value.ui32;
tunp->tun_OutDiscard = tunsp->tuns_OutDiscard.value.ui32;
tunp->tun_OutErrors = tunsp->tuns_OutErrors.value.ui32;
tunp->tun_HCInOctets = tunsp->tuns_HCInOctets.value.ui64;
tunp->tun_HCInUcastPkts = tunsp->tuns_HCInUcastPkts.value.ui64;
tunp->tun_HCInMulticastPkts =
tunsp->tuns_HCInMulticastPkts.value.ui64;
tunp->tun_HCOutOctets = tunsp->tuns_HCOutOctets.value.ui64;
tunp->tun_HCOutUcastPkts =
tunsp->tuns_HCOutUcastPkts.value.ui64;
tunp->tun_HCOutMulticastPkts =
tunsp->tuns_HCOutMulticastPkts.value.ui64;
mutex_exit(&tstats->ts_lock);
return (0);
}
/*
* update kstats.. fist zero them all out, then
* walk through all the interfaces that share kstat and
* add in the new stats
*/
tunsp->tuns_nocanput.value.ui32 = 0;
tunsp->tuns_xmtretry.value.ui32 = 0;
tunsp->tuns_allocbfail.value.ui32 = 0;
tunsp->tuns_InDiscard.value.ui32 = 0;
tunsp->tuns_InErrors.value.ui32 = 0;
tunsp->tuns_OutDiscard.value.ui32 = 0;
tunsp->tuns_OutErrors.value.ui32 = 0;
tunsp->tuns_HCInOctets.value.ui64 = 0;
tunsp->tuns_HCInUcastPkts.value.ui64 = 0;
tunsp->tuns_HCInMulticastPkts.value.ui64 = 0;
tunsp->tuns_HCOutOctets.value.ui64 = 0;
tunsp->tuns_HCOutUcastPkts.value.ui64 = 0;
tunsp->tuns_HCOutMulticastPkts.value.ui64 = 0;
for (tunp = tstats->ts_atp; tunp; tunp = tunp->tun_kstat_next) {
tunsp->tuns_nocanput.value.ui32 += tunp->tun_nocanput;
tunsp->tuns_xmtretry.value.ui32 += tunp->tun_xmtretry;
tunsp->tuns_allocbfail.value.ui32 += tunp->tun_allocbfail;
tunsp->tuns_InDiscard.value.ui32 += tunp->tun_InDiscard;
tunsp->tuns_InErrors.value.ui32 += tunp->tun_InErrors;
tunsp->tuns_OutDiscard.value.ui32 += tunp->tun_OutDiscard;
tunsp->tuns_OutErrors.value.ui32 += tunp->tun_OutErrors;
tunsp->tuns_HCInOctets.value.ui64 += tunp->tun_HCInOctets;
tunsp->tuns_HCInUcastPkts.value.ui64 += tunp->tun_HCInUcastPkts;
tunsp->tuns_HCInMulticastPkts.value.ui64 +=
tunp->tun_HCInMulticastPkts;
tunsp->tuns_HCOutOctets.value.ui64 += tunp->tun_HCOutOctets;
tunsp->tuns_HCOutUcastPkts.value.ui64 +=
tunp->tun_HCOutUcastPkts;
tunsp->tuns_HCOutMulticastPkts.value.ui64 +=
tunp->tun_HCOutMulticastPkts;
}
tunsp->tuns_xmtbytes.value.ui32 =
tunsp->tuns_HCOutOctets.value.ui64 & 0xffffffff;
tunsp->tuns_rcvbytes.value.ui32 =
tunsp->tuns_HCInOctets.value.ui64 & 0xffffffff;
tunsp->tuns_opackets.value.ui32 =
tunsp->tuns_HCOutUcastPkts.value.ui64 & 0xffffffff;
tunsp->tuns_ipackets.value.ui32 =
tunsp->tuns_HCInUcastPkts.value.ui64 & 0xffffffff;
tunsp->tuns_multixmt.value.ui32 =
tunsp->tuns_HCOutMulticastPkts.value.ui64 & 0xffffffff;
tunsp->tuns_multircv.value.ui32 =
tunsp->tuns_HCInMulticastPkts.value.ui64 & 0xffffffff;
mutex_exit(&tstats->ts_lock);
return (0);
}
/*
* Initialize kstats
*/
static void
tun_statinit(tun_stats_t *tun_stat, char *modname, netstackid_t stackid)
{
kstat_t *ksp;
struct tunstat *tunsp;
char buf[32];
char *mod_buf;
/*
* create kstat name based on lower ip and ppa
*/
if (tun_stat->ts_lower == TUN_L_V4) {
mod_buf = "ip";
} else {
mod_buf = "ip6";
}
(void) sprintf(buf, "%s.%s%d", mod_buf, modname, tun_stat->ts_ppa);
tun1dbg(("tunstatinit: Creating kstat %s\n", buf));
if ((ksp = kstat_create_netstack(mod_buf, tun_stat->ts_ppa, buf, "net",
KSTAT_TYPE_NAMED, sizeof (struct tunstat) / sizeof (kstat_named_t),
KSTAT_FLAG_PERSISTENT, stackid)) == NULL) {
cmn_err(CE_CONT, "tun: kstat_create failed tun%d",
tun_stat->ts_ppa);
return;
}
tun_stat->ts_ksp = ksp;
tunsp = (struct tunstat *)(ksp->ks_data);
kstat_named_init(&tunsp->tuns_ipackets, "ipackets", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_opackets, "opackets", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_InErrors, "ierrors", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_OutErrors, "oerrors", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_xmtbytes, "obytes", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_rcvbytes, "rbytes", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_multixmt, "multixmt", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_multircv, "multircv", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_nocanput, "blocked", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_xmtretry, "xmtretry", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_InDiscard, "norcvbuf", KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_OutDiscard, "noxmtbuf",
KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_allocbfail, "allocbfail",
KSTAT_DATA_UINT32);
kstat_named_init(&tunsp->tuns_HCOutUcastPkts, "opackets64",
KSTAT_DATA_UINT64);
kstat_named_init(&tunsp->tuns_HCInUcastPkts, "ipackets64",
KSTAT_DATA_UINT64);
kstat_named_init(&tunsp->tuns_HCOutMulticastPkts, "multixmt64",
KSTAT_DATA_UINT64);
kstat_named_init(&tunsp->tuns_HCInMulticastPkts, "multircv64",
KSTAT_DATA_UINT64);
kstat_named_init(&tunsp->tuns_HCOutOctets, "obytes64",
KSTAT_DATA_UINT64);
kstat_named_init(&tunsp->tuns_HCInOctets, "rbytes64",
KSTAT_DATA_UINT64);
ksp->ks_update = tun_stat_kstat_update;
ksp->ks_private = (void *) tun_stat;
kstat_install(ksp);
}
/*
* Debug routine to print out tunnel name
*/
static char *
tun_who(queue_t *q, char *buf)
{
tun_t *atp = (tun_t *)q->q_ptr;
char ppa_buf[20];
if (buf == NULL)
return ("tun_who: no buf");
if (atp->tun_state != DL_UNATTACHED) {
(void) sprintf(ppa_buf, "%d", atp->tun_ppa);
} else {
(void) sprintf(ppa_buf, "<not attached>");
}
(void) sprintf(buf, "%s.%s%s (%s)",
(atp->tun_flags & TUN_LOWER_MASK) == TUN_L_V4 ? "ip" :
(atp->tun_flags & TUN_LOWER_MASK) == TUN_L_V6 ? "ip6" : "<unknown>",
q->q_qinfo->qi_minfo->mi_idname,
ppa_buf,
(atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V4 ? "inet" :
(atp->tun_flags & TUN_UPPER_MASK) == TUN_U_V6 ? "inet6" :
"<unknown af>");
return (buf);
}
/*
* Initialize the tunnel stack instance.
*/
/*ARGSUSED*/
static void *
tun_stack_init(netstackid_t stackid, netstack_t *ns)
{
tun_stack_t *tuns;
ipsec_stack_t *ipss = ns->netstack_ipsec;
tuns = (tun_stack_t *)kmem_zalloc(sizeof (*tuns), KM_SLEEP);
tuns->tuns_netstack = ns;
mutex_init(&tuns->tuns_global_lock, NULL, MUTEX_DEFAULT, NULL);
rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER);
ipss->ipsec_itp_get_byaddr = itp_get_byaddr_fn;
rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
return (tuns);
}
/*
* Free the tunnel stack instance.
*/
/*ARGSUSED*/
static void
tun_stack_fini(netstackid_t stackid, void *arg)
{
tun_stack_t *tuns = (tun_stack_t *)arg;
ipsec_stack_t *ipss = tuns->tuns_netstack->netstack_ipsec;
int i;
rw_enter(&ipss->ipsec_itp_get_byaddr_rw_lock, RW_WRITER);
ipss->ipsec_itp_get_byaddr = itp_get_byaddr_dummy;
rw_exit(&ipss->ipsec_itp_get_byaddr_rw_lock);
for (i = 0; i < TUN_PPA_SZ; i++) {
ASSERT(tuns->tuns_ppa_list[i] == NULL);
}
for (i = 0; i < TUN_T_SZ; i++) {
ASSERT(tuns->tuns_byaddr_list[i] == NULL);
}
mutex_destroy(&tuns->tuns_global_lock);
kmem_free(tuns, sizeof (*tuns));
}