ip6.c revision 2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/dlpi.h>
#include <sys/stropts.h>
#include <sys/sysmacros.h>
#include <sys/strsun.h>
#include <sys/strlog.h>
#include <sys/strsubr.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/kobj.h>
#include <sys/zone.h>
#include <sys/neti.h>
#include <sys/hook.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/vtrace.h>
#include <sys/isa_defs.h>
#include <sys/atomic.h>
#include <sys/iphada.h>
#include <sys/policy.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/if_dl.h>
#include <sys/sockio.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet/sctp.h>
#include <inet/common.h>
#include <inet/mi.h>
#include <inet/optcom.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/arp.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
#include <inet/ip6_asp.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
#include <inet/udp_impl.h>
#include <inet/ipp_common.h>
#include <inet/ip_multi.h>
#include <inet/ip_if.h>
#include <inet/ip_ire.h>
#include <inet/ip_rts.h>
#include <inet/ip_ndp.h>
#include <net/pfkeyv2.h>
#include <inet/ipsec_info.h>
#include <inet/sadb.h>
#include <inet/ipsec_impl.h>
#include <inet/iptun/iptun_impl.h>
#include <inet/sctp_ip.h>
#include <sys/pattr.h>
#include <inet/ipclassifier.h>
#include <inet/ipsecah.h>
#include <inet/rawip_impl.h>
#include <inet/rts_impl.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#include <rpc/pmap_prot.h>
/* Temporary; for CR 6451644 work-around */
#include <sys/ethernet.h>
extern int ip_squeue_flag;
/*
* Naming conventions:
* These rules should be judiciously applied
* if there is a need to identify something as IPv6 versus IPv4
* IPv6 funcions will end with _v6 in the ip module.
* IPv6 funcions will end with _ipv6 in the transport modules.
* IPv6 macros:
* Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
* Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
* And then there are ..V4_PART_OF_V6.
* The intent is that macros in the ip module end with _V6.
* IPv6 global variables will start with ipv6_
* IPv6 structures will start with ipv6
* IPv6 defined constants should start with IPV6_
* (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
*/
/*
* ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
* We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
* from IANA. This mechanism will remain in effect until an official
* number is obtained.
*/
uchar_t ip6opt_ls;
const in6_addr_t ipv6_all_ones =
{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_solicited_node_mcast =
{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_solicited_node_mcast =
{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
#endif /* _BIG_ENDIAN */
/* Leave room for ip_newroute to tack on the src and target addresses */
#define OK_RESOLVER_MP_V6(mp) \
((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN))
#define IP6_MBLK_OK 0
#define IP6_MBLK_HDR_ERR 1
#define IP6_MBLK_LEN_ERR 2
static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *,
boolean_t, zoneid_t);
static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t,
const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *);
static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill);
static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *,
uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t,
boolean_t, boolean_t, cred_t *);
static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *,
iulp_t *, ip_stack_t *);
static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t,
const in6_addr_t *, uint16_t, boolean_t);
static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *,
ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t);
static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *,
ill_t *, uint_t, uint_t, boolean_t, zoneid_t);
static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t,
ill_t *, ill_t *, uint_t, boolean_t, zoneid_t);
static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *,
uint8_t *, uint_t, uint8_t, ip_stack_t *);
static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *,
ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *);
static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int,
conn_t *, int, int, zoneid_t);
static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *,
ipif_t **);
/*
* A template for an IPv6 AR_ENTRY_QUERY
*/
static areq_t ipv6_areq_template = {
AR_ENTRY_QUERY, /* cmd */
sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */
sizeof (areq_t), /* name len (filled by ill_arp_alloc) */
ETHERTYPE_IPV6, /* protocol, from arps perspective */
sizeof (areq_t), /* target addr offset */
IPV6_ADDR_LEN, /* target addr_length */
0, /* flags */
sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */
IPV6_ADDR_LEN, /* sender addr length */
6, /* xmit_count */
1000, /* (re)xmit_interval in milliseconds */
4 /* max # of requests to buffer */
/* anything else filled in by the code */
};
/*
* Handle IPv6 ICMP packets sent to us. Consume the mblk passed in.
* The message has already been checksummed and if needed,
* a copy has been made to be sent any interested ICMP client (conn)
* Note that this is different than icmp_inbound() which does the fanout
* to conn's as well as local processing of the ICMP packets.
*
* All error messages are passed to the matching transport stream.
*
* Zones notes:
* The packet is only processed in the context of the specified zone: typically
* only this zone will reply to an echo request. This means that the caller must
* call icmp_inbound_v6() for each relevant zone.
*/
static void
icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill,
uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid,
mblk_t *dl_mp)
{
icmp6_t *icmp6;
ip6_t *ip6h;
boolean_t interested;
in6_addr_t origsrc;
mblk_t *first_mp;
ipsec_in_t *ii;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(ill != NULL);
first_mp = mp;
if (mctl_present) {
mp = first_mp->b_cont;
ASSERT(mp != NULL);
ii = (ipsec_in_t *)first_mp->b_rptr;
ASSERT(ii->ipsec_in_type == IPSEC_IN);
}
ip6h = (ip6_t *)mp->b_rptr;
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) {
if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) {
ip1dbg(("icmp_inbound_v6: pullupmsg failed\n"));
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
if (ipst->ips_icmp_accept_clear_messages == 0) {
first_mp = ipsec_check_global_policy(first_mp, NULL,
NULL, ip6h, mctl_present, ipst->ips_netstack);
if (first_mp == NULL)
return;
}
/*
* On a labeled system, we have to check whether the zone itself is
* permitted to receive raw traffic.
*/
if (is_system_labeled()) {
if (zoneid == ALL_ZONES)
zoneid = tsol_packet_to_zoneid(mp);
if (!tsol_can_accept_raw(mp, B_FALSE)) {
ip1dbg(("icmp_inbound_v6: zone %d can't receive raw",
zoneid));
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
freemsg(first_mp);
return;
}
}
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
icmp6->icmp6_code));
interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
/* Initiate IPPF processing here */
if (IP6_IN_IPP(flags, ipst)) {
/*
* If the ifindex changes due to SIOCSLIFINDEX
* packet may return to IP on the wrong ill.
*/
ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex);
if (mp == NULL) {
if (mctl_present) {
freeb(first_mp);
}
return;
}
}
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
break;
case ICMP6_TIME_EXCEEDED:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
break;
case ICMP6_PARAM_PROB:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
break;
case ICMP6_PACKET_TOO_BIG:
icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present,
zoneid);
return;
case ICMP6_ECHO_REQUEST:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
!ipst->ips_ipv6_resp_echo_mcast)
break;
/*
* We must have exclusive use of the mblk to convert it to
* a response.
* If not, we copy it.
*/
if (mp->b_datap->db_ref > 1) {
mblk_t *mp1;
mp1 = copymsg(mp);
freemsg(mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInErrors);
if (mctl_present)
freeb(first_mp);
return;
}
mp = mp1;
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
}
/*
* Turn the echo into an echo reply.
* Remove any extension headers (do not reverse a source route)
* and clear the flow id (keep traffic class for now).
*/
if (hdr_length != IPV6_HDR_LEN) {
int i;
for (i = 0; i < IPV6_HDR_LEN; i++)
mp->b_rptr[hdr_length - i - 1] =
mp->b_rptr[IPV6_HDR_LEN - i - 1];
mp->b_rptr += (hdr_length - IPV6_HDR_LEN);
ip6h = (ip6_t *)mp->b_rptr;
ip6h->ip6_nxt = IPPROTO_ICMPV6;
hdr_length = IPV6_HDR_LEN;
}
ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
icmp6->icmp6_type = ICMP6_ECHO_REPLY;
ip6h->ip6_plen =
htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
origsrc = ip6h->ip6_src;
/*
* Reverse the source and destination addresses.
* If the return address is a multicast, zero out the source
* (ip_wput_v6 will set an address).
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
ip6h->ip6_src = ipv6_all_zeros;
ip6h->ip6_dst = origsrc;
} else {
ip6h->ip6_src = ip6h->ip6_dst;
ip6h->ip6_dst = origsrc;
}
/* set the hop limit */
ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
if (!mctl_present) {
/*
* This packet should go out the same way as it
* came in i.e in clear. To make sure that global
* policy will not be applied to this in ip_wput,
* we attach a IPSEC_IN mp and clear ipsec_in_secure.
*/
ASSERT(first_mp == mp);
first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack);
if (first_mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(mp);
return;
}
ii = (ipsec_in_t *)first_mp->b_rptr;
/* This is not a secure packet */
ii->ipsec_in_secure = B_FALSE;
first_mp->b_cont = mp;
}
ii->ipsec_in_zoneid = zoneid;
ASSERT(zoneid != ALL_ZONES);
if (!ipsec_in_to_out(first_mp, NULL, ip6h)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
return;
}
put(WR(q), first_mp);
return;
case ICMP6_ECHO_REPLY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
break;
case ND_ROUTER_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
break;
case ND_ROUTER_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
break;
case ND_NEIGHBOR_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
if (mctl_present)
freeb(first_mp);
/* XXX may wish to pass first_mp up to ndp_input someday. */
ndp_input(inill, mp, dl_mp);
return;
case ND_NEIGHBOR_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInNeighborAdvertisements);
if (mctl_present)
freeb(first_mp);
/* XXX may wish to pass first_mp up to ndp_input someday. */
ndp_input(inill, mp, dl_mp);
return;
case ND_REDIRECT: {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
if (ipst->ips_ipv6_ignore_redirect)
break;
/*
* As there is no upper client to deliver, we don't
* need the first_mp any more.
*/
if (mctl_present)
freeb(first_mp);
if (!pullupmsg(mp, -1)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
break;
}
icmp_redirect_v6(q, mp, ill);
return;
}
/*
* The next three icmp messages will be handled by MLD.
* Pass all valid MLD packets up to any process(es)
* listening on a raw ICMP socket. MLD messages are
* freed by mld_input function.
*/
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
case MLD_LISTENER_REDUCTION:
if (mctl_present)
freeb(first_mp);
mld_input(q, mp, ill);
return;
default:
break;
}
if (interested) {
icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill,
inill, mctl_present, zoneid);
} else {
freemsg(first_mp);
}
}
/*
* Process received IPv6 ICMP Packet too big.
* After updating any IRE it does the fanout to any matching transport streams.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*/
/* ARGSUSED */
static void
icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill,
boolean_t mctl_present, zoneid_t zoneid)
{
ip6_t *ip6h;
ip6_t *inner_ip6h;
icmp6_t *icmp6;
uint16_t hdr_length;
uint32_t mtu;
ire_t *ire, *first_ire;
mblk_t *first_mp;
ip_stack_t *ipst = ill->ill_ipst;
first_mp = mp;
if (mctl_present)
mp = first_mp->b_cont;
/*
* We must have exclusive use of the mblk to update the MTU
* in the packet.
* If not, we copy it.
*
* If there's an M_CTL present, we know that allocated first_mp
* earlier in this function, so we know first_mp has refcnt of one.
*/
ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1);
if (mp->b_datap->db_ref > 1) {
mblk_t *mp1;
mp1 = copymsg(mp);
freemsg(mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
if (mctl_present)
freeb(first_mp);
return;
}
mp = mp1;
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
}
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
hdr_length = ip_hdr_length_v6(mp, ip6h);
else
hdr_length = IPV6_HDR_LEN;
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN);
inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */
if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) {
if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length];
inner_ip6h = (ip6_t *)&icmp6[1];
}
/*
* For link local destinations matching simply on IRE type is not
* sufficient. Same link local addresses for different ILL's is
* possible.
*/
if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) {
first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL,
IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL,
MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst);
if (first_ire == NULL) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no ire for dst %s\n", AF_INET6,
&inner_ip6h->ip6_dst);
}
freemsg(first_mp);
return;
}
mtu = ntohl(icmp6->icmp6_mtu);
rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER);
for (ire = first_ire; ire != NULL &&
IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst);
ire = ire->ire_next) {
mutex_enter(&ire->ire_lock);
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6 "
"min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is received,
* we must include a fragment header in
* subsequent packets.
*/
ire->ire_frag_flag |= IPH_FRAG_HDR;
}
ip1dbg(("Received mtu from router: %d\n", mtu));
ire->ire_max_frag = MIN(ire->ire_max_frag, mtu);
if (ire->ire_max_frag == mtu) {
/* Decreased it */
ire->ire_marks |= IRE_MARK_PMTU;
}
/* Record the new max frag size for the ULP. */
if (ire->ire_frag_flag & IPH_FRAG_HDR) {
/*
* If we need a fragment header in every packet
* (above case or multirouting), make sure the
* ULP takes it into account when computing the
* payload size.
*/
icmp6->icmp6_mtu = htonl(ire->ire_max_frag -
sizeof (ip6_frag_t));
} else {
icmp6->icmp6_mtu = htonl(ire->ire_max_frag);
}
mutex_exit(&ire->ire_lock);
}
rw_exit(&first_ire->ire_bucket->irb_lock);
ire_refrele(first_ire);
} else {
irb_t *irb = NULL;
/*
* for non-link local destinations we match only on the IRE type
*/
ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL,
IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE,
ipst);
if (ire == NULL) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no ire for dst %s\n",
AF_INET6, &inner_ip6h->ip6_dst);
}
freemsg(first_mp);
return;
}
irb = ire->ire_bucket;
ire_refrele(ire);
rw_enter(&irb->irb_lock, RW_READER);
for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6,
&inner_ip6h->ip6_dst)) {
mtu = ntohl(icmp6->icmp6_mtu);
mutex_enter(&ire->ire_lock);
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6"
"min mtu %d: %d\n",
IPV6_MIN_MTU, mtu));
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is
* received, we must include a fragment
* header in subsequent packets.
*/
ire->ire_frag_flag |= IPH_FRAG_HDR;
}
ip1dbg(("Received mtu from router: %d\n", mtu));
ire->ire_max_frag = MIN(ire->ire_max_frag, mtu);
if (ire->ire_max_frag == mtu) {
/* Decreased it */
ire->ire_marks |= IRE_MARK_PMTU;
}
/* Record the new max frag size for the ULP. */
if (ire->ire_frag_flag & IPH_FRAG_HDR) {
/*
* If we need a fragment header in
* every packet (above case or
* multirouting), make sure the ULP
* takes it into account when computing
* the payload size.
*/
icmp6->icmp6_mtu =
htonl(ire->ire_max_frag -
sizeof (ip6_frag_t));
} else {
icmp6->icmp6_mtu =
htonl(ire->ire_max_frag);
}
mutex_exit(&ire->ire_lock);
}
}
rw_exit(&irb->irb_lock);
}
icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill,
mctl_present, zoneid);
}
/*
* Fanout for ICMPv6 errors containing IP-in-IPv6 packets. Returns B_TRUE if a
* tunnel consumed the message, and B_FALSE otherwise.
*/
static boolean_t
icmp_inbound_iptun_fanout_v6(mblk_t *first_mp, ip6_t *rip6h, ill_t *ill,
ip_stack_t *ipst)
{
conn_t *connp;
if ((connp = ipcl_iptun_classify_v6(&rip6h->ip6_src, &rip6h->ip6_dst,
ipst)) == NULL)
return (B_FALSE);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
connp->conn_recv(connp, first_mp, NULL);
CONN_DEC_REF(connp);
return (B_TRUE);
}
/*
* Fanout received ICMPv6 error packets to the transports.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*/
void
icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h,
icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present,
zoneid_t zoneid)
{
uint16_t *up; /* Pointer to ports in ULP header */
uint32_t ports; /* reversed ports for fanout */
ip6_t rip6h; /* With reversed addresses */
uint16_t hdr_length;
uint8_t *nexthdrp;
uint8_t nexthdr;
mblk_t *first_mp;
ipsec_in_t *ii;
tcpha_t *tcpha;
conn_t *connp;
ip_stack_t *ipst = ill->ill_ipst;
first_mp = mp;
if (mctl_present) {
mp = first_mp->b_cont;
ASSERT(mp != NULL);
ii = (ipsec_in_t *)first_mp->b_rptr;
ASSERT(ii->ipsec_in_type == IPSEC_IN);
} else {
ii = NULL;
}
hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h);
ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN);
/*
* Need to pullup everything in order to use
* ip_hdr_length_nexthdr_v6()
*/
if (mp->b_cont != NULL) {
if (!pullupmsg(mp, -1)) {
ip1dbg(("icmp_inbound_error_fanout_v6: "
"pullupmsg failed\n"));
goto drop_pkt;
}
ip6h = (ip6_t *)mp->b_rptr;
icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
}
ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */
if ((uchar_t *)&ip6h[1] > mp->b_wptr)
goto drop_pkt;
if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
goto drop_pkt;
nexthdr = *nexthdrp;
/* Set message type, must be done after pullups */
mp->b_datap->db_type = M_CTL;
/*
* We need a separate IP header with the source and destination
* addresses reversed to do fanout/classification because the ip6h in
* the ICMPv6 error is in the form we sent it out.
*/
rip6h.ip6_src = ip6h->ip6_dst;
rip6h.ip6_dst = ip6h->ip6_src;
rip6h.ip6_nxt = nexthdr;
/* Try to pass the ICMP message to clients who need it */
switch (nexthdr) {
case IPPROTO_UDP: {
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* UDP header to get the port information.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr) {
break;
}
/* Attempt to find a client stream based on port. */
up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
((uint16_t *)&ports)[0] = up[1];
((uint16_t *)&ports)[1] = up[0];
ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill,
IP6_NO_IPPOLICY, mctl_present, zoneid);
return;
}
case IPPROTO_TCP: {
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* the TCP header to get the port information.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr) {
break;
}
/*
* Attempt to find a client stream based on port.
* Note that we do a reverse lookup since the header is
* in the form we sent it out.
*/
tcpha = (tcpha_t *)((char *)ip6h + hdr_length);
connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
if (connp == NULL) {
goto drop_pkt;
}
SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp,
SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR);
return;
}
case IPPROTO_SCTP:
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* the SCTP header to get the port information.
*/
if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
mp->b_wptr) {
break;
}
up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
((uint16_t *)&ports)[0] = up[1];
((uint16_t *)&ports)[1] = up[0];
ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0,
mctl_present, IP6_NO_IPPOLICY, zoneid);
return;
case IPPROTO_ESP:
case IPPROTO_AH: {
int ipsec_rc;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
/*
* We need a IPSEC_IN in the front to fanout to AH/ESP.
* We will re-use the IPSEC_IN if it is already present as
* AH/ESP will not affect any fields in the IPSEC_IN for
* ICMP errors. If there is no IPSEC_IN, allocate a new
* one and attach it in the front.
*/
if (ii != NULL) {
/*
* ip_fanout_proto_again converts the ICMP errors
* that come back from AH/ESP to M_DATA so that
* if it is non-AH/ESP and we do a pullupmsg in
* this function, it would work. Convert it back
* to M_CTL before we send up as this is a ICMP
* error. This could have been generated locally or
* by some router. Validate the inner IPSEC
* headers.
*
* NOTE : ill_index is used by ip_fanout_proto_again
* to locate the ill.
*/
ASSERT(ill != NULL);
ii->ipsec_in_ill_index =
ill->ill_phyint->phyint_ifindex;
ii->ipsec_in_rill_index =
inill->ill_phyint->phyint_ifindex;
first_mp->b_cont->b_datap->db_type = M_CTL;
} else {
/*
* IPSEC_IN is not present. We attach a ipsec_in
* message and send up to IPSEC for validating
* and removing the IPSEC headers. Clear
* ipsec_in_secure so that when we return
* from IPSEC, we don't mistakenly think that this
* is a secure packet came from the network.
*
* NOTE : ill_index is used by ip_fanout_proto_again
* to locate the ill.
*/
ASSERT(first_mp == mp);
first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack);
ASSERT(ill != NULL);
if (first_mp == NULL) {
freemsg(mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
return;
}
ii = (ipsec_in_t *)first_mp->b_rptr;
/* This is not a secure packet */
ii->ipsec_in_secure = B_FALSE;
first_mp->b_cont = mp;
mp->b_datap->db_type = M_CTL;
ii->ipsec_in_ill_index =
ill->ill_phyint->phyint_ifindex;
ii->ipsec_in_rill_index =
inill->ill_phyint->phyint_ifindex;
}
if (!ipsec_loaded(ipss)) {
ip_proto_not_sup(q, first_mp, 0, zoneid, ipst);
return;
}
if (nexthdr == IPPROTO_ESP)
ipsec_rc = ipsecesp_icmp_error(first_mp);
else
ipsec_rc = ipsecah_icmp_error(first_mp);
if (ipsec_rc == IPSEC_STATUS_FAILED)
return;
ip_fanout_proto_again(first_mp, ill, inill, NULL);
return;
}
case IPPROTO_ENCAP:
case IPPROTO_IPV6:
if ((uint8_t *)ip6h + hdr_length +
(nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) :
sizeof (ip6_t)) > mp->b_wptr) {
goto drop_pkt;
}
if (nexthdr == IPPROTO_ENCAP ||
!IN6_ARE_ADDR_EQUAL(
&((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src,
&ip6h->ip6_src) ||
!IN6_ARE_ADDR_EQUAL(
&((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst,
&ip6h->ip6_dst)) {
/*
* For tunnels that have used IPsec protection,
* we need to adjust the MTU to take into account
* the IPsec overhead.
*/
if (ii != NULL) {
icmp6->icmp6_mtu = htonl(
ntohl(icmp6->icmp6_mtu) -
ipsec_in_extra_length(first_mp));
}
} else {
/*
* Self-encapsulated case. As in the ipv4 case,
* we need to strip the 2nd IP header. Since mp
* is already pulled-up, we can simply bcopy
* the 3rd header + data over the 2nd header.
*/
uint16_t unused_len;
ip6_t *inner_ip6h = (ip6_t *)
((uchar_t *)ip6h + hdr_length);
/*
* Make sure we don't do recursion more than once.
*/
if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h,
&unused_len, &nexthdrp) ||
*nexthdrp == IPPROTO_IPV6) {
goto drop_pkt;
}
/*
* We are about to modify the packet. Make a copy if
* someone else has a reference to it.
*/
if (DB_REF(mp) > 1) {
mblk_t *mp1;
uint16_t icmp6_offset;
mp1 = copymsg(mp);
if (mp1 == NULL) {
goto drop_pkt;
}
icmp6_offset = (uint16_t)
((uchar_t *)icmp6 - mp->b_rptr);
freemsg(mp);
mp = mp1;
icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset);
ip6h = (ip6_t *)&icmp6[1];
inner_ip6h = (ip6_t *)
((uchar_t *)ip6h + hdr_length);
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
}
/*
* Need to set db_type back to M_DATA before
* refeeding mp into this function.
*/
DB_TYPE(mp) = M_DATA;
/*
* Copy the 3rd header + remaining data on top
* of the 2nd header.
*/
bcopy(inner_ip6h, ip6h,
mp->b_wptr - (uchar_t *)inner_ip6h);
/*
* Subtract length of the 2nd header.
*/
mp->b_wptr -= hdr_length;
/*
* Now recurse, and see what I _really_ should be
* doing here.
*/
icmp_inbound_error_fanout_v6(q, first_mp,
(ip6_t *)mp->b_rptr, icmp6, ill, inill,
mctl_present, zoneid);
return;
}
if (icmp_inbound_iptun_fanout_v6(first_mp, &rip6h, ill, ipst))
return;
/*
* No IP tunnel is associated with this error. Perhaps a raw
* socket will want it.
*/
/* FALLTHRU */
default:
ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0,
IP6_NO_IPPOLICY, mctl_present, zoneid);
return;
}
/* NOTREACHED */
drop_pkt:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
freemsg(first_mp);
}
/*
* Process received IPv6 ICMP Redirect messages.
*/
/* ARGSUSED */
static void
icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill)
{
ip6_t *ip6h;
uint16_t hdr_length;
nd_redirect_t *rd;
ire_t *ire;
ire_t *prev_ire;
ire_t *redir_ire;
in6_addr_t *src, *dst, *gateway;
nd_opt_hdr_t *opt;
nce_t *nce;
int nce_flags = 0;
int err = 0;
boolean_t redirect_to_router = B_FALSE;
int len;
int optlen;
iulp_t ulp_info = { 0 };
ill_t *prev_ire_ill;
ipif_t *ipif;
ip_stack_t *ipst = ill->ill_ipst;
ip6h = (ip6_t *)mp->b_rptr;
if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
hdr_length = ip_hdr_length_v6(mp, ip6h);
else
hdr_length = IPV6_HDR_LEN;
rd = (nd_redirect_t *)&mp->b_rptr[hdr_length];
len = mp->b_wptr - mp->b_rptr - hdr_length;
src = &ip6h->ip6_src;
dst = &rd->nd_rd_dst;
gateway = &rd->nd_rd_target;
/* Verify if it is a valid redirect */
if (!IN6_IS_ADDR_LINKLOCAL(src) ||
(ip6h->ip6_hops != IPV6_MAX_HOPS) ||
(rd->nd_rd_code != 0) ||
(len < sizeof (nd_redirect_t)) ||
(IN6_IS_ADDR_V4MAPPED(dst)) ||
(IN6_IS_ADDR_MULTICAST(dst))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
freemsg(mp);
return;
}
if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
IN6_ARE_ADDR_EQUAL(gateway, dst))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
freemsg(mp);
return;
}
if (len > sizeof (nd_redirect_t)) {
if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1],
len - sizeof (nd_redirect_t))) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
freemsg(mp);
return;
}
}
if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
redirect_to_router = B_TRUE;
nce_flags |= NCE_F_ISROUTER;
}
/* ipif will be refreleased afterwards */
ipif = ipif_get_next_ipif(NULL, ill);
if (ipif == NULL) {
freemsg(mp);
return;
}
/*
* Verify that the IP source address of the redirect is
* the same as the current first-hop router for the specified
* ICMP destination address.
* Also, Make sure we had a route for the dest in question and
* that route was pointing to the old gateway (the source of the
* redirect packet.)
*/
prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES,
NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst);
/*
* Check that
* the redirect was not from ourselves
* old gateway is still directly reachable
*/
if (prev_ire == NULL ||
prev_ire->ire_type == IRE_LOCAL) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
ipif_refrele(ipif);
goto fail_redirect;
}
prev_ire_ill = ire_to_ill(prev_ire);
ASSERT(prev_ire_ill != NULL);
if (prev_ire_ill->ill_flags & ILLF_NONUD)
nce_flags |= NCE_F_NONUD;
/*
* Should we use the old ULP info to create the new gateway? From
* a user's perspective, we should inherit the info so that it
* is a "smooth" transition. If we do not do that, then new
* connections going thru the new gateway will have no route metrics,
* which is counter-intuitive to user. From a network point of
* view, this may or may not make sense even though the new gateway
* is still directly connected to us so the route metrics should not
* change much.
*
* But if the old ire_uinfo is not initialized, we do another
* recursive lookup on the dest using the new gateway. There may
* be a route to that. If so, use it to initialize the redirect
* route.
*/
if (prev_ire->ire_uinfo.iulp_set) {
bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t));
} else if (redirect_to_router) {
/*
* Only do the following if the redirection is really to
* a router.
*/
ire_t *tmp_ire;
ire_t *sire;
tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire,
ALL_ZONES, 0, NULL,
(MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT),
ipst);
if (sire != NULL) {
bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t));
ASSERT(tmp_ire != NULL);
ire_refrele(tmp_ire);
ire_refrele(sire);
} else if (tmp_ire != NULL) {
bcopy(&tmp_ire->ire_uinfo, &ulp_info,
sizeof (iulp_t));
ire_refrele(tmp_ire);
}
}
optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t);
opt = (nd_opt_hdr_t *)&rd[1];
opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
if (opt != NULL) {
err = ndp_lookup_then_add_v6(ill,
B_FALSE, /* don't match across illgrp */
(uchar_t *)&opt[1], /* Link layer address */
gateway,
&ipv6_all_ones, /* prefix mask */
&ipv6_all_zeros, /* Mapping mask */
0,
nce_flags,
ND_STALE,
&nce);
switch (err) {
case 0:
NCE_REFRELE(nce);
break;
case EEXIST:
/*
* Check to see if link layer address has changed and
* process the nce_state accordingly.
*/
ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE);
NCE_REFRELE(nce);
break;
default:
ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
err));
ipif_refrele(ipif);
goto fail_redirect;
}
}
if (redirect_to_router) {
/* icmp_redirect_ok_v6() must have already verified this */
ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
/*
* Create a Route Association. This will allow us to remember
* a router told us to use the particular gateway.
*/
ire = ire_create_v6(
dst,
&ipv6_all_ones, /* mask */
&prev_ire->ire_src_addr_v6, /* source addr */
gateway, /* gateway addr */
&prev_ire->ire_max_frag, /* max frag */
NULL, /* no src nce */
NULL, /* no rfq */
NULL, /* no stq */
IRE_HOST,
prev_ire->ire_ipif,
NULL,
0,
0,
(RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
&ulp_info,
NULL,
NULL,
ipst);
} else {
queue_t *stq;
stq = (ipif->ipif_net_type == IRE_IF_RESOLVER)
? ipif->ipif_rq : ipif->ipif_wq;
/*
* Just create an on link entry, i.e. interface route.
*/
ire = ire_create_v6(
dst, /* gateway == dst */
&ipv6_all_ones, /* mask */
&prev_ire->ire_src_addr_v6, /* source addr */
&ipv6_all_zeros, /* gateway addr */
&prev_ire->ire_max_frag, /* max frag */
NULL, /* no src nce */
NULL, /* ire rfq */
stq, /* ire stq */
ipif->ipif_net_type, /* IF_[NO]RESOLVER */
prev_ire->ire_ipif,
&ipv6_all_ones,
0,
0,
(RTF_DYNAMIC | RTF_HOST),
&ulp_info,
NULL,
NULL,
ipst);
}
/* Release reference from earlier ipif_get_next_ipif() */
ipif_refrele(ipif);
if (ire == NULL)
goto fail_redirect;
if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) {
/* tell routing sockets that we received a redirect */
ip_rts_change_v6(RTM_REDIRECT,
&rd->nd_rd_dst,
&rd->nd_rd_target,
&ipv6_all_ones, 0, &ire->ire_src_addr_v6,
(RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
(RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
/*
* Delete any existing IRE_HOST type ires for this destination.
* This together with the added IRE has the effect of
* modifying an existing redirect.
*/
redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
ire->ire_ipif, NULL, ALL_ZONES, 0, NULL,
(MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst);
ire_refrele(ire); /* Held in ire_add_v6 */
if (redir_ire != NULL) {
if (redir_ire->ire_flags & RTF_DYNAMIC)
ire_delete(redir_ire);
ire_refrele(redir_ire);
}
}
if (prev_ire->ire_type == IRE_CACHE)
ire_delete(prev_ire);
ire_refrele(prev_ire);
prev_ire = NULL;
fail_redirect:
if (prev_ire != NULL)
ire_refrele(prev_ire);
freemsg(mp);
}
static ill_t *
ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst)
{
ill_t *ill;
ASSERT(WR(q) == q);
if (q->q_next != NULL) {
ill = (ill_t *)q->q_ptr;
if (ILL_CAN_LOOKUP(ill))
ill_refhold(ill);
else
ill = NULL;
} else {
ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE,
NULL, NULL, NULL, NULL, NULL, ipst);
}
if (ill == NULL)
ip0dbg(("ip_queue_to_ill_v6: no ill\n"));
return (ill);
}
/*
* Assigns an appropriate source address to the packet.
* If origdst is one of our IP addresses that use it as the source.
* If the queue is an ill queue then select a source from that ill.
* Otherwise pick a source based on a route lookup back to the origsrc.
*
* src is the return parameter. Returns a pointer to src or NULL if failure.
*/
static in6_addr_t *
icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst,
in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst)
{
ill_t *ill;
ire_t *ire;
ipif_t *ipif;
ASSERT(!(wq->q_flag & QREADR));
if (wq->q_next != NULL) {
ill = (ill_t *)wq->q_ptr;
} else {
ill = NULL;
}
ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK),
NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY),
ipst);
if (ire != NULL) {
/* Destined to one of our addresses */
*src = *origdst;
ire_refrele(ire);
return (src);
}
if (ire != NULL) {
ire_refrele(ire);
ire = NULL;
}
if (ill == NULL) {
/* What is the route back to the original source? */
ire = ire_route_lookup_v6(origsrc, 0, 0, 0,
NULL, NULL, zoneid, NULL,
(MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst);
if (ire == NULL) {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes);
return (NULL);
}
ASSERT(ire->ire_ipif != NULL);
ill = ire->ire_ipif->ipif_ill;
ire_refrele(ire);
}
ipif = ipif_select_source_v6(ill, origsrc, B_FALSE,
IPV6_PREFER_SRC_DEFAULT, zoneid);
if (ipif != NULL) {
*src = ipif->ipif_v6src_addr;
ipif_refrele(ipif);
return (src);
}
/*
* Unusual case - can't find a usable source address to reach the
* original source. Use what in the route to the source.
*/
ire = ire_route_lookup_v6(origsrc, 0, 0, 0,
NULL, NULL, zoneid, NULL,
(MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst);
if (ire == NULL) {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes);
return (NULL);
}
ASSERT(ire != NULL);
*src = ire->ire_src_addr_v6;
ire_refrele(ire);
return (src);
}
/*
* Build and ship an IPv6 ICMP message using the packet data in mp,
* and the ICMP header pointed to by "stuff". (May be called as
* writer.)
* Note: assumes that icmp_pkt_err_ok_v6 has been called to
* verify that an icmp error packet can be sent.
*
* If q is an ill write side queue (which is the case when packets
* arrive from ip_rput) then ip_wput code will ensure that packets to
* link-local destinations are sent out that ill.
*
* If v6src_ptr is set use it as a source. Otherwise select a reasonable
* source address (see above function).
*/
static void
icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len,
const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid,
ip_stack_t *ipst)
{
ip6_t *ip6h;
in6_addr_t v6dst;
size_t len_needed;
size_t msg_len;
mblk_t *mp1;
icmp6_t *icmp6;
ill_t *ill;
in6_addr_t v6src;
mblk_t *ipsec_mp;
ipsec_out_t *io;
ill = ip_queue_to_ill_v6(q, ipst);
if (ill == NULL) {
freemsg(mp);
return;
}
if (mctl_present) {
/*
* If it is :
*
* 1) a IPSEC_OUT, then this is caused by outbound
* datagram originating on this host. IPSEC processing
* may or may not have been done. Refer to comments above
* icmp_inbound_error_fanout for details.
*
* 2) a IPSEC_IN if we are generating a icmp_message
* for an incoming datagram destined for us i.e called
* from ip_fanout_send_icmp.
*/
ipsec_info_t *in;
ipsec_mp = mp;
mp = ipsec_mp->b_cont;
in = (ipsec_info_t *)ipsec_mp->b_rptr;
ip6h = (ip6_t *)mp->b_rptr;
ASSERT(in->ipsec_info_type == IPSEC_OUT ||
in->ipsec_info_type == IPSEC_IN);
if (in->ipsec_info_type == IPSEC_IN) {
/*
* Convert the IPSEC_IN to IPSEC_OUT.
*/
if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ill_refrele(ill);
return;
}
} else {
ASSERT(in->ipsec_info_type == IPSEC_OUT);
io = (ipsec_out_t *)in;
/*
* Clear out ipsec_out_proc_begin, so we do a fresh
* ire lookup.
*/
io->ipsec_out_proc_begin = B_FALSE;
}
} else {
/*
* This is in clear. The icmp message we are building
* here should go out in clear.
*/
ipsec_in_t *ii;
ASSERT(mp->b_datap->db_type == M_DATA);
ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack);
if (ipsec_mp == NULL) {
freemsg(mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ill_refrele(ill);
return;
}
ii = (ipsec_in_t *)ipsec_mp->b_rptr;
/* This is not a secure packet */
ii->ipsec_in_secure = B_FALSE;
/*
* For trusted extensions using a shared IP address we can
* send using any zoneid.
*/
if (zoneid == ALL_ZONES)
ii->ipsec_in_zoneid = GLOBAL_ZONEID;
else
ii->ipsec_in_zoneid = zoneid;
ipsec_mp->b_cont = mp;
ip6h = (ip6_t *)mp->b_rptr;
/*
* Convert the IPSEC_IN to IPSEC_OUT.
*/
if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ill_refrele(ill);
return;
}
}
io = (ipsec_out_t *)ipsec_mp->b_rptr;
if (v6src_ptr != NULL) {
v6src = *v6src_ptr;
} else {
if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst,
&v6src, zoneid, ipst) == NULL) {
freemsg(ipsec_mp);
ill_refrele(ill);
return;
}
}
v6dst = ip6h->ip6_src;
len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
msg_len = msgdsize(mp);
if (msg_len > len_needed) {
if (!adjmsg(mp, len_needed - msg_len)) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
freemsg(ipsec_mp);
ill_refrele(ill);
return;
}
msg_len = len_needed;
}
mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
freemsg(ipsec_mp);
ill_refrele(ill);
return;
}
ill_refrele(ill);
mp1->b_cont = mp;
mp = mp1;
ASSERT(ipsec_mp->b_datap->db_type == M_CTL &&
io->ipsec_out_type == IPSEC_OUT);
ipsec_mp->b_cont = mp;
/*
* Set ipsec_out_icmp_loopback so we can let the ICMP messages this
* node generates be accepted in peace by all on-host destinations.
* If we do NOT assume that all on-host destinations trust
* self-generated ICMP messages, then rework here, ip.c, and spd.c.
* (Look for ipsec_out_icmp_loopback).
*/
io->ipsec_out_icmp_loopback = B_TRUE;
ip6h = (ip6_t *)mp->b_rptr;
mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6h->ip6_nxt = IPPROTO_ICMPV6;
ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
ip6h->ip6_dst = v6dst;
ip6h->ip6_src = v6src;
msg_len += IPV6_HDR_LEN + len;
if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
}
ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
icmp6 = (icmp6_t *)&ip6h[1];
bcopy(stuff, (char *)icmp6, len);
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
icmp6->icmp6_cksum = ip6h->ip6_plen;
if (icmp6->icmp6_type == ND_REDIRECT) {
ip6h->ip6_hops = IPV6_MAX_HOPS;
}
/* Send to V6 writeside put routine */
put(q, ipsec_mp);
}
/*
* Update the output mib when ICMPv6 packets are sent.
*/
static void
icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
{
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
break;
case ICMP6_TIME_EXCEEDED:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
break;
case ICMP6_PARAM_PROB:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
break;
case ICMP6_PACKET_TOO_BIG:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
break;
case ICMP6_ECHO_REQUEST:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
break;
case ICMP6_ECHO_REPLY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
break;
case ND_ROUTER_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
break;
case ND_ROUTER_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
break;
case ND_NEIGHBOR_SOLICIT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
break;
case ND_NEIGHBOR_ADVERT:
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpOutNeighborAdvertisements);
break;
case ND_REDIRECT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
break;
case MLD_LISTENER_QUERY:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
break;
case MLD_LISTENER_REPORT:
case MLD_V2_LISTENER_REPORT:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
break;
case MLD_LISTENER_REDUCTION:
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
break;
}
}
/*
* Check if it is ok to send an ICMPv6 error packet in
* response to the IP packet in mp.
* Free the message and return null if no
* ICMP error packet should be sent.
*/
static mblk_t *
icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp,
boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst)
{
ip6_t *ip6h;
if (!mp)
return (NULL);
ip6h = (ip6_t *)mp->b_rptr;
/* Check if source address uniquely identifies the host */
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
freemsg(mp);
return (NULL);
}
if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
icmp6_t *icmp6;
if (mp->b_wptr - mp->b_rptr < len_needed) {
if (!pullupmsg(mp, len_needed)) {
ill_t *ill;
ill = ip_queue_to_ill_v6(q, ipst);
if (ill == NULL) {
BUMP_MIB(&ipst->ips_icmp6_mib,
ipv6IfIcmpInErrors);
} else {
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInErrors);
ill_refrele(ill);
}
freemsg(mp);
return (NULL);
}
ip6h = (ip6_t *)mp->b_rptr;
}
icmp6 = (icmp6_t *)&ip6h[1];
/* Explicitly do not generate errors in response to redirects */
if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
icmp6->icmp6_type == ND_REDIRECT) {
freemsg(mp);
return (NULL);
}
}
/*
* Check that the destination is not multicast and that the packet
* was not sent on link layer broadcast or multicast. (Exception
* is Packet too big message as per the draft - when mcast_ok is set.)
*/
if (!mcast_ok &&
(llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
freemsg(mp);
return (NULL);
}
if (icmp_err_rate_limit(ipst)) {
/*
* Only send ICMP error packets every so often.
* This should be done on a per port/source basis,
* but for now this will suffice.
*/
freemsg(mp);
return (NULL);
}
return (mp);
}
/*
* Generate an ICMPv6 redirect message.
* Include target link layer address option if it exits.
* Always include redirect header.
*/
static void
icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp,
in6_addr_t *dest, ill_t *ill, boolean_t llbcast)
{
nd_redirect_t *rd;
nd_opt_rd_hdr_t *rdh;
uchar_t *buf;
nce_t *nce = NULL;
nd_opt_hdr_t *opt;
int len;
int ll_opt_len = 0;
int max_redir_hdr_data_len;
int pkt_len;
in6_addr_t *srcp;
ip_stack_t *ipst = ill->ill_ipst;
/*
* We are called from ip_rput where we could
* not have attached an IPSEC_IN.
*/
ASSERT(mp->b_datap->db_type == M_DATA);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst);
if (mp == NULL)
return;
nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE);
if (nce != NULL && nce->nce_state != ND_INCOMPLETE) {
ll_opt_len = (sizeof (nd_opt_hdr_t) +
ill->ill_phys_addr_length + 7)/8 * 8;
}
len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
ASSERT(len % 4 == 0);
buf = kmem_alloc(len, KM_NOSLEEP);
if (buf == NULL) {
if (nce != NULL)
NCE_REFRELE(nce);
freemsg(mp);
return;
}
rd = (nd_redirect_t *)buf;
rd->nd_rd_type = (uint8_t)ND_REDIRECT;
rd->nd_rd_code = 0;
rd->nd_rd_reserved = 0;
rd->nd_rd_target = *targetp;
rd->nd_rd_dst = *dest;
opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
if (nce != NULL && ll_opt_len != 0) {
opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
opt->nd_opt_len = ll_opt_len/8;
bcopy((char *)nce->nce_res_mp->b_rptr +
NCE_LL_ADDR_OFFSET(ill), &opt[1],
ill->ill_phys_addr_length);
}
if (nce != NULL)
NCE_REFRELE(nce);
rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
max_redir_hdr_data_len =
(ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
pkt_len = msgdsize(mp);
/* Make sure mp is 8 byte aligned */
if (pkt_len > max_redir_hdr_data_len) {
rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
sizeof (nd_opt_rd_hdr_t))/8;
(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
} else {
rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
(void) adjmsg(mp, -(pkt_len % 8));
}
rdh->nd_opt_rh_reserved1 = 0;
rdh->nd_opt_rh_reserved2 = 0;
/* ipif_v6src_addr contains the link-local source address */
srcp = &ill->ill_ipif->ipif_v6src_addr;
/* Redirects sent by router, and router is global zone */
icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst);
kmem_free(buf, len);
}
/* Generate an ICMP time exceeded message. (May be called as writer.) */
void
icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code,
boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid,
ip_stack_t *ipst)
{
icmp6_t icmp6;
boolean_t mctl_present;
mblk_t *first_mp;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
icmp6.icmp6_code = code;
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* Generate an ICMP unreachable message.
*/
void
icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code,
boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid,
ip_stack_t *ipst)
{
icmp6_t icmp6;
boolean_t mctl_present;
mblk_t *first_mp;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_DST_UNREACH;
icmp6.icmp6_code = code;
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* Generate an ICMP pkt too big message.
*/
static void
icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu,
boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst)
{
icmp6_t icmp6;
mblk_t *first_mp;
boolean_t mctl_present;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero(&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
icmp6.icmp6_code = 0;
icmp6.icmp6_mtu = htonl(mtu);
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* Generate an ICMP parameter problem message. (May be called as writer.)
* 'offset' is the offset from the beginning of the packet in error.
*/
static void
icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code,
uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid,
ip_stack_t *ipst)
{
icmp6_t icmp6;
boolean_t mctl_present;
mblk_t *first_mp;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst);
if (mp == NULL) {
if (mctl_present)
freeb(first_mp);
return;
}
bzero((char *)&icmp6, sizeof (icmp6_t));
icmp6.icmp6_type = ICMP6_PARAM_PROB;
icmp6.icmp6_code = code;
icmp6.icmp6_pptr = htonl(offset);
icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present,
zoneid, ipst);
}
/*
* This code will need to take into account the possibility of binding
* to a link local address on a multi-homed host, in which case the
* outgoing interface (from the conn) will need to be used when getting
* an ire for the dst. Going through proper outgoing interface and
* choosing the source address corresponding to the outgoing interface
* is necessary when the destination address is a link-local address and
* IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set.
* This can happen when active connection is setup; thus ipp pointer
* is passed here from tcp_connect_*() routines, in non-TCP cases NULL
* pointer is passed as ipp pointer.
*/
mblk_t *
ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp)
{
ssize_t len;
int protocol;
struct T_bind_req *tbr;
sin6_t *sin6;
ipa6_conn_t *ac6;
in6_addr_t *v6srcp;
in6_addr_t *v6dstp;
uint16_t lport;
uint16_t fport;
uchar_t *ucp;
int error = 0;
boolean_t local_bind;
ipa6_conn_x_t *acx6;
boolean_t verify_dst;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
cred_t *cr;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
cr = msg_getcred(mp, NULL);
ASSERT(cr != NULL);
if (cr == NULL) {
error = EINVAL;
goto bad_addr;
}
ASSERT(connp->conn_af_isv6);
len = mp->b_wptr - mp->b_rptr;
if (len < (sizeof (*tbr) + 1)) {
(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
"ip_bind_v6: bogus msg, len %ld", len);
goto bad_addr;
}
/* Back up and extract the protocol identifier. */
mp->b_wptr--;
tbr = (struct T_bind_req *)mp->b_rptr;
/* Reset the message type in preparation for shipping it back. */
mp->b_datap->db_type = M_PCPROTO;
protocol = *mp->b_wptr & 0xFF;
connp->conn_ulp = (uint8_t)protocol;
/*
* Check for a zero length address. This is from a protocol that
* wants to register to receive all packets of its type.
*/
if (tbr->ADDR_length == 0) {
if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP ||
protocol == IPPROTO_ESP || protocol == IPPROTO_AH) &&
ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head !=
NULL) {
/*
* TCP, SCTP, AH, and ESP have single protocol fanouts.
* Do not allow others to bind to these.
*/
goto bad_addr;
}
/*
*
* The udp module never sends down a zero-length address,
* and allowing this on a labeled system will break MLP
* functionality.
*/
if (is_system_labeled() && protocol == IPPROTO_UDP)
goto bad_addr;
/* Allow ipsec plumbing */
if (connp->conn_mac_exempt && protocol != IPPROTO_AH &&
protocol != IPPROTO_ESP)
goto bad_addr;
connp->conn_srcv6 = ipv6_all_zeros;
ipcl_proto_insert_v6(connp, protocol);
tbr->PRIM_type = T_BIND_ACK;
return (mp);
}
/* Extract the address pointer from the message. */
ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset,
tbr->ADDR_length);
if (ucp == NULL) {
ip1dbg(("ip_bind_v6: no address\n"));
goto bad_addr;
}
if (!OK_32PTR(ucp)) {
ip1dbg(("ip_bind_v6: unaligned address\n"));
goto bad_addr;
}
switch (tbr->ADDR_length) {
default:
ip1dbg(("ip_bind_v6: bad address length %d\n",
(int)tbr->ADDR_length));
goto bad_addr;
case IPV6_ADDR_LEN:
/* Verification of local address only */
v6srcp = (in6_addr_t *)ucp;
lport = 0;
local_bind = B_TRUE;
break;
case sizeof (sin6_t):
sin6 = (sin6_t *)ucp;
v6srcp = &sin6->sin6_addr;
lport = sin6->sin6_port;
local_bind = B_TRUE;
break;
case sizeof (ipa6_conn_t):
/*
* Verify that both the source and destination addresses
* are valid.
*/
ac6 = (ipa6_conn_t *)ucp;
v6srcp = &ac6->ac6_laddr;
v6dstp = &ac6->ac6_faddr;
fport = ac6->ac6_fport;
/* For raw socket, the local port is not set. */
lport = ac6->ac6_lport != 0 ? ac6->ac6_lport :
connp->conn_lport;
local_bind = B_FALSE;
/* Always verify destination reachability. */
verify_dst = B_TRUE;
break;
case sizeof (ipa6_conn_x_t):
/*
* Verify that the source address is valid.
*/
acx6 = (ipa6_conn_x_t *)ucp;
ac6 = &acx6->ac6x_conn;
v6srcp = &ac6->ac6_laddr;
v6dstp = &ac6->ac6_faddr;
fport = ac6->ac6_fport;
lport = ac6->ac6_lport;
local_bind = B_FALSE;
/*
* Client that passed ipa6_conn_x_t to us specifies whether to
* verify destination reachability.
*/
verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0;
break;
}
if (local_bind) {
error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol,
v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN);
} else {
error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol,
v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr);
}
if (error == 0) {
/* Send it home. */
mp->b_datap->db_type = M_PCPROTO;
tbr->PRIM_type = T_BIND_ACK;
return (mp);
}
bad_addr:
ASSERT(error != EINPROGRESS);
if (error > 0)
mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
else
mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0);
return (mp);
}
/*
* Here address is verified to be a valid local address.
* If the IRE_DB_REQ_TYPE mp is present, a multicast
* address is also considered a valid local address.
* In the case of a multicast address, however, the
* upper protocol is expected to reset the src address
* to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that
* no packets are emitted with multicast address as
* source address.
* The addresses valid for bind are:
* (1) - in6addr_any
* (2) - IP address of an UP interface
* (3) - IP address of a DOWN interface
* (4) - a multicast address. In this case
* the conn will only receive packets destined to
* the specified multicast address. Note: the
* application still has to issue an
* IPV6_JOIN_GROUP socket option.
*
* In all the above cases, the bound address must be valid in the current zone.
* When the address is loopback or multicast, there might be many matching IREs
* so bind has to look up based on the zone.
*/
/*
* Verify the local IP address. Does not change the conn_t except
* conn_fully_bound and conn_policy_cached.
*/
static int
ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert)
{
int error = 0;
ire_t *src_ire = NULL;
zoneid_t zoneid;
mblk_t *mp = NULL;
boolean_t ire_requested;
boolean_t ipsec_policy_set;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
if (mpp)
mp = *mpp;
ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE);
ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET);
/*
* If it was previously connected, conn_fully_bound would have
* been set.
*/
connp->conn_fully_bound = B_FALSE;
zoneid = IPCL_ZONEID(connp);
if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
src_ire = ire_route_lookup_v6(v6src, 0, 0,
0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst);
/*
* If an address other than in6addr_any is requested,
* we verify that it is a valid address for bind
* Note: Following code is in if-else-if form for
* readability compared to a condition check.
*/
ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST));
/* LINTED - statement has no consequent */
if (IRE_IS_LOCAL(src_ire)) {
/*
* (2) Bind to address of local UP interface
*/
} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
ipif_t *multi_ipif = NULL;
ire_t *save_ire;
/*
* (4) bind to multicast address.
* Fake out the IRE returned to upper
* layer to be a broadcast IRE in
* ip_bind_insert_ire_v6().
* Pass other information that matches
* the ipif (e.g. the source address).
* conn_multicast_ill is only used for
* IPv6 packets
*/
mutex_enter(&connp->conn_lock);
if (connp->conn_multicast_ill != NULL) {
(void) ipif_lookup_zoneid(
connp->conn_multicast_ill, zoneid, 0,
&multi_ipif);
} else {
/*
* Look for default like
* ip_wput_v6
*/
multi_ipif = ipif_lookup_group_v6(
&ipv6_unspecified_group, zoneid, ipst);
}
mutex_exit(&connp->conn_lock);
save_ire = src_ire;
src_ire = NULL;
if (multi_ipif == NULL || !ire_requested ||
(src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) {
src_ire = save_ire;
error = EADDRNOTAVAIL;
} else {
ASSERT(src_ire != NULL);
if (save_ire != NULL)
ire_refrele(save_ire);
}
if (multi_ipif != NULL)
ipif_refrele(multi_ipif);
} else {
if (!ip_addr_exists_v6(v6src, zoneid, ipst)) {
/*
* Not a valid address for bind
*/
error = EADDRNOTAVAIL;
}
}
if (error != 0) {
/* Red Alert! Attempting to be a bogon! */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_laddr_v6: bad src"
" address %s\n", AF_INET6, v6src);
}
goto bad_addr;
}
}
/*
* Allow setting new policies. For example, disconnects come
* down as ipa_t bind. As we would have set conn_policy_cached
* to B_TRUE before, we should set it to B_FALSE, so that policy
* can change after the disconnect.
*/
connp->conn_policy_cached = B_FALSE;
/* If not fanout_insert this was just an address verification */
if (fanout_insert) {
/*
* The addresses have been verified. Time to insert in
* the correct fanout list.
*/
connp->conn_srcv6 = *v6src;
connp->conn_remv6 = ipv6_all_zeros;
connp->conn_lport = lport;
connp->conn_fport = 0;
error = ipcl_bind_insert_v6(connp, protocol, v6src, lport);
}
if (error == 0) {
if (ire_requested) {
if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL,
ipst)) {
error = -1;
goto bad_addr;
}
mp = *mpp;
} else if (ipsec_policy_set) {
if (!ip_bind_ipsec_policy_set(connp, mp)) {
error = -1;
goto bad_addr;
}
}
}
bad_addr:
if (error != 0) {
if (connp->conn_anon_port) {
(void) tsol_mlp_anon(crgetzone(connp->conn_cred),
connp->conn_mlp_type, connp->conn_ulp, ntohs(lport),
B_FALSE);
}
connp->conn_mlp_type = mlptSingle;
}
if (src_ire != NULL)
ire_refrele(src_ire);
if (ipsec_policy_set) {
ASSERT(mp != NULL);
freeb(mp);
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
*mpp = NULL;
}
return (error);
}
int
ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert)
{
int error;
boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
ASSERT(connp->conn_af_isv6);
connp->conn_ulp = protocol;
if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) {
/* Bind to IPv4 address */
ipaddr_t v4src;
IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src);
error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport,
fanout_insert);
if (error != 0)
goto bad_addr;
connp->conn_pkt_isv6 = B_FALSE;
} else {
if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
error = 0;
goto bad_addr;
}
error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp,
lport, fanout_insert);
if (error != 0)
goto bad_addr;
connp->conn_pkt_isv6 = B_TRUE;
}
if (orig_pkt_isv6 != connp->conn_pkt_isv6)
ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst);
return (0);
bad_addr:
if (error < 0)
error = -TBADADDR;
return (error);
}
/*
* Verify that both the source and destination addresses
* are valid. If verify_dst, then destination address must also be reachable,
* i.e. have a route. Protocols like TCP want this. Tunnels do not.
* It takes ip6_pkt_t * as one of the arguments to determine correct
* source address when IPV6_PKTINFO or scope_id is set along with a link-local
* destination address. Note that parameter ipp is only useful for TCP connect
* when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all
* non-TCP cases, it is NULL and for all other tcp cases it is not useful.
*
*/
int
ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst,
ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert,
boolean_t verify_dst, cred_t *cr)
{
ire_t *src_ire;
ire_t *dst_ire;
int error = 0;
ire_t *sire = NULL;
ire_t *md_dst_ire = NULL;
ill_t *md_ill = NULL;
ill_t *dst_ill = NULL;
ipif_t *src_ipif = NULL;
zoneid_t zoneid;
boolean_t ill_held = B_FALSE;
mblk_t *mp = NULL;
boolean_t ire_requested = B_FALSE;
boolean_t ipsec_policy_set = B_FALSE;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
ts_label_t *tsl = NULL;
cred_t *effective_cred = NULL;
if (mpp)
mp = *mpp;
if (mp != NULL) {
ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE);
ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET);
}
src_ire = dst_ire = NULL;
/*
* If we never got a disconnect before, clear it now.
*/
connp->conn_fully_bound = B_FALSE;
zoneid = connp->conn_zoneid;
/*
* Check whether Trusted Solaris policy allows communication with this
* host, and pretend that the destination is unreachable if not.
*
* This is never a problem for TCP, since that transport is known to
* compute the label properly as part of the tcp_rput_other T_BIND_ACK
* handling. If the remote is unreachable, it will be detected at that
* point, so there's no reason to check it here.
*
* Note that for sendto (and other datagram-oriented friends), this
* check is done as part of the data path label computation instead.
* The check here is just to make non-TCP connect() report the right
* error.
*/
if (is_system_labeled() && !IPCL_IS_TCP(connp)) {
if ((error = tsol_check_dest(cr, v6dst, IPV6_VERSION,
connp->conn_mac_exempt, &effective_cred)) != 0) {
if (ip_debug > 2) {
pr_addr_dbg(
"ip_bind_connected: no label for dst %s\n",
AF_INET6, v6dst);
}
goto bad_addr;
}
/*
* tsol_check_dest() may have created a new cred with
* a modified security label. Use that cred if it exists
* for ire lookups.
*/
if (effective_cred == NULL) {
tsl = crgetlabel(cr);
} else {
tsl = crgetlabel(effective_cred);
}
}
if (IN6_IS_ADDR_MULTICAST(v6dst)) {
ipif_t *ipif;
/*
* Use an "emulated" IRE_BROADCAST to tell the transport it
* is a multicast.
* Pass other information that matches
* the ipif (e.g. the source address).
*
* conn_multicast_ill is only used for IPv6 packets
*/
mutex_enter(&connp->conn_lock);
if (connp->conn_multicast_ill != NULL) {
(void) ipif_lookup_zoneid(connp->conn_multicast_ill,
zoneid, 0, &ipif);
} else {
/* Look for default like ip_wput_v6 */
ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst);
}
mutex_exit(&connp->conn_lock);
if (ipif == NULL || ire_requested ||
(dst_ire = ipif_to_ire_v6(ipif)) == NULL) {
if (ipif != NULL)
ipif_refrele(ipif);
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad "
"connected multicast %s\n", AF_INET6,
v6dst);
}
error = ENETUNREACH;
goto bad_addr;
}
if (ipif != NULL)
ipif_refrele(ipif);
} else {
dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0,
NULL, &sire, zoneid, tsl,
MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR,
ipst);
/*
* We also prevent ire's with src address INADDR_ANY to
* be used, which are created temporarily for
* sending out packets from endpoints that have
* conn_unspec_src set.
*/
if (dst_ire == NULL ||
(dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) {
/*
* When verifying destination reachability, we always
* complain.
*
* When not verifying destination reachability but we
* found an IRE, i.e. the destination is reachable,
* then the other tests still apply and we complain.
*/
if (verify_dst || (dst_ire != NULL)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad"
" connected dst %s\n", AF_INET6,
v6dst);
}
if (dst_ire == NULL ||
!(dst_ire->ire_type & IRE_HOST)) {
error = ENETUNREACH;
} else {
error = EHOSTUNREACH;
}
goto bad_addr;
}
}
}
/*
* If the app does a connect(), it means that it will most likely
* send more than 1 packet to the destination. It makes sense
* to clear the temporary flag.
*/
if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE &&
(dst_ire->ire_marks & IRE_MARK_TEMPORARY)) {
irb_t *irb = dst_ire->ire_bucket;
rw_enter(&irb->irb_lock, RW_WRITER);
/*
* We need to recheck for IRE_MARK_TEMPORARY after acquiring
* the lock in order to guarantee irb_tmp_ire_cnt.
*/
if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) {
dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY;
irb->irb_tmp_ire_cnt--;
}
rw_exit(&irb->irb_lock);
}
ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION);
/*
* See if we should notify ULP about MDT; we do this whether or not
* ire_requested is TRUE, in order to handle active connects; MDT
* eligibility tests for passive connects are handled separately
* through tcp_adapt_ire(). We do this before the source address
* selection, because dst_ire may change after a call to
* ipif_select_source_v6(). This is a best-effort check, as the
* packet for this connection may not actually go through
* dst_ire->ire_stq, and the exact IRE can only be known after
* calling ip_newroute_v6(). This is why we further check on the
* IRE during Multidata packet transmission in tcp_multisend().
*/
if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set &&
dst_ire != NULL &&
!(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) &&
(md_ill = ire_to_ill(dst_ire), md_ill != NULL) &&
ILL_MDT_CAPABLE(md_ill)) {
md_dst_ire = dst_ire;
IRE_REFHOLD(md_dst_ire);
}
if (dst_ire != NULL &&
dst_ire->ire_type == IRE_LOCAL &&
dst_ire->ire_zoneid != zoneid &&
dst_ire->ire_zoneid != ALL_ZONES) {
src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL,
zoneid, 0, NULL,
MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_RJ_BHOLE, ipst);
if (src_ire == NULL) {
error = EHOSTUNREACH;
goto bad_addr;
} else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
if (!(src_ire->ire_type & IRE_HOST))
error = ENETUNREACH;
else
error = EHOSTUNREACH;
goto bad_addr;
}
if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
src_ipif = src_ire->ire_ipif;
ipif_refhold(src_ipif);
*v6src = src_ipif->ipif_v6lcl_addr;
}
ire_refrele(src_ire);
src_ire = NULL;
} else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) {
if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) {
*v6src = sire->ire_src_addr_v6;
ire_refrele(dst_ire);
dst_ire = sire;
sire = NULL;
} else if (dst_ire->ire_type == IRE_CACHE &&
(dst_ire->ire_flags & RTF_SETSRC)) {
ASSERT(dst_ire->ire_zoneid == zoneid ||
dst_ire->ire_zoneid == ALL_ZONES);
*v6src = dst_ire->ire_src_addr_v6;
} else {
/*
* Pick a source address so that a proper inbound load
* spreading would happen. Use dst_ill specified by the
* app. when socket option or scopeid is set.
*/
int err;
if (ipp != NULL && ipp->ipp_ifindex != 0) {
uint_t if_index;
/*
* Scope id or IPV6_PKTINFO
*/
if_index = ipp->ipp_ifindex;
dst_ill = ill_lookup_on_ifindex(
if_index, B_TRUE, NULL, NULL, NULL, NULL,
ipst);
if (dst_ill == NULL) {
ip1dbg(("ip_bind_connected_v6:"
" bad ifindex %d\n", if_index));
error = EADDRNOTAVAIL;
goto bad_addr;
}
ill_held = B_TRUE;
} else if (connp->conn_outgoing_ill != NULL) {
/*
* For IPV6_BOUND_IF socket option,
* conn_outgoing_ill should be set
* already in TCP or UDP/ICMP.
*/
dst_ill = conn_get_held_ill(connp,
&connp->conn_outgoing_ill, &err);
if (err == ILL_LOOKUP_FAILED) {
ip1dbg(("ip_bind_connected_v6:"
"no ill for bound_if\n"));
error = EADDRNOTAVAIL;
goto bad_addr;
}
ill_held = B_TRUE;
} else if (dst_ire->ire_stq != NULL) {
/* No need to hold ill here */
dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr;
} else {
/* No need to hold ill here */
dst_ill = dst_ire->ire_ipif->ipif_ill;
}
if (ip6_asp_can_lookup(ipst)) {
src_ipif = ipif_select_source_v6(dst_ill,
v6dst, B_FALSE, connp->conn_src_preferences,
zoneid);
ip6_asp_table_refrele(ipst);
if (src_ipif == NULL) {
pr_addr_dbg("ip_bind_connected_v6: "
"no usable source address for "
"connection to %s\n",
AF_INET6, v6dst);
error = EADDRNOTAVAIL;
goto bad_addr;
}
*v6src = src_ipif->ipif_v6lcl_addr;
} else {
error = EADDRNOTAVAIL;
goto bad_addr;
}
}
}
/*
* We do ire_route_lookup_v6() here (and not an interface lookup)
* as we assert that v6src should only come from an
* UP interface for hard binding.
*/
src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL,
NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst);
/* src_ire must be a local|loopback */
if (!IRE_IS_LOCAL(src_ire)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad "
"connected src %s\n", AF_INET6, v6src);
}
error = EADDRNOTAVAIL;
goto bad_addr;
}
/*
* If the source address is a loopback address, the
* destination had best be local or multicast.
* The transports that can't handle multicast will reject
* those addresses.
*/
if (src_ire->ire_type == IRE_LOOPBACK &&
!(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) ||
IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) {
ip1dbg(("ip_bind_connected_v6: bad connected loopback\n"));
error = -1;
goto bad_addr;
}
/*
* Allow setting new policies. For example, disconnects come
* down as ipa_t bind. As we would have set conn_policy_cached
* to B_TRUE before, we should set it to B_FALSE, so that policy
* can change after the disconnect.
*/
connp->conn_policy_cached = B_FALSE;
/*
* The addresses have been verified. Initialize the conn
* before calling the policy as they expect the conns
* initialized.
*/
connp->conn_srcv6 = *v6src;
connp->conn_remv6 = *v6dst;
connp->conn_lport = lport;
connp->conn_fport = fport;
ASSERT(!(ipsec_policy_set && ire_requested));
if (ire_requested) {
iulp_t *ulp_info = NULL;
/*
* Note that sire will not be NULL if this is an off-link
* connection and there is not cache for that dest yet.
*
* XXX Because of an existing bug, if there are multiple
* default routes, the IRE returned now may not be the actual
* default route used (default routes are chosen in a
* round robin fashion). So if the metrics for different
* default routes are different, we may return the wrong
* metrics. This will not be a problem if the existing
* bug is fixed.
*/
if (sire != NULL)
ulp_info = &(sire->ire_uinfo);
if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info,
ipst)) {
error = -1;
goto bad_addr;
}
} else if (ipsec_policy_set) {
if (!ip_bind_ipsec_policy_set(connp, mp)) {
error = -1;
goto bad_addr;
}
}
/*
* Cache IPsec policy in this conn. If we have per-socket policy,
* we'll cache that. If we don't, we'll inherit global policy.
*
* We can't insert until the conn reflects the policy. Note that
* conn_policy_cached is set by ipsec_conn_cache_policy() even for
* connections where we don't have a policy. This is to prevent
* global policy lookups in the inbound path.
*
* If we insert before we set conn_policy_cached,
* CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true
* because global policy cound be non-empty. We normally call
* ipsec_check_policy() for conn_policy_cached connections only if
* conn_in_enforce_policy is set. But in this case,
* conn_policy_cached can get set anytime since we made the
* CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy()
* is called, which will make the above assumption false. Thus, we
* need to insert after we set conn_policy_cached.
*/
if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0)
goto bad_addr;
/* If not fanout_insert this was just an address verification */
if (fanout_insert) {
/*
* The addresses have been verified. Time to insert in
* the correct fanout list.
*/
error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst,
connp->conn_ports,
IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0);
}
if (error == 0) {
connp->conn_fully_bound = B_TRUE;
/*
* Our initial checks for MDT have passed; the IRE is not
* LOCAL/LOOPBACK/BROADCAST, and the link layer seems to
* be supporting MDT. Pass the IRE, IPC and ILL into
* ip_mdinfo_return(), which performs further checks
* against them and upon success, returns the MDT info
* mblk which we will attach to the bind acknowledgment.
*/
if (md_dst_ire != NULL) {
mblk_t *mdinfo_mp;
ASSERT(md_ill != NULL);
ASSERT(md_ill->ill_mdt_capab != NULL);
if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp,
md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) {
if (mp == NULL) {
*mpp = mdinfo_mp;
} else {
linkb(mp, mdinfo_mp);
}
}
}
}
bad_addr:
if (ipsec_policy_set) {
ASSERT(mp != NULL);
freeb(mp);
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
*mpp = NULL;
}
refrele_and_quit:
if (src_ire != NULL)
IRE_REFRELE(src_ire);
if (dst_ire != NULL)
IRE_REFRELE(dst_ire);
if (sire != NULL)
IRE_REFRELE(sire);
if (src_ipif != NULL)
ipif_refrele(src_ipif);
if (md_dst_ire != NULL)
IRE_REFRELE(md_dst_ire);
if (ill_held && dst_ill != NULL)
ill_refrele(dst_ill);
if (effective_cred != NULL)
crfree(effective_cred);
return (error);
}
/* ARGSUSED */
int
ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol,
in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp,
ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert,
boolean_t verify_dst, cred_t *cr)
{
int error = 0;
boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
ASSERT(connp->conn_af_isv6);
connp->conn_ulp = protocol;
/* For raw socket, the local port is not set. */
lport = lport != 0 ? lport : connp->conn_lport;
/*
* Bind to local and remote address. Local might be
* unspecified in which case it will be extracted from
* ire_src_addr_v6
*/
if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) {
/* Connect to IPv4 address */
ipaddr_t v4src;
ipaddr_t v4dst;
/* Is the source unspecified or mapped? */
if (!IN6_IS_ADDR_V4MAPPED(v6srcp) &&
!IN6_IS_ADDR_UNSPECIFIED(v6srcp)) {
ip1dbg(("ip_proto_bind_connected_v6: "
"dst is mapped, but not the src\n"));
goto bad_addr;
}
IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src);
IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst);
/* Always verify destination reachability. */
error = ip_bind_connected_v4(connp, mpp, protocol, &v4src,
lport, v4dst, fport, B_TRUE, B_TRUE, cr);
if (error != 0)
goto bad_addr;
IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp);
connp->conn_pkt_isv6 = B_FALSE;
} else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
ip1dbg(("ip_proto_bind_connected_v6: "
"src is mapped, but not the dst\n"));
goto bad_addr;
} else {
error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp,
lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr);
if (error != 0)
goto bad_addr;
connp->conn_pkt_isv6 = B_TRUE;
}
if (orig_pkt_isv6 != connp->conn_pkt_isv6)
ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst);
/* Send it home. */
return (0);
bad_addr:
if (error == 0)
error = -TBADADDR;
return (error);
}
/*
* Get the ire in *mpp. Returns false if it fails (due to lack of space).
* Makes the IRE be IRE_BROADCAST if dst is a multicast address.
*/
/* ARGSUSED4 */
static boolean_t
ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst,
iulp_t *ulp_info, ip_stack_t *ipst)
{
mblk_t *mp = *mpp;
ire_t *ret_ire;
ASSERT(mp != NULL);
if (ire != NULL) {
/*
* mp initialized above to IRE_DB_REQ_TYPE
* appended mblk. Its <upper protocol>'s
* job to make sure there is room.
*/
if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t))
return (B_FALSE);
mp->b_datap->db_type = IRE_DB_TYPE;
mp->b_wptr = mp->b_rptr + sizeof (ire_t);
bcopy(ire, mp->b_rptr, sizeof (ire_t));
ret_ire = (ire_t *)mp->b_rptr;
if (IN6_IS_ADDR_MULTICAST(dst) ||
IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) {
ret_ire->ire_type = IRE_BROADCAST;
ret_ire->ire_addr_v6 = *dst;
}
if (ulp_info != NULL) {
bcopy(ulp_info, &(ret_ire->ire_uinfo),
sizeof (iulp_t));
}
ret_ire->ire_mp = mp;
} else {
/*
* No IRE was found. Remove IRE mblk.
*/
*mpp = mp->b_cont;
freeb(mp);
}
return (B_TRUE);
}
/*
* Add an ip6i_t header to the front of the mblk.
* Inline if possible else allocate a separate mblk containing only the ip6i_t.
* Returns NULL if allocation fails (and frees original message).
* Used in outgoing path when going through ip_newroute_*v6().
* Used in incoming path to pass ifindex to transports.
*/
mblk_t *
ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst)
{
mblk_t *mp1;
ip6i_t *ip6i;
ip6_t *ip6h;
ip6h = (ip6_t *)mp->b_rptr;
ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t));
if ((uchar_t *)ip6i < mp->b_datap->db_base ||
mp->b_datap->db_ref > 1) {
mp1 = allocb(sizeof (ip6i_t), BPRI_MED);
if (mp1 == NULL) {
freemsg(mp);
return (NULL);
}
mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim;
mp1->b_cont = mp;
mp = mp1;
ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t));
}
mp->b_rptr = (uchar_t *)ip6i;
ip6i->ip6i_vcf = ip6h->ip6_vcf;
ip6i->ip6i_nxt = IPPROTO_RAW;
if (ill != NULL) {
ip6i->ip6i_flags = IP6I_IFINDEX;
/*
* If `ill' is in an IPMP group, make sure we use the IPMP
* interface index so that e.g. IPV6_RECVPKTINFO will get the
* IPMP interface index and not an underlying interface index.
*/
if (IS_UNDER_IPMP(ill))
ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill);
else
ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex;
} else {
ip6i->ip6i_flags = 0;
}
ip6i->ip6i_nexthop = *dst;
return (mp);
}
/*
* Handle protocols with which IP is less intimate. There
* can be more than one stream bound to a particular
* protocol. When this is the case, normally each one gets a copy
* of any incoming packets.
*
* Zones notes:
* Packets will be distributed to streams in all zones. This is really only
* useful for ICMPv6 as only applications in the global zone can create raw
* sockets for other protocols.
*/
static void
ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill,
ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags,
boolean_t mctl_present, zoneid_t zoneid)
{
queue_t *rq;
mblk_t *mp1, *first_mp1;
in6_addr_t dst = ip6h->ip6_dst;
in6_addr_t src = ip6h->ip6_src;
mblk_t *first_mp = mp;
boolean_t secure, shared_addr;
conn_t *connp, *first_connp, *next_connp;
connf_t *connfp;
ip_stack_t *ipst = inill->ill_ipst;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
if (mctl_present) {
mp = first_mp->b_cont;
secure = ipsec_in_is_secure(first_mp);
ASSERT(mp != NULL);
} else {
secure = B_FALSE;
}
shared_addr = (zoneid == ALL_ZONES);
if (shared_addr) {
/*
* We don't allow multilevel ports for raw IP, so no need to
* check for that here.
*/
zoneid = tsol_packet_to_zoneid(mp);
}
connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr];
mutex_enter(&connfp->connf_lock);
connp = connfp->connf_head;
for (connp = connfp->connf_head; connp != NULL;
connp = connp->conn_next) {
if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags,
zoneid) &&
(!is_system_labeled() ||
tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr,
connp)))
break;
}
if (connp == NULL) {
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
mutex_exit(&connfp->connf_lock);
if (ip_fanout_send_icmp_v6(q, first_mp, flags,
ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER,
nexthdr_offset, mctl_present, zoneid, ipst)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
}
return;
}
ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL);
CONN_INC_REF(connp);
first_connp = connp;
/*
* XXX: Fix the multiple protocol listeners case. We should not
* be walking the conn->next list here.
*/
connp = connp->conn_next;
for (;;) {
while (connp != NULL) {
if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill,
flags, zoneid) &&
(!is_system_labeled() ||
tsol_receive_local(mp, &dst, IPV6_VERSION,
shared_addr, connp)))
break;
connp = connp->conn_next;
}
/*
* Just copy the data part alone. The mctl part is
* needed just for verifying policy and it is never
* sent up.
*/
if (connp == NULL ||
(((first_mp1 = dupmsg(first_mp)) == NULL) &&
((first_mp1 = ip_copymsg(first_mp)) == NULL))) {
/*
* No more intested clients or memory
* allocation failed
*/
connp = first_connp;
break;
}
ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
mp1 = mctl_present ? first_mp1->b_cont : first_mp1;
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
rq = connp->conn_rq;
/*
* For link-local always add ifindex so that transport can set
* sin6_scope_id. Avoid it for ICMP error fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&src)) &&
(flags & IP_FF_IPINFO)) {
/* Add header */
mp1 = ip_add_info_v6(mp1, inill, &dst);
}
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
} else if (
(IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
(!IPCL_IS_NONSTR(connp) && !canputnext(rq))) {
if (flags & IP_FF_RAWIP) {
BUMP_MIB(ill->ill_ip_mib,
rawipIfStatsInOverflows);
} else {
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInOverflows);
}
freemsg(mp1);
} else {
ASSERT(!IPCL_IS_IPTUN(connp));
if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
secure) {
first_mp1 = ipsec_check_inbound_policy(
first_mp1, connp, NULL, ip6h, mctl_present);
}
if (first_mp1 != NULL) {
if (mctl_present)
freeb(first_mp1);
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsHCInDelivers);
(connp->conn_recv)(connp, mp1, NULL);
}
}
mutex_enter(&connfp->connf_lock);
/* Follow the next pointer before releasing the conn. */
next_connp = connp->conn_next;
CONN_DEC_REF(connp);
connp = next_connp;
}
/* Last one. Send it upstream. */
mutex_exit(&connfp->connf_lock);
/* Initiate IPPF processing */
if (IP6_IN_IPP(flags, ipst)) {
uint_t ifindex;
mutex_enter(&ill->ill_lock);
ifindex = ill->ill_phyint->phyint_ifindex;
mutex_exit(&ill->ill_lock);
ip_process(IPP_LOCAL_IN, &mp, ifindex);
if (mp == NULL) {
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
}
}
/*
* For link-local always add ifindex so that transport can set
* sin6_scope_id. Avoid it for ICMP error fanout.
*/
if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) &&
(flags & IP_FF_IPINFO)) {
/* Add header */
mp = ip_add_info_v6(mp, inill, &dst);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
} else if (mctl_present) {
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
}
rq = connp->conn_rq;
if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
(!IPCL_IS_NONSTR(connp) && !canputnext(rq))) {
if (flags & IP_FF_RAWIP) {
BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows);
} else {
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows);
}
freemsg(first_mp);
} else {
ASSERT(!IPCL_IS_IPTUN(connp));
if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) {
first_mp = ipsec_check_inbound_policy(first_mp, connp,
NULL, ip6h, mctl_present);
if (first_mp == NULL) {
CONN_DEC_REF(connp);
return;
}
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
(connp->conn_recv)(connp, mp, NULL);
if (mctl_present)
freeb(first_mp);
}
CONN_DEC_REF(connp);
}
/*
* Send an ICMP error after patching up the packet appropriately. Returns
* non-zero if the appropriate MIB should be bumped; zero otherwise.
*/
int
ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags,
uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset,
boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst)
{
ip6_t *ip6h;
mblk_t *first_mp;
boolean_t secure;
unsigned char db_type;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
first_mp = mp;
if (mctl_present) {
mp = mp->b_cont;
secure = ipsec_in_is_secure(first_mp);
ASSERT(mp != NULL);
} else {
/*
* If this is an ICMP error being reported - which goes
* up as M_CTLs, we need to convert them to M_DATA till
* we finish checking with global policy because
* ipsec_check_global_policy() assumes M_DATA as clear
* and M_CTL as secure.
*/
db_type = mp->b_datap->db_type;
mp->b_datap->db_type = M_DATA;
secure = B_FALSE;
}
/*
* We are generating an icmp error for some inbound packet.
* Called from all ip_fanout_(udp, tcp, proto) functions.
* Before we generate an error, check with global policy
* to see whether this is allowed to enter the system. As
* there is no "conn", we are checking with global policy.
*/
ip6h = (ip6_t *)mp->b_rptr;
if (secure || ipss->ipsec_inbound_v6_policy_present) {
first_mp = ipsec_check_global_policy(first_mp, NULL,
NULL, ip6h, mctl_present, ipst->ips_netstack);
if (first_mp == NULL)
return (0);
}
if (!mctl_present)
mp->b_datap->db_type = db_type;
if (flags & IP_FF_SEND_ICMP) {
if (flags & IP_FF_HDR_COMPLETE) {
if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) {
freemsg(first_mp);
return (1);
}
}
switch (icmp_type) {
case ICMP6_DST_UNREACH:
icmp_unreachable_v6(WR(q), first_mp, icmp_code,
B_FALSE, B_FALSE, zoneid, ipst);
break;
case ICMP6_PARAM_PROB:
icmp_param_problem_v6(WR(q), first_mp, icmp_code,
nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst);
break;
default:
#ifdef DEBUG
panic("ip_fanout_send_icmp_v6: wrong type");
/*NOTREACHED*/
#else
freemsg(first_mp);
break;
#endif
}
} else {
freemsg(first_mp);
return (0);
}
return (1);
}
/*
* Fanout for TCP packets
* The caller puts <fport, lport> in the ports parameter.
*/
static void
ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill,
uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid)
{
mblk_t *first_mp;
boolean_t secure;
conn_t *connp;
tcph_t *tcph;
boolean_t syn_present = B_FALSE;
ip_stack_t *ipst = inill->ill_ipst;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
first_mp = mp;
if (mctl_present) {
mp = first_mp->b_cont;
secure = ipsec_in_is_secure(first_mp);
ASSERT(mp != NULL);
} else {
secure = B_FALSE;
}
connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst);
if (connp == NULL ||
!conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) {
/*
* No hard-bound match. Send Reset.
*/
dblk_t *dp = mp->b_datap;
uint32_t ill_index;
ASSERT((dp->db_struioflag & STRUIO_IP) == 0);
/* Initiate IPPf processing, if needed. */
if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
(flags & IP6_NO_IPPOLICY)) {
ill_index = ill->ill_phyint->phyint_ifindex;
ip_process(IPP_LOCAL_IN, &first_mp, ill_index);
if (first_mp == NULL) {
if (connp != NULL)
CONN_DEC_REF(connp);
return;
}
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
if (connp != NULL) {
ip_xmit_reset_serialize(first_mp, hdr_len, zoneid,
ipst->ips_netstack->netstack_tcp, connp);
CONN_DEC_REF(connp);
} else {
tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid,
ipst->ips_netstack->netstack_tcp, NULL);
}
return;
}
tcph = (tcph_t *)&mp->b_rptr[hdr_len];
if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) {
if (IPCL_IS_TCP(connp)) {
squeue_t *sqp;
/*
* If the queue belongs to a conn, and fused tcp
* loopback is enabled, assign the eager's squeue
* to be that of the active connect's.
*/
if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion &&
CONN_Q(q) && IPCL_IS_TCP(Q_TO_CONN(q)) &&
!CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) &&
!secure &&
!IP6_IN_IPP(flags, ipst)) {
ASSERT(Q_TO_CONN(q)->conn_sqp != NULL);
sqp = Q_TO_CONN(q)->conn_sqp;
} else {
sqp = IP_SQUEUE_GET(lbolt);
}
mp->b_datap->db_struioflag |= STRUIO_EAGER;
DB_CKSUMSTART(mp) = (intptr_t)sqp;
/*
* db_cksumstuff is unused in the incoming
* path; Thus store the ifindex here. It will
* be cleared in tcp_conn_create_v6().
*/
DB_CKSUMSTUFF(mp) =
(intptr_t)ill->ill_phyint->phyint_ifindex;
syn_present = B_TRUE;
}
}
if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) {
uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF;
if ((flags & TH_RST) || (flags & TH_URG)) {
CONN_DEC_REF(connp);
freemsg(first_mp);
return;
}
if (flags & TH_ACK) {
ip_xmit_reset_serialize(first_mp, hdr_len, zoneid,
ipst->ips_netstack->netstack_tcp, connp);
CONN_DEC_REF(connp);
return;
}
CONN_DEC_REF(connp);
freemsg(first_mp);
return;
}
if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) {
first_mp = ipsec_check_inbound_policy(first_mp, connp,
NULL, ip6h, mctl_present);
if (first_mp == NULL) {
CONN_DEC_REF(connp);
return;
}
if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) {
ASSERT(syn_present);
if (mctl_present) {
ASSERT(first_mp != mp);
first_mp->b_datap->db_struioflag |=
STRUIO_POLICY;
} else {
ASSERT(first_mp == mp);
mp->b_datap->db_struioflag &=
~STRUIO_EAGER;
mp->b_datap->db_struioflag |=
STRUIO_POLICY;
}
} else {
/*
* Discard first_mp early since we're dealing with a
* fully-connected conn_t and tcp doesn't do policy in
* this case. Also, if someone is bound to IPPROTO_TCP
* over raw IP, they don't expect to see a M_CTL.
*/
if (mctl_present) {
freeb(first_mp);
mctl_present = B_FALSE;
}
first_mp = mp;
}
}
/* Initiate IPPF processing */
if (IP6_IN_IPP(flags, ipst)) {
uint_t ifindex;
mutex_enter(&ill->ill_lock);
ifindex = ill->ill_phyint->phyint_ifindex;
mutex_exit(&ill->ill_lock);
ip_process(IPP_LOCAL_IN, &mp, ifindex);
if (mp == NULL) {
CONN_DEC_REF(connp);
if (mctl_present) {
freeb(first_mp);
}
return;
} else if (mctl_present) {
/*
* ip_add_info_v6 might return a new mp.
*/
ASSERT(first_mp != mp);
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
}
/*
* For link-local always add ifindex so that TCP can bind to that
* interface. Avoid it for ICMP error fanout.
*/
if (!syn_present && ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) &&
(flags & IP_FF_IPINFO))) {
/* Add header */
mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
} else if (mctl_present) {
ASSERT(first_mp != mp);
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
if (IPCL_IS_TCP(connp)) {
SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv,
connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
} else {
/* SOCK_RAW, IPPROTO_TCP case */
(connp->conn_recv)(connp, first_mp, NULL);
CONN_DEC_REF(connp);
}
}
/*
* Fanout for UDP packets.
* The caller puts <fport, lport> in the ports parameter.
* ire_type must be IRE_BROADCAST for multicast and broadcast packets.
*
* If SO_REUSEADDR is set all multicast and broadcast packets
* will be delivered to all streams bound to the same port.
*
* Zones notes:
* Multicast packets will be distributed to streams in all zones.
*/
static void
ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports,
ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present,
zoneid_t zoneid)
{
uint32_t dstport, srcport;
in6_addr_t dst;
mblk_t *first_mp;
boolean_t secure;
conn_t *connp;
connf_t *connfp;
conn_t *first_conn;
conn_t *next_conn;
mblk_t *mp1, *first_mp1;
in6_addr_t src;
boolean_t shared_addr;
ip_stack_t *ipst = inill->ill_ipst;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
first_mp = mp;
if (mctl_present) {
mp = first_mp->b_cont;
secure = ipsec_in_is_secure(first_mp);
ASSERT(mp != NULL);
} else {
secure = B_FALSE;
}
/* Extract ports in net byte order */
dstport = htons(ntohl(ports) & 0xFFFF);
srcport = htons(ntohl(ports) >> 16);
dst = ip6h->ip6_dst;
src = ip6h->ip6_src;
shared_addr = (zoneid == ALL_ZONES);
if (shared_addr) {
/*
* No need to handle exclusive-stack zones since ALL_ZONES
* only applies to the shared stack.
*/
zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport);
/*
* If no shared MLP is found, tsol_mlp_findzone returns
* ALL_ZONES. In that case, we assume it's SLP, and
* search for the zone based on the packet label.
* That will also return ALL_ZONES on failure, but
* we never allow conn_zoneid to be set to ALL_ZONES.
*/
if (zoneid == ALL_ZONES)
zoneid = tsol_packet_to_zoneid(mp);
}
/* Attempt to find a client stream based on destination port. */
connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)];
mutex_enter(&connfp->connf_lock);
connp = connfp->connf_head;
if (!IN6_IS_ADDR_MULTICAST(&dst)) {
/*
* Not multicast. Send to the one (first) client we find.
*/
while (connp != NULL) {
if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport,
src) && IPCL_ZONE_MATCH(connp, zoneid) &&
conn_wantpacket_v6(connp, ill, ip6h,
flags, zoneid)) {
break;
}
connp = connp->conn_next;
}
if (connp == NULL || connp->conn_upq == NULL)
goto notfound;
if (is_system_labeled() &&
!tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr,
connp))
goto notfound;
/* Found a client */
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
(!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
freemsg(first_mp);
CONN_DEC_REF(connp);
return;
}
if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) {
first_mp = ipsec_check_inbound_policy(first_mp,
connp, NULL, ip6h, mctl_present);
if (first_mp == NULL) {
CONN_DEC_REF(connp);
return;
}
}
/* Initiate IPPF processing */
if (IP6_IN_IPP(flags, ipst)) {
uint_t ifindex;
mutex_enter(&ill->ill_lock);
ifindex = ill->ill_phyint->phyint_ifindex;
mutex_exit(&ill->ill_lock);
ip_process(IPP_LOCAL_IN, &mp, ifindex);
if (mp == NULL) {
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
}
}
/*
* For link-local always add ifindex so that
* transport can set sin6_scope_id. Avoid it for
* ICMP error fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&src)) &&
(flags & IP_FF_IPINFO)) {
/* Add header */
mp = ip_add_info_v6(mp, inill, &dst);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
} else if (mctl_present) {
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
/* Send it upstream */
(connp->conn_recv)(connp, mp, NULL);
IP6_STAT(ipst, ip6_udp_fannorm);
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
}
while (connp != NULL) {
if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) &&
conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) &&
(!is_system_labeled() ||
tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr,
connp)))
break;
connp = connp->conn_next;
}
if (connp == NULL || connp->conn_upq == NULL)
goto notfound;
first_conn = connp;
CONN_INC_REF(connp);
connp = connp->conn_next;
for (;;) {
while (connp != NULL) {
if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport,
src) && conn_wantpacket_v6(connp, ill, ip6h,
flags, zoneid) &&
(!is_system_labeled() ||
tsol_receive_local(mp, &dst, IPV6_VERSION,
shared_addr, connp)))
break;
connp = connp->conn_next;
}
/*
* Just copy the data part alone. The mctl part is
* needed just for verifying policy and it is never
* sent up.
*/
if (connp == NULL ||
(((first_mp1 = dupmsg(first_mp)) == NULL) &&
((first_mp1 = ip_copymsg(first_mp)) == NULL))) {
/*
* No more interested clients or memory
* allocation failed
*/
connp = first_conn;
break;
}
mp1 = mctl_present ? first_mp1->b_cont : first_mp1;
CONN_INC_REF(connp);
mutex_exit(&connfp->connf_lock);
/*
* For link-local always add ifindex so that transport
* can set sin6_scope_id. Avoid it for ICMP error
* fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&src)) &&
(flags & IP_FF_IPINFO)) {
/* Add header */
mp1 = ip_add_info_v6(mp1, inill, &dst);
}
/* mp1 could have changed */
if (mctl_present)
first_mp1->b_cont = mp1;
else
first_mp1 = mp1;
if (mp1 == NULL) {
if (mctl_present)
freeb(first_mp1);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
goto next_one;
}
if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
(!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
freemsg(first_mp1);
goto next_one;
}
if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) {
first_mp1 = ipsec_check_inbound_policy
(first_mp1, connp, NULL, ip6h,
mctl_present);
}
if (first_mp1 != NULL) {
if (mctl_present)
freeb(first_mp1);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
/* Send it upstream */
(connp->conn_recv)(connp, mp1, NULL);
}
next_one:
mutex_enter(&connfp->connf_lock);
/* Follow the next pointer before releasing the conn. */
next_conn = connp->conn_next;
IP6_STAT(ipst, ip6_udp_fanmb);
CONN_DEC_REF(connp);
connp = next_conn;
}
/* Last one. Send it upstream. */
mutex_exit(&connfp->connf_lock);
/* Initiate IPPF processing */
if (IP6_IN_IPP(flags, ipst)) {
uint_t ifindex;
mutex_enter(&ill->ill_lock);
ifindex = ill->ill_phyint->phyint_ifindex;
mutex_exit(&ill->ill_lock);
ip_process(IPP_LOCAL_IN, &mp, ifindex);
if (mp == NULL) {
CONN_DEC_REF(connp);
if (mctl_present) {
freeb(first_mp);
}
return;
}
}
/*
* For link-local always add ifindex so that transport can set
* sin6_scope_id. Avoid it for ICMP error fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) {
/* Add header */
mp = ip_add_info_v6(mp, inill, &dst);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
} else if (mctl_present) {
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
}
if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
(!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
freemsg(mp);
} else {
if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) {
first_mp = ipsec_check_inbound_policy(first_mp,
connp, NULL, ip6h, mctl_present);
if (first_mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
CONN_DEC_REF(connp);
return;
}
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
/* Send it upstream */
(connp->conn_recv)(connp, mp, NULL);
}
IP6_STAT(ipst, ip6_udp_fanmb);
CONN_DEC_REF(connp);
if (mctl_present)
freeb(first_mp);
return;
notfound:
mutex_exit(&connfp->connf_lock);
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP,
0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present,
zoneid);
} else {
if (ip_fanout_send_icmp_v6(q, first_mp, flags,
ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0,
mctl_present, zoneid, ipst)) {
BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
}
}
}
/*
* int ip_find_hdr_v6()
*
* This routine is used by the upper layer protocols and the IP tunnel
* module to:
* - Set extension header pointers to appropriate locations
* - Determine IPv6 header length and return it
* - Return a pointer to the last nexthdr value
*
* The caller must initialize ipp_fields.
*
* NOTE: If multiple extension headers of the same type are present,
* ip_find_hdr_v6() will set the respective extension header pointers
* to the first one that it encounters in the IPv6 header. It also
* skips fragment headers. This routine deals with malformed packets
* of various sorts in which case the returned length is up to the
* malformed part.
*/
int
ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp)
{
uint_t length, ehdrlen;
uint8_t nexthdr;
uint8_t *whereptr, *endptr;
ip6_dest_t *tmpdstopts;
ip6_rthdr_t *tmprthdr;
ip6_hbh_t *tmphopopts;
ip6_frag_t *tmpfraghdr;
length = IPV6_HDR_LEN;
whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
endptr = mp->b_wptr;
nexthdr = ip6h->ip6_nxt;
while (whereptr < endptr) {
/* Is there enough left for len + nexthdr? */
if (whereptr + MIN_EHDR_LEN > endptr)
goto done;
switch (nexthdr) {
case IPPROTO_HOPOPTS:
tmphopopts = (ip6_hbh_t *)whereptr;
ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
if ((uchar_t *)tmphopopts + ehdrlen > endptr)
goto done;
nexthdr = tmphopopts->ip6h_nxt;
/* return only 1st hbh */
if (!(ipp->ipp_fields & IPPF_HOPOPTS)) {
ipp->ipp_fields |= IPPF_HOPOPTS;
ipp->ipp_hopopts = tmphopopts;
ipp->ipp_hopoptslen = ehdrlen;
}
break;
case IPPROTO_DSTOPTS:
tmpdstopts = (ip6_dest_t *)whereptr;
ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
if ((uchar_t *)tmpdstopts + ehdrlen > endptr)
goto done;
nexthdr = tmpdstopts->ip6d_nxt;
/*
* ipp_dstopts is set to the destination header after a
* routing header.
* Assume it is a post-rthdr destination header
* and adjust when we find an rthdr.
*/
if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
ipp->ipp_fields |= IPPF_DSTOPTS;
ipp->ipp_dstopts = tmpdstopts;
ipp->ipp_dstoptslen = ehdrlen;
}
break;
case IPPROTO_ROUTING:
tmprthdr = (ip6_rthdr_t *)whereptr;
ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
if ((uchar_t *)tmprthdr + ehdrlen > endptr)
goto done;
nexthdr = tmprthdr->ip6r_nxt;
/* return only 1st rthdr */
if (!(ipp->ipp_fields & IPPF_RTHDR)) {
ipp->ipp_fields |= IPPF_RTHDR;
ipp->ipp_rthdr = tmprthdr;
ipp->ipp_rthdrlen = ehdrlen;
}
/*
* Make any destination header we've seen be a
* pre-rthdr destination header.
*/
if (ipp->ipp_fields & IPPF_DSTOPTS) {
ipp->ipp_fields &= ~IPPF_DSTOPTS;
ipp->ipp_fields |= IPPF_RTDSTOPTS;
ipp->ipp_rtdstopts = ipp->ipp_dstopts;
ipp->ipp_dstopts = NULL;
ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen;
ipp->ipp_dstoptslen = 0;
}
break;
case IPPROTO_FRAGMENT:
tmpfraghdr = (ip6_frag_t *)whereptr;
ehdrlen = sizeof (ip6_frag_t);
if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
goto done;
nexthdr = tmpfraghdr->ip6f_nxt;
if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
ipp->ipp_fields |= IPPF_FRAGHDR;
ipp->ipp_fraghdr = tmpfraghdr;
ipp->ipp_fraghdrlen = ehdrlen;
}
break;
case IPPROTO_NONE:
default:
goto done;
}
length += ehdrlen;
whereptr += ehdrlen;
}
done:
if (nexthdrp != NULL)
*nexthdrp = nexthdr;
return (length);
}
int
ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst)
{
ire_t *ire;
if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
ire = ire_lookup_local_v6(zoneid, ipst);
if (ire == NULL) {
ip1dbg(("ip_hdr_complete_v6: no source IRE\n"));
return (1);
}
ip6h->ip6_src = ire->ire_addr_v6;
ire_refrele(ire);
}
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
return (0);
}
/*
* Try to determine where and what are the IPv6 header length and
* pointer to nexthdr value for the upper layer protocol (or an
* unknown next hdr).
*
* Parameters returns a pointer to the nexthdr value;
* Must handle malformed packets of various sorts.
* Function returns failure for malformed cases.
*/
boolean_t
ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
uint8_t **nexthdrpp)
{
uint16_t length;
uint_t ehdrlen;
uint8_t *nexthdrp;
uint8_t *whereptr;
uint8_t *endptr;
ip6_dest_t *desthdr;
ip6_rthdr_t *rthdr;
ip6_frag_t *fraghdr;
ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION);
length = IPV6_HDR_LEN;
whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
endptr = mp->b_wptr;
nexthdrp = &ip6h->ip6_nxt;
while (whereptr < endptr) {
/* Is there enough left for len + nexthdr? */
if (whereptr + MIN_EHDR_LEN > endptr)
break;
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
desthdr = (ip6_dest_t *)whereptr;
ehdrlen = 8 * (desthdr->ip6d_len + 1);
if ((uchar_t *)desthdr + ehdrlen > endptr)
return (B_FALSE);
nexthdrp = &desthdr->ip6d_nxt;
break;
case IPPROTO_ROUTING:
rthdr = (ip6_rthdr_t *)whereptr;
ehdrlen = 8 * (rthdr->ip6r_len + 1);
if ((uchar_t *)rthdr + ehdrlen > endptr)
return (B_FALSE);
nexthdrp = &rthdr->ip6r_nxt;
break;
case IPPROTO_FRAGMENT:
fraghdr = (ip6_frag_t *)whereptr;
ehdrlen = sizeof (ip6_frag_t);
if ((uchar_t *)&fraghdr[1] > endptr)
return (B_FALSE);
nexthdrp = &fraghdr->ip6f_nxt;
break;
case IPPROTO_NONE:
/* No next header means we're finished */
default:
*hdr_length_ptr = length;
*nexthdrpp = nexthdrp;
return (B_TRUE);
}
length += ehdrlen;
whereptr += ehdrlen;
*hdr_length_ptr = length;
*nexthdrpp = nexthdrp;
}
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
case IPPROTO_FRAGMENT:
/*
* If any know extension headers are still to be processed,
* the packet's malformed (or at least all the IP header(s) are
* not in the same mblk - and that should never happen.
*/
return (B_FALSE);
default:
/*
* If we get here, we know that all of the IP headers were in
* the same mblk, even if the ULP header is in the next mblk.
*/
*hdr_length_ptr = length;
*nexthdrpp = nexthdrp;
return (B_TRUE);
}
}
/*
* Return the length of the IPv6 related headers (including extension headers)
* Returns a length even if the packet is malformed.
*/
int
ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
{
uint16_t hdr_len;
uint8_t *nexthdrp;
(void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
return (hdr_len);
}
/*
* IPv6 -
* ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need
* to send out a packet to a destination address for which we do not have
* specific routing information.
*
* Handle non-multicast packets. If ill is non-NULL the match is done
* for that ill.
*
* When a specific ill is specified (using IPV6_PKTINFO,
* IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match
* on routing entries (ftable and ctable) that have a matching
* ire->ire_ipif->ipif_ill. Thus this can only be used
* for destinations that are on-link for the specific ill
* and that can appear on multiple links. Thus it is useful
* for multicast destinations, link-local destinations, and
* at some point perhaps for site-local destinations (if the
* node sits at a site boundary).
* We create the cache entries in the regular ctable since
* it can not "confuse" things for other destinations.
*
* NOTE : These are the scopes of some of the variables that point at IRE,
* which needs to be followed while making any future modifications
* to avoid memory leaks.
*
* - ire and sire are the entries looked up initially by
* ire_ftable_lookup_v6.
* - ipif_ire is used to hold the interface ire associated with
* the new cache ire. But it's scope is limited, so we always REFRELE
* it before branching out to error paths.
* - save_ire is initialized before ire_create, so that ire returned
* by ire_create will not over-write the ire. We REFRELE save_ire
* before breaking out of the switch.
*
* Thus on failures, we have to REFRELE only ire and sire, if they
* are not NULL.
*/
/* ARGSUSED */
void
ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp,
const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst)
{
in6_addr_t v6gw;
in6_addr_t dst;
ire_t *ire = NULL;
ipif_t *src_ipif = NULL;
ill_t *dst_ill = NULL;
ire_t *sire = NULL;
ire_t *save_ire;
ip6_t *ip6h;
int err = 0;
mblk_t *first_mp;
ipsec_out_t *io;
ushort_t ire_marks = 0;
int match_flags;
ire_t *first_sire = NULL;
mblk_t *copy_mp = NULL;
mblk_t *xmit_mp = NULL;
in6_addr_t save_dst;
uint32_t multirt_flags =
MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP;
boolean_t multirt_is_resolvable;
boolean_t multirt_resolve_next;
boolean_t need_rele = B_FALSE;
boolean_t ip6_asp_table_held = B_FALSE;
tsol_ire_gw_secattr_t *attrp = NULL;
tsol_gcgrp_t *gcgrp = NULL;
tsol_gcgrp_addr_t ga;
ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp));
first_mp = mp;
if (mp->b_datap->db_type == M_CTL) {
mp = mp->b_cont;
io = (ipsec_out_t *)first_mp->b_rptr;
ASSERT(io->ipsec_out_type == IPSEC_OUT);
} else {
io = NULL;
}
ip6h = (ip6_t *)mp->b_rptr;
if (IN6_IS_ADDR_LOOPBACK(v6dstp)) {
ip1dbg(("ip_newroute_v6: dst with loopback addr\n"));
goto icmp_err_ret;
} else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) {
ip1dbg(("ip_newroute_v6: src with loopback addr\n"));
goto icmp_err_ret;
}
/*
* If this IRE is created for forwarding or it is not for
* TCP traffic, mark it as temporary.
*
* Is it sufficient just to check the next header??
*/
if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt))
ire_marks |= IRE_MARK_TEMPORARY;
/*
* Get what we can from ire_ftable_lookup_v6 which will follow an IRE
* chain until it gets the most specific information available.
* For example, we know that there is no IRE_CACHE for this dest,
* but there may be an IRE_OFFSUBNET which specifies a gateway.
* ire_ftable_lookup_v6 will look up the gateway, etc.
*/
if (ill == NULL) {
match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR;
ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0,
NULL, &sire, zoneid, 0, msg_getlabel(mp),
match_flags, ipst);
} else {
match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT |
MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL;
match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR;
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always
* tied to an underlying interface, IS_UNDER_IPMP() may be
* true even when building IREs that will be used for data
* traffic. As such, use the packet's source address to
* determine whether the traffic is test traffic, and set
* MATCH_IRE_MARK_TESTHIDDEN if so.
*/
if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) {
if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL))
match_flags |= MATCH_IRE_MARK_TESTHIDDEN;
}
ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif,
&sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst);
}
ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() "
"returned ire %p, sire %p\n", (void *)ire, (void *)sire));
/*
* We enter a loop that will be run only once in most cases.
* The loop is re-entered in the case where the destination
* can be reached through multiple RTF_MULTIRT-flagged routes.
* The intention is to compute multiple routes to a single
* destination in a single ip_newroute_v6 call.
* The information is contained in sire->ire_flags.
*/
do {
multirt_resolve_next = B_FALSE;
if (dst_ill != NULL) {
ill_refrele(dst_ill);
dst_ill = NULL;
}
if (src_ipif != NULL) {
ipif_refrele(src_ipif);
src_ipif = NULL;
}
if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) {
ip3dbg(("ip_newroute_v6: starting new resolution "
"with first_mp %p, tag %d\n",
(void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp)));
/*
* We check if there are trailing unresolved routes for
* the destination contained in sire.
*/
multirt_is_resolvable = ire_multirt_lookup_v6(&ire,
&sire, multirt_flags, msg_getlabel(mp), ipst);
ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, "
"ire %p, sire %p\n",
multirt_is_resolvable, (void *)ire, (void *)sire));
if (!multirt_is_resolvable) {
/*
* No more multirt routes to resolve; give up
* (all routes resolved or no more resolvable
* routes).
*/
if (ire != NULL) {
ire_refrele(ire);
ire = NULL;
}
} else {
ASSERT(sire != NULL);
ASSERT(ire != NULL);
/*
* We simply use first_sire as a flag that
* indicates if a resolvable multirt route has
* already been found during the preceding
* loops. If it is not the case, we may have
* to send an ICMP error to report that the
* destination is unreachable. We do not
* IRE_REFHOLD first_sire.
*/
if (first_sire == NULL) {
first_sire = sire;
}
}
}
if ((ire == NULL) || (ire == sire)) {
/*
* either ire == NULL (the destination cannot be
* resolved) or ire == sire (the gateway cannot be
* resolved). At this point, there are no more routes
* to resolve for the destination, thus we exit.
*/
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_newroute_v6: "
"can't resolve %s\n", AF_INET6, v6dstp);
}
ip3dbg(("ip_newroute_v6: "
"ire %p, sire %p, first_sire %p\n",
(void *)ire, (void *)sire, (void *)first_sire));
if (sire != NULL) {
ire_refrele(sire);
sire = NULL;
}
if (first_sire != NULL) {
/*
* At least one multirt route has been found
* in the same ip_newroute() call; there is no
* need to report an ICMP error.
* first_sire was not IRE_REFHOLDed.
*/
MULTIRT_DEBUG_UNTAG(first_mp);
freemsg(first_mp);
return;
}
ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0,
RTA_DST, ipst);
goto icmp_err_ret;
}
ASSERT(ire->ire_ipversion == IPV6_VERSION);
/*
* Verify that the returned IRE does not have either the
* RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is
* either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER.
*/
if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) ||
(ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0)
goto icmp_err_ret;
/*
* Increment the ire_ob_pkt_count field for ire if it is an
* INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and
* increment the same for the parent IRE, sire, if it is some
* sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST)
*/
if ((ire->ire_type & IRE_INTERFACE) != 0) {
UPDATE_OB_PKT_COUNT(ire);
ire->ire_last_used_time = lbolt;
}
if (sire != NULL) {
mutex_enter(&sire->ire_lock);
v6gw = sire->ire_gateway_addr_v6;
mutex_exit(&sire->ire_lock);
ASSERT((sire->ire_type & (IRE_CACHETABLE |
IRE_INTERFACE)) == 0);
UPDATE_OB_PKT_COUNT(sire);
sire->ire_last_used_time = lbolt;
} else {
v6gw = ipv6_all_zeros;
}
/*
* We have a route to reach the destination. Find the
* appropriate ill, then get a source address that matches the
* right scope via ipif_select_source_v6().
*
* If we are here trying to create an IRE_CACHE for an offlink
* destination and have an IRE_CACHE entry for VNI, then use
* ire_stq instead since VNI's queue is a black hole.
*
* Note: While we pick a dst_ill we are really only interested
* in the ill for load spreading. The source ipif is
* determined by source address selection below.
*/
if ((ire->ire_type == IRE_CACHE) &&
IS_VNI(ire->ire_ipif->ipif_ill)) {
dst_ill = ire->ire_stq->q_ptr;
ill_refhold(dst_ill);
} else {
ill_t *ill = ire->ire_ipif->ipif_ill;
if (IS_IPMP(ill)) {
dst_ill =
ipmp_illgrp_hold_next_ill(ill->ill_grp);
} else {
dst_ill = ill;
ill_refhold(dst_ill);
}
}
if (dst_ill == NULL) {
if (ip_debug > 2) {
pr_addr_dbg("ip_newroute_v6 : no dst "
"ill for dst %s\n", AF_INET6, v6dstp);
}
goto icmp_err_ret;
}
if (ill != NULL && dst_ill != ill &&
!IS_IN_SAME_ILLGRP(dst_ill, ill)) {
/*
* We should have found a route matching "ill"
* as we called ire_ftable_lookup_v6 with
* MATCH_IRE_ILL. Rather than asserting when
* there is a mismatch, we just drop the packet.
*/
ip0dbg(("ip_newroute_v6: BOUND_IF failed: "
"dst_ill %s ill %s\n", dst_ill->ill_name,
ill->ill_name));
goto icmp_err_ret;
}
/*
* Pick a source address which matches the scope of the
* destination address.
* For RTF_SETSRC routes, the source address is imposed by the
* parent ire (sire).
*/
ASSERT(src_ipif == NULL);
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always
* tied to the underlying interface, IS_UNDER_IPMP() may be
* true even when building IREs that will be used for data
* traffic. As such, see if the packet's source address is a
* test address, and if so use that test address's ipif for
* the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in
* ire_add_v6() can work properly.
*/
if (ill != NULL && IS_UNDER_IPMP(ill))
(void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif);
if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER &&
!IN6_IS_ADDR_UNSPECIFIED(&v6gw) &&
ip6_asp_can_lookup(ipst)) {
/*
* The ire cache entry we're adding is for the
* gateway itself. The source address in this case
* is relative to the gateway's address.
*/
ip6_asp_table_held = B_TRUE;
src_ipif = ipif_select_source_v6(dst_ill, &v6gw,
B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid);
if (src_ipif != NULL)
ire_marks |= IRE_MARK_USESRC_CHECK;
} else if (src_ipif == NULL) {
if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) {
/*
* Check that the ipif matching the requested
* source address still exists.
*/
src_ipif = ipif_lookup_addr_v6(
&sire->ire_src_addr_v6, NULL, zoneid,
NULL, NULL, NULL, NULL, ipst);
}
if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) {
ip6_asp_table_held = B_TRUE;
src_ipif = ipif_select_source_v6(dst_ill,
v6dstp, B_FALSE,
IPV6_PREFER_SRC_DEFAULT, zoneid);
if (src_ipif != NULL)
ire_marks |= IRE_MARK_USESRC_CHECK;
}
}
if (src_ipif == NULL) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_v6: no src for "
"dst %s\n", AF_INET6, v6dstp);
printf("ip_newroute_v6: interface name %s\n",
dst_ill->ill_name);
}
goto icmp_err_ret;
}
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_newroute_v6: first hop %s\n",
AF_INET6, &v6gw);
}
ip2dbg(("\tire type %s (%d)\n",
ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type));
/*
* At this point in ip_newroute_v6(), ire is either the
* IRE_CACHE of the next-hop gateway for an off-subnet
* destination or an IRE_INTERFACE type that should be used
* to resolve an on-subnet destination or an on-subnet
* next-hop gateway.
*
* In the IRE_CACHE case, we have the following :
*
* 1) src_ipif - used for getting a source address.
*
* 2) dst_ill - from which we derive ire_stq/ire_rfq. This
* means packets using this IRE_CACHE will go out on dst_ill.
*
* 3) The IRE sire will point to the prefix that is the longest
* matching route for the destination. These prefix types
* include IRE_DEFAULT, IRE_PREFIX, IRE_HOST.
*
* The newly created IRE_CACHE entry for the off-subnet
* destination is tied to both the prefix route and the
* interface route used to resolve the next-hop gateway
* via the ire_phandle and ire_ihandle fields, respectively.
*
* In the IRE_INTERFACE case, we have the following :
*
* 1) src_ipif - used for getting a source address.
*
* 2) dst_ill - from which we derive ire_stq/ire_rfq. This
* means packets using the IRE_CACHE that we will build
* here will go out on dst_ill.
*
* 3) sire may or may not be NULL. But, the IRE_CACHE that is
* to be created will only be tied to the IRE_INTERFACE that
* was derived from the ire_ihandle field.
*
* If sire is non-NULL, it means the destination is off-link
* and we will first create the IRE_CACHE for the gateway.
* Next time through ip_newroute_v6, we will create the
* IRE_CACHE for the final destination as described above.
*/
save_ire = ire;
switch (ire->ire_type) {
case IRE_CACHE: {
ire_t *ipif_ire;
ASSERT(sire != NULL);
if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) {
mutex_enter(&ire->ire_lock);
v6gw = ire->ire_gateway_addr_v6;
mutex_exit(&ire->ire_lock);
}
/*
* We need 3 ire's to create a new cache ire for an
* off-link destination from the cache ire of the
* gateway.
*
* 1. The prefix ire 'sire'
* 2. The cache ire of the gateway 'ire'
* 3. The interface ire 'ipif_ire'
*
* We have (1) and (2). We lookup (3) below.
*
* If there is no interface route to the gateway,
* it is a race condition, where we found the cache
* but the inteface route has been deleted.
*/
ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire);
if (ipif_ire == NULL) {
ip1dbg(("ip_newroute_v6:"
"ire_ihandle_lookup_offlink_v6 failed\n"));
goto icmp_err_ret;
}
/*
* Note: the new ire inherits RTF_SETSRC
* and RTF_MULTIRT to propagate these flags from prefix
* to cache.
*/
/*
* Check cached gateway IRE for any security
* attributes; if found, associate the gateway
* credentials group to the destination IRE.
*/
if ((attrp = save_ire->ire_gw_secattr) != NULL) {
mutex_enter(&attrp->igsa_lock);
if ((gcgrp = attrp->igsa_gcgrp) != NULL)
GCGRP_REFHOLD(gcgrp);
mutex_exit(&attrp->igsa_lock);
}
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
&src_ipif->ipif_v6src_addr, /* source address */
&v6gw, /* gateway address */
&save_ire->ire_max_frag,
NULL, /* src nce */
dst_ill->ill_rq, /* recv-from queue */
dst_ill->ill_wq, /* send-to queue */
IRE_CACHE,
src_ipif,
&sire->ire_mask_v6, /* Parent mask */
sire->ire_phandle, /* Parent handle */
ipif_ire->ire_ihandle, /* Interface handle */
sire->ire_flags & /* flags if any */
(RTF_SETSRC | RTF_MULTIRT),
&(sire->ire_uinfo),
NULL,
gcgrp,
ipst);
if (ire == NULL) {
if (gcgrp != NULL) {
GCGRP_REFRELE(gcgrp);
gcgrp = NULL;
}
ire_refrele(save_ire);
ire_refrele(ipif_ire);
break;
}
/* reference now held by IRE */
gcgrp = NULL;
ire->ire_marks |= ire_marks;
/*
* Prevent sire and ipif_ire from getting deleted. The
* newly created ire is tied to both of them via the
* phandle and ihandle respectively.
*/
IRB_REFHOLD(sire->ire_bucket);
/* Has it been removed already ? */
if (sire->ire_marks & IRE_MARK_CONDEMNED) {
IRB_REFRELE(sire->ire_bucket);
ire_refrele(ipif_ire);
ire_refrele(save_ire);
break;
}
IRB_REFHOLD(ipif_ire->ire_bucket);
/* Has it been removed already ? */
if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) {
IRB_REFRELE(ipif_ire->ire_bucket);
IRB_REFRELE(sire->ire_bucket);
ire_refrele(ipif_ire);
ire_refrele(save_ire);
break;
}
xmit_mp = first_mp;
if (ire->ire_flags & RTF_MULTIRT) {
copy_mp = copymsg(first_mp);
if (copy_mp != NULL) {
xmit_mp = copy_mp;
MULTIRT_DEBUG_TAG(first_mp);
}
}
ire_add_then_send(q, ire, xmit_mp);
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
ire_refrele(save_ire);
/* Assert that sire is not deleted yet. */
ASSERT(sire->ire_ptpn != NULL);
IRB_REFRELE(sire->ire_bucket);
/* Assert that ipif_ire is not deleted yet. */
ASSERT(ipif_ire->ire_ptpn != NULL);
IRB_REFRELE(ipif_ire->ire_bucket);
ire_refrele(ipif_ire);
if (copy_mp != NULL) {
/*
* Search for the next unresolved
* multirt route.
*/
copy_mp = NULL;
ipif_ire = NULL;
ire = NULL;
/* re-enter the loop */
multirt_resolve_next = B_TRUE;
continue;
}
ire_refrele(sire);
ill_refrele(dst_ill);
ipif_refrele(src_ipif);
return;
}
case IRE_IF_NORESOLVER:
/*
* We have what we need to build an IRE_CACHE.
*
* handle the Gated case, where we create
* a NORESOLVER route for loopback.
*/
if (dst_ill->ill_net_type != IRE_IF_NORESOLVER)
break;
/*
* TSol note: We are creating the ire cache for the
* destination 'dst'. If 'dst' is offlink, going
* through the first hop 'gw', the security attributes
* of 'dst' must be set to point to the gateway
* credentials of gateway 'gw'. If 'dst' is onlink, it
* is possible that 'dst' is a potential gateway that is
* referenced by some route that has some security
* attributes. Thus in the former case, we need to do a
* gcgrp_lookup of 'gw' while in the latter case we
* need to do gcgrp_lookup of 'dst' itself.
*/
ga.ga_af = AF_INET6;
if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw))
ga.ga_addr = v6gw;
else
ga.ga_addr = *v6dstp;
gcgrp = gcgrp_lookup(&ga, B_FALSE);
/*
* Note: the new ire inherits sire flags RTF_SETSRC
* and RTF_MULTIRT to propagate those rules from prefix
* to cache.
*/
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
&src_ipif->ipif_v6src_addr, /* source address */
&v6gw, /* gateway address */
&save_ire->ire_max_frag,
NULL, /* no src nce */
dst_ill->ill_rq, /* recv-from queue */
dst_ill->ill_wq, /* send-to queue */
IRE_CACHE,
src_ipif,
&save_ire->ire_mask_v6, /* Parent mask */
(sire != NULL) ? /* Parent handle */
sire->ire_phandle : 0,
save_ire->ire_ihandle, /* Interface handle */
(sire != NULL) ? /* flags if any */
sire->ire_flags &
(RTF_SETSRC | RTF_MULTIRT) : 0,
&(save_ire->ire_uinfo),
NULL,
gcgrp,
ipst);
if (ire == NULL) {
if (gcgrp != NULL) {
GCGRP_REFRELE(gcgrp);
gcgrp = NULL;
}
ire_refrele(save_ire);
break;
}
/* reference now held by IRE */
gcgrp = NULL;
ire->ire_marks |= ire_marks;
if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw))
dst = v6gw;
else
dst = *v6dstp;
err = ndp_noresolver(dst_ill, &dst);
if (err != 0) {
ire_refrele(save_ire);
break;
}
/* Prevent save_ire from getting deleted */
IRB_REFHOLD(save_ire->ire_bucket);
/* Has it been removed already ? */
if (save_ire->ire_marks & IRE_MARK_CONDEMNED) {
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
break;
}
xmit_mp = first_mp;
/*
* In case of MULTIRT, a copy of the current packet
* to send is made to further re-enter the
* loop and attempt another route resolution
*/
if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) {
copy_mp = copymsg(first_mp);
if (copy_mp != NULL) {
xmit_mp = copy_mp;
MULTIRT_DEBUG_TAG(first_mp);
}
}
ire_add_then_send(q, ire, xmit_mp);
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
/* Assert that it is not deleted yet. */
ASSERT(save_ire->ire_ptpn != NULL);
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
if (copy_mp != NULL) {
/*
* If we found a (no)resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. This ensures that we do not
* omit any (no)resolver despite the priority
* in this call.
* IRE_CACHE, if any, will be processed
* by another thread entering ip_newroute(),
* (on resolver response, for example).
* We use this to force multiple parallel
* resolution as soon as a packet needs to be
* sent. The result is, after one packet
* emission all reachable routes are generally
* resolved.
* Otherwise, complete resolution of MULTIRT
* routes would require several emissions as
* side effect.
*/
multirt_flags &= ~MULTIRT_CACHEGW;
/*
* Search for the next unresolved multirt
* route.
*/
copy_mp = NULL;
save_ire = NULL;
ire = NULL;
/* re-enter the loop */
multirt_resolve_next = B_TRUE;
continue;
}
/* Don't need sire anymore */
if (sire != NULL)
ire_refrele(sire);
ill_refrele(dst_ill);
ipif_refrele(src_ipif);
return;
case IRE_IF_RESOLVER:
/*
* We can't build an IRE_CACHE yet, but at least we
* found a resolver that can help.
*/
dst = *v6dstp;
/*
* To be at this point in the code with a non-zero gw
* means that dst is reachable through a gateway that
* we have never resolved. By changing dst to the gw
* addr we resolve the gateway first. When
* ire_add_then_send() tries to put the IP dg to dst,
* it will reenter ip_newroute() at which time we will
* find the IRE_CACHE for the gw and create another
* IRE_CACHE above (for dst itself).
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) {
save_dst = dst;
dst = v6gw;
v6gw = ipv6_all_zeros;
}
if (dst_ill->ill_flags & ILLF_XRESOLV) {
/*
* Ask the external resolver to do its thing.
* Make an mblk chain in the following form:
* ARQ_REQ_MBLK-->IRE_MBLK-->packet
*/
mblk_t *ire_mp;
mblk_t *areq_mp;
areq_t *areq;
in6_addr_t *addrp;
ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n"));
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
ire = ire_create_mp_v6(
&dst, /* dest address */
&ipv6_all_ones, /* mask */
&src_ipif->ipif_v6src_addr,
/* source address */
&v6gw, /* gateway address */
NULL, /* no src nce */
dst_ill->ill_rq, /* recv-from queue */
dst_ill->ill_wq, /* send-to queue */
IRE_CACHE,
src_ipif,
&save_ire->ire_mask_v6, /* Parent mask */
0,
save_ire->ire_ihandle,
/* Interface handle */
0, /* flags if any */
&(save_ire->ire_uinfo),
NULL,
NULL,
ipst);
ire_refrele(save_ire);
if (ire == NULL) {
ip1dbg(("ip_newroute_v6:"
"ire is NULL\n"));
break;
}
if ((sire != NULL) &&
(sire->ire_flags & RTF_MULTIRT)) {
/*
* processing a copy of the packet to
* send for further resolution loops
*/
copy_mp = copymsg(first_mp);
if (copy_mp != NULL)
MULTIRT_DEBUG_TAG(copy_mp);
}
ire->ire_marks |= ire_marks;
ire_mp = ire->ire_mp;
/*
* Now create or find an nce for this interface.
* The hw addr will need to to be set from
* the reply to the AR_ENTRY_QUERY that
* we're about to send. This will be done in
* ire_add_v6().
*/
err = ndp_resolver(dst_ill, &dst, mp, zoneid);
switch (err) {
case 0:
/*
* New cache entry created.
* Break, then ask the external
* resolver.
*/
break;
case EINPROGRESS:
/*
* Resolution in progress;
* packet has been queued by
* ndp_resolver().
*/
ire_delete(ire);
ire = NULL;
/*
* Check if another multirt
* route must be resolved.
*/
if (copy_mp != NULL) {
/*
* If we found a resolver, we
* ignore any trailing top
* priority IRE_CACHE in
* further loops. The reason is
* the same as for noresolver.
*/
multirt_flags &=
~MULTIRT_CACHEGW;
/*
* Search for the next
* unresolved multirt route.
*/
first_mp = copy_mp;
copy_mp = NULL;
mp = first_mp;
if (mp->b_datap->db_type ==
M_CTL) {
mp = mp->b_cont;
}
ASSERT(sire != NULL);
dst = save_dst;
/*
* re-enter the loop
*/
multirt_resolve_next =
B_TRUE;
continue;
}
if (sire != NULL)
ire_refrele(sire);
ill_refrele(dst_ill);
ipif_refrele(src_ipif);
return;
default:
/*
* Transient error; packet will be
* freed.
*/
ire_delete(ire);
ire = NULL;
break;
}
if (err != 0)
break;
/*
* Now set up the AR_ENTRY_QUERY and send it.
*/
areq_mp = ill_arp_alloc(dst_ill,
(uchar_t *)&ipv6_areq_template,
(caddr_t)&dst);
if (areq_mp == NULL) {
ip1dbg(("ip_newroute_v6:"
"areq_mp is NULL\n"));
freemsg(ire_mp);
break;
}
areq = (areq_t *)areq_mp->b_rptr;
addrp = (in6_addr_t *)((char *)areq +
areq->areq_target_addr_offset);
*addrp = dst;
addrp = (in6_addr_t *)((char *)areq +
areq->areq_sender_addr_offset);
*addrp = src_ipif->ipif_v6src_addr;
/*
* link the chain, then send up to the resolver.
*/
linkb(areq_mp, ire_mp);
linkb(areq_mp, mp);
ip1dbg(("ip_newroute_v6:"
"putnext to resolver\n"));
putnext(dst_ill->ill_rq, areq_mp);
/*
* Check if another multirt route
* must be resolved.
*/
ire = NULL;
if (copy_mp != NULL) {
/*
* If we find a resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. The reason is the
* same as for noresolver.
*/
multirt_flags &= ~MULTIRT_CACHEGW;
/*
* Search for the next unresolved
* multirt route.
*/
first_mp = copy_mp;
copy_mp = NULL;
mp = first_mp;
if (mp->b_datap->db_type == M_CTL) {
mp = mp->b_cont;
}
ASSERT(sire != NULL);
dst = save_dst;
/*
* re-enter the loop
*/
multirt_resolve_next = B_TRUE;
continue;
}
if (sire != NULL)
ire_refrele(sire);
ill_refrele(dst_ill);
ipif_refrele(src_ipif);
return;
}
/*
* Non-external resolver case.
*
* TSol note: Please see the note above the
* IRE_IF_NORESOLVER case.
*/
ga.ga_af = AF_INET6;
ga.ga_addr = dst;
gcgrp = gcgrp_lookup(&ga, B_FALSE);
ire = ire_create_v6(
&dst, /* dest address */
&ipv6_all_ones, /* mask */
&src_ipif->ipif_v6src_addr, /* source address */
&v6gw, /* gateway address */
&save_ire->ire_max_frag,
NULL, /* no src nce */
dst_ill->ill_rq, /* recv-from queue */
dst_ill->ill_wq, /* send-to queue */
IRE_CACHE,
src_ipif,
&save_ire->ire_mask_v6, /* Parent mask */
0,
save_ire->ire_ihandle, /* Interface handle */
0, /* flags if any */
&(save_ire->ire_uinfo),
NULL,
gcgrp,
ipst);
if (ire == NULL) {
if (gcgrp != NULL) {
GCGRP_REFRELE(gcgrp);
gcgrp = NULL;
}
ire_refrele(save_ire);
break;
}
/* reference now held by IRE */
gcgrp = NULL;
if ((sire != NULL) &&
(sire->ire_flags & RTF_MULTIRT)) {
copy_mp = copymsg(first_mp);
if (copy_mp != NULL)
MULTIRT_DEBUG_TAG(copy_mp);
}
ire->ire_marks |= ire_marks;
err = ndp_resolver(dst_ill, &dst, first_mp, zoneid);
switch (err) {
case 0:
/* Prevent save_ire from getting deleted */
IRB_REFHOLD(save_ire->ire_bucket);
/* Has it been removed already ? */
if (save_ire->ire_marks & IRE_MARK_CONDEMNED) {
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
break;
}
/*
* We have a resolved cache entry,
* add in the IRE.
*/
ire_add_then_send(q, ire, first_mp);
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
/* Assert that it is not deleted yet. */
ASSERT(save_ire->ire_ptpn != NULL);
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
/*
* Check if another multirt route
* must be resolved.
*/
ire = NULL;
if (copy_mp != NULL) {
/*
* If we find a resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. The reason is the
* same as for noresolver.
*/
multirt_flags &= ~MULTIRT_CACHEGW;
/*
* Search for the next unresolved
* multirt route.
*/
first_mp = copy_mp;
copy_mp = NULL;
mp = first_mp;
if (mp->b_datap->db_type == M_CTL) {
mp = mp->b_cont;
}
ASSERT(sire != NULL);
dst = save_dst;
/*
* re-enter the loop
*/
multirt_resolve_next = B_TRUE;
continue;
}
if (sire != NULL)
ire_refrele(sire);
ill_refrele(dst_ill);
ipif_refrele(src_ipif);
return;
case EINPROGRESS:
/*
* mp was consumed - presumably queued.
* No need for ire, presumably resolution is
* in progress, and ire will be added when the
* address is resolved.
*/
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
ASSERT(ire->ire_nce == NULL);
ire_delete(ire);
ire_refrele(save_ire);
/*
* Check if another multirt route
* must be resolved.
*/
ire = NULL;
if (copy_mp != NULL) {
/*
* If we find a resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. The reason is the
* same as for noresolver.
*/
multirt_flags &= ~MULTIRT_CACHEGW;
/*
* Search for the next unresolved
* multirt route.
*/
first_mp = copy_mp;
copy_mp = NULL;
mp = first_mp;
if (mp->b_datap->db_type == M_CTL) {
mp = mp->b_cont;
}
ASSERT(sire != NULL);
dst = save_dst;
/*
* re-enter the loop
*/
multirt_resolve_next = B_TRUE;
continue;
}
if (sire != NULL)
ire_refrele(sire);
ill_refrele(dst_ill);
ipif_refrele(src_ipif);
return;
default:
/* Some transient error */
ASSERT(ire->ire_nce == NULL);
ire_refrele(save_ire);
break;
}
break;
default:
break;
}
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
} while (multirt_resolve_next);
err_ret:
ip1dbg(("ip_newroute_v6: dropped\n"));
if (src_ipif != NULL)
ipif_refrele(src_ipif);
if (dst_ill != NULL) {
need_rele = B_TRUE;
ill = dst_ill;
}
if (ill != NULL) {
if (mp->b_prev != NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
} else {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
}
if (need_rele)
ill_refrele(ill);
} else {
if (mp->b_prev != NULL) {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards);
} else {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards);
}
}
/* Did this packet originate externally? */
if (mp->b_prev) {
mp->b_next = NULL;
mp->b_prev = NULL;
}
if (copy_mp != NULL) {
MULTIRT_DEBUG_UNTAG(copy_mp);
freemsg(copy_mp);
}
MULTIRT_DEBUG_UNTAG(first_mp);
freemsg(first_mp);
if (ire != NULL)
ire_refrele(ire);
if (sire != NULL)
ire_refrele(sire);
return;
icmp_err_ret:
if (ip6_asp_table_held)
ip6_asp_table_refrele(ipst);
if (src_ipif != NULL)
ipif_refrele(src_ipif);
if (dst_ill != NULL) {
need_rele = B_TRUE;
ill = dst_ill;
}
ip1dbg(("ip_newroute_v6: no route\n"));
if (sire != NULL)
ire_refrele(sire);
/*
* We need to set sire to NULL to avoid double freeing if we
* ever goto err_ret from below.
*/
sire = NULL;
ip6h = (ip6_t *)mp->b_rptr;
/* Skip ip6i_t header if present */
if (ip6h->ip6_nxt == IPPROTO_RAW) {
/* Make sure the IPv6 header is present */
if ((mp->b_wptr - (uchar_t *)ip6h) <
sizeof (ip6i_t) + IPV6_HDR_LEN) {
if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) {
ip1dbg(("ip_newroute_v6: pullupmsg failed\n"));
goto err_ret;
}
}
mp->b_rptr += sizeof (ip6i_t);
ip6h = (ip6_t *)mp->b_rptr;
}
/* Did this packet originate externally? */
if (mp->b_prev) {
if (ill != NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
} else {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes);
}
mp->b_next = NULL;
mp->b_prev = NULL;
q = WR(q);
} else {
if (ill != NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes);
} else {
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes);
}
if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) {
/* Failed */
if (copy_mp != NULL) {
MULTIRT_DEBUG_UNTAG(copy_mp);
freemsg(copy_mp);
}
MULTIRT_DEBUG_UNTAG(first_mp);
freemsg(first_mp);
if (ire != NULL)
ire_refrele(ire);
if (need_rele)
ill_refrele(ill);
return;
}
}
if (need_rele)
ill_refrele(ill);
/*
* At this point we will have ire only if RTF_BLACKHOLE
* or RTF_REJECT flags are set on the IRE. It will not
* generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set.
*/
if (ire != NULL) {
if (ire->ire_flags & RTF_BLACKHOLE) {
ire_refrele(ire);
if (copy_mp != NULL) {
MULTIRT_DEBUG_UNTAG(copy_mp);
freemsg(copy_mp);
}
MULTIRT_DEBUG_UNTAG(first_mp);
freemsg(first_mp);
return;
}
ire_refrele(ire);
}
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_newroute_v6: no route to %s\n",
AF_INET6, v6dstp);
}
icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE,
B_FALSE, B_FALSE, zoneid, ipst);
}
/*
* ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever
* we need to send out a packet to a destination address for which we do not
* have specific routing information. It is only used for multicast packets.
*
* If unspec_src we allow creating an IRE with source address zero.
* ire_send_v6() will delete it after the packet is sent.
*/
void
ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif,
const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src,
zoneid_t zoneid)
{
ire_t *ire = NULL;
ipif_t *src_ipif = NULL;
int err = 0;
ill_t *dst_ill = NULL;
ire_t *save_ire;
ipsec_out_t *io;
ill_t *ill;
mblk_t *first_mp;
ire_t *fire = NULL;
mblk_t *copy_mp = NULL;
const in6_addr_t *ire_v6srcp;
boolean_t probe = B_FALSE;
boolean_t multirt_resolve_next;
boolean_t ipif_held = B_FALSE;
boolean_t ill_held = B_FALSE;
boolean_t ip6_asp_table_held = B_FALSE;
ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
/*
* This loop is run only once in most cases.
* We loop to resolve further routes only when the destination
* can be reached through multiple RTF_MULTIRT-flagged ires.
*/
do {
multirt_resolve_next = B_FALSE;
if (dst_ill != NULL) {
ill_refrele(dst_ill);
dst_ill = NULL;
}
if (src_ipif != NULL) {
ipif_refrele(src_ipif);
src_ipif = NULL;
}
ASSERT(ipif != NULL);
ill = ipif->ipif_ill;
ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp));
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n",
AF_INET6, v6dstp);
printf("ip_newroute_ipif_v6: if %s, v6 %d\n",
ill->ill_name, ipif->ipif_isv6);
}
first_mp = mp;
if (mp->b_datap->db_type == M_CTL) {
mp = mp->b_cont;
io = (ipsec_out_t *)first_mp->b_rptr;
ASSERT(io->ipsec_out_type == IPSEC_OUT);
} else {
io = NULL;
}
/*
* If the interface is a pt-pt interface we look for an
* IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the
* local_address and the pt-pt destination address.
* Otherwise we just match the local address.
*/
if (!(ill->ill_flags & ILLF_MULTICAST)) {
goto err_ret;
}
/*
* We check if an IRE_OFFSUBNET for the addr that goes through
* ipif exists. We need it to determine if the RTF_SETSRC and/or
* RTF_MULTIRT flags must be honored.
*/
fire = ipif_lookup_multi_ire_v6(ipif, v6dstp);
ip2dbg(("ip_newroute_ipif_v6: "
"ipif_lookup_multi_ire_v6("
"ipif %p, dst %08x) = fire %p\n",
(void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))),
(void *)fire));
ASSERT(src_ipif == NULL);
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always
* tied to the underlying interface, IS_UNDER_IPMP() may be
* true even when building IREs that will be used for data
* traffic. As such, see if the packet's source address is a
* test address, and if so use that test address's ipif for
* the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in
* ire_add_v6() can work properly.
*/
if (IS_UNDER_IPMP(ill))
probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif);
/*
* Determine the outbound (destination) ill for this route.
* If IPMP is not in use, that's the same as our ill. If IPMP
* is in-use and we're on the IPMP interface, or we're on an
* underlying ill but sending data traffic, use a suitable
* destination ill from the group. The latter case covers a
* subtle edge condition with multicast: when we bring up an
* IPv6 data address, we will create an NCE on an underlying
* interface, and send solitications to ff02::1, which would
* take us through here, and cause us to create an IRE for
* ff02::1. To meet our defined semantics for multicast (and
* ensure there aren't unexpected echoes), that IRE needs to
* use the IPMP group's nominated multicast interface.
*
* Note: the source ipif is determined by source address
* selection later.
*/
if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) {
ill_t *ipmp_ill;
ipmp_illgrp_t *illg;
if (IS_UNDER_IPMP(ill)) {
ipmp_ill = ipmp_ill_hold_ipmp_ill(ill);
} else {
ipmp_ill = ill;
ill_refhold(ipmp_ill); /* for symmetry */
}
if (ipmp_ill == NULL)
goto err_ret;
illg = ipmp_ill->ill_grp;
if (IN6_IS_ADDR_MULTICAST(v6dstp))
dst_ill = ipmp_illgrp_hold_cast_ill(illg);
else
dst_ill = ipmp_illgrp_hold_next_ill(illg);
ill_refrele(ipmp_ill);
} else {
dst_ill = ill;
ill_refhold(dst_ill); /* for symmetry */
}
if (dst_ill == NULL) {
if (ip_debug > 2) {
pr_addr_dbg("ip_newroute_ipif_v6: "
"no dst ill for dst %s\n",
AF_INET6, v6dstp);
}
goto err_ret;
}
/*
* Pick a source address which matches the scope of the
* destination address.
* For RTF_SETSRC routes, the source address is imposed by the
* parent ire (fire).
*/
if (src_ipif == NULL && fire != NULL &&
(fire->ire_flags & RTF_SETSRC)) {
/*
* Check that the ipif matching the requested source
* address still exists.
*/
src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6,
NULL, zoneid, NULL, NULL, NULL, NULL, ipst);
}
if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) {
ip6_asp_table_held = B_TRUE;
src_ipif = ipif_select_source_v6(dst_ill, v6dstp,
B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid);
}
if (src_ipif == NULL) {
if (!unspec_src) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_ipif_v6: "
"no src for dst %s\n",
AF_INET6, v6dstp);
printf(" through interface %s\n",
dst_ill->ill_name);
}
goto err_ret;
}
ire_v6srcp = &ipv6_all_zeros;
src_ipif = ipif;
ipif_refhold(src_ipif);
} else {
ire_v6srcp = &src_ipif->ipif_v6src_addr;
}
ire = ipif_to_ire_v6(ipif);
if (ire == NULL) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n",
AF_INET6, &ipif->ipif_v6lcl_addr);
printf("ip_newroute_ipif_v6: "
"if %s\n", dst_ill->ill_name);
}
goto err_ret;
}
if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))
goto err_ret;
ASSERT(ire->ire_ipversion == IPV6_VERSION);
ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),",
ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type));
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg(" address %s\n",
AF_INET6, &ire->ire_src_addr_v6);
}
save_ire = ire;
ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n",
(void *)ire, (void *)ipif));
if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) {
/*
* an IRE_OFFSUBET was looked up
* on that interface.
* this ire has RTF_MULTIRT flag,
* so the resolution loop
* will be re-entered to resolve
* additional routes on other
* interfaces. For that purpose,
* a copy of the packet is
* made at this point.
*/
fire->ire_last_used_time = lbolt;
copy_mp = copymsg(first_mp);
if (copy_mp) {
MULTIRT_DEBUG_TAG(copy_mp);
}
}
switch (ire->ire_type) {
case IRE_IF_NORESOLVER: {
/*
* We have what we need to build an IRE_CACHE.
*
* handle the Gated case, where we create
* a NORESOLVER route for loopback.
*/
if (dst_ill->ill_net_type != IRE_IF_NORESOLVER)
break;
/*
* The newly created ire will inherit the flags of the
* parent ire, if any.
*/
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
ire_v6srcp, /* source address */
NULL, /* gateway address */
&save_ire->ire_max_frag,
NULL, /* no src nce */
dst_ill->ill_rq, /* recv-from queue */
dst_ill->ill_wq, /* send-to queue */
IRE_CACHE,
src_ipif,
NULL,
(fire != NULL) ? /* Parent handle */
fire->ire_phandle : 0,
save_ire->ire_ihandle, /* Interface handle */
(fire != NULL) ?
(fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) :
0,
&ire_uinfo_null,
NULL,
NULL,
ipst);
if (ire == NULL) {
ire_refrele(save_ire);
break;
}
err = ndp_noresolver(dst_ill, v6dstp);
if (err != 0) {
ire_refrele(save_ire);
break;
}
/* Prevent save_ire from getting deleted */
IRB_REFHOLD(save_ire->ire_bucket);
/* Has it been removed already ? */
if (save_ire->ire_marks & IRE_MARK_CONDEMNED) {
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
break;
}
ire_add_then_send(q, ire, first_mp);
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
/* Assert that it is not deleted yet. */
ASSERT(save_ire->ire_ptpn != NULL);
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
if (fire != NULL) {
ire_refrele(fire);
fire = NULL;
}
/*
* The resolution loop is re-entered if we
* actually are in a multirouting case.
*/
if (copy_mp != NULL) {
boolean_t need_resolve =
ire_multirt_need_resolve_v6(v6dstp,
msg_getlabel(copy_mp), ipst);
if (!need_resolve) {
MULTIRT_DEBUG_UNTAG(copy_mp);
freemsg(copy_mp);
copy_mp = NULL;
} else {
/*
* ipif_lookup_group_v6() calls
* ire_lookup_multi_v6() that uses
* ire_ftable_lookup_v6() to find
* an IRE_INTERFACE for the group.
* In the multirt case,
* ire_lookup_multi_v6() then invokes
* ire_multirt_lookup_v6() to find
* the next resolvable ire.
* As a result, we obtain a new
* interface, derived from the
* next ire.
*/
if (ipif_held) {
ipif_refrele(ipif);
ipif_held = B_FALSE;
}
ipif = ipif_lookup_group_v6(v6dstp,
zoneid, ipst);
ip2dbg(("ip_newroute_ipif: "
"multirt dst %08x, ipif %p\n",
ntohl(V4_PART_OF_V6((*v6dstp))),
(void *)ipif));
if (ipif != NULL) {
ipif_held = B_TRUE;
mp = copy_mp;
copy_mp = NULL;
multirt_resolve_next =
B_TRUE;
continue;
} else {
freemsg(copy_mp);
}
}
}
ill_refrele(dst_ill);
if (ipif_held) {
ipif_refrele(ipif);
ipif_held = B_FALSE;
}
if (src_ipif != NULL)
ipif_refrele(src_ipif);
return;
}
case IRE_IF_RESOLVER: {
ASSERT(dst_ill->ill_isv6);
/*
* We obtain a partial IRE_CACHE which we will pass
* along with the resolver query. When the response
* comes back it will be there ready for us to add.
*/
/*
* the newly created ire will inherit the flags of the
* parent ire, if any.
*/
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
ire_v6srcp, /* source address */
NULL, /* gateway address */
&save_ire->ire_max_frag,
NULL, /* src nce */
dst_ill->ill_rq, /* recv-from queue */
dst_ill->ill_wq, /* send-to queue */
IRE_CACHE,
src_ipif,
NULL,
(fire != NULL) ? /* Parent handle */
fire->ire_phandle : 0,
save_ire->ire_ihandle, /* Interface handle */
(fire != NULL) ?
(fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) :
0,
&ire_uinfo_null,
NULL,
NULL,
ipst);
if (ire == NULL) {
ire_refrele(save_ire);
break;
}
/* Resolve and add ire to the ctable */
err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid);
switch (err) {
case 0:
/* Prevent save_ire from getting deleted */
IRB_REFHOLD(save_ire->ire_bucket);
/* Has it been removed already ? */
if (save_ire->ire_marks & IRE_MARK_CONDEMNED) {
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
break;
}
/*
* We have a resolved cache entry,
* add in the IRE.
*/
ire_add_then_send(q, ire, first_mp);
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
/* Assert that it is not deleted yet. */
ASSERT(save_ire->ire_ptpn != NULL);
IRB_REFRELE(save_ire->ire_bucket);
ire_refrele(save_ire);
if (fire != NULL) {
ire_refrele(fire);
fire = NULL;
}
/*
* The resolution loop is re-entered if we
* actually are in a multirouting case.
*/
if (copy_mp != NULL) {
boolean_t need_resolve =
ire_multirt_need_resolve_v6(v6dstp,
msg_getlabel(copy_mp), ipst);
if (!need_resolve) {
MULTIRT_DEBUG_UNTAG(copy_mp);
freemsg(copy_mp);
copy_mp = NULL;
} else {
/*
* ipif_lookup_group_v6() calls
* ire_lookup_multi_v6() that
* uses ire_ftable_lookup_v6()
* to find an IRE_INTERFACE for
* the group. In the multirt
* case, ire_lookup_multi_v6()
* then invokes
* ire_multirt_lookup_v6() to
* find the next resolvable ire.
* As a result, we obtain a new
* interface, derived from the
* next ire.
*/
if (ipif_held) {
ipif_refrele(ipif);
ipif_held = B_FALSE;
}
ipif = ipif_lookup_group_v6(
v6dstp, zoneid, ipst);
ip2dbg(("ip_newroute_ipif: "
"multirt dst %08x, "
"ipif %p\n",
ntohl(V4_PART_OF_V6(
(*v6dstp))),
(void *)ipif));
if (ipif != NULL) {
ipif_held = B_TRUE;
mp = copy_mp;
copy_mp = NULL;
multirt_resolve_next =
B_TRUE;
continue;
} else {
freemsg(copy_mp);
}
}
}
ill_refrele(dst_ill);
if (ipif_held) {
ipif_refrele(ipif);
ipif_held = B_FALSE;
}
if (src_ipif != NULL)
ipif_refrele(src_ipif);
return;
case EINPROGRESS:
/*
* mp was consumed - presumably queued.
* No need for ire, presumably resolution is
* in progress, and ire will be added when the
* address is resolved.
*/
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
ire_delete(ire);
ire_refrele(save_ire);
if (fire != NULL) {
ire_refrele(fire);
fire = NULL;
}
/*
* The resolution loop is re-entered if we
* actually are in a multirouting case.
*/
if (copy_mp != NULL) {
boolean_t need_resolve =
ire_multirt_need_resolve_v6(v6dstp,
msg_getlabel(copy_mp), ipst);
if (!need_resolve) {
MULTIRT_DEBUG_UNTAG(copy_mp);
freemsg(copy_mp);
copy_mp = NULL;
} else {
/*
* ipif_lookup_group_v6() calls
* ire_lookup_multi_v6() that
* uses ire_ftable_lookup_v6()
* to find an IRE_INTERFACE for
* the group. In the multirt
* case, ire_lookup_multi_v6()
* then invokes
* ire_multirt_lookup_v6() to
* find the next resolvable ire.
* As a result, we obtain a new
* interface, derived from the
* next ire.
*/
if (ipif_held) {
ipif_refrele(ipif);
ipif_held = B_FALSE;
}
ipif = ipif_lookup_group_v6(
v6dstp, zoneid, ipst);
ip2dbg(("ip_newroute_ipif: "
"multirt dst %08x, "
"ipif %p\n",
ntohl(V4_PART_OF_V6(
(*v6dstp))),
(void *)ipif));
if (ipif != NULL) {
ipif_held = B_TRUE;
mp = copy_mp;
copy_mp = NULL;
multirt_resolve_next =
B_TRUE;
continue;
} else {
freemsg(copy_mp);
}
}
}
ill_refrele(dst_ill);
if (ipif_held) {
ipif_refrele(ipif);
ipif_held = B_FALSE;
}
if (src_ipif != NULL)
ipif_refrele(src_ipif);
return;
default:
/* Some transient error */
ire_refrele(save_ire);
break;
}
break;
}
default:
break;
}
if (ip6_asp_table_held) {
ip6_asp_table_refrele(ipst);
ip6_asp_table_held = B_FALSE;
}
} while (multirt_resolve_next);
err_ret:
if (ip6_asp_table_held)
ip6_asp_table_refrele(ipst);
if (ire != NULL)
ire_refrele(ire);
if (fire != NULL)
ire_refrele(fire);
if (ipif != NULL && ipif_held)
ipif_refrele(ipif);
if (src_ipif != NULL)
ipif_refrele(src_ipif);
/* Multicast - no point in trying to generate ICMP error */
if (dst_ill != NULL) {
ill = dst_ill;
ill_held = B_TRUE;
}
if (mp->b_prev || mp->b_next) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
} else {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
}
ip1dbg(("ip_newroute_ipif_v6: dropped\n"));
mp->b_next = NULL;
mp->b_prev = NULL;
freemsg(first_mp);
if (ill_held)
ill_refrele(ill);
}
/*
* Parse and process any hop-by-hop or destination options.
*
* Assumes that q is an ill read queue so that ICMP errors for link-local
* destinations are sent out the correct interface.
*
* Returns -1 if there was an error and mp has been consumed.
* Returns 0 if no special action is needed.
* Returns 1 if the packet contained a router alert option for this node
* which is verified to be "interesting/known" for our implementation.
*
* XXX Note: In future as more hbh or dest options are defined,
* it may be better to have different routines for hbh and dest
* options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
* may have same value in different namespaces. Or is it same namespace ??
* Current code checks for each opt_type (other than pads) if it is in
* the expected nexthdr (hbh or dest)
*/
static int
ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h,
uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst)
{
uint8_t opt_type;
uint_t optused;
int ret = 0;
mblk_t *first_mp;
const char *errtype;
zoneid_t zoneid;
ill_t *ill = q->q_ptr;
ipif_t *ipif;
first_mp = mp;
if (mp->b_datap->db_type == M_CTL) {
mp = mp->b_cont;
}
while (optlen != 0) {
opt_type = *optptr;
if (opt_type == IP6OPT_PAD1) {
optused = 1;
} else {
if (optlen < 2)
goto bad_opt;
errtype = "malformed";
if (opt_type == ip6opt_ls) {
optused = 2 + optptr[1];
if (optused > optlen)
goto bad_opt;
} else switch (opt_type) {
case IP6OPT_PADN:
/*
* Note:We don't verify that (N-2) pad octets
* are zero as required by spec. Adhere to
* "be liberal in what you accept..." part of
* implementation philosophy (RFC791,RFC1122)
*/
optused = 2 + optptr[1];
if (optused > optlen)
goto bad_opt;
break;
case IP6OPT_JUMBO:
if (hdr_type != IPPROTO_HOPOPTS)
goto opt_error;
goto opt_error; /* XXX Not implemented! */
case IP6OPT_ROUTER_ALERT: {
struct ip6_opt_router *or;
if (hdr_type != IPPROTO_HOPOPTS)
goto opt_error;
optused = 2 + optptr[1];
if (optused > optlen)
goto bad_opt;
or = (struct ip6_opt_router *)optptr;
/* Check total length and alignment */
if (optused != sizeof (*or) ||
((uintptr_t)or->ip6or_value & 0x1) != 0)
goto opt_error;
/* Check value */
switch (*((uint16_t *)or->ip6or_value)) {
case IP6_ALERT_MLD:
case IP6_ALERT_RSVP:
ret = 1;
}
break;
}
case IP6OPT_HOME_ADDRESS: {
/*
* Minimal support for the home address option
* (which is required by all IPv6 nodes).
* Implement by just swapping the home address
* and source address.
* XXX Note: this has IPsec implications since
* AH needs to take this into account.
* Also, when IPsec is used we need to ensure
* that this is only processed once
* in the received packet (to avoid swapping
* back and forth).
* NOTE:This option processing is considered
* to be unsafe and prone to a denial of
* service attack.
* The current processing is not safe even with
* IPsec secured IP packets. Since the home
* address option processing requirement still
* is in the IETF draft and in the process of
* being redefined for its usage, it has been
* decided to turn off the option by default.
* If this section of code needs to be executed,
* ndd variable ip6_ignore_home_address_opt
* should be set to 0 at the user's own risk.
*/
struct ip6_opt_home_address *oh;
in6_addr_t tmp;
if (ipst->ips_ipv6_ignore_home_address_opt)
goto opt_error;
if (hdr_type != IPPROTO_DSTOPTS)
goto opt_error;
optused = 2 + optptr[1];
if (optused > optlen)
goto bad_opt;
/*
* We did this dest. opt the first time
* around (i.e. before AH processing).
* If we've done AH... stop now.
*/
if (first_mp != mp) {
ipsec_in_t *ii;
ii = (ipsec_in_t *)first_mp->b_rptr;
if (ii->ipsec_in_ah_sa != NULL)
break;
}
oh = (struct ip6_opt_home_address *)optptr;
/* Check total length and alignment */
if (optused < sizeof (*oh) ||
((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
goto opt_error;
/* Swap ip6_src and the home address */
tmp = ip6h->ip6_src;
/* XXX Note: only 8 byte alignment option */
ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
*(in6_addr_t *)oh->ip6oh_addr = tmp;
break;
}
case IP6OPT_TUNNEL_LIMIT:
if (hdr_type != IPPROTO_DSTOPTS) {
goto opt_error;
}
optused = 2 + optptr[1];
if (optused > optlen) {
goto bad_opt;
}
if (optused != 3) {
goto opt_error;
}
break;
default:
errtype = "unknown";
/* FALLTHROUGH */
opt_error:
/* Determine which zone should send error */
zoneid = ipif_lookup_addr_zoneid_v6(
&ip6h->ip6_dst, ill, ipst);
switch (IP6OPT_TYPE(opt_type)) {
case IP6OPT_TYPE_SKIP:
optused = 2 + optptr[1];
if (optused > optlen)
goto bad_opt;
ip1dbg(("ip_process_options_v6: %s "
"opt 0x%x skipped\n",
errtype, opt_type));
break;
case IP6OPT_TYPE_DISCARD:
ip1dbg(("ip_process_options_v6: %s "
"opt 0x%x; packet dropped\n",
errtype, opt_type));
freemsg(first_mp);
return (-1);
case IP6OPT_TYPE_ICMP:
if (zoneid == ALL_ZONES) {
freemsg(first_mp);
return (-1);
}
icmp_param_problem_v6(WR(q), first_mp,
ICMP6_PARAMPROB_OPTION,
(uint32_t)(optptr -
(uint8_t *)ip6h),
B_FALSE, B_FALSE, zoneid, ipst);
return (-1);
case IP6OPT_TYPE_FORCEICMP:
/*
* If we don't have a zone and the dst
* addr is multicast, then pick a zone
* based on the inbound interface.
*/
if (zoneid == ALL_ZONES &&
IN6_IS_ADDR_MULTICAST(
&ip6h->ip6_dst)) {
ipif = ipif_select_source_v6(
ill, &ip6h->ip6_src,
B_TRUE,
IPV6_PREFER_SRC_DEFAULT,
ALL_ZONES);
if (ipif != NULL) {
zoneid =
ipif->ipif_zoneid;
ipif_refrele(ipif);
}
}
if (zoneid == ALL_ZONES) {
freemsg(first_mp);
return (-1);
}
icmp_param_problem_v6(WR(q), first_mp,
ICMP6_PARAMPROB_OPTION,
(uint32_t)(optptr -
(uint8_t *)ip6h),
B_FALSE, B_TRUE, zoneid, ipst);
return (-1);
default:
ASSERT(0);
}
}
}
optlen -= optused;
optptr += optused;
}
return (ret);
bad_opt:
/* Determine which zone should send error */
zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst);
if (zoneid == ALL_ZONES) {
freemsg(first_mp);
} else {
icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION,
(uint32_t)(optptr - (uint8_t *)ip6h),
B_FALSE, B_FALSE, zoneid, ipst);
}
return (-1);
}
/*
* Process a routing header that is not yet empty.
* Because of RFC 5095, we now reject all route headers.
*/
static void
ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
ill_t *ill, mblk_t *hada_mp)
{
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(rth->ip6r_segleft != 0);
if (!ipst->ips_ipv6_forward_src_routed) {
/* XXX Check for source routed out same interface? */
BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
freemsg(hada_mp);
freemsg(mp);
return;
}
if (hada_mp != NULL) {
freemsg(hada_mp);
freemsg(mp);
return;
}
/* Sent by forwarding path, and router is global zone */
icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER,
(uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), B_FALSE,
B_FALSE, GLOBAL_ZONEID, ipst);
}
/*
* Read side put procedure for IPv6 module.
*/
void
ip_rput_v6(queue_t *q, mblk_t *mp)
{
mblk_t *first_mp;
mblk_t *hada_mp = NULL;
ip6_t *ip6h;
boolean_t ll_multicast = B_FALSE;
boolean_t mctl_present = B_FALSE;
ill_t *ill;
struct iocblk *iocp;
uint_t flags = 0;
mblk_t *dl_mp;
ip_stack_t *ipst;
int check;
ill = (ill_t *)q->q_ptr;
ipst = ill->ill_ipst;
if (ill->ill_state_flags & ILL_CONDEMNED) {
union DL_primitives *dl;
dl = (union DL_primitives *)mp->b_rptr;
/*
* Things are opening or closing - only accept DLPI
* ack messages. If the stream is closing and ip_wsrv
* has completed, ip_close is out of the qwait, but has
* not yet completed qprocsoff. Don't proceed any further
* because the ill has been cleaned up and things hanging
* off the ill have been freed.
*/
if ((mp->b_datap->db_type != M_PCPROTO) ||
(dl->dl_primitive == DL_UNITDATA_IND)) {
inet_freemsg(mp);
return;
}
}
dl_mp = NULL;
switch (mp->b_datap->db_type) {
case M_DATA: {
int hlen;
uchar_t *ucp;
struct ether_header *eh;
dl_unitdata_ind_t *dui;
/*
* This is a work-around for CR 6451644, a bug in Nemo. It
* should be removed when that problem is fixed.
*/
if (ill->ill_mactype == DL_ETHER &&
(hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) &&
(ucp = mp->b_rptr)[-1] == (ETHERTYPE_IPV6 & 0xFF) &&
ucp[-2] == (ETHERTYPE_IPV6 >> 8)) {
if (hlen >= sizeof (struct ether_vlan_header) &&
ucp[-5] == 0 && ucp[-6] == 0x81)
ucp -= sizeof (struct ether_vlan_header);
else
ucp -= sizeof (struct ether_header);
/*
* If it's a group address, then fabricate a
* DL_UNITDATA_IND message.
*/
if ((ll_multicast = (ucp[0] & 1)) != 0 &&
(dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16,
BPRI_HI)) != NULL) {
eh = (struct ether_header *)ucp;
dui = (dl_unitdata_ind_t *)dl_mp->b_rptr;
DB_TYPE(dl_mp) = M_PROTO;
dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16;
dui->dl_primitive = DL_UNITDATA_IND;
dui->dl_dest_addr_length = 8;
dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE;
dui->dl_src_addr_length = 8;
dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE +
8;
dui->dl_group_address = 1;
ucp = (uchar_t *)(dui + 1);
if (ill->ill_sap_length > 0)
ucp += ill->ill_sap_length;
bcopy(&eh->ether_dhost, ucp, 6);
bcopy(&eh->ether_shost, ucp + 8, 6);
ucp = (uchar_t *)(dui + 1);
if (ill->ill_sap_length < 0)
ucp += 8 + ill->ill_sap_length;
bcopy(&eh->ether_type, ucp, 2);
bcopy(&eh->ether_type, ucp + 8, 2);
}
}
break;
}
case M_PROTO:
case M_PCPROTO:
if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive !=
DL_UNITDATA_IND) {
/* Go handle anything other than data elsewhere. */
ip_rput_dlpi(q, mp);
return;
}
ll_multicast = ip_get_dlpi_mbcast(ill, mp);
/* Save the DLPI header. */
dl_mp = mp;
mp = mp->b_cont;
dl_mp->b_cont = NULL;
break;
case M_BREAK:
panic("ip_rput_v6: got an M_BREAK");
/*NOTREACHED*/
case M_IOCACK:
iocp = (struct iocblk *)mp->b_rptr;
switch (iocp->ioc_cmd) {
case DL_IOC_HDR_INFO:
ill = (ill_t *)q->q_ptr;
ill_fastpath_ack(ill, mp);
return;
default:
putnext(q, mp);
return;
}
/* FALLTHRU */
case M_ERROR:
case M_HANGUP:
mutex_enter(&ill->ill_lock);
if (ill->ill_state_flags & ILL_CONDEMNED) {
mutex_exit(&ill->ill_lock);
freemsg(mp);
return;
}
ill_refhold_locked(ill);
mutex_exit(&ill->ill_lock);
qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE);
return;
case M_CTL:
if ((MBLKL(mp) > sizeof (int)) &&
((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) {
ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t));
mctl_present = B_TRUE;
break;
}
putnext(q, mp);
return;
case M_IOCNAK:
iocp = (struct iocblk *)mp->b_rptr;
switch (iocp->ioc_cmd) {
case DL_IOC_HDR_INFO:
ip_rput_other(NULL, q, mp, NULL);
return;
default:
break;
}
/* FALLTHRU */
default:
putnext(q, mp);
return;
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
(mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp));
/*
* if db_ref > 1 then copymsg and free original. Packet may be
* changed and do not want other entity who has a reference to this
* message to trip over the changes. This is a blind change because
* trying to catch all places that might change packet is too
* difficult (since it may be a module above this one).
*/
if (mp->b_datap->db_ref > 1) {
mblk_t *mp1;
mp1 = copymsg(mp);
freemsg(mp);
if (mp1 == NULL) {
first_mp = NULL;
goto discard;
}
mp = mp1;
}
first_mp = mp;
if (mctl_present) {
hada_mp = first_mp;
mp = first_mp->b_cont;
}
if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) {
freemsg(mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
/*
* ip:::receive must see ipv6 packets with a full header,
* and so is placed after the IP6_MBLK_HDR_ERR check.
*/
DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
int, 0);
if (check != IP6_MBLK_OK) {
freemsg(mp);
return;
}
DTRACE_PROBE4(ip6__physical__in__start,
ill_t *, ill, ill_t *, NULL,
ip6_t *, ip6h, mblk_t *, first_mp);
FW_HOOKS6(ipst->ips_ip6_physical_in_event,
ipst->ips_ipv6firewall_physical_in,
ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst);
DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp);
if (first_mp == NULL)
return;
/*
* Attach any necessary label information to this packet.
*/
if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) {
if (ip6opt_ls != 0)
ip0dbg(("tsol_get_pkt_label v6 failed\n"));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
goto discard;
}
/* IP observability hook. */
if (ipst->ips_ipobs_enabled) {
zoneid_t dzone;
dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
ALL_ZONES);
ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill,
IPV6_VERSION, 0, ipst);
}
if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) ==
IPV6_DEFAULT_VERS_AND_FLOW) {
/*
* It may be a bit too expensive to do this mapped address
* check here, but in the interest of robustness, it seems
* like the correct place.
* TODO: Avoid this check for e.g. connected TCP sockets
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) {
ip1dbg(("ip_rput_v6: pkt with mapped src addr\n"));
goto discard;
}
if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) {
ip1dbg(("ip_rput_v6: pkt with loopback src"));
goto discard;
} else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) {
ip1dbg(("ip_rput_v6: pkt with loopback dst"));
goto discard;
}
flags |= (ll_multicast ? IP6_IN_LLMCAST : 0);
ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp);
} else {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
goto discard;
}
freemsg(dl_mp);
return;
discard:
if (dl_mp != NULL)
freeb(dl_mp);
freemsg(first_mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
}
/*
* Walk through the IPv6 packet in mp and see if there's an AH header
* in it. See if the AH header needs to get done before other headers in
* the packet. (Worker function for ipsec_early_ah_v6().)
*/
#define IPSEC_HDR_DONT_PROCESS 0
#define IPSEC_HDR_PROCESS 1
#define IPSEC_MEMORY_ERROR 2 /* or malformed packet */
static int
ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
{
uint_t length;
uint_t ehdrlen;
uint8_t *whereptr;
uint8_t *endptr;
uint8_t *nexthdrp;
ip6_dest_t *desthdr;
ip6_rthdr_t *rthdr;
ip6_t *ip6h;
/*
* For now just pullup everything. In general, the less pullups,
* the better, but there's so much squirrelling through anyway,
* it's just easier this way.
*/
if (!pullupmsg(mp, -1)) {
return (IPSEC_MEMORY_ERROR);
}
ip6h = (ip6_t *)mp->b_rptr;
length = IPV6_HDR_LEN;
whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
endptr = mp->b_wptr;
/*
* We can't just use the argument nexthdr in the place
* of nexthdrp becaue we don't dereference nexthdrp
* till we confirm whether it is a valid address.
*/
nexthdrp = &ip6h->ip6_nxt;
while (whereptr < endptr) {
/* Is there enough left for len + nexthdr? */
if (whereptr + MIN_EHDR_LEN > endptr)
return (IPSEC_MEMORY_ERROR);
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
desthdr = (ip6_dest_t *)whereptr;
ehdrlen = 8 * (desthdr->ip6d_len + 1);
if ((uchar_t *)desthdr + ehdrlen > endptr)
return (IPSEC_MEMORY_ERROR);
/*
* Return DONT_PROCESS because the destination
* options header may be for each hop in a
* routing-header, and we only want AH if we're
* finished with routing headers.
*/
if (*nexthdrp == IPPROTO_DSTOPTS)
return (IPSEC_HDR_DONT_PROCESS);
nexthdrp = &desthdr->ip6d_nxt;
break;
case IPPROTO_ROUTING:
rthdr = (ip6_rthdr_t *)whereptr;
/*
* If there's more hops left on the routing header,
* return now with DON'T PROCESS.
*/
if (rthdr->ip6r_segleft > 0)
return (IPSEC_HDR_DONT_PROCESS);
ehdrlen = 8 * (rthdr->ip6r_len + 1);
if ((uchar_t *)rthdr + ehdrlen > endptr)
return (IPSEC_MEMORY_ERROR);
nexthdrp = &rthdr->ip6r_nxt;
break;
case IPPROTO_FRAGMENT:
/* Wait for reassembly */
return (IPSEC_HDR_DONT_PROCESS);
case IPPROTO_AH:
*nexthdr = IPPROTO_AH;
return (IPSEC_HDR_PROCESS);
case IPPROTO_NONE:
/* No next header means we're finished */
default:
return (IPSEC_HDR_DONT_PROCESS);
}
length += ehdrlen;
whereptr += ehdrlen;
}
/*
* Malformed/truncated packet.
*/
return (IPSEC_MEMORY_ERROR);
}
/*
* Path for AH if options are present. If this is the first time we are
* sending a datagram to AH, allocate a IPSEC_IN message and prepend it.
* Otherwise, just fanout. Return value answers the boolean question:
* "Did I consume the mblk you sent me?"
*
* Sometimes AH needs to be done before other IPv6 headers for security
* reasons. This function (and its ipsec_needs_processing_v6() above)
* indicates if that is so, and fans out to the appropriate IPsec protocol
* for the datagram passed in.
*/
static boolean_t
ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present,
ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid)
{
mblk_t *mp;
uint8_t nexthdr;
ipsec_in_t *ii = NULL;
ah_t *ah;
ipsec_status_t ipsec_rc;
ip_stack_t *ipst = ill->ill_ipst;
netstack_t *ns = ipst->ips_netstack;
ipsec_stack_t *ipss = ns->netstack_ipsec;
ASSERT((hada_mp == NULL) || (!mctl_present));
switch (ipsec_needs_processing_v6(
(mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) {
case IPSEC_MEMORY_ERROR:
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(hada_mp);
freemsg(first_mp);
return (B_TRUE);
case IPSEC_HDR_DONT_PROCESS:
return (B_FALSE);
}
/* Default means send it to AH! */
ASSERT(nexthdr == IPPROTO_AH);
if (!mctl_present) {
mp = first_mp;
first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack);
if (first_mp == NULL) {
ip1dbg(("ipsec_early_ah_v6: IPSEC_IN "
"allocation failure.\n"));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(hada_mp);
freemsg(mp);
return (B_TRUE);
}
/*
* Store the ill_index so that when we come back
* from IPSEC we ride on the same queue.
*/
ii = (ipsec_in_t *)first_mp->b_rptr;
ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex;
ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex;
first_mp->b_cont = mp;
}
/*
* Cache hardware acceleration info.
*/
if (hada_mp != NULL) {
ASSERT(ii != NULL);
IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: "
"caching data attr.\n"));
ii->ipsec_in_accelerated = B_TRUE;
ii->ipsec_in_da = hada_mp;
}
if (!ipsec_loaded(ipss)) {
ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst);
return (B_TRUE);
}
ah = ipsec_inbound_ah_sa(first_mp, ns);
if (ah == NULL)
return (B_TRUE);
ASSERT(ii->ipsec_in_ah_sa != NULL);
ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL);
ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah);
switch (ipsec_rc) {
case IPSEC_STATUS_SUCCESS:
/* we're done with IPsec processing, send it up */
ip_fanout_proto_again(first_mp, ill, inill, NULL);
break;
case IPSEC_STATUS_FAILED:
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards);
break;
case IPSEC_STATUS_PENDING:
/* no action needed */
break;
}
return (B_TRUE);
}
static boolean_t
ip_iptun_input_v6(mblk_t *ipsec_mp, mblk_t *data_mp,
size_t hdr_len, uint8_t nexthdr, zoneid_t zoneid, ill_t *ill,
ip_stack_t *ipst)
{
conn_t *connp;
ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp);
connp = ipcl_classify_v6(data_mp, nexthdr, hdr_len, zoneid, ipst);
if (connp != NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
connp->conn_recv(connp, ipsec_mp != NULL ? ipsec_mp : data_mp,
NULL);
CONN_DEC_REF(connp);
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Validate the IPv6 mblk for alignment.
*/
int
ip_check_v6_mblk(mblk_t *mp, ill_t *ill)
{
int pkt_len, ip6_len;
ip6_t *ip6h = (ip6_t *)mp->b_rptr;
/* check for alignment and full IPv6 header */
if (!OK_32PTR((uchar_t *)ip6h) ||
(mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) {
if (!pullupmsg(mp, IPV6_HDR_LEN)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip1dbg(("ip_rput_v6: pullupmsg failed\n"));
return (IP6_MBLK_HDR_ERR);
}
ip6h = (ip6_t *)mp->b_rptr;
}
ASSERT(OK_32PTR((uchar_t *)ip6h) &&
(mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN);
if (mp->b_cont == NULL)
pkt_len = mp->b_wptr - mp->b_rptr;
else
pkt_len = msgdsize(mp);
ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
/*
* Check for bogus (too short packet) and packet which
* was padded by the link layer.
*/
if (ip6_len != pkt_len) {
ssize_t diff;
if (ip6_len > pkt_len) {
ip1dbg(("ip_rput_data_v6: packet too short %d %d\n",
ip6_len, pkt_len));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
return (IP6_MBLK_LEN_ERR);
}
diff = (ssize_t)(pkt_len - ip6_len);
if (!adjmsg(mp, -diff)) {
ip1dbg(("ip_rput_data_v6: adjmsg failed\n"));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
return (IP6_MBLK_LEN_ERR);
}
/*
* adjmsg may have freed an mblk from the chain, hence
* invalidate any hw checksum here. This will force IP to
* calculate the checksum in sw, but only for this packet.
*/
DB_CKSUMFLAGS(mp) = 0;
}
return (IP6_MBLK_OK);
}
/*
* ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here.
* ip_rput_v6 has already verified alignment, the min length, the version,
* and db_ref = 1.
*
* The ill passed in (the arg named inill) is the ill that the packet
* actually arrived on. We need to remember this when saving the
* input interface index into potential IPV6_PKTINFO data in
* ip_add_info_v6().
*
* This routine doesn't free dl_mp; that's the caller's responsibility on
* return. (Note that the callers are complex enough that there's no tail
* recursion here anyway.)
*/
void
ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h,
uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp)
{
ire_t *ire = NULL;
ill_t *ill = inill;
ill_t *outill;
uint8_t *whereptr;
uint8_t nexthdr;
uint16_t remlen;
uint_t prev_nexthdr_offset;
uint_t used;
size_t old_pkt_len;
size_t pkt_len;
uint16_t ip6_len;
uint_t hdr_len;
boolean_t mctl_present;
mblk_t *first_mp;
mblk_t *first_mp1;
boolean_t no_forward;
ip6_hbh_t *hbhhdr;
boolean_t ll_multicast = (flags & IP6_IN_LLMCAST);
conn_t *connp;
uint32_t ports;
zoneid_t zoneid = GLOBAL_ZONEID;
uint16_t hck_flags, reass_hck_flags;
uint32_t reass_sum;
boolean_t cksum_err;
mblk_t *mp1;
ip_stack_t *ipst = inill->ill_ipst;
EXTRACT_PKT_MP(mp, first_mp, mctl_present);
if (hada_mp != NULL) {
/*
* It's an IPsec accelerated packet.
* Keep a pointer to the data attributes around until
* we allocate the ipsecinfo structure.
*/
IPSECHW_DEBUG(IPSECHW_PKT,
("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n"));
hada_mp->b_cont = NULL;
/*
* Since it is accelerated, it came directly from
* the ill.
*/
ASSERT(mctl_present == B_FALSE);
ASSERT(mp->b_datap->db_type != M_CTL);
}
ip6h = (ip6_t *)mp->b_rptr;
ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
old_pkt_len = pkt_len = ip6_len;
if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum)
hck_flags = DB_CKSUMFLAGS(mp);
else
hck_flags = 0;
/* Clear checksum flags in case we need to forward */
DB_CKSUMFLAGS(mp) = 0;
reass_sum = reass_hck_flags = 0;
nexthdr = ip6h->ip6_nxt;
prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt -
(uchar_t *)ip6h);
whereptr = (uint8_t *)&ip6h[1];
remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */
/* Process hop by hop header options */
if (nexthdr == IPPROTO_HOPOPTS) {
uint_t ehdrlen;
uint8_t *optptr;
if (remlen < MIN_EHDR_LEN)
goto pkt_too_short;
if (mp->b_cont != NULL &&
whereptr + MIN_EHDR_LEN > mp->b_wptr) {
if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(hada_mp);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
whereptr = (uint8_t *)ip6h + pkt_len - remlen;
}
hbhhdr = (ip6_hbh_t *)whereptr;
nexthdr = hbhhdr->ip6h_nxt;
prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h);
ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
if (remlen < ehdrlen)
goto pkt_too_short;
if (mp->b_cont != NULL &&
whereptr + ehdrlen > mp->b_wptr) {
if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(hada_mp);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
whereptr = (uint8_t *)ip6h + pkt_len - remlen;
hbhhdr = (ip6_hbh_t *)whereptr;
}
optptr = whereptr + 2;
whereptr += ehdrlen;
remlen -= ehdrlen;
switch (ip_process_options_v6(q, first_mp, ip6h, optptr,
ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) {
case -1:
/*
* Packet has been consumed and any
* needed ICMP messages sent.
*/
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
freemsg(hada_mp);
return;
case 0:
/* no action needed */
break;
case 1:
/* Known router alert */
goto ipv6forus;
}
}
/*
* On incoming v6 multicast packets we will bypass the ire table,
* and assume that the read queue corresponds to the targetted
* interface.
*
* The effect of this is the same as the IPv4 original code, but is
* much cleaner I think. See ip_rput for how that was done.
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len);
/*
* So that we don't end up with dups, only one ill in an IPMP
* group is nominated to receive multicast data traffic.
* However, link-locals on any underlying interfaces will have
* joined their solicited-node multicast addresses and we must
* accept those packets. (We don't attempt to precisely
* filter out duplicate solicited-node multicast packets since
* e.g. an IPMP interface and underlying interface may have
* the same solicited-node multicast address.) Note that we
* won't generally have duplicates because we only issue a
* DL_ENABMULTI_REQ on one interface in a group; the exception
* is when PHYI_MULTI_BCAST is set.
*/
if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast &&
!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) {
goto drop_pkt;
}
/*
* XXX TODO Give to mrouted to for multicast forwarding.
*/
if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE,
ALL_ZONES) == NULL) {
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_rput_data_v6: got mcast packet"
" which is not for us: %s\n", AF_INET6,
&ip6h->ip6_dst);
}
drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(hada_mp);
freemsg(first_mp);
return;
}
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n",
AF_INET6, &ip6h->ip6_dst);
}
zoneid = GLOBAL_ZONEID;
goto ipv6forus;
}
/*
* Find an ire that matches destination. For link-local addresses
* we have to match the ill.
* TBD for site local addresses.
*/
if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) {
ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL,
IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL,
MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst);
} else {
ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES,
msg_getlabel(mp), ipst);
if (ire != NULL && ire->ire_stq != NULL &&
ire->ire_zoneid != GLOBAL_ZONEID &&
ire->ire_zoneid != ALL_ZONES) {
/*
* Should only use IREs that are visible from the
* global zone for forwarding.
*/
ire_refrele(ire);
ire = ire_cache_lookup_v6(&ip6h->ip6_dst,
GLOBAL_ZONEID, msg_getlabel(mp), ipst);
}
}
if (ire == NULL) {
/*
* No matching IRE found. Mark this packet as having
* originated externally.
*/
if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
if (!(ill->ill_flags & ILLF_ROUTER)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInAddrErrors);
}
freemsg(hada_mp);
freemsg(first_mp);
return;
}
if (ip6h->ip6_hops <= 1) {
if (hada_mp != NULL)
goto hada_drop;
/* Sent by forwarding path, and router is global zone */
icmp_time_exceeded_v6(WR(q), first_mp,
ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE,
GLOBAL_ZONEID, ipst);
return;
}
/*
* Per RFC 3513 section 2.5.2, we must not forward packets with
* an unspecified source address.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
freemsg(hada_mp);
freemsg(first_mp);
return;
}
mp->b_prev = (mblk_t *)(uintptr_t)
ill->ill_phyint->phyint_ifindex;
ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src,
IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL,
GLOBAL_ZONEID, ipst);
return;
}
/* we have a matching IRE */
if (ire->ire_stq != NULL) {
/*
* To be quicker, we may wish not to chase pointers
* (ire->ire_ipif->ipif_ill...) and instead store the
* forwarding policy in the ire. An unfortunate side-
* effect of this would be requiring an ire flush whenever
* the ILLF_ROUTER flag changes. For now, chase pointers
* once and store in the boolean no_forward.
*
* This appears twice to keep it out of the non-forwarding,
* yes-it's-for-us-on-the-right-interface case.
*/
no_forward = ((ill->ill_flags &
ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0);
ASSERT(first_mp == mp);
/*
* This ire has a send-to queue - forward the packet.
*/
if (no_forward || ll_multicast || (hada_mp != NULL)) {
freemsg(hada_mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
if (no_forward) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInAddrErrors);
}
freemsg(mp);
ire_refrele(ire);
return;
}
/*
* ipIfStatsHCInForwDatagrams should only be increment if there
* will be an attempt to forward the packet, which is why we
* increment after the above condition has been checked.
*/
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
if (ip6h->ip6_hops <= 1) {
ip1dbg(("ip_rput_data_v6: hop limit expired.\n"));
/* Sent by forwarding path, and router is global zone */
icmp_time_exceeded_v6(WR(q), mp,
ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE,
GLOBAL_ZONEID, ipst);
ire_refrele(ire);
return;
}
/*
* Per RFC 3513 section 2.5.2, we must not forward packets with
* an unspecified source address.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
freemsg(mp);
ire_refrele(ire);
return;
}
if (is_system_labeled()) {
mblk_t *mp1;
if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsForwProhibits);
freemsg(mp);
ire_refrele(ire);
return;
}
/* Size may have changed */
mp = mp1;
ip6h = (ip6_t *)mp->b_rptr;
pkt_len = msgdsize(mp);
}
if (pkt_len > ire->ire_max_frag) {
int max_frag = ire->ire_max_frag;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors);
/*
* Handle labeled packet resizing.
*/
if (is_system_labeled()) {
max_frag = tsol_pmtu_adjust(mp, max_frag,
pkt_len - old_pkt_len, AF_INET6);
}
/* Sent by forwarding path, and router is global zone */
icmp_pkt2big_v6(WR(q), mp, max_frag,
ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst);
ire_refrele(ire);
return;
}
/*
* Check to see if we're forwarding the packet to a
* different link from which it came. If so, check the
* source and destination addresses since routers must not
* forward any packets with link-local source or
* destination addresses to other links. Otherwise (if
* we're forwarding onto the same link), conditionally send
* a redirect message.
*/
if (ire->ire_rfq != q &&
!IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) {
if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInAddrErrors);
freemsg(mp);
ire_refrele(ire);
return;
}
/* TBD add site-local check at site boundary? */
} else if (ipst->ips_ipv6_send_redirects) {
in6_addr_t *v6targ;
in6_addr_t gw_addr_v6;
ire_t *src_ire_v6 = NULL;
/*
* Don't send a redirect when forwarding a source
* routed packet.
*/
if (ip_source_routed_v6(ip6h, mp, ipst))
goto forward;
mutex_enter(&ire->ire_lock);
gw_addr_v6 = ire->ire_gateway_addr_v6;
mutex_exit(&ire->ire_lock);
if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) {
v6targ = &gw_addr_v6;
/*
* We won't send redirects to a router
* that doesn't have a link local
* address, but will forward.
*/
if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInAddrErrors);
goto forward;
}
} else {
v6targ = &ip6h->ip6_dst;
}
src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL,
GLOBAL_ZONEID, 0, NULL,
MATCH_IRE_IPIF | MATCH_IRE_TYPE,
ipst);
if (src_ire_v6 != NULL) {
/*
* The source is directly connected.
*/
mp1 = copymsg(mp);
if (mp1 != NULL) {
icmp_send_redirect_v6(WR(q),
mp1, v6targ, &ip6h->ip6_dst,
ill, B_FALSE);
}
ire_refrele(src_ire_v6);
}
}
forward:
/* Hoplimit verified above */
ip6h->ip6_hops--;
outill = ire->ire_ipif->ipif_ill;
DTRACE_PROBE4(ip6__forwarding__start,
ill_t *, inill, ill_t *, outill,
ip6_t *, ip6h, mblk_t *, mp);
FW_HOOKS6(ipst->ips_ip6_forwarding_event,
ipst->ips_ipv6firewall_forwarding,
inill, outill, ip6h, mp, mp, 0, ipst);
DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
if (mp != NULL) {
UPDATE_IB_PKT_COUNT(ire);
ire->ire_last_used_time = lbolt;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL);
}
IRE_REFRELE(ire);
return;
}
/*
* Need to put on correct queue for reassembly to find it.
* No need to use put() since reassembly has its own locks.
* Note: multicast packets and packets destined to addresses
* assigned to loopback (ire_rfq is NULL) will be reassembled on
* the arriving ill. Unlike the IPv4 case, enabling strict
* destination multihoming will prevent accepting packets
* addressed to an IRE_LOCAL on lo0.
*/
if (ire->ire_rfq != q) {
if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill))
== NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
freemsg(hada_mp);
freemsg(first_mp);
return;
}
if (ire->ire_rfq != NULL) {
q = ire->ire_rfq;
ill = (ill_t *)q->q_ptr;
ASSERT(ill != NULL);
}
}
zoneid = ire->ire_zoneid;
UPDATE_IB_PKT_COUNT(ire);
ire->ire_last_used_time = lbolt;
/* Don't use the ire after this point, we'll NULL it out to be sure. */
ire_refrele(ire);
ire = NULL;
ipv6forus:
/*
* Looks like this packet is for us one way or another.
* This is where we'll process destination headers etc.
*/
for (; ; ) {
switch (nexthdr) {
case IPPROTO_TCP: {
uint16_t *up;
uint32_t sum;
int offset;
hdr_len = pkt_len - remlen;
if (hada_mp != NULL) {
ip0dbg(("tcp hada drop\n"));
goto hada_drop;
}
/* TCP needs all of the TCP header */
if (remlen < TCP_MIN_HEADER_LENGTH)
goto pkt_too_short;
if (mp->b_cont != NULL &&
whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) {
if (!pullupmsg(mp,
hdr_len + TCP_MIN_HEADER_LENGTH)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInDiscards);
freemsg(first_mp);
return;
}
hck_flags = 0;
ip6h = (ip6_t *)mp->b_rptr;
whereptr = (uint8_t *)ip6h + hdr_len;
}
/*
* Extract the offset field from the TCP header.
*/
offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4;
if (offset != 5) {
if (offset < 5) {
ip1dbg(("ip_rput_data_v6: short "
"TCP data offset"));
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInDiscards);
freemsg(first_mp);
return;
}
/*
* There must be TCP options.
* Make sure we can grab them.
*/
offset <<= 2;
if (remlen < offset)
goto pkt_too_short;
if (mp->b_cont != NULL &&
whereptr + offset > mp->b_wptr) {
if (!pullupmsg(mp,
hdr_len + offset)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInDiscards);
freemsg(first_mp);
return;
}
hck_flags = 0;
ip6h = (ip6_t *)mp->b_rptr;
whereptr = (uint8_t *)ip6h + hdr_len;
}
}
up = (uint16_t *)&ip6h->ip6_src;
/*
* TCP checksum calculation. First sum up the
* pseudo-header fields:
* - Source IPv6 address
* - Destination IPv6 address
* - TCP payload length
* - TCP protocol ID
*/
sum = htons(IPPROTO_TCP + remlen) +
up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
/* Fold initial sum */
sum = (sum & 0xffff) + (sum >> 16);
mp1 = mp->b_cont;
if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0)
IP6_STAT(ipst, ip6_in_sw_cksum);
IP_CKSUM_RECV(hck_flags, sum, (uchar_t *)
((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)),
(int32_t)(whereptr - (uchar_t *)mp->b_rptr),
mp, mp1, cksum_err);
if (cksum_err) {
BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
if (hck_flags & HCK_FULLCKSUM) {
IP6_STAT(ipst,
ip6_tcp_in_full_hw_cksum_err);
} else if (hck_flags & HCK_PARTIALCKSUM) {
IP6_STAT(ipst,
ip6_tcp_in_part_hw_cksum_err);
} else {
IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
}
freemsg(first_mp);
return;
}
tcp_fanout:
ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill,
(flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE|
IP_FF_IPINFO), hdr_len, mctl_present, zoneid);
return;
}
case IPPROTO_SCTP:
{
sctp_hdr_t *sctph;
uint32_t calcsum, pktsum;
uint_t hdr_len = pkt_len - remlen;
sctp_stack_t *sctps;
sctps = inill->ill_ipst->ips_netstack->netstack_sctp;
/* SCTP needs all of the SCTP header */
if (remlen < sizeof (*sctph)) {
goto pkt_too_short;
}
if (whereptr + sizeof (*sctph) > mp->b_wptr) {
ASSERT(mp->b_cont != NULL);
if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInDiscards);
freemsg(mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
whereptr = (uint8_t *)ip6h + hdr_len;
}
sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len);
/* checksum */
pktsum = sctph->sh_chksum;
sctph->sh_chksum = 0;
calcsum = sctp_cksum(mp, hdr_len);
if (calcsum != pktsum) {
BUMP_MIB(&sctps->sctps_mib, sctpChecksumError);
freemsg(mp);
return;
}
sctph->sh_chksum = pktsum;
ports = *(uint32_t *)(mp->b_rptr + hdr_len);
if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst,
ports, zoneid, mp, sctps)) == NULL) {
ip_fanout_sctp_raw(first_mp, ill,
(ipha_t *)ip6h, B_FALSE, ports,
mctl_present,
(flags|IP_FF_SEND_ICMP|IP_FF_IPINFO),
B_TRUE, zoneid);
return;
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill,
B_FALSE, mctl_present);
return;
}
case IPPROTO_UDP: {
uint16_t *up;
uint32_t sum;
hdr_len = pkt_len - remlen;
if (hada_mp != NULL) {
ip0dbg(("udp hada drop\n"));
goto hada_drop;
}
/* Verify that at least the ports are present */
if (remlen < UDPH_SIZE)
goto pkt_too_short;
if (mp->b_cont != NULL &&
whereptr + UDPH_SIZE > mp->b_wptr) {
if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInDiscards);
freemsg(first_mp);
return;
}
hck_flags = 0;
ip6h = (ip6_t *)mp->b_rptr;
whereptr = (uint8_t *)ip6h + hdr_len;
}
/*
* Before going through the regular checksum
* calculation, make sure the received checksum
* is non-zero. RFC 2460 says, a 0x0000 checksum
* in a UDP packet (within IPv6 packet) is invalid
* and should be replaced by 0xffff. This makes
* sense as regular checksum calculation will
* pass for both the cases i.e. 0x0000 and 0xffff.
* Removing one of the case makes error detection
* stronger.
*/
if (((udpha_t *)whereptr)->uha_checksum == 0) {
/* 0x0000 checksum is invalid */
ip1dbg(("ip_rput_data_v6: Invalid UDP "
"checksum value 0x0000\n"));
BUMP_MIB(ill->ill_ip_mib,
udpIfStatsInCksumErrs);
freemsg(first_mp);
return;
}
up = (uint16_t *)&ip6h->ip6_src;
/*
* UDP checksum calculation. First sum up the
* pseudo-header fields:
* - Source IPv6 address
* - Destination IPv6 address
* - UDP payload length
* - UDP protocol ID
*/
sum = htons(IPPROTO_UDP + remlen) +
up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
/* Fold initial sum */
sum = (sum & 0xffff) + (sum >> 16);
if (reass_hck_flags != 0) {
hck_flags = reass_hck_flags;
IP_CKSUM_RECV_REASS(hck_flags,
(int32_t)(whereptr - (uchar_t *)mp->b_rptr),
sum, reass_sum, cksum_err);
} else {
mp1 = mp->b_cont;
IP_CKSUM_RECV(hck_flags, sum, (uchar_t *)
((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)),
(int32_t)(whereptr - (uchar_t *)mp->b_rptr),
mp, mp1, cksum_err);
}
if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0)
IP6_STAT(ipst, ip6_in_sw_cksum);
if (cksum_err) {
BUMP_MIB(ill->ill_ip_mib,
udpIfStatsInCksumErrs);
if (hck_flags & HCK_FULLCKSUM)
IP6_STAT(ipst,
ip6_udp_in_full_hw_cksum_err);
else if (hck_flags & HCK_PARTIALCKSUM)
IP6_STAT(ipst,
ip6_udp_in_part_hw_cksum_err);
else
IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
freemsg(first_mp);
return;
}
goto udp_fanout;
}
case IPPROTO_ICMPV6: {
uint16_t *up;
uint32_t sum;
uint_t hdr_len = pkt_len - remlen;
if (hada_mp != NULL) {
ip0dbg(("icmp hada drop\n"));
goto hada_drop;
}
up = (uint16_t *)&ip6h->ip6_src;
sum = htons(IPPROTO_ICMPV6 + remlen) +
up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
sum = (sum & 0xffff) + (sum >> 16);
sum = IP_CSUM(mp, hdr_len, sum);
if (sum != 0) {
/* IPv6 ICMP checksum failed */
ip1dbg(("ip_rput_data_v6: ICMPv6 checksum "
"failed %x\n",
sum));
BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
BUMP_MIB(ill->ill_icmp6_mib,
ipv6IfIcmpInErrors);
freemsg(first_mp);
return;
}
icmp_fanout:
/* Check variable for testing applications */
if (ipst->ips_ipv6_drop_inbound_icmpv6) {
freemsg(first_mp);
return;
}
/*
* Assume that there is always at least one conn for
* ICMPv6 (in.ndpd) i.e. don't optimize the case
* where there is no conn.
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
ilm_t *ilm;
ilm_walker_t ilw;
ASSERT(!IS_LOOPBACK(ill));
/*
* In the multicast case, applications may have
* joined the group from different zones, so we
* need to deliver the packet to each of them.
* Loop through the multicast memberships
* structures (ilm) on the receive ill and send
* a copy of the packet up each matching one.
*/
ilm = ilm_walker_start(&ilw, inill);
for (; ilm != NULL;
ilm = ilm_walker_step(&ilw, ilm)) {
if (!IN6_ARE_ADDR_EQUAL(
&ilm->ilm_v6addr, &ip6h->ip6_dst))
continue;
if (!ipif_lookup_zoneid(
ilw.ilw_walk_ill, ilm->ilm_zoneid,
IPIF_UP, NULL))
continue;
first_mp1 = ip_copymsg(first_mp);
if (first_mp1 == NULL)
continue;
icmp_inbound_v6(q, first_mp1,
ilw.ilw_walk_ill, inill,
hdr_len, mctl_present, 0,
ilm->ilm_zoneid, dl_mp);
}
ilm_walker_finish(&ilw);
} else {
first_mp1 = ip_copymsg(first_mp);
if (first_mp1 != NULL)
icmp_inbound_v6(q, first_mp1, ill,
inill, hdr_len, mctl_present, 0,
zoneid, dl_mp);
}
goto proto_fanout;
}
case IPPROTO_ENCAP:
case IPPROTO_IPV6:
if (ip_iptun_input_v6(mctl_present ? first_mp : NULL,
mp, pkt_len - remlen, nexthdr, zoneid, ill, ipst)) {
return;
}
/*
* If there was no IP tunnel data-link bound to
* receive this packet, then we fall through to
* allow potential raw sockets bound to either of
* these protocols to pick it up.
*/
/* FALLTHRU */
proto_fanout:
default: {
/*
* Handle protocols with which IPv6 is less intimate.
*/
uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO;
if (hada_mp != NULL) {
ip0dbg(("default hada drop\n"));
goto hada_drop;
}
/*
* Enable sending ICMP for "Unknown" nexthdr
* case. i.e. where we did not FALLTHRU from
* IPPROTO_ICMPV6 processing case above.
* If we did FALLTHRU, then the packet has already been
* processed for IPPF, don't process it again in
* ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the
* flags
*/
if (nexthdr != IPPROTO_ICMPV6)
proto_flags |= IP_FF_SEND_ICMP;
else
proto_flags |= IP6_NO_IPPOLICY;
ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill,
nexthdr, prev_nexthdr_offset, (flags|proto_flags),
mctl_present, zoneid);
return;
}
case IPPROTO_DSTOPTS: {
uint_t ehdrlen;
uint8_t *optptr;
ip6_dest_t *desthdr;
/* If packet is too short, look no further */
if (remlen < MIN_EHDR_LEN)
goto pkt_too_short;
/* Check if AH is present. */
if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill,
inill, hada_mp, zoneid)) {
return;
}
/*
* Reinitialize pointers, as ipsec_early_ah_v6() does
* complete pullups. We don't have to do more pullups
* as a result.
*/
whereptr = (uint8_t *)((uintptr_t)mp->b_rptr +
(uintptr_t)(whereptr - ((uint8_t *)ip6h)));
ip6h = (ip6_t *)mp->b_rptr;
desthdr = (ip6_dest_t *)whereptr;
nexthdr = desthdr->ip6d_nxt;
prev_nexthdr_offset = (uint_t)(whereptr -
(uint8_t *)ip6h);
ehdrlen = 8 * (desthdr->ip6d_len + 1);
if (remlen < ehdrlen)
goto pkt_too_short;
optptr = whereptr + 2;
/*
* Note: XXX This code does not seem to make
* distinction between Destination Options Header
* being before/after Routing Header which can
* happen if we are at the end of source route.
* This may become significant in future.
* (No real significant Destination Options are
* defined/implemented yet ).
*/
switch (ip_process_options_v6(q, first_mp, ip6h, optptr,
ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) {
case -1:
/*
* Packet has been consumed and any needed
* ICMP errors sent.
*/
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
freemsg(hada_mp);
return;
case 0:
/* No action needed continue */
break;
case 1:
/*
* Unnexpected return value
* (Router alert is a Hop-by-Hop option)
*/
#ifdef DEBUG
panic("ip_rput_data_v6: router "
"alert hbh opt indication in dest opt");
/*NOTREACHED*/
#else
freemsg(hada_mp);
freemsg(first_mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
return;
#endif
}
used = ehdrlen;
break;
}
case IPPROTO_FRAGMENT: {
ip6_frag_t *fraghdr;
size_t no_frag_hdr_len;
if (hada_mp != NULL) {
ip0dbg(("frag hada drop\n"));
goto hada_drop;
}
ASSERT(first_mp == mp);
if (remlen < sizeof (ip6_frag_t))
goto pkt_too_short;
if (mp->b_cont != NULL &&
whereptr + sizeof (ip6_frag_t) > mp->b_wptr) {
if (!pullupmsg(mp,
pkt_len - remlen + sizeof (ip6_frag_t))) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInDiscards);
freemsg(mp);
return;
}
hck_flags = 0;
ip6h = (ip6_t *)mp->b_rptr;
whereptr = (uint8_t *)ip6h + pkt_len - remlen;
}
fraghdr = (ip6_frag_t *)whereptr;
used = (uint_t)sizeof (ip6_frag_t);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
/*
* Invoke the CGTP (multirouting) filtering module to
* process the incoming packet. Packets identified as
* duplicates must be discarded. Filtering is active
* only if the the ip_cgtp_filter ndd variable is
* non-zero.
*/
if (ipst->ips_ip_cgtp_filter &&
ipst->ips_ip_cgtp_filter_ops != NULL) {
int cgtp_flt_pkt;
netstackid_t stackid;
stackid = ipst->ips_netstack->netstack_stackid;
cgtp_flt_pkt =
ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
stackid, inill->ill_phyint->phyint_ifindex,
ip6h, fraghdr);
if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
freemsg(mp);
return;
}
}
/* Restore the flags */
DB_CKSUMFLAGS(mp) = hck_flags;
mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr,
remlen - used, &prev_nexthdr_offset,
&reass_sum, &reass_hck_flags);
if (mp == NULL) {
/* Reassembly is still pending */
return;
}
/* The first mblk are the headers before the frag hdr */
BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
first_mp = mp; /* mp has most likely changed! */
no_frag_hdr_len = mp->b_wptr - mp->b_rptr;
ip6h = (ip6_t *)mp->b_rptr;
nexthdr = ((char *)ip6h)[prev_nexthdr_offset];
whereptr = mp->b_rptr + no_frag_hdr_len;
remlen = ntohs(ip6h->ip6_plen) +
(uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len);
pkt_len = msgdsize(mp);
used = 0;
break;
}
case IPPROTO_HOPOPTS: {
if (hada_mp != NULL) {
ip0dbg(("hop hada drop\n"));
goto hada_drop;
}
/*
* Illegal header sequence.
* (Hop-by-hop headers are processed above
* and required to immediately follow IPv6 header)
*/
icmp_param_problem_v6(WR(q), first_mp,
ICMP6_PARAMPROB_NEXTHEADER,
prev_nexthdr_offset,
B_FALSE, B_FALSE, zoneid, ipst);
return;
}
case IPPROTO_ROUTING: {
uint_t ehdrlen;
ip6_rthdr_t *rthdr;
/* If packet is too short, look no further */
if (remlen < MIN_EHDR_LEN)
goto pkt_too_short;
/* Check if AH is present. */
if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill,
inill, hada_mp, zoneid)) {
return;
}
/*
* Reinitialize pointers, as ipsec_early_ah_v6() does
* complete pullups. We don't have to do more pullups
* as a result.
*/
whereptr = (uint8_t *)((uintptr_t)mp->b_rptr +
(uintptr_t)(whereptr - ((uint8_t *)ip6h)));
ip6h = (ip6_t *)mp->b_rptr;
rthdr = (ip6_rthdr_t *)whereptr;
nexthdr = rthdr->ip6r_nxt;
prev_nexthdr_offset = (uint_t)(whereptr -
(uint8_t *)ip6h);
ehdrlen = 8 * (rthdr->ip6r_len + 1);
if (remlen < ehdrlen)
goto pkt_too_short;
if (rthdr->ip6r_segleft != 0) {
/* Not end of source route */
if (ll_multicast) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsForwProhibits);
freemsg(hada_mp);
freemsg(mp);
return;
}
ip_process_rthdr(q, mp, ip6h, rthdr, ill,
hada_mp);
return;
}
used = ehdrlen;
break;
}
case IPPROTO_AH:
case IPPROTO_ESP: {
/*
* Fast path for AH/ESP. If this is the first time
* we are sending a datagram to AH/ESP, allocate
* a IPSEC_IN message and prepend it. Otherwise,
* just fanout.
*/
ipsec_in_t *ii;
int ipsec_rc;
ipsec_stack_t *ipss;
ipss = ipst->ips_netstack->netstack_ipsec;
if (!mctl_present) {
ASSERT(first_mp == mp);
first_mp = ipsec_in_alloc(B_FALSE,
ipst->ips_netstack);
if (first_mp == NULL) {
ip1dbg(("ip_rput_data_v6: IPSEC_IN "
"allocation failure.\n"));
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsInDiscards);
freemsg(mp);
return;
}
/*
* Store the ill_index so that when we come back
* from IPSEC we ride on the same queue.
*/
ii = (ipsec_in_t *)first_mp->b_rptr;
ii->ipsec_in_ill_index =
ill->ill_phyint->phyint_ifindex;
ii->ipsec_in_rill_index =
inill->ill_phyint->phyint_ifindex;
first_mp->b_cont = mp;
/*
* Cache hardware acceleration info.
*/
if (hada_mp != NULL) {
IPSECHW_DEBUG(IPSECHW_PKT,
("ip_rput_data_v6: "
"caching data attr.\n"));
ii->ipsec_in_accelerated = B_TRUE;
ii->ipsec_in_da = hada_mp;
hada_mp = NULL;
}
} else {
ii = (ipsec_in_t *)first_mp->b_rptr;
}
if (!ipsec_loaded(ipss)) {
ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP,
zoneid, ipst);
return;
}
/* select inbound SA and have IPsec process the pkt */
if (nexthdr == IPPROTO_ESP) {
esph_t *esph = ipsec_inbound_esp_sa(first_mp,
ipst->ips_netstack);
if (esph == NULL)
return;
ASSERT(ii->ipsec_in_esp_sa != NULL);
ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func !=
NULL);
ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func(
first_mp, esph);
} else {
ah_t *ah = ipsec_inbound_ah_sa(first_mp,
ipst->ips_netstack);
if (ah == NULL)
return;
ASSERT(ii->ipsec_in_ah_sa != NULL);
ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func !=
NULL);
ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(
first_mp, ah);
}
switch (ipsec_rc) {
case IPSEC_STATUS_SUCCESS:
break;
case IPSEC_STATUS_FAILED:
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
/* FALLTHRU */
case IPSEC_STATUS_PENDING:
return;
}
/* we're done with IPsec processing, send it up */
ip_fanout_proto_again(first_mp, ill, inill, NULL);
return;
}
case IPPROTO_NONE:
/* All processing is done. Count as "delivered". */
freemsg(hada_mp);
freemsg(first_mp);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
return;
}
whereptr += used;
ASSERT(remlen >= used);
remlen -= used;
}
/* NOTREACHED */
pkt_too_short:
ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n",
ip6_len, pkt_len, remlen));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
freemsg(hada_mp);
freemsg(first_mp);
return;
udp_fanout:
if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
connp = NULL;
} else {
connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid,
ipst);
if ((connp != NULL) && (connp->conn_upq == NULL)) {
CONN_DEC_REF(connp);
connp = NULL;
}
}
if (connp == NULL) {
uint32_t ports;
ports = *(uint32_t *)(mp->b_rptr + hdr_len +
UDP_PORTS_OFFSET);
IP6_STAT(ipst, ip6_udp_slow_path);
ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill,
(flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present,
zoneid);
return;
}
if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) ||
(!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) {
freemsg(first_mp);
BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
CONN_DEC_REF(connp);
return;
}
/* Initiate IPPF processing */
if (IP6_IN_IPP(flags, ipst)) {
ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
CONN_DEC_REF(connp);
return;
}
}
if (connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
CONN_DEC_REF(connp);
return;
}
}
IP6_STAT(ipst, ip6_udp_fast_path);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
/* Send it upstream */
(connp->conn_recv)(connp, mp, NULL);
CONN_DEC_REF(connp);
freemsg(hada_mp);
return;
hada_drop:
ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n"));
/* IPsec kstats: bump counter here */
freemsg(hada_mp);
freemsg(first_mp);
}
/*
* Reassemble fragment.
* When it returns a completed message the first mblk will only contain
* the headers prior to the fragment header.
*
* prev_nexthdr_offset is an offset indication of where the nexthdr field is
* of the preceding header. This is needed to patch the previous header's
* nexthdr field when reassembly completes.
*/
static mblk_t *
ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h,
ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset,
uint32_t *cksum_val, uint16_t *cksum_flags)
{
uint32_t ident = ntohl(fraghdr->ip6f_ident);
uint16_t offset;
boolean_t more_frags;
uint8_t nexthdr = fraghdr->ip6f_nxt;
in6_addr_t *v6dst_ptr;
in6_addr_t *v6src_ptr;
uint_t end;
uint_t hdr_length;
size_t count;
ipf_t *ipf;
ipf_t **ipfp;
ipfb_t *ipfb;
mblk_t *mp1;
uint8_t ecn_info = 0;
size_t msg_len;
mblk_t *tail_mp;
mblk_t *t_mp;
boolean_t pruned = B_FALSE;
uint32_t sum_val;
uint16_t sum_flags;
ip_stack_t *ipst = ill->ill_ipst;
if (cksum_val != NULL)
*cksum_val = 0;
if (cksum_flags != NULL)
*cksum_flags = 0;
/*
* We utilize hardware computed checksum info only for UDP since
* IP fragmentation is a normal occurence for the protocol. In
* addition, checksum offload support for IP fragments carrying
* UDP payload is commonly implemented across network adapters.
*/
ASSERT(inill != NULL);
if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) &&
(DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
mblk_t *mp1 = mp->b_cont;
int32_t len;
/* Record checksum information from the packet */
sum_val = (uint32_t)DB_CKSUM16(mp);
sum_flags = DB_CKSUMFLAGS(mp);
/* fragmented payload offset from beginning of mblk */
offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
if ((sum_flags & HCK_PARTIALCKSUM) &&
(mp1 == NULL || mp1->b_cont == NULL) &&
offset >= (uint16_t)DB_CKSUMSTART(mp) &&
((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) {
uint32_t adj;
/*
* Partial checksum has been calculated by hardware
* and attached to the packet; in addition, any
* prepended extraneous data is even byte aligned.
* If any such data exists, we adjust the checksum;
* this would also handle any postpended data.
*/
IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
mp, mp1, len, adj);
/* One's complement subtract extraneous checksum */
if (adj >= sum_val)
sum_val = ~(adj - sum_val) & 0xFFFF;
else
sum_val -= adj;
}
} else {
sum_val = 0;
sum_flags = 0;
}
/* Clear hardware checksumming flag */
DB_CKSUMFLAGS(mp) = 0;
/*
* Note: Fragment offset in header is in 8-octet units.
* Clearing least significant 3 bits not only extracts
* it but also gets it in units of octets.
*/
offset = ntohs(fraghdr->ip6f_offlg) & ~7;
more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
/*
* Is the more frags flag on and the payload length not a multiple
* of eight?
*/
if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
zoneid_t zoneid;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst);
if (zoneid == ALL_ZONES) {
freemsg(mp);
return (NULL);
}
icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER,
(uint32_t)((char *)&ip6h->ip6_plen -
(char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst);
return (NULL);
}
v6src_ptr = &ip6h->ip6_src;
v6dst_ptr = &ip6h->ip6_dst;
end = remlen;
hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
end += offset;
/*
* Would fragment cause reassembled packet to have a payload length
* greater than IP_MAXPACKET - the max payload size?
*/
if (end > IP_MAXPACKET) {
zoneid_t zoneid;
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst);
if (zoneid == ALL_ZONES) {
freemsg(mp);
return (NULL);
}
icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER,
(uint32_t)((char *)&fraghdr->ip6f_offlg -
(char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst);
return (NULL);
}
/*
* This packet just has one fragment. Reassembly not
* needed.
*/
if (!more_frags && offset == 0) {
goto reass_done;
}
/*
* Drop the fragmented as early as possible, if
* we don't have resource(s) to re-assemble.
*/
if (ipst->ips_ip_reass_queue_bytes == 0) {
freemsg(mp);
return (NULL);
}
/* Record the ECN field info. */
ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
/*
* If this is not the first fragment, dump the unfragmentable
* portion of the packet.
*/
if (offset)
mp->b_rptr = (uchar_t *)&fraghdr[1];
/*
* Fragmentation reassembly. Each ILL has a hash table for
* queueing packets undergoing reassembly for all IPIFs
* associated with the ILL. The hash is based on the packet
* IP ident field. The ILL frag hash table was allocated
* as a timer block at the time the ILL was created. Whenever
* there is anything on the reassembly queue, the timer will
* be running.
*/
msg_len = MBLKSIZE(mp);
tail_mp = mp;
while (tail_mp->b_cont != NULL) {
tail_mp = tail_mp->b_cont;
msg_len += MBLKSIZE(tail_mp);
}
/*
* If the reassembly list for this ILL will get too big
* prune it.
*/
if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
ipst->ips_ip_reass_queue_bytes) {
ill_frag_prune(ill,
(ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
(ipst->ips_ip_reass_queue_bytes - msg_len));
pruned = B_TRUE;
}
ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
mutex_enter(&ipfb->ipfb_lock);
ipfp = &ipfb->ipfb_ipf;
/* Try to find an existing fragment queue for this packet. */
for (;;) {
ipf = ipfp[0];
if (ipf) {
/*
* It has to match on ident, source address, and
* dest address.
*/
if (ipf->ipf_ident == ident &&
IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
/*
* If we have received too many
* duplicate fragments for this packet
* free it.
*/
if (ipf->ipf_num_dups > ip_max_frag_dups) {
ill_frag_free_pkts(ill, ipfb, ipf, 1);
freemsg(mp);
mutex_exit(&ipfb->ipfb_lock);
return (NULL);
}
break;
}
ipfp = &ipf->ipf_hash_next;
continue;
}
/*
* If we pruned the list, do we want to store this new
* fragment?. We apply an optimization here based on the
* fact that most fragments will be received in order.
* So if the offset of this incoming fragment is zero,
* it is the first fragment of a new packet. We will
* keep it. Otherwise drop the fragment, as we have
* probably pruned the packet already (since the
* packet cannot be found).
*/
if (pruned && offset != 0) {
mutex_exit(&ipfb->ipfb_lock);
freemsg(mp);
return (NULL);
}
/* New guy. Allocate a frag message. */
mp1 = allocb(sizeof (*ipf), BPRI_MED);
if (!mp1) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
freemsg(mp);
partial_reass_done:
mutex_exit(&ipfb->ipfb_lock);
return (NULL);
}
if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) {
/*
* Too many fragmented packets in this hash bucket.
* Free the oldest.
*/
ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
}
mp1->b_cont = mp;
/* Initialize the fragment header. */
ipf = (ipf_t *)mp1->b_rptr;
ipf->ipf_mp = mp1;
ipf->ipf_ptphn = ipfp;
ipfp[0] = ipf;
ipf->ipf_hash_next = NULL;
ipf->ipf_ident = ident;
ipf->ipf_v6src = *v6src_ptr;
ipf->ipf_v6dst = *v6dst_ptr;
/* Record reassembly start time. */
ipf->ipf_timestamp = gethrestime_sec();
/* Record ipf generation and account for frag header */
ipf->ipf_gen = ill->ill_ipf_gen++;
ipf->ipf_count = MBLKSIZE(mp1);
ipf->ipf_protocol = nexthdr;
ipf->ipf_nf_hdr_len = 0;
ipf->ipf_prev_nexthdr_offset = 0;
ipf->ipf_last_frag_seen = B_FALSE;
ipf->ipf_ecn = ecn_info;
ipf->ipf_num_dups = 0;
ipfb->ipfb_frag_pkts++;
ipf->ipf_checksum = 0;
ipf->ipf_checksum_flags = 0;
/* Store checksum value in fragment header */
if (sum_flags != 0) {
sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
ipf->ipf_checksum = sum_val;
ipf->ipf_checksum_flags = sum_flags;
}
/*
* We handle reassembly two ways. In the easy case,
* where all the fragments show up in order, we do
* minimal bookkeeping, and just clip new pieces on
* the end. If we ever see a hole, then we go off
* to ip_reassemble which has to mark the pieces and
* keep track of the number of holes, etc. Obviously,
* the point of having both mechanisms is so we can
* handle the easy case as efficiently as possible.
*/
if (offset == 0) {
/* Easy case, in-order reassembly so far. */
/* Update the byte count */
ipf->ipf_count += msg_len;
ipf->ipf_tail_mp = tail_mp;
/*
* Keep track of next expected offset in
* ipf_end.
*/
ipf->ipf_end = end;
ipf->ipf_nf_hdr_len = hdr_length;
ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset;
} else {
/* Hard case, hole at the beginning. */
ipf->ipf_tail_mp = NULL;
/*
* ipf_end == 0 means that we have given up
* on easy reassembly.
*/
ipf->ipf_end = 0;
/* Forget checksum offload from now on */
ipf->ipf_checksum_flags = 0;
/*
* ipf_hole_cnt is set by ip_reassemble.
* ipf_count is updated by ip_reassemble.
* No need to check for return value here
* as we don't expect reassembly to complete or
* fail for the first fragment itself.
*/
(void) ip_reassemble(mp, ipf, offset, more_frags, ill,
msg_len);
}
/* Update per ipfb and ill byte counts */
ipfb->ipfb_count += ipf->ipf_count;
ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
/* If the frag timer wasn't already going, start it. */
mutex_enter(&ill->ill_lock);
ill_frag_timer_start(ill);
mutex_exit(&ill->ill_lock);
goto partial_reass_done;
}
/*
* If the packet's flag has changed (it could be coming up
* from an interface different than the previous, therefore
* possibly different checksum capability), then forget about
* any stored checksum states. Otherwise add the value to
* the existing one stored in the fragment header.
*/
if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
sum_val += ipf->ipf_checksum;
sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
ipf->ipf_checksum = sum_val;
} else if (ipf->ipf_checksum_flags != 0) {
/* Forget checksum offload from now on */
ipf->ipf_checksum_flags = 0;
}
/*
* We have a new piece of a datagram which is already being
* reassembled. Update the ECN info if all IP fragments
* are ECN capable. If there is one which is not, clear
* all the info. If there is at least one which has CE
* code point, IP needs to report that up to transport.
*/
if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
if (ecn_info == IPH_ECN_CE)
ipf->ipf_ecn = IPH_ECN_CE;
} else {
ipf->ipf_ecn = IPH_ECN_NECT;
}
if (offset && ipf->ipf_end == offset) {
/* The new fragment fits at the end */
ipf->ipf_tail_mp->b_cont = mp;
/* Update the byte count */
ipf->ipf_count += msg_len;
/* Update per ipfb and ill byte counts */
ipfb->ipfb_count += msg_len;
ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
atomic_add_32(&ill->ill_frag_count, msg_len);
if (more_frags) {
/* More to come. */
ipf->ipf_end = end;
ipf->ipf_tail_mp = tail_mp;
goto partial_reass_done;
}
} else {
/*
* Go do the hard cases.
* Call ip_reassemble().
*/
int ret;
if (offset == 0) {
if (ipf->ipf_prev_nexthdr_offset == 0) {
ipf->ipf_nf_hdr_len = hdr_length;
ipf->ipf_prev_nexthdr_offset =
*prev_nexthdr_offset;
}
}
/* Save current byte count */
count = ipf->ipf_count;
ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
/* Count of bytes added and subtracted (freeb()ed) */
count = ipf->ipf_count - count;
if (count) {
/* Update per ipfb and ill byte counts */
ipfb->ipfb_count += count;
ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
atomic_add_32(&ill->ill_frag_count, count);
}
if (ret == IP_REASS_PARTIAL) {
goto partial_reass_done;
} else if (ret == IP_REASS_FAILED) {
/* Reassembly failed. Free up all resources */
ill_frag_free_pkts(ill, ipfb, ipf, 1);
for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
IP_REASS_SET_START(t_mp, 0);
IP_REASS_SET_END(t_mp, 0);
}
freemsg(mp);
goto partial_reass_done;
}
/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
}
/*
* We have completed reassembly. Unhook the frag header from
* the reassembly list.
*
* Grab the unfragmentable header length next header value out
* of the first fragment
*/
ASSERT(ipf->ipf_nf_hdr_len != 0);
hdr_length = ipf->ipf_nf_hdr_len;
/*
* Before we free the frag header, record the ECN info
* to report back to the transport.
*/
ecn_info = ipf->ipf_ecn;
/*
* Store the nextheader field in the header preceding the fragment
* header
*/
nexthdr = ipf->ipf_protocol;
*prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
ipfp = ipf->ipf_ptphn;
/* We need to supply these to caller */
if ((sum_flags = ipf->ipf_checksum_flags) != 0)
sum_val = ipf->ipf_checksum;
else
sum_val = 0;
mp1 = ipf->ipf_mp;
count = ipf->ipf_count;
ipf = ipf->ipf_hash_next;
if (ipf)
ipf->ipf_ptphn = ipfp;
ipfp[0] = ipf;
atomic_add_32(&ill->ill_frag_count, -count);
ASSERT(ipfb->ipfb_count >= count);
ipfb->ipfb_count -= count;
ipfb->ipfb_frag_pkts--;
mutex_exit(&ipfb->ipfb_lock);
/* Ditch the frag header. */
mp = mp1->b_cont;
freeb(mp1);
/*
* Make sure the packet is good by doing some sanity
* check. If bad we can silentely drop the packet.
*/
reass_done:
if (hdr_length < sizeof (ip6_frag_t)) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
ip1dbg(("ip_rput_frag_v6: bad packet\n"));
freemsg(mp);
return (NULL);
}
/*
* Remove the fragment header from the initial header by
* splitting the mblk into the non-fragmentable header and
* everthing after the fragment extension header. This has the
* side effect of putting all the headers that need destination
* processing into the b_cont block-- on return this fact is
* used in order to avoid having to look at the extensions
* already processed.
*
* Note that this code assumes that the unfragmentable portion
* of the header is in the first mblk and increments
* the read pointer past it. If this assumption is broken
* this code fails badly.
*/
if (mp->b_rptr + hdr_length != mp->b_wptr) {
mblk_t *nmp;
if (!(nmp = dupb(mp))) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
ip1dbg(("ip_rput_frag_v6: dupb failed\n"));
freemsg(mp);
return (NULL);
}
nmp->b_cont = mp->b_cont;
mp->b_cont = nmp;
nmp->b_rptr += hdr_length;
}
mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
ip6h = (ip6_t *)mp->b_rptr;
((char *)ip6h)[*prev_nexthdr_offset] = nexthdr;
/* Restore original IP length in header. */
ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
/* Record the ECN info. */
ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
ip6h->ip6_vcf |= htonl(ecn_info << 20);
/* Reassembly is successful; return checksum information if needed */
if (cksum_val != NULL)
*cksum_val = sum_val;
if (cksum_flags != NULL)
*cksum_flags = sum_flags;
return (mp);
}
/*
* Given an mblk and a ptr, find the destination address in an IPv6 routing
* header.
*/
static in6_addr_t
pluck_out_dst(mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
{
ip6_rthdr0_t *rt0;
int segleft, numaddr;
in6_addr_t *ap, rv = oldrv;
rt0 = (ip6_rthdr0_t *)whereptr;
if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
uint8_t *, whereptr);
return (rv);
}
segleft = rt0->ip6r0_segleft;
numaddr = rt0->ip6r0_len / 2;
if ((rt0->ip6r0_len & 0x1) ||
whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr ||
(segleft > rt0->ip6r0_len / 2)) {
/*
* Corrupt packet. Either the routing header length is odd
* (can't happen) or mismatched compared to the packet, or the
* number of addresses is. Return what we can. This will
* only be a problem on forwarded packets that get squeezed
* through an outbound tunnel enforcing IPsec Tunnel Mode.
*/
DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
whereptr);
return (rv);
}
if (segleft != 0) {
ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
rv = ap[numaddr - 1];
}
return (rv);
}
/*
* Walk through the options to see if there is a routing header.
* If present get the destination which is the last address of
* the option.
*/
in6_addr_t
ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment)
{
mblk_t *current_mp = mp;
uint8_t nexthdr;
uint8_t *whereptr;
int ehdrlen;
in6_addr_t rv;
whereptr = (uint8_t *)ip6h;
ehdrlen = sizeof (ip6_t);
/* We assume at least the IPv6 base header is within one mblk. */
ASSERT(mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen);
rv = ip6h->ip6_dst;
nexthdr = ip6h->ip6_nxt;
if (is_fragment != NULL)
*is_fragment = B_FALSE;
/*
* We also assume (thanks to ipsec_tun_outbound()'s pullup) that
* no extension headers will be split across mblks.
*/
while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
nexthdr == IPPROTO_ROUTING) {
if (nexthdr == IPPROTO_ROUTING)
rv = pluck_out_dst(current_mp, whereptr, rv);
/*
* All IPv6 extension headers have the next-header in byte
* 0, and the (length - 8) in 8-byte-words.
*/
while (whereptr + ehdrlen >= current_mp->b_wptr) {
ehdrlen -= (current_mp->b_wptr - whereptr);
current_mp = current_mp->b_cont;
if (current_mp == NULL) {
/* Bad packet. Return what we can. */
DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
mp, mblk_t *, current_mp, ip6_t *, ip6h);
goto done;
}
whereptr = current_mp->b_rptr;
}
whereptr += ehdrlen;
nexthdr = *whereptr;
ASSERT(whereptr + 1 < current_mp->b_wptr);
ehdrlen = (*(whereptr + 1) + 1) * 8;
}
done:
if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
*is_fragment = B_TRUE;
return (rv);
}
/*
* ip_source_routed_v6:
* This function is called by redirect code in ip_rput_data_v6 to
* know whether this packet is source routed through this node i.e
* whether this node (router) is part of the journey. This
* function is called under two cases :
*
* case 1 : Routing header was processed by this node and
* ip_process_rthdr replaced ip6_dst with the next hop
* and we are forwarding the packet to the next hop.
*
* case 2 : Routing header was not processed by this node and we
* are just forwarding the packet.
*
* For case (1) we don't want to send redirects. For case(2) we
* want to send redirects.
*/
static boolean_t
ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
{
uint8_t nexthdr;
in6_addr_t *addrptr;
ip6_rthdr0_t *rthdr;
uint8_t numaddr;
ip6_hbh_t *hbhhdr;
uint_t ehdrlen;
uint8_t *byteptr;
ip2dbg(("ip_source_routed_v6\n"));
nexthdr = ip6h->ip6_nxt;
ehdrlen = IPV6_HDR_LEN;
/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
while (nexthdr == IPPROTO_HOPOPTS ||
nexthdr == IPPROTO_DSTOPTS) {
byteptr = (uint8_t *)ip6h + ehdrlen;
/*
* Check if we have already processed
* packets or we are just a forwarding
* router which only pulled up msgs up
* to IPV6HDR and one HBH ext header
*/
if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
ip2dbg(("ip_source_routed_v6: Extension"
" headers not processed\n"));
return (B_FALSE);
}
hbhhdr = (ip6_hbh_t *)byteptr;
nexthdr = hbhhdr->ip6h_nxt;
ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
}
switch (nexthdr) {
case IPPROTO_ROUTING:
byteptr = (uint8_t *)ip6h + ehdrlen;
/*
* If for some reason, we haven't pulled up
* the routing hdr data mblk, then we must
* not have processed it at all. So for sure
* we are not part of the source routed journey.
*/
if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
ip2dbg(("ip_source_routed_v6: Routing"
" header not processed\n"));
return (B_FALSE);
}
rthdr = (ip6_rthdr0_t *)byteptr;
/*
* Either we are an intermediate router or the
* last hop before destination and we have
* already processed the routing header.
* If segment_left is greater than or equal to zero,
* then we must be the (numaddr - segleft) entry
* of the routing header. Although ip6r0_segleft
* is a unit8_t variable, we still check for zero
* or greater value, if in case the data type
* is changed someday in future.
*/
if (rthdr->ip6r0_segleft > 0 ||
rthdr->ip6r0_segleft == 0) {
ire_t *ire = NULL;
numaddr = rthdr->ip6r0_len / 2;
addrptr = (in6_addr_t *)((char *)rthdr +
sizeof (*rthdr));
addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
if (addrptr != NULL) {
ire = ire_ctable_lookup_v6(addrptr, NULL,
IRE_LOCAL, NULL, ALL_ZONES, NULL,
MATCH_IRE_TYPE,
ipst);
if (ire != NULL) {
ire_refrele(ire);
return (B_TRUE);
}
ip1dbg(("ip_source_routed_v6: No ire found\n"));
}
}
/* FALLTHRU */
default:
ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
return (B_FALSE);
}
}
/*
* ip_wput_v6 -- Packets sent down from transport modules show up here.
* Assumes that the following set of headers appear in the first
* mblk:
* ip6i_t (if present) CAN also appear as a separate mblk.
* ip6_t
* Any extension headers
* TCP/UDP/SCTP header (if present)
* The routine can handle an ICMPv6 header that is not in the first mblk.
*
* The order to determine the outgoing interface is as follows:
* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill.
* 2. If q is an ill queue and (link local or multicast destination) then
* use that ill.
* 3. If IPV6_BOUND_IF has been set use that ill.
* 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise
* look for the best IRE match for the unspecified group to determine
* the ill.
* 5. For unicast: Just do an IRE lookup for the best match.
*
* arg2 is always a queue_t *.
* When that queue is an ill_t (i.e. q_next != NULL), then arg must be
* the zoneid.
* When that queue is not an ill_t, then arg must be a conn_t pointer.
*/
void
ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller)
{
conn_t *connp = NULL;
queue_t *q = (queue_t *)arg2;
ire_t *ire = NULL;
ire_t *sctp_ire = NULL;
ip6_t *ip6h;
in6_addr_t *v6dstp;
ill_t *ill = NULL;
ipif_t *ipif;
ip6i_t *ip6i;
int cksum_request; /* -1 => normal. */
/* 1 => Skip TCP/UDP/SCTP checksum */
/* Otherwise contains insert offset for checksum */
int unspec_src;
boolean_t do_outrequests; /* Increment OutRequests? */
mib2_ipIfStatsEntry_t *mibptr;
int match_flags = MATCH_IRE_ILL;
mblk_t *first_mp;
boolean_t mctl_present;
ipsec_out_t *io;
boolean_t multirt_need_resolve = B_FALSE;
mblk_t *copy_mp = NULL;
int err = 0;
int ip6i_flags = 0;
zoneid_t zoneid;
ill_t *saved_ill = NULL;
boolean_t conn_lock_held;
boolean_t need_decref = B_FALSE;
ip_stack_t *ipst;
if (q->q_next != NULL) {
ill = (ill_t *)q->q_ptr;
ipst = ill->ill_ipst;
} else {
connp = (conn_t *)arg;
ASSERT(connp != NULL);
ipst = connp->conn_netstack->netstack_ip;
}
/*
* Highest bit in version field is Reachability Confirmation bit
* used by NUD in ip_xmit_v6().
*/
#ifdef _BIG_ENDIAN
#define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7)
#else
#define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7)
#endif
/*
* M_CTL comes from 5 places
*
* 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections
* both V4 and V6 datagrams.
*
* 2) AH/ESP sends down M_CTL after doing their job with both
* V4 and V6 datagrams.
*
* 3) NDP callbacks when nce is resolved and IPSEC_OUT has been
* attached.
*
* 4) Notifications from an external resolver (for XRESOLV ifs)
*
* 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for
* IPsec hardware acceleration support.
*
* We need to handle (1)'s IPv6 case and (3) here. For the
* IPv4 case in (1), and (2), IPSEC processing has already
* started. The code in ip_wput() already knows how to handle
* continuing IPSEC processing (for IPv4 and IPv6). All other
* M_CTLs (including case (4)) are passed on to ip_wput_nondata()
* for handling.
*/
first_mp = mp;
mctl_present = B_FALSE;
io = NULL;
/* Multidata transmit? */
if (DB_TYPE(mp) == M_MULTIDATA) {
/*
* We should never get here, since all Multidata messages
* originating from tcp should have been directed over to
* tcp_multisend() in the first place.
*/
BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards);
freemsg(mp);
return;
} else if (DB_TYPE(mp) == M_CTL) {
uint32_t mctltype = 0;
uint32_t mlen = MBLKL(first_mp);
mp = mp->b_cont;
mctl_present = B_TRUE;
io = (ipsec_out_t *)first_mp->b_rptr;
/*
* Validate this M_CTL message. The only three types of
* M_CTL messages we expect to see in this code path are
* ipsec_out_t or ipsec_in_t structures (allocated as
* ipsec_info_t unions), or ipsec_ctl_t structures.
* The ipsec_out_type and ipsec_in_type overlap in the two
* data structures, and they are either set to IPSEC_OUT
* or IPSEC_IN depending on which data structure it is.
* ipsec_ctl_t is an IPSEC_CTL.
*
* All other M_CTL messages are sent to ip_wput_nondata()
* for handling.
*/
if (mlen >= sizeof (io->ipsec_out_type))
mctltype = io->ipsec_out_type;
if ((mlen == sizeof (ipsec_ctl_t)) &&
(mctltype == IPSEC_CTL)) {
ip_output(arg, first_mp, arg2, caller);
return;
}
if ((mlen < sizeof (ipsec_info_t)) ||
(mctltype != IPSEC_OUT && mctltype != IPSEC_IN) ||
mp == NULL) {
ip_wput_nondata(NULL, q, first_mp, NULL);
return;
}
/* NDP callbacks have q_next non-NULL. That's case #3. */
if (q->q_next == NULL) {
ip6h = (ip6_t *)mp->b_rptr;
/*
* For a freshly-generated TCP dgram that needs IPV6
* processing, don't call ip_wput immediately. We can
* tell this by the ipsec_out_proc_begin. In-progress
* IPSEC_OUT messages have proc_begin set to TRUE,
* and we want to send all IPSEC_IN messages to
* ip_wput() for IPsec processing or finishing.
*/
if (mctltype == IPSEC_IN ||
IPVER(ip6h) != IPV6_VERSION ||
io->ipsec_out_proc_begin) {
mibptr = &ipst->ips_ip6_mib;
goto notv6;
}
}
} else if (DB_TYPE(mp) != M_DATA) {
ip_wput_nondata(NULL, q, mp, NULL);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
if (IPVER(ip6h) != IPV6_VERSION) {
mibptr = &ipst->ips_ip6_mib;
goto notv6;
}
if (is_system_labeled() && DB_TYPE(mp) == M_DATA &&
(connp == NULL || !connp->conn_ulp_labeled)) {
cred_t *cr;
pid_t pid;
if (connp != NULL) {
ASSERT(CONN_CRED(connp) != NULL);
cr = BEST_CRED(mp, connp, &pid);
err = tsol_check_label_v6(cr, &mp,
connp->conn_mac_exempt, ipst, pid);
} else if ((cr = msg_getcred(mp, &pid)) != NULL) {
err = tsol_check_label_v6(cr, &mp, B_FALSE, ipst, pid);
}
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
if (err != 0) {
DTRACE_PROBE3(
tsol_ip_log_drop_checklabel_ip6, char *,
"conn(1), failed to check/update mp(2)",
conn_t, connp, mblk_t, mp);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
if (q->q_next != NULL) {
/*
* We don't know if this ill will be used for IPv6
* until the ILLF_IPV6 flag is set via SIOCSLIFNAME.
* ipif_set_values() sets the ill_isv6 flag to true if
* ILLF_IPV6 is set. If the ill_isv6 flag isn't true,
* just drop the packet.
*/
if (!ill->ill_isv6) {
ip1dbg(("ip_wput_v6: Received an IPv6 packet before "
"ILLF_IPV6 was set\n"));
freemsg(first_mp);
return;
}
/* For uniformity do a refhold */
mutex_enter(&ill->ill_lock);
if (!ILL_CAN_LOOKUP(ill)) {
mutex_exit(&ill->ill_lock);
freemsg(first_mp);
return;
}
ill_refhold_locked(ill);
mutex_exit(&ill->ill_lock);
mibptr = ill->ill_ip_mib;
ASSERT(mibptr != NULL);
unspec_src = 0;
BUMP_MIB(mibptr, ipIfStatsHCOutRequests);
do_outrequests = B_FALSE;
zoneid = (zoneid_t)(uintptr_t)arg;
} else {
ASSERT(connp != NULL);
zoneid = connp->conn_zoneid;
/* is queue flow controlled? */
if ((q->q_first || connp->conn_draining) &&
(caller == IP_WPUT)) {
/*
* 1) TCP sends down M_CTL for detached connections.
* 2) AH/ESP sends down M_CTL.
*
* We don't flow control either of the above. Only
* UDP and others are flow controlled for which we
* can't have a M_CTL.
*/
ASSERT(first_mp == mp);
(void) putq(q, mp);
return;
}
mibptr = &ipst->ips_ip6_mib;
unspec_src = connp->conn_unspec_src;
do_outrequests = B_TRUE;
if (mp->b_flag & MSGHASREF) {
mp->b_flag &= ~MSGHASREF;
ASSERT(connp->conn_ulp == IPPROTO_SCTP);
SCTP_EXTRACT_IPINFO(mp, sctp_ire);
need_decref = B_TRUE;
}
/*
* If there is a policy, try to attach an ipsec_out in
* the front. At the end, first_mp either points to a
* M_DATA message or IPSEC_OUT message linked to a
* M_DATA message. We have to do it now as we might
* lose the "conn" if we go through ip_newroute.
*/
if (!mctl_present &&
(connp->conn_out_enforce_policy ||
connp->conn_latch != NULL)) {
ASSERT(first_mp == mp);
/* XXX Any better way to get the protocol fast ? */
if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL,
connp->conn_ulp, ipst->ips_netstack)) == NULL)) {
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
if (need_decref)
CONN_DEC_REF(connp);
return;
} else {
ASSERT(mp->b_datap->db_type == M_CTL);
first_mp = mp;
mp = mp->b_cont;
mctl_present = B_TRUE;
io = (ipsec_out_t *)first_mp->b_rptr;
}
}
}
/* check for alignment and full IPv6 header */
if (!OK_32PTR((uchar_t *)ip6h) ||
(mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) {
ip0dbg(("ip_wput_v6: bad alignment or length\n"));
if (do_outrequests)
BUMP_MIB(mibptr, ipIfStatsHCOutRequests);
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
if (ill != NULL)
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
v6dstp = &ip6h->ip6_dst;
cksum_request = -1;
ip6i = NULL;
/*
* Once neighbor discovery has completed, ndp_process() will provide
* locally generated packets for which processing can be reattempted.
* In these cases, connp is NULL and the original zone is part of a
* prepended ipsec_out_t.
*/
if (io != NULL) {
/*
* When coming from icmp_input_v6, the zoneid might not match
* for the loopback case, because inside icmp_input_v6 the
* queue_t is a conn queue from the sending side.
*/
zoneid = io->ipsec_out_zoneid;
ASSERT(zoneid != ALL_ZONES);
}
if (ip6h->ip6_nxt == IPPROTO_RAW) {
/*
* This is an ip6i_t header followed by an ip6_hdr.
* Check which fields are set.
*
* When the packet comes from a transport we should have
* all needed headers in the first mblk. However, when
* going through ip_newroute*_v6 the ip6i might be in
* a separate mblk when we return here. In that case
* we pullup everything to ensure that extension and transport
* headers "stay" in the first mblk.
*/
ip6i = (ip6i_t *)ip6h;
ip6i_flags = ip6i->ip6i_flags;
ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) ||
((mp->b_wptr - (uchar_t *)ip6i) >=
sizeof (ip6i_t) + IPV6_HDR_LEN));
if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) {
if (!pullupmsg(mp, -1)) {
ip1dbg(("ip_wput_v6: pullupmsg failed\n"));
if (do_outrequests) {
BUMP_MIB(mibptr,
ipIfStatsHCOutRequests);
}
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
if (ill != NULL)
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
v6dstp = &ip6h->ip6_dst;
ip6i = (ip6i_t *)ip6h;
}
ip6h = (ip6_t *)&ip6i[1];
/*
* Advance rptr past the ip6i_t to get ready for
* transmitting the packet. However, if the packet gets
* passed to ip_newroute*_v6 then rptr is moved back so
* that the ip6i_t header can be inspected when the
* packet comes back here after passing through
* ire_add_then_send.
*/
mp->b_rptr = (uchar_t *)ip6h;
if (ip6i->ip6i_flags & IP6I_IFINDEX) {
ASSERT(ip6i->ip6i_ifindex != 0);
if (ill != NULL)
ill_refrele(ill);
ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1,
NULL, NULL, NULL, NULL, ipst);
if (ill == NULL) {
if (do_outrequests) {
BUMP_MIB(mibptr,
ipIfStatsHCOutRequests);
}
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
ip1dbg(("ip_wput_v6: bad ifindex %d\n",
ip6i->ip6i_ifindex));
if (need_decref)
CONN_DEC_REF(connp);
freemsg(first_mp);
return;
}
mibptr = ill->ill_ip_mib;
/*
* Preserve the index so that when we return from
* IPSEC processing, we know where to send the packet.
*/
if (mctl_present) {
ASSERT(io != NULL);
io->ipsec_out_ill_index = ip6i->ip6i_ifindex;
}
}
if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) {
cred_t *cr = msg_getcred(mp, NULL);
/* rpcmod doesn't send down db_credp for UDP packets */
if (cr == NULL) {
if (connp != NULL)
cr = connp->conn_cred;
else
cr = ill->ill_credp;
}
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src));
if (secpolicy_net_rawaccess(cr) != 0) {
/*
* Use IPCL_ZONEID to honor SO_ALLZONES.
*/
ire = ire_route_lookup_v6(&ip6h->ip6_src,
0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL,
NULL, connp != NULL ?
IPCL_ZONEID(connp) : zoneid, NULL,
MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst);
if (ire == NULL) {
if (do_outrequests)
BUMP_MIB(mibptr,
ipIfStatsHCOutRequests);
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
ip1dbg(("ip_wput_v6: bad source "
"addr\n"));
freemsg(first_mp);
if (ill != NULL)
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
ire_refrele(ire);
}
/* No need to verify again when using ip_newroute */
ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC;
}
if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) {
/*
* Make sure they match since ip_newroute*_v6 etc might
* (unknown to them) inspect ip6i_nexthop when
* they think they access ip6_dst.
*/
ip6i->ip6i_nexthop = ip6h->ip6_dst;
}
if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM)
cksum_request = 1;
if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM)
cksum_request = ip6i->ip6i_checksum_off;
if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC)
unspec_src = 1;
if (do_outrequests && ill != NULL) {
BUMP_MIB(mibptr, ipIfStatsHCOutRequests);
do_outrequests = B_FALSE;
}
/*
* Store ip6i_t info that we need after we come back
* from IPSEC processing.
*/
if (mctl_present) {
ASSERT(io != NULL);
io->ipsec_out_unspec_src = unspec_src;
}
}
if (connp != NULL && connp->conn_dontroute)
ip6h->ip6_hops = 1;
if (IN6_IS_ADDR_MULTICAST(v6dstp))
goto ipv6multicast;
/* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */
if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) {
ASSERT(ill != NULL);
goto send_from_ill;
}
/*
* 2. If q is an ill queue and there's a link-local destination
* then use that ill.
*/
if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp))
goto send_from_ill;
/* 3. If IPV6_BOUND_IF has been set use that ill. */
if (connp != NULL && connp->conn_outgoing_ill != NULL) {
ill_t *conn_outgoing_ill;
conn_outgoing_ill = conn_get_held_ill(connp,
&connp->conn_outgoing_ill, &err);
if (err == ILL_LOOKUP_FAILED) {
if (ill != NULL)
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
freemsg(first_mp);
return;
}
if (ill != NULL)
ill_refrele(ill);
ill = conn_outgoing_ill;
mibptr = ill->ill_ip_mib;
goto send_from_ill;
}
/*
* 4. For unicast: Just do an IRE lookup for the best match.
* If we get here for a link-local address it is rather random
* what interface we pick on a multihomed host.
* *If* there is an IRE_CACHE (and the link-local address
* isn't duplicated on multi links) this will find the IRE_CACHE.
* Otherwise it will use one of the matching IRE_INTERFACE routes
* for the link-local prefix. Hence, applications
* *should* be encouraged to specify an outgoing interface when sending
* to a link local address.
*/
if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) &&
!connp->conn_fully_bound)) {
/*
* We cache IRE_CACHEs to avoid lookups. We don't do
* this for the tcp global queue and listen end point
* as it does not really have a real destination to
* talk to.
*/
ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp),
ipst);
} else {
/*
* IRE_MARK_CONDEMNED is marked in ire_delete. We don't
* grab a lock here to check for CONDEMNED as it is okay
* to send a packet or two with the IRE_CACHE that is going
* away.
*/
mutex_enter(&connp->conn_lock);
ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache;
if (ire != NULL &&
IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) &&
!(ire->ire_marks & IRE_MARK_CONDEMNED)) {
IRE_REFHOLD(ire);
mutex_exit(&connp->conn_lock);
} else {
boolean_t cached = B_FALSE;
connp->conn_ire_cache = NULL;
mutex_exit(&connp->conn_lock);
/* Release the old ire */
if (ire != NULL && sctp_ire == NULL)
IRE_REFRELE_NOTR(ire);
ire = ire_cache_lookup_v6(v6dstp, zoneid,
msg_getlabel(mp), ipst);
if (ire != NULL) {
IRE_REFHOLD_NOTR(ire);
mutex_enter(&connp->conn_lock);
if (CONN_CACHE_IRE(connp) &&
(connp->conn_ire_cache == NULL)) {
rw_enter(&ire->ire_bucket->irb_lock,
RW_READER);
if (!(ire->ire_marks &
IRE_MARK_CONDEMNED)) {
connp->conn_ire_cache = ire;
cached = B_TRUE;
}
rw_exit(&ire->ire_bucket->irb_lock);
}
mutex_exit(&connp->conn_lock);
/*
* We can continue to use the ire but since it
* was not cached, we should drop the extra
* reference.
*/
if (!cached)
IRE_REFRELE_NOTR(ire);
}
}
}
if (ire != NULL) {
if (do_outrequests) {
/* Handle IRE_LOCAL's that might appear here */
if (ire->ire_type == IRE_CACHE) {
mibptr = ((ill_t *)ire->ire_stq->q_ptr)->
ill_ip_mib;
} else {
mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib;
}
BUMP_MIB(mibptr, ipIfStatsHCOutRequests);
}
/*
* Check if the ire has the RTF_MULTIRT flag, inherited
* from an IRE_OFFSUBNET ire entry in ip_newroute().
*/
if (ire->ire_flags & RTF_MULTIRT) {
/*
* Force hop limit of multirouted packets if required.
* The hop limit of such packets is bounded by the
* ip_multirt_ttl ndd variable.
* NDP packets must have a hop limit of 255; don't
* change the hop limit in that case.
*/
if ((ipst->ips_ip_multirt_ttl > 0) &&
(ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) &&
(ip6h->ip6_hops != IPV6_MAX_HOPS)) {
if (ip_debug > 3) {
ip2dbg(("ip_wput_v6: forcing multirt "
"hop limit to %d (was %d) ",
ipst->ips_ip_multirt_ttl,
ip6h->ip6_hops));
pr_addr_dbg("v6dst %s\n", AF_INET6,
&ire->ire_addr_v6);
}
ip6h->ip6_hops = ipst->ips_ip_multirt_ttl;
}
/*
* We look at this point if there are pending
* unresolved routes. ire_multirt_need_resolve_v6()
* checks in O(n) that all IRE_OFFSUBNET ire
* entries for the packet's destination and
* flagged RTF_MULTIRT are currently resolved.
* If some remain unresolved, we do a copy
* of the current message. It will be used
* to initiate additional route resolutions.
*/
multirt_need_resolve =
ire_multirt_need_resolve_v6(&ire->ire_addr_v6,
msg_getlabel(first_mp), ipst);
ip2dbg(("ip_wput_v6: ire %p, "
"multirt_need_resolve %d, first_mp %p\n",
(void *)ire, multirt_need_resolve,
(void *)first_mp));
if (multirt_need_resolve) {
copy_mp = copymsg(first_mp);
if (copy_mp != NULL) {
MULTIRT_DEBUG_TAG(copy_mp);
}
}
}
ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request,
connp, caller, ip6i_flags, zoneid);
if (need_decref) {
CONN_DEC_REF(connp);
connp = NULL;
}
IRE_REFRELE(ire);
/*
* Try to resolve another multiroute if
* ire_multirt_need_resolve_v6() deemed it necessary.
* copy_mp will be consumed (sent or freed) by
* ip_newroute_v6().
*/
if (copy_mp != NULL) {
if (mctl_present) {
ip6h = (ip6_t *)copy_mp->b_cont->b_rptr;
} else {
ip6h = (ip6_t *)copy_mp->b_rptr;
}
ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst,
&ip6h->ip6_src, NULL, zoneid, ipst);
}
if (ill != NULL)
ill_refrele(ill);
return;
}
/*
* No full IRE for this destination. Send it to
* ip_newroute_v6 to see if anything else matches.
* Mark this packet as having originated on this
* machine.
* Update rptr if there was an ip6i_t header.
*/
mp->b_prev = NULL;
mp->b_next = NULL;
if (ip6i != NULL)
mp->b_rptr -= sizeof (ip6i_t);
if (unspec_src) {
if (ip6i == NULL) {
/*
* Add ip6i_t header to carry unspec_src
* until the packet comes back in ip_wput_v6.
*/
mp = ip_add_info_v6(mp, NULL, v6dstp);
if (mp == NULL) {
if (do_outrequests)
BUMP_MIB(mibptr,
ipIfStatsHCOutRequests);
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
if (mctl_present)
freeb(first_mp);
if (ill != NULL)
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
ip6i = (ip6i_t *)mp->b_rptr;
if (mctl_present) {
ASSERT(first_mp != mp);
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
if ((mp->b_wptr - (uchar_t *)ip6i) ==
sizeof (ip6i_t)) {
/*
* ndp_resolver called from ip_newroute_v6
* expects pulled up message.
*/
if (!pullupmsg(mp, -1)) {
ip1dbg(("ip_wput_v6: pullupmsg"
" failed\n"));
if (do_outrequests) {
BUMP_MIB(mibptr,
ipIfStatsHCOutRequests);
}
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
if (ill != NULL)
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
ip6i = (ip6i_t *)mp->b_rptr;
}
ip6h = (ip6_t *)&ip6i[1];
v6dstp = &ip6h->ip6_dst;
}
ip6i->ip6i_flags |= IP6I_UNSPEC_SRC;
if (mctl_present) {
ASSERT(io != NULL);
io->ipsec_out_unspec_src = unspec_src;
}
}
if (do_outrequests)
BUMP_MIB(mibptr, ipIfStatsHCOutRequests);
if (need_decref)
CONN_DEC_REF(connp);
ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst);
if (ill != NULL)
ill_refrele(ill);
return;
/*
* Handle multicast packets with or without an conn.
* Assumes that the transports set ip6_hops taking
* IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit)
* into account.
*/
ipv6multicast:
ip2dbg(("ip_wput_v6: multicast\n"));
/*
* Hold the conn_lock till we refhold the ill of interest that is
* pointed to from the conn. Since we cannot do an ill/ipif_refrele
* while holding any locks, postpone the refrele until after the
* conn_lock is dropped.
*/
if (connp != NULL) {
mutex_enter(&connp->conn_lock);
conn_lock_held = B_TRUE;
} else {
conn_lock_held = B_FALSE;
}
if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) {
/* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */
ASSERT(ill != NULL);
} else if (ill != NULL) {
/*
* 2. If q is an ill queue and (link local or multicast
* destination) then use that ill.
* We don't need the ipif initialization here.
* This useless assert below is just to prevent lint from
* reporting a null body if statement.
*/
ASSERT(ill != NULL);
} else if (connp != NULL) {
/*
* 3. If IPV6_BOUND_IF has been set use that ill.
*
* 4. For multicast: if IPV6_MULTICAST_IF has been set use it.
* Otherwise look for the best IRE match for the unspecified
* group to determine the ill.
*
* conn_multicast_ill is used for only IPv6 packets.
* conn_multicast_ipif is used for only IPv4 packets.
* Thus a PF_INET6 socket send both IPv4 and IPv6
* multicast packets using different IP*_MULTICAST_IF
* interfaces.
*/
if (connp->conn_outgoing_ill != NULL) {
err = ill_check_and_refhold(connp->conn_outgoing_ill);
if (err == ILL_LOOKUP_FAILED) {
ip1dbg(("ip_output_v6: multicast"
" conn_outgoing_ill no ipif\n"));
multicast_discard:
ASSERT(saved_ill == NULL);
if (conn_lock_held)
mutex_exit(&connp->conn_lock);
if (ill != NULL)
ill_refrele(ill);
freemsg(first_mp);
if (do_outrequests)
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
ill = connp->conn_outgoing_ill;
} else if (connp->conn_multicast_ill != NULL) {
err = ill_check_and_refhold(connp->conn_multicast_ill);
if (err == ILL_LOOKUP_FAILED) {
ip1dbg(("ip_output_v6: multicast"
" conn_multicast_ill no ipif\n"));
goto multicast_discard;
}
ill = connp->conn_multicast_ill;
} else {
mutex_exit(&connp->conn_lock);
conn_lock_held = B_FALSE;
ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst);
if (ipif == NULL) {
ip1dbg(("ip_output_v6: multicast no ipif\n"));
goto multicast_discard;
}
/*
* We have a ref to this ipif, so we can safely
* access ipif_ill.
*/
ill = ipif->ipif_ill;
mutex_enter(&ill->ill_lock);
if (!ILL_CAN_LOOKUP(ill)) {
mutex_exit(&ill->ill_lock);
ipif_refrele(ipif);
ill = NULL;
ip1dbg(("ip_output_v6: multicast no ipif\n"));
goto multicast_discard;
}
ill_refhold_locked(ill);
mutex_exit(&ill->ill_lock);
ipif_refrele(ipif);
/*
* Save binding until IPV6_MULTICAST_IF
* changes it
*/
mutex_enter(&connp->conn_lock);
connp->conn_multicast_ill = ill;
mutex_exit(&connp->conn_lock);
}
}
if (conn_lock_held)
mutex_exit(&connp->conn_lock);
if (saved_ill != NULL)
ill_refrele(saved_ill);
ASSERT(ill != NULL);
/*
* For multicast loopback interfaces replace the multicast address
* with a unicast address for the ire lookup.
*/
if (IS_LOOPBACK(ill))
v6dstp = &ill->ill_ipif->ipif_v6lcl_addr;
mibptr = ill->ill_ip_mib;
if (do_outrequests) {
BUMP_MIB(mibptr, ipIfStatsHCOutRequests);
do_outrequests = B_FALSE;
}
BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts);
UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets,
ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
/*
* As we may lose the conn by the time we reach ip_wput_ire_v6
* we copy conn_multicast_loop and conn_dontroute on to an
* ipsec_out. In case if this datagram goes out secure,
* we need the ill_index also. Copy that also into the
* ipsec_out.
*/
if (mctl_present) {
io = (ipsec_out_t *)first_mp->b_rptr;
ASSERT(first_mp->b_datap->db_type == M_CTL);
ASSERT(io->ipsec_out_type == IPSEC_OUT);
} else {
ASSERT(mp == first_mp);
if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) ==
NULL) {
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(mp);
if (ill != NULL)
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
io = (ipsec_out_t *)first_mp->b_rptr;
/* This is not a secure packet */
io->ipsec_out_secure = B_FALSE;
io->ipsec_out_use_global_policy = B_TRUE;
io->ipsec_out_zoneid =
(zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID);
first_mp->b_cont = mp;
mctl_present = B_TRUE;
}
io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
io->ipsec_out_unspec_src = unspec_src;
if (connp != NULL)
io->ipsec_out_dontroute = connp->conn_dontroute;
send_from_ill:
ASSERT(ill != NULL);
ASSERT(mibptr == ill->ill_ip_mib);
if (do_outrequests) {
BUMP_MIB(mibptr, ipIfStatsHCOutRequests);
do_outrequests = B_FALSE;
}
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always tied to
* an underlying interface, IS_UNDER_IPMP() may be true even when
* building IREs that will be used for data traffic. As such, use the
* packet's source address to determine whether the traffic is test
* traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so.
*
* Separately, we also need to mark probe packets so that ND can
* process them specially; see the comments in nce_queue_mp_common().
*/
if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) &&
ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) {
if (ip6i == NULL) {
if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) {
if (mctl_present)
freeb(first_mp);
goto discard;
}
if (mctl_present)
first_mp->b_cont = mp;
else
first_mp = mp;
/* ndp_resolver() expects a pulled-up message */
if (MBLKL(mp) == sizeof (ip6i_t) &&
pullupmsg(mp, -1) == 0) {
ip1dbg(("ip_output_v6: pullupmsg failed\n"));
discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards);
ill_refrele(ill);
if (need_decref)
CONN_DEC_REF(connp);
return;
}
ip6i = (ip6i_t *)mp->b_rptr;
ip6h = (ip6_t *)&ip6i[1];
v6dstp = &ip6h->ip6_dst;
mp->b_rptr = (uchar_t *)ip6h; /* rewound below */
}
ip6i->ip6i_flags |= IP6I_IPMP_PROBE;
match_flags |= MATCH_IRE_MARK_TESTHIDDEN;
}
if (io != NULL)
io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
/*
* When a specific ill is specified (using IPV6_PKTINFO,
* IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match
* on routing entries (ftable and ctable) that have a matching
* ire->ire_ipif->ipif_ill. Thus this can only be used
* for destinations that are on-link for the specific ill
* and that can appear on multiple links. Thus it is useful
* for multicast destinations, link-local destinations, and
* at some point perhaps for site-local destinations (if the
* node sits at a site boundary).
* We create the cache entries in the regular ctable since
* it can not "confuse" things for other destinations.
* table.
*
* NOTE : conn_ire_cache is not used for caching ire_ctable_lookups.
* It is used only when ire_cache_lookup is used above.
*/
ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif,
zoneid, msg_getlabel(mp), match_flags, ipst);
if (ire != NULL) {
/*
* Check if the ire has the RTF_MULTIRT flag, inherited
* from an IRE_OFFSUBNET ire entry in ip_newroute().
*/
if (ire->ire_flags & RTF_MULTIRT) {
/*
* Force hop limit of multirouted packets if required.
* The hop limit of such packets is bounded by the
* ip_multirt_ttl ndd variable.
* NDP packets must have a hop limit of 255; don't
* change the hop limit in that case.
*/
if ((ipst->ips_ip_multirt_ttl > 0) &&
(ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) &&
(ip6h->ip6_hops != IPV6_MAX_HOPS)) {
if (ip_debug > 3) {
ip2dbg(("ip_wput_v6: forcing multirt "
"hop limit to %d (was %d) ",
ipst->ips_ip_multirt_ttl,
ip6h->ip6_hops));
pr_addr_dbg("v6dst %s\n", AF_INET6,
&ire->ire_addr_v6);
}
ip6h->ip6_hops = ipst->ips_ip_multirt_ttl;
}
/*
* We look at this point if there are pending
* unresolved routes. ire_multirt_need_resolve_v6()
* checks in O(n) that all IRE_OFFSUBNET ire
* entries for the packet's destination and
* flagged RTF_MULTIRT are currently resolved.
* If some remain unresolved, we make a copy
* of the current message. It will be used
* to initiate additional route resolutions.
*/
multirt_need_resolve =
ire_multirt_need_resolve_v6(&ire->ire_addr_v6,
msg_getlabel(first_mp), ipst);
ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, "
"multirt_need_resolve %d, first_mp %p\n",
(void *)ire, multirt_need_resolve,
(void *)first_mp));
if (multirt_need_resolve) {
copy_mp = copymsg(first_mp);
if (copy_mp != NULL) {
MULTIRT_DEBUG_TAG(copy_mp);
}
}
}
ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n",
ill->ill_name, (void *)ire,
ill->ill_phyint->phyint_ifindex));
ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request,
connp, caller, ip6i_flags, zoneid);
ire_refrele(ire);
if (need_decref) {
CONN_DEC_REF(connp);
connp = NULL;
}
/*
* Try to resolve another multiroute if
* ire_multirt_need_resolve_v6() deemed it necessary.
* copy_mp will be consumed (sent or freed) by
* ip_newroute_[ipif_]v6().
*/
if (copy_mp != NULL) {
if (mctl_present) {
ip6h = (ip6_t *)copy_mp->b_cont->b_rptr;
} else {
ip6h = (ip6_t *)copy_mp->b_rptr;
}
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
ipif = ipif_lookup_group_v6(&ip6h->ip6_dst,
zoneid, ipst);
if (ipif == NULL) {
ip1dbg(("ip_wput_v6: No ipif for "
"multicast\n"));
MULTIRT_DEBUG_UNTAG(copy_mp);
freemsg(copy_mp);
return;
}
ip_newroute_ipif_v6(q, copy_mp, ipif,
&ip6h->ip6_dst, &ip6h->ip6_src, unspec_src,
zoneid);
ipif_refrele(ipif);
} else {
ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst,
&ip6h->ip6_src, ill, zoneid, ipst);
}
}
ill_refrele(ill);
return;
}
if (need_decref) {
CONN_DEC_REF(connp);
connp = NULL;
}
/* Update rptr if there was an ip6i_t header. */
if (ip6i != NULL)
mp->b_rptr -= sizeof (ip6i_t);
if (unspec_src) {
if (ip6i == NULL) {
/*
* Add ip6i_t header to carry unspec_src
* until the packet comes back in ip_wput_v6.
*/
if (mctl_present) {
first_mp->b_cont =
ip_add_info_v6(mp, NULL, v6dstp);
mp = first_mp->b_cont;
if (mp == NULL)
freeb(first_mp);
} else {
first_mp = mp = ip_add_info_v6(mp, NULL,
v6dstp);
}
if (mp == NULL) {
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
ill_refrele(ill);
return;
}
ip6i = (ip6i_t *)mp->b_rptr;
if ((mp->b_wptr - (uchar_t *)ip6i) ==
sizeof (ip6i_t)) {
/*
* ndp_resolver called from ip_newroute_v6
* expects a pulled up message.
*/
if (!pullupmsg(mp, -1)) {
ip1dbg(("ip_wput_v6: pullupmsg"
" failed\n"));
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
ip6i = (ip6i_t *)mp->b_rptr;
}
ip6h = (ip6_t *)&ip6i[1];
v6dstp = &ip6h->ip6_dst;
}
ip6i->ip6i_flags |= IP6I_UNSPEC_SRC;
if (mctl_present) {
ASSERT(io != NULL);
io->ipsec_out_unspec_src = unspec_src;
}
}
if (IN6_IS_ADDR_MULTICAST(v6dstp)) {
ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp,
&ip6h->ip6_src, unspec_src, zoneid);
} else {
ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill,
zoneid, ipst);
}
ill_refrele(ill);
return;
notv6:
/* FIXME?: assume the caller calls the right version of ip_output? */
if (q->q_next == NULL) {
connp = Q_TO_CONN(q);
/*
* We can change conn_send for all types of conn, even
* though only TCP uses it right now.
* FIXME: sctp could use conn_send but doesn't currently.
*/
ip_setpktversion(connp, B_FALSE, B_TRUE, ipst);
}
BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion);
(void) ip_output(arg, first_mp, arg2, caller);
if (ill != NULL)
ill_refrele(ill);
}
/*
* If this is a conn_t queue, then we pass in the conn. This includes the
* zoneid.
* Otherwise, this is a message for an ill_t queue,
* in which case we use the global zoneid since those are all part of
* the global zone.
*/
void
ip_wput_v6(queue_t *q, mblk_t *mp)
{
if (CONN_Q(q))
ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT);
else
ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT);
}
/*
* NULL send-to queue - packet is to be delivered locally.
*/
void
ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp,
ire_t *ire, int fanout_flags, zoneid_t zoneid)
{
uint32_t ports;
mblk_t *mp = first_mp, *first_mp1;
boolean_t mctl_present;
uint8_t nexthdr;
uint16_t hdr_length;
ipsec_out_t *io;
mib2_ipIfStatsEntry_t *mibptr;
ilm_t *ilm;
uint_t nexthdr_offset;
ip_stack_t *ipst = ill->ill_ipst;
if (DB_TYPE(mp) == M_CTL) {
io = (ipsec_out_t *)mp->b_rptr;
if (!io->ipsec_out_secure) {
mp = mp->b_cont;
freeb(first_mp);
first_mp = mp;
mctl_present = B_FALSE;
} else {
mctl_present = B_TRUE;
mp = first_mp->b_cont;
ipsec_out_to_in(first_mp);
}
} else {
mctl_present = B_FALSE;
}
/*
* Remove reachability confirmation bit from version field
* before passing the packet on to any firewall hooks or
* looping back the packet.
*/
if (ip6h->ip6_vcf & IP_FORWARD_PROG)
ip6h->ip6_vcf &= ~IP_FORWARD_PROG;
DTRACE_PROBE4(ip6__loopback__in__start,
ill_t *, ill, ill_t *, NULL,
ip6_t *, ip6h, mblk_t *, first_mp);
FW_HOOKS6(ipst->ips_ip6_loopback_in_event,
ipst->ips_ipv6firewall_loopback_in,
ill, NULL, ip6h, first_mp, mp, 0, ipst);
DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp);
if (first_mp == NULL)
return;
if (ipst->ips_ipobs_enabled) {
zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES;
zoneid_t stackzoneid = netstackid_to_zoneid(
ipst->ips_netstack->netstack_stackid);
szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid;
/*
* ::1 is special, as we cannot lookup its zoneid by
* address. For this case, restrict the lookup to the
* source zone.
*/
if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst))
lookup_zoneid = zoneid;
dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
lookup_zoneid);
ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
IPV6_VERSION, 0, ipst);
}
DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
int, 1);
nexthdr = ip6h->ip6_nxt;
mibptr = ill->ill_ip_mib;
/* Fastpath */
switch (nexthdr) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_ICMPV6:
case IPPROTO_SCTP:
hdr_length = IPV6_HDR_LEN;
nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt -
(uchar_t *)ip6h);
break;
default: {
uint8_t *nexthdrp;
if (!ip_hdr_length_nexthdr_v6(mp, ip6h,
&hdr_length, &nexthdrp)) {
/* Malformed packet */
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
nexthdr = *nexthdrp;
nexthdr_offset = nexthdrp - (uint8_t *)ip6h;
break;
}
}
UPDATE_OB_PKT_COUNT(ire);
ire->ire_last_used_time = lbolt;
switch (nexthdr) {
case IPPROTO_TCP:
if (DB_TYPE(mp) == M_DATA) {
/*
* M_DATA mblk, so init mblk (chain) for
* no struio().
*/
mblk_t *mp1 = mp;
do {
mp1->b_datap->db_struioflag = 0;
} while ((mp1 = mp1->b_cont) != NULL);
}
ports = *(uint32_t *)(mp->b_rptr + hdr_length +
TCP_PORTS_OFFSET);
ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill,
fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE|
IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK,
hdr_length, mctl_present, ire->ire_zoneid);
return;
case IPPROTO_UDP:
ports = *(uint32_t *)(mp->b_rptr + hdr_length +
UDP_PORTS_OFFSET);
ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill,
fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO|
IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid);
return;
case IPPROTO_SCTP:
{
ports = *(uint32_t *)(mp->b_rptr + hdr_length);
ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports,
fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO,
mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid);
return;
}
case IPPROTO_ICMPV6: {
icmp6_t *icmp6;
/* check for full IPv6+ICMPv6 header */
if ((mp->b_wptr - mp->b_rptr) <
(hdr_length + ICMP6_MINLEN)) {
if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) {
ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg"
" failed\n"));
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length);
/* Update output mib stats */
icmp_update_out_mib_v6(ill, icmp6);
/* Check variable for testing applications */
if (ipst->ips_ipv6_drop_inbound_icmpv6) {
freemsg(first_mp);
return;
}
/*
* Assume that there is always at least one conn for
* ICMPv6 (in.ndpd) i.e. don't optimize the case
* where there is no conn.
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
!IS_LOOPBACK(ill)) {
ilm_walker_t ilw;
/*
* In the multicast case, applications may have
* joined the group from different zones, so we
* need to deliver the packet to each of them.
* Loop through the multicast memberships
* structures (ilm) on the receive ill and send
* a copy of the packet up each matching one.
* However, we don't do this for multicasts sent
* on the loopback interface (PHYI_LOOPBACK flag
* set) as they must stay in the sender's zone.
*/
ilm = ilm_walker_start(&ilw, ill);
for (; ilm != NULL;
ilm = ilm_walker_step(&ilw, ilm)) {
if (!IN6_ARE_ADDR_EQUAL(
&ilm->ilm_v6addr, &ip6h->ip6_dst))
continue;
if ((fanout_flags &
IP_FF_NO_MCAST_LOOP) &&
ilm->ilm_zoneid == ire->ire_zoneid)
continue;
if (!ipif_lookup_zoneid(
ilw.ilw_walk_ill, ilm->ilm_zoneid,
IPIF_UP, NULL))
continue;
first_mp1 = ip_copymsg(first_mp);
if (first_mp1 == NULL)
continue;
icmp_inbound_v6(q, first_mp1,
ilw.ilw_walk_ill, ill, hdr_length,
mctl_present, IP6_NO_IPPOLICY,
ilm->ilm_zoneid, NULL);
}
ilm_walker_finish(&ilw);
} else {
first_mp1 = ip_copymsg(first_mp);
if (first_mp1 != NULL)
icmp_inbound_v6(q, first_mp1, ill, ill,
hdr_length, mctl_present,
IP6_NO_IPPOLICY, ire->ire_zoneid,
NULL);
}
}
/* FALLTHRU */
default: {
/*
* Handle protocols with which IPv6 is less intimate.
*/
fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO;
/*
* Enable sending ICMP for "Unknown" nexthdr
* case. i.e. where we did not FALLTHRU from
* IPPROTO_ICMPV6 processing case above.
*/
if (nexthdr != IPPROTO_ICMPV6)
fanout_flags |= IP_FF_SEND_ICMP;
/*
* Note: There can be more than one stream bound
* to a particular protocol. When this is the case,
* each one gets a copy of any incoming packets.
*/
ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr,
nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY,
mctl_present, ire->ire_zoneid);
return;
}
}
}
/*
* Send packet using IRE.
* Checksumming is controlled by cksum_request:
* -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else.
* 1 => Skip TCP/UDP/SCTP checksum
* Otherwise => checksum_request contains insert offset for checksum
*
* Assumes that the following set of headers appear in the first
* mblk:
* ip6_t
* Any extension headers
* TCP/UDP/SCTP header (if present)
* The routine can handle an ICMPv6 header that is not in the first mblk.
*
* NOTE : This function does not ire_refrele the ire passed in as the
* argument unlike ip_wput_ire where the REFRELE is done.
* Refer to ip_wput_ire for more on this.
*/
static void
ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src,
int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid)
{
ip6_t *ip6h;
uint8_t nexthdr;
uint16_t hdr_length;
uint_t reachable = 0x0;
ill_t *ill;
mib2_ipIfStatsEntry_t *mibptr;
mblk_t *first_mp;
boolean_t mctl_present;
ipsec_out_t *io;
boolean_t conn_dontroute; /* conn value for multicast */
boolean_t conn_multicast_loop; /* conn value for multicast */
boolean_t multicast_forward; /* Should we forward ? */
int max_frag;
ip_stack_t *ipst = ire->ire_ipst;
ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
ill = ire_to_ill(ire);
first_mp = mp;
multicast_forward = B_FALSE;
if (mp->b_datap->db_type != M_CTL) {
ip6h = (ip6_t *)first_mp->b_rptr;
} else {
io = (ipsec_out_t *)first_mp->b_rptr;
ASSERT(io->ipsec_out_type == IPSEC_OUT);
/*
* Grab the zone id now because the M_CTL can be discarded by
* ip_wput_ire_parse_ipsec_out() below.
*/
ASSERT(zoneid == io->ipsec_out_zoneid);
ASSERT(zoneid != ALL_ZONES);
ip6h = (ip6_t *)first_mp->b_cont->b_rptr;
/*
* For the multicast case, ipsec_out carries conn_dontroute and
* conn_multicast_loop as conn may not be available here. We
* need this for multicast loopback and forwarding which is done
* later in the code.
*/
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
conn_dontroute = io->ipsec_out_dontroute;
conn_multicast_loop = io->ipsec_out_multicast_loop;
/*
* If conn_dontroute is not set or conn_multicast_loop
* is set, we need to do forwarding/loopback. For
* datagrams from ip_wput_multicast, conn_dontroute is
* set to B_TRUE and conn_multicast_loop is set to
* B_FALSE so that we neither do forwarding nor
* loopback.
*/
if (!conn_dontroute || conn_multicast_loop)
multicast_forward = B_TRUE;
}
}
/*
* If the sender didn't supply the hop limit and there is a default
* unicast hop limit associated with the output interface, we use
* that if the packet is unicast. Interface specific unicast hop
* limits as set via the SIOCSLIFLNKINFO ioctl.
*/
if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) &&
!(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
ip6h->ip6_hops = ill->ill_max_hops;
}
if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid &&
ire->ire_zoneid != ALL_ZONES) {
/*
* When a zone sends a packet to another zone, we try to deliver
* the packet under the same conditions as if the destination
* was a real node on the network. To do so, we look for a
* matching route in the forwarding table.
* RTF_REJECT and RTF_BLACKHOLE are handled just like
* ip_newroute_v6() does.
* Note that IRE_LOCAL are special, since they are used
* when the zoneid doesn't match in some cases. This means that
* we need to handle ipha_src differently since ire_src_addr
* belongs to the receiving zone instead of the sending zone.
* When ip_restrict_interzone_loopback is set, then
* ire_cache_lookup_v6() ensures that IRE_LOCAL are only used
* for loopback between zones when the logical "Ethernet" would
* have looped them back.
*/
ire_t *src_ire;
src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0,
NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE |
MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst);
if (src_ire != NULL &&
!(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) &&
(!ipst->ips_ip_restrict_interzone_loopback ||
ire_local_same_lan(ire, src_ire))) {
if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) &&
!unspec_src) {
ip6h->ip6_src = src_ire->ire_src_addr_v6;
}
ire_refrele(src_ire);
} else {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes);
if (src_ire != NULL) {
if (src_ire->ire_flags & RTF_BLACKHOLE) {
ire_refrele(src_ire);
freemsg(first_mp);
return;
}
ire_refrele(src_ire);
}
if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) {
/* Failed */
freemsg(first_mp);
return;
}
icmp_unreachable_v6(q, first_mp,
ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE,
zoneid, ipst);
return;
}
}
if (mp->b_datap->db_type == M_CTL ||
ipss->ipsec_outbound_v6_policy_present) {
mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire,
connp, unspec_src, zoneid);
if (mp == NULL) {
return;
}
}
first_mp = mp;
if (mp->b_datap->db_type == M_CTL) {
io = (ipsec_out_t *)mp->b_rptr;
ASSERT(io->ipsec_out_type == IPSEC_OUT);
mp = mp->b_cont;
mctl_present = B_TRUE;
} else {
mctl_present = B_FALSE;
}
ip6h = (ip6_t *)mp->b_rptr;
nexthdr = ip6h->ip6_nxt;
mibptr = ill->ill_ip_mib;
if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) {
ipif_t *ipif;
/*
* Select the source address using ipif_select_source_v6.
*/
ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE,
IPV6_PREFER_SRC_DEFAULT, zoneid);
if (ipif == NULL) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_wput_ire_v6: no src for "
"dst %s\n", AF_INET6, &ip6h->ip6_dst);
printf("through interface %s\n", ill->ill_name);
}
freemsg(first_mp);
return;
}
ip6h->ip6_src = ipif->ipif_v6src_addr;
ipif_refrele(ipif);
}
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
if ((connp != NULL && connp->conn_multicast_loop) ||
!IS_LOOPBACK(ill)) {
if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE,
ALL_ZONES) != NULL) {
mblk_t *nmp;
int fanout_flags = 0;
if (connp != NULL &&
!connp->conn_multicast_loop) {
fanout_flags |= IP_FF_NO_MCAST_LOOP;
}
ip1dbg(("ip_wput_ire_v6: "
"Loopback multicast\n"));
nmp = ip_copymsg(first_mp);
if (nmp != NULL) {
ip6_t *nip6h;
mblk_t *mp_ip6h;
if (mctl_present) {
nip6h = (ip6_t *)
nmp->b_cont->b_rptr;
mp_ip6h = nmp->b_cont;
} else {
nip6h = (ip6_t *)nmp->b_rptr;
mp_ip6h = nmp;
}
DTRACE_PROBE4(
ip6__loopback__out__start,
ill_t *, NULL,
ill_t *, ill,
ip6_t *, nip6h,
mblk_t *, nmp);
FW_HOOKS6(
ipst->ips_ip6_loopback_out_event,
ipst->ips_ipv6firewall_loopback_out,
NULL, ill, nip6h, nmp, mp_ip6h,
0, ipst);
DTRACE_PROBE1(
ip6__loopback__out__end,
mblk_t *, nmp);
/*
* DTrace this as ip:::send. A blocked
* packet will fire the send probe, but
* not the receive probe.
*/
DTRACE_IP7(send, mblk_t *, nmp,
conn_t *, NULL, void_ip_t *, nip6h,
__dtrace_ipsr_ill_t *, ill,
ipha_t *, NULL, ip6_t *, nip6h,
int, 1);
if (nmp != NULL) {
/*
* Deliver locally and to
* every local zone, except
* the sending zone when
* IPV6_MULTICAST_LOOP is
* disabled.
*/
ip_wput_local_v6(RD(q), ill,
nip6h, nmp, ire,
fanout_flags, zoneid);
}
} else {
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
ip1dbg(("ip_wput_ire_v6: "
"copymsg failed\n"));
}
}
}
if (ip6h->ip6_hops == 0 ||
IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) ||
IS_LOOPBACK(ill)) {
/*
* Local multicast or just loopback on loopback
* interface.
*/
BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts);
UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets,
ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
ip1dbg(("ip_wput_ire_v6: local multicast only\n"));
freemsg(first_mp);
return;
}
}
if (ire->ire_stq != NULL) {
uint32_t sum;
uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)->
ill_phyint->phyint_ifindex;
queue_t *dev_q = ire->ire_stq->q_next;
/*
* non-NULL send-to queue - packet is to be sent
* out an interface.
*/
/* Driver is flow-controlling? */
if (!IP_FLOW_CONTROLLED_ULP(nexthdr) &&
DEV_Q_FLOW_BLOCKED(dev_q)) {
/*
* Queue packet if we have an conn to give back
* pressure. We can't queue packets intended for
* hardware acceleration since we've tossed that
* state already. If the packet is being fed back
* from ire_send_v6, we don't know the position in
* the queue to enqueue the packet and we discard
* the packet.
*/
if (ipst->ips_ip_output_queue && connp != NULL &&
!mctl_present && caller != IRE_SEND) {
if (caller == IP_WSRV) {
idl_tx_list_t *idl_txl;
idl_txl = &ipst->ips_idl_tx_list[0];
connp->conn_did_putbq = 1;
(void) putbq(connp->conn_wq, mp);
conn_drain_insert(connp, idl_txl);
/*
* caller == IP_WSRV implies we are
* the service thread, and the
* queue is already noenabled.
* The check for canput and
* the putbq is not atomic.
* So we need to check again.
*/
if (canput(dev_q))
connp->conn_did_putbq = 0;
} else {
(void) putq(connp->conn_wq, mp);
}
return;
}
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
/*
* Look for reachability confirmations from the transport.
*/
if (ip6h->ip6_vcf & IP_FORWARD_PROG) {
reachable |= IPV6_REACHABILITY_CONFIRMATION;
ip6h->ip6_vcf &= ~IP_FORWARD_PROG;
if (mctl_present)
io->ipsec_out_reachable = B_TRUE;
}
/* Fastpath */
switch (nexthdr) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_ICMPV6:
case IPPROTO_SCTP:
hdr_length = IPV6_HDR_LEN;
break;
default: {
uint8_t *nexthdrp;
if (!ip_hdr_length_nexthdr_v6(mp, ip6h,
&hdr_length, &nexthdrp)) {
/* Malformed packet */
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
nexthdr = *nexthdrp;
break;
}
}
if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) {
uint16_t *up;
uint16_t *insp;
/*
* The packet header is processed once for all, even
* in the multirouting case. We disable hardware
* checksum if the packet is multirouted, as it will be
* replicated via several interfaces, and not all of
* them may have this capability.
*/
if (cksum_request == 1 &&
!(ire->ire_flags & RTF_MULTIRT)) {
/* Skip the transport checksum */
goto cksum_done;
}
/*
* Do user-configured raw checksum.
* Compute checksum and insert at offset "cksum_request"
*/
/* check for enough headers for checksum */
cksum_request += hdr_length; /* offset from rptr */
if ((mp->b_wptr - mp->b_rptr) <
(cksum_request + sizeof (int16_t))) {
if (!pullupmsg(mp,
cksum_request + sizeof (int16_t))) {
ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg"
" failed\n"));
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
insp = (uint16_t *)((uchar_t *)ip6h + cksum_request);
ASSERT(((uintptr_t)insp & 0x1) == 0);
up = (uint16_t *)&ip6h->ip6_src;
/*
* icmp has placed length and routing
* header adjustment in *insp.
*/
sum = htons(nexthdr) +
up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
sum = (sum & 0xffff) + (sum >> 16);
*insp = IP_CSUM(mp, hdr_length, sum);
} else if (nexthdr == IPPROTO_TCP) {
uint16_t *up;
/*
* Check for full IPv6 header + enough TCP header
* to get at the checksum field.
*/
if ((mp->b_wptr - mp->b_rptr) <
(hdr_length + TCP_CHECKSUM_OFFSET +
TCP_CHECKSUM_SIZE)) {
if (!pullupmsg(mp, hdr_length +
TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) {
ip1dbg(("ip_wput_v6: TCP hdr pullupmsg"
" failed\n"));
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
up = (uint16_t *)&ip6h->ip6_src;
/*
* Note: The TCP module has stored the length value
* into the tcp checksum field, so we don't
* need to explicitly sum it in here.
*/
sum = up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
/* Fold the initial sum */
sum = (sum & 0xffff) + (sum >> 16);
up = (uint16_t *)(((uchar_t *)ip6h) +
hdr_length + TCP_CHECKSUM_OFFSET);
IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP,
hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN,
ire->ire_max_frag, mctl_present, sum);
/* Software checksum? */
if (DB_CKSUMFLAGS(mp) == 0) {
IP6_STAT(ipst, ip6_out_sw_cksum);
IP6_STAT_UPDATE(ipst,
ip6_tcp_out_sw_cksum_bytes,
(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) -
hdr_length);
}
} else if (nexthdr == IPPROTO_UDP) {
uint16_t *up;
/*
* check for full IPv6 header + enough UDP header
* to get at the UDP checksum field
*/
if ((mp->b_wptr - mp->b_rptr) < (hdr_length +
UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) {
if (!pullupmsg(mp, hdr_length +
UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) {
ip1dbg(("ip_wput_v6: UDP hdr pullupmsg"
" failed\n"));
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
up = (uint16_t *)&ip6h->ip6_src;
/*
* Note: The UDP module has stored the length value
* into the udp checksum field, so we don't
* need to explicitly sum it in here.
*/
sum = up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
/* Fold the initial sum */
sum = (sum & 0xffff) + (sum >> 16);
up = (uint16_t *)(((uchar_t *)ip6h) +
hdr_length + UDP_CHECKSUM_OFFSET);
IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP,
hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN,
ire->ire_max_frag, mctl_present, sum);
/* Software checksum? */
if (DB_CKSUMFLAGS(mp) == 0) {
IP6_STAT(ipst, ip6_out_sw_cksum);
IP6_STAT_UPDATE(ipst,
ip6_udp_out_sw_cksum_bytes,
(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) -
hdr_length);
}
} else if (nexthdr == IPPROTO_ICMPV6) {
uint16_t *up;
icmp6_t *icmp6;
/* check for full IPv6+ICMPv6 header */
if ((mp->b_wptr - mp->b_rptr) <
(hdr_length + ICMP6_MINLEN)) {
if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) {
ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg"
" failed\n"));
BUMP_MIB(mibptr, ipIfStatsOutDiscards);
freemsg(first_mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length);
up = (uint16_t *)&ip6h->ip6_src;
/*
* icmp has placed length and routing
* header adjustment in icmp6_cksum.
*/
sum = htons(IPPROTO_ICMPV6) +
up[0] + up[1] + up[2] + up[3] +
up[4] + up[5] + up[6] + up[7] +
up[8] + up[9] + up[10] + up[11] +
up[12] + up[13] + up[14] + up[15];
sum = (sum & 0xffff) + (sum >> 16);
icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum);
/* Update output mib stats */
icmp_update_out_mib_v6(ill, icmp6);
} else if (nexthdr == IPPROTO_SCTP) {
sctp_hdr_t *sctph;
if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) {
if (!pullupmsg(mp, hdr_length +
sizeof (*sctph))) {
ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg"
" failed\n"));
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsOutDiscards);
freemsg(mp);
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length);
sctph->sh_chksum = 0;
sctph->sh_chksum = sctp_cksum(mp, hdr_length);
}
cksum_done:
/*
* We force the insertion of a fragment header using the
* IPH_FRAG_HDR flag in two cases:
* - after reception of an ICMPv6 "packet too big" message
* with a MTU < 1280 (cf. RFC 2460 section 5)
* - for multirouted IPv6 packets, so that the receiver can
* discard duplicates according to their fragment identifier
*
* Two flags modifed from the API can modify this behavior.
* The first is IPV6_USE_MIN_MTU. With this API the user
* can specify how to manage PMTUD for unicast and multicast.
*
* IPV6_DONTFRAG disallows fragmentation.
*/
max_frag = ire->ire_max_frag;
switch (IP6I_USE_MIN_MTU_API(flags)) {
case IPV6_USE_MIN_MTU_DEFAULT:
case IPV6_USE_MIN_MTU_UNICAST:
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
max_frag = IPV6_MIN_MTU;
}
break;
case IPV6_USE_MIN_MTU_NEVER:
max_frag = IPV6_MIN_MTU;
break;
}
if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag ||
(ire->ire_frag_flag & IPH_FRAG_HDR)) {
if (connp != NULL && (flags & IP6I_DONTFRAG)) {
icmp_pkt2big_v6(ire->ire_stq, first_mp,
max_frag, B_FALSE, B_TRUE, zoneid, ipst);
return;
}
if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN !=
(mp->b_cont ? msgdsize(mp) :
mp->b_wptr - (uchar_t *)ip6h)) {
ip0dbg(("Packet length mismatch: %d, %ld\n",
ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN,
msgdsize(mp)));
freemsg(first_mp);
return;
}
/* Do IPSEC processing first */
if (mctl_present) {
ipsec_out_process(q, first_mp, ire, ill_index);
return;
}
ASSERT(mp->b_prev == NULL);
ip2dbg(("Fragmenting Size = %d, mtu = %d\n",
ntohs(ip6h->ip6_plen) +
IPV6_HDR_LEN, max_frag));
ASSERT(mp == first_mp);
/* Initiate IPPF processing */
if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) {
ip_process(IPP_LOCAL_OUT, &mp, ill_index);
if (mp == NULL) {
return;
}
}
ip_wput_frag_v6(mp, ire, reachable, connp,
caller, max_frag);
return;
}
/* Do IPSEC processing first */
if (mctl_present) {
int extra_len = ipsec_out_extra_length(first_mp);
if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len >
max_frag && connp != NULL &&
(flags & IP6I_DONTFRAG)) {
/*
* IPsec headers will push the packet over the
* MTU limit. Issue an ICMPv6 Packet Too Big
* message for this packet if the upper-layer
* that issued this packet will be able to
* react to the icmp_pkt2big_v6() that we'll
* generate.
*/
icmp_pkt2big_v6(ire->ire_stq, first_mp,
max_frag, B_FALSE, B_TRUE, zoneid, ipst);
return;
}
ipsec_out_process(q, first_mp, ire, ill_index);
return;
}
/*
* XXX multicast: add ip_mforward_v6() here.
* Check conn_dontroute
*/
#ifdef lint
/*
* XXX The only purpose of this statement is to avoid lint
* errors. See the above "XXX multicast". When that gets
* fixed, remove this whole #ifdef lint section.
*/
ip3dbg(("multicast forward is %s.\n",
(multicast_forward ? "TRUE" : "FALSE")));
#endif
UPDATE_OB_PKT_COUNT(ire);
ire->ire_last_used_time = lbolt;
ASSERT(mp == first_mp);
ip_xmit_v6(mp, ire, reachable, connp, caller, NULL);
} else {
/*
* DTrace this as ip:::send. A blocked packet will fire the
* send probe, but not the receive probe.
*/
DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL,
void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *,
NULL, ip6_t *, ip6h, int, 1);
DTRACE_PROBE4(ip6__loopback__out__start,
ill_t *, NULL, ill_t *, ill,
ip6_t *, ip6h, mblk_t *, first_mp);
FW_HOOKS6(ipst->ips_ip6_loopback_out_event,
ipst->ips_ipv6firewall_loopback_out,
NULL, ill, ip6h, first_mp, mp, 0, ipst);
DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp);
if (first_mp != NULL) {
ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0,
zoneid);
}
}
}
/*
* Outbound IPv6 fragmentation routine using MDT.
*/
static void
ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk,
size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset)
{
ip6_t *ip6h = (ip6_t *)mp->b_rptr;
uint_t pkts, wroff, hdr_chunk_len, pbuf_idx;
mblk_t *hdr_mp, *md_mp = NULL;
int i1;
multidata_t *mmd;
unsigned char *hdr_ptr, *pld_ptr;
ip_pdescinfo_t pdi;
uint32_t ident;
size_t len;
uint16_t offset;
queue_t *stq = ire->ire_stq;
ill_t *ill = (ill_t *)stq->q_ptr;
ip_stack_t *ipst = ill->ill_ipst;
ASSERT(DB_TYPE(mp) == M_DATA);
ASSERT(MBLKL(mp) > unfragmentable_len);
/*
* Move read ptr past unfragmentable portion, we don't want this part
* of the data in our fragments.
*/
mp->b_rptr += unfragmentable_len;
/* Calculate how many packets we will send out */
i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp);
pkts = (i1 + max_chunk - 1) / max_chunk;
ASSERT(pkts > 1);
/* Allocate a message block which will hold all the IP Headers. */
wroff = ipst->ips_ip_wroff_extra;
hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t);
i1 = pkts * hdr_chunk_len;
/*
* Create the header buffer, Multidata and destination address
* and SAP attribute that should be associated with it.
*/
if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL ||
((hdr_mp->b_wptr += i1),
(mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) ||
!ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) {
freemsg(mp);
if (md_mp == NULL) {
freemsg(hdr_mp);
} else {
free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded);
freemsg(md_mp);
}
IP6_STAT(ipst, ip6_frag_mdt_allocfail);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
return;
}
IP6_STAT(ipst, ip6_frag_mdt_allocd);
/*
* Add a payload buffer to the Multidata; this operation must not
* fail, or otherwise our logic in this routine is broken. There
* is no memory allocation done by the routine, so any returned
* failure simply tells us that we've done something wrong.
*
* A failure tells us that either we're adding the same payload
* buffer more than once, or we're trying to add more buffers than
* allowed. None of the above cases should happen, and we panic
* because either there's horrible heap corruption, and/or
* programming mistake.
*/
if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) {
goto pbuf_panic;
}
hdr_ptr = hdr_mp->b_rptr;
pld_ptr = mp->b_rptr;
pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF;
ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1));
/*
* len is the total length of the fragmentable data in this
* datagram. For each fragment sent, we will decrement len
* by the amount of fragmentable data sent in that fragment
* until len reaches zero.
*/
len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN);
offset = 0;
prev_nexthdr_offset += wroff;
while (len != 0) {
size_t mlen;
ip6_t *fip6h;
ip6_frag_t *fraghdr;
int error;
ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr);
mlen = MIN(len, max_chunk);
len -= mlen;
fip6h = (ip6_t *)(hdr_ptr + wroff);
ASSERT(OK_32PTR(fip6h));
bcopy(ip6h, fip6h, unfragmentable_len);
hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
fip6h->ip6_plen = htons((uint16_t)(mlen +
unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t)));
fraghdr = (ip6_frag_t *)((unsigned char *)fip6h +
unfragmentable_len);
fraghdr->ip6f_nxt = nexthdr;
fraghdr->ip6f_reserved = 0;
fraghdr->ip6f_offlg = htons(offset) |
((len != 0) ? IP6F_MORE_FRAG : 0);
fraghdr->ip6f_ident = ident;
/*
* Record offset and size of header and data of the next packet
* in the multidata message.
*/
PDESC_HDR_ADD(&pdi, hdr_ptr, wroff,
unfragmentable_len + sizeof (ip6_frag_t), 0);
PDESC_PLD_INIT(&pdi);
i1 = MIN(mp->b_wptr - pld_ptr, mlen);
ASSERT(i1 > 0);
PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1);
if (i1 == mlen) {
pld_ptr += mlen;
} else {
i1 = mlen - i1;
mp = mp->b_cont;
ASSERT(mp != NULL);
ASSERT(MBLKL(mp) >= i1);
/*
* Attach the next payload message block to the
* multidata message.
*/
if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0)
goto pbuf_panic;
PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1);
pld_ptr = mp->b_rptr + i1;
}
if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error,
KM_NOSLEEP)) == NULL) {
/*
* Any failure other than ENOMEM indicates that we
* have passed in invalid pdesc info or parameters
* to mmd_addpdesc, which must not happen.
*
* EINVAL is a result of failure on boundary checks
* against the pdesc info contents. It should not
* happen, and we panic because either there's
* horrible heap corruption, and/or programming
* mistake.
*/
if (error != ENOMEM) {
cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: "
"pdesc logic error detected for "
"mmd %p pinfo %p (%d)\n",
(void *)mmd, (void *)&pdi, error);
/* NOTREACHED */
}
IP6_STAT(ipst, ip6_frag_mdt_addpdescfail);
/* Free unattached payload message blocks as well */
md_mp->b_cont = mp->b_cont;
goto free_mmd;
}
/* Advance fragment offset. */
offset += mlen;
/* Advance to location for next header in the buffer. */
hdr_ptr += hdr_chunk_len;
/* Did we reach the next payload message block? */
if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) {
mp = mp->b_cont;
/*
* Attach the next message block with payload
* data to the multidata message.
*/
if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0)
goto pbuf_panic;
pld_ptr = mp->b_rptr;
}
}
ASSERT(hdr_mp->b_wptr == hdr_ptr);
ASSERT(mp->b_wptr == pld_ptr);
/* Update IP statistics */
UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts);
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts);
/*
* The ipv6 header len is accounted for in unfragmentable_len so
* when calculating the fragmentation overhead just add the frag
* header len.
*/
UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets,
(ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) +
pkts * (unfragmentable_len + sizeof (ip6_frag_t)));
IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts);
ire->ire_ob_pkt_count += pkts;
if (ire->ire_ipif != NULL)
atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts);
ire->ire_last_used_time = lbolt;
/* Send it down */
putnext(stq, md_mp);
return;
pbuf_panic:
cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic "
"error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp,
pbuf_idx);
/* NOTREACHED */
}
/*
* IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
* We have not optimized this in terms of number of mblks
* allocated. For instance, for each fragment sent we always allocate a
* mblk to hold the IPv6 header and fragment header.
*
* Assumes that all the extension headers are contained in the first mblk.
*
* The fragment header is inserted after an hop-by-hop options header
* and after [an optional destinations header followed by] a routing header.
*
* NOTE : This function does not ire_refrele the ire passed in as
* the argument.
*/
void
ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp,
int caller, int max_frag)
{
ip6_t *ip6h = (ip6_t *)mp->b_rptr;
ip6_t *fip6h;
mblk_t *hmp;
mblk_t *hmp0;
mblk_t *dmp;
ip6_frag_t *fraghdr;
size_t unfragmentable_len;
size_t len;
size_t mlen;
size_t max_chunk;
uint32_t ident;
uint16_t off_flags;
uint16_t offset = 0;
ill_t *ill;
uint8_t nexthdr;
uint_t prev_nexthdr_offset;
uint8_t *ptr;
ip_stack_t *ipst = ire->ire_ipst;
ASSERT(ire->ire_type == IRE_CACHE);
ill = (ill_t *)ire->ire_stq->q_ptr;
if (max_frag <= 0) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
freemsg(mp);
return;
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
/*
* Determine the length of the unfragmentable portion of this
* datagram. This consists of the IPv6 header, a potential
* hop-by-hop options header, a potential pre-routing-header
* destination options header, and a potential routing header.
*/
nexthdr = ip6h->ip6_nxt;
prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
ptr = (uint8_t *)&ip6h[1];
if (nexthdr == IPPROTO_HOPOPTS) {
ip6_hbh_t *hbh_hdr;
uint_t hdr_len;
hbh_hdr = (ip6_hbh_t *)ptr;
hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
nexthdr = hbh_hdr->ip6h_nxt;
prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
- (uint8_t *)ip6h;
ptr += hdr_len;
}
if (nexthdr == IPPROTO_DSTOPTS) {
ip6_dest_t *dest_hdr;
uint_t hdr_len;
dest_hdr = (ip6_dest_t *)ptr;
if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
hdr_len = 8 * (dest_hdr->ip6d_len + 1);
nexthdr = dest_hdr->ip6d_nxt;
prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
- (uint8_t *)ip6h;
ptr += hdr_len;
}
}
if (nexthdr == IPPROTO_ROUTING) {
ip6_rthdr_t *rthdr;
uint_t hdr_len;
rthdr = (ip6_rthdr_t *)ptr;
nexthdr = rthdr->ip6r_nxt;
prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
- (uint8_t *)ip6h;
hdr_len = 8 * (rthdr->ip6r_len + 1);
ptr += hdr_len;
}
unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len -
sizeof (ip6_frag_t)) & ~7;
/* Check if we can use MDT to send out the frags. */
ASSERT(!IRE_IS_LOCAL(ire));
if (ipst->ips_ip_multidata_outbound && reachable == 0 &&
!(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) &&
IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) {
ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len,
nexthdr, prev_nexthdr_offset);
return;
}
/*
* Allocate an mblk with enough room for the link-layer
* header, the unfragmentable part of the datagram, and the
* fragment header. This (or a copy) will be used as the
* first mblk for each fragment we send.
*/
hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
ipst->ips_ip_wroff_extra, mp);
if (hmp == NULL) {
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
freemsg(mp);
return;
}
hmp->b_rptr += ipst->ips_ip_wroff_extra;
hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
fip6h = (ip6_t *)hmp->b_rptr;
fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
bcopy(ip6h, fip6h, unfragmentable_len);
hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
ident = atomic_add_32_nv(&ire->ire_ident, 1);
fraghdr->ip6f_nxt = nexthdr;
fraghdr->ip6f_reserved = 0;
fraghdr->ip6f_offlg = 0;
fraghdr->ip6f_ident = htonl(ident);
/*
* len is the total length of the fragmentable data in this
* datagram. For each fragment sent, we will decrement len
* by the amount of fragmentable data sent in that fragment
* until len reaches zero.
*/
len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN);
/*
* Move read ptr past unfragmentable portion, we don't want this part
* of the data in our fragments.
*/
mp->b_rptr += unfragmentable_len;
while (len != 0) {
mlen = MIN(len, max_chunk);
len -= mlen;
if (len != 0) {
/* Not last */
hmp0 = copyb(hmp);
if (hmp0 == NULL) {
freeb(hmp);
freemsg(mp);
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsOutFragFails);
ip1dbg(("ip_wput_frag_v6: copyb failed\n"));
return;
}
off_flags = IP6F_MORE_FRAG;
} else {
/* Last fragment */
hmp0 = hmp;
hmp = NULL;
off_flags = 0;
}
fip6h = (ip6_t *)(hmp0->b_rptr);
fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len);
fip6h->ip6_plen = htons((uint16_t)(mlen +
unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t)));
/*
* Note: Optimization alert.
* In IPv6 (and IPv4) protocol header, Fragment Offset
* ("offset") is 13 bits wide and in 8-octet units.
* In IPv6 protocol header (unlike IPv4) in a 16 bit field,
* it occupies the most significant 13 bits.
* (least significant 13 bits in IPv4).
* We do not do any shifts here. Not shifting is same effect
* as taking offset value in octet units, dividing by 8 and
* then shifting 3 bits left to line it up in place in proper
* place protocol header.
*/
fraghdr->ip6f_offlg = htons(offset) | off_flags;
if (!(dmp = ip_carve_mp(&mp, mlen))) {
/* mp has already been freed by ip_carve_mp() */
if (hmp != NULL)
freeb(hmp);
freeb(hmp0);
ip1dbg(("ip_carve_mp: failed\n"));
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
return;
}
hmp0->b_cont = dmp;
/* Get the priority marking, if any */
hmp0->b_band = dmp->b_band;
UPDATE_OB_PKT_COUNT(ire);
ire->ire_last_used_time = lbolt;
ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp,
caller, NULL);
reachable = 0; /* No need to redo state machine in loop */
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
offset += mlen;
}
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
}
/*
* Determine if the ill and multicast aspects of that packets
* "matches" the conn.
*/
boolean_t
conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags,
zoneid_t zoneid)
{
ill_t *bound_ill;
boolean_t wantpacket;
in6_addr_t *v6dst_ptr = &ip6h->ip6_dst;
in6_addr_t *v6src_ptr = &ip6h->ip6_src;
/*
* conn_incoming_ill is set by IPV6_BOUND_IF which limits
* unicast and multicast reception to conn_incoming_ill.
* conn_wantpacket_v6 is called both for unicast and
* multicast.
*/
bound_ill = connp->conn_incoming_ill;
if (bound_ill != NULL) {
if (IS_IPMP(bound_ill)) {
if (bound_ill->ill_grp != ill->ill_grp)
return (B_FALSE);
} else {
if (bound_ill != ill)
return (B_FALSE);
}
}
if (connp->conn_multi_router)
return (B_TRUE);
if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) &&
!IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) {
/*
* Unicast case: we match the conn only if it's in the specified
* zone.
*/
return (IPCL_ZONE_MATCH(connp, zoneid));
}
if ((fanout_flags & IP_FF_NO_MCAST_LOOP) &&
(connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) {
/*
* Loopback case: the sending endpoint has IP_MULTICAST_LOOP
* disabled, therefore we don't dispatch the multicast packet to
* the sending zone.
*/
return (B_FALSE);
}
if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid &&
zoneid != ALL_ZONES) {
/*
* Multicast packet on the loopback interface: we only match
* conns who joined the group in the specified zone.
*/
return (B_FALSE);
}
mutex_enter(&connp->conn_lock);
wantpacket =
ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL;
mutex_exit(&connp->conn_lock);
return (wantpacket);
}
/*
* Transmit a packet and update any NUD state based on the flags
* XXX need to "recover" any ip6i_t when doing putq!
*
* NOTE : This function does not ire_refrele the ire passed in as the
* argument.
*/
void
ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp,
int caller, ipsec_out_t *io)
{
mblk_t *mp1;
nce_t *nce = ire->ire_nce;
ill_t *ill;
ill_t *out_ill;
uint64_t delta;
ip6_t *ip6h;
queue_t *stq = ire->ire_stq;
ire_t *ire1 = NULL;
ire_t *save_ire = ire;
boolean_t multirt_send = B_FALSE;
mblk_t *next_mp = NULL;
ip_stack_t *ipst = ire->ire_ipst;
boolean_t fp_prepend = B_FALSE;
uint32_t hlen;
ip6h = (ip6_t *)mp->b_rptr;
ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6));
ASSERT(ire->ire_ipversion == IPV6_VERSION);
ASSERT(nce != NULL);
ASSERT(mp->b_datap->db_type == M_DATA);
ASSERT(stq != NULL);
ill = ire_to_ill(ire);
if (!ill) {
ip0dbg(("ip_xmit_v6: ire_to_ill failed\n"));
freemsg(mp);
return;
}
/* Flow-control check has been done in ip_wput_ire_v6 */
if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT ||
caller == IP_WSRV || canput(stq->q_next)) {
uint32_t ill_index;
/*
* In most cases, the emission loop below is entered only
* once. Only in the case where the ire holds the
* RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT
* flagged ires in the bucket, and send the packet
* through all crossed RTF_MULTIRT routes.
*/
if (ire->ire_flags & RTF_MULTIRT) {
/*
* Multirouting case. The bucket where ire is stored
* probably holds other RTF_MULTIRT flagged ires
* to the destination. In this call to ip_xmit_v6,
* we attempt to send the packet through all
* those ires. Thus, we first ensure that ire is the
* first RTF_MULTIRT ire in the bucket,
* before walking the ire list.
*/
ire_t *first_ire;
irb_t *irb = ire->ire_bucket;
ASSERT(irb != NULL);
multirt_send = B_TRUE;
/* Make sure we do not omit any multiroute ire. */
IRB_REFHOLD(irb);
for (first_ire = irb->irb_ire;
first_ire != NULL;
first_ire = first_ire->ire_next) {
if ((first_ire->ire_flags & RTF_MULTIRT) &&
(IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6,
&ire->ire_addr_v6)) &&
!(first_ire->ire_marks &
(IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN)))
break;
}
if ((first_ire != NULL) && (first_ire != ire)) {
IRE_REFHOLD(first_ire);
/* ire will be released by the caller */
ire = first_ire;
nce = ire->ire_nce;
stq = ire->ire_stq;
ill = ire_to_ill(ire);
}
IRB_REFRELE(irb);
} else if (connp != NULL && IPCL_IS_TCP(connp) &&
connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt &&
ILL_MDT_USABLE(ill)) {
/*
* This tcp connection was marked as MDT-capable, but
* it has been turned off due changes in the interface.
* Now that the interface support is back, turn it on
* by notifying tcp. We don't directly modify tcp_mdt,
* since we leave all the details to the tcp code that
* knows better.
*/
mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab);
if (mdimp == NULL) {
ip0dbg(("ip_xmit_v6: can't re-enable MDT for "
"connp %p (ENOMEM)\n", (void *)connp));
} else {
CONN_INC_REF(connp);
SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp,
tcp_input, connp, SQ_FILL,
SQTAG_TCP_INPUT_MCTL);
}
}
do {
mblk_t *mp_ip6h;
if (multirt_send) {
irb_t *irb;
/*
* We are in a multiple send case, need to get
* the next ire and make a duplicate of the
* packet. ire1 holds here the next ire to
* process in the bucket. If multirouting is
* expected, any non-RTF_MULTIRT ire that has
* the right destination address is ignored.
*/
irb = ire->ire_bucket;
ASSERT(irb != NULL);
IRB_REFHOLD(irb);
for (ire1 = ire->ire_next;
ire1 != NULL;
ire1 = ire1->ire_next) {
if (!(ire1->ire_flags & RTF_MULTIRT))
continue;
if (!IN6_ARE_ADDR_EQUAL(
&ire1->ire_addr_v6,
&ire->ire_addr_v6))
continue;
if (ire1->ire_marks &
IRE_MARK_CONDEMNED)
continue;
/* Got one */
if (ire1 != save_ire) {
IRE_REFHOLD(ire1);
}
break;
}
IRB_REFRELE(irb);
if (ire1 != NULL) {
next_mp = copyb(mp);
if ((next_mp == NULL) ||
((mp->b_cont != NULL) &&
((next_mp->b_cont =
dupmsg(mp->b_cont)) == NULL))) {
freemsg(next_mp);
next_mp = NULL;
ire_refrele(ire1);
ire1 = NULL;
}
}
/* Last multiroute ire; don't loop anymore. */
if (ire1 == NULL) {
multirt_send = B_FALSE;
}
}
ill_index =
((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex;
/* Initiate IPPF processing */
if (IP6_OUT_IPP(flags, ipst)) {
ip_process(IPP_LOCAL_OUT, &mp, ill_index);
if (mp == NULL) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsOutDiscards);
if (next_mp != NULL)
freemsg(next_mp);
if (ire != save_ire) {
ire_refrele(ire);
}
return;
}
ip6h = (ip6_t *)mp->b_rptr;
}
mp_ip6h = mp;
/*
* Check for fastpath, we need to hold nce_lock to
* prevent fastpath update from chaining nce_fp_mp.
*/
ASSERT(nce->nce_ipversion != IPV4_VERSION);
mutex_enter(&nce->nce_lock);
if ((mp1 = nce->nce_fp_mp) != NULL) {
uchar_t *rptr;
hlen = MBLKL(mp1);
rptr = mp->b_rptr - hlen;
/*
* make sure there is room for the fastpath
* datalink header
*/
if (rptr < mp->b_datap->db_base) {
mp1 = copyb(mp1);
mutex_exit(&nce->nce_lock);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsOutDiscards);
freemsg(mp);
if (next_mp != NULL)
freemsg(next_mp);
if (ire != save_ire) {
ire_refrele(ire);
}
return;
}
mp1->b_cont = mp;
/* Get the priority marking, if any */
mp1->b_band = mp->b_band;
mp = mp1;
} else {
mp->b_rptr = rptr;
/*
* fastpath - pre-pend datalink
* header
*/
bcopy(mp1->b_rptr, rptr, hlen);
mutex_exit(&nce->nce_lock);
fp_prepend = B_TRUE;
}
} else {
/*
* Get the DL_UNITDATA_REQ.
*/
mp1 = nce->nce_res_mp;
if (mp1 == NULL) {
mutex_exit(&nce->nce_lock);
ip1dbg(("ip_xmit_v6: No resolution "
"block ire = %p\n", (void *)ire));
freemsg(mp);
if (next_mp != NULL)
freemsg(next_mp);
if (ire != save_ire) {
ire_refrele(ire);
}
return;
}
/*
* Prepend the DL_UNITDATA_REQ.
*/
mp1 = copyb(mp1);
mutex_exit(&nce->nce_lock);
if (mp1 == NULL) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsOutDiscards);
freemsg(mp);
if (next_mp != NULL)
freemsg(next_mp);
if (ire != save_ire) {
ire_refrele(ire);
}
return;
}
mp1->b_cont = mp;
/* Get the priority marking, if any */
mp1->b_band = mp->b_band;
mp = mp1;
}
out_ill = (ill_t *)stq->q_ptr;
DTRACE_PROBE4(ip6__physical__out__start,
ill_t *, NULL, ill_t *, out_ill,
ip6_t *, ip6h, mblk_t *, mp);
FW_HOOKS6(ipst->ips_ip6_physical_out_event,
ipst->ips_ipv6firewall_physical_out,
NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst);
DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp);
if (mp == NULL) {
if (multirt_send) {
ASSERT(ire1 != NULL);
if (ire != save_ire) {
ire_refrele(ire);
}
/*
* Proceed with the next RTF_MULTIRT
* ire, also set up the send-to queue
* accordingly.
*/
ire = ire1;
ire1 = NULL;
stq = ire->ire_stq;
nce = ire->ire_nce;
ill = ire_to_ill(ire);
mp = next_mp;
next_mp = NULL;
continue;
} else {
ASSERT(next_mp == NULL);
ASSERT(ire1 == NULL);
break;
}
}
if (ipst->ips_ipobs_enabled) {
zoneid_t szone;
szone = ip_get_zoneid_v6(&ip6h->ip6_src,
mp_ip6h, out_ill, ipst, ALL_ZONES);
ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone,
ALL_ZONES, out_ill, IPV6_VERSION,
fp_prepend ? hlen : 0, ipst);
}
/*
* Update ire and MIB counters; for save_ire, this has
* been done by the caller.
*/
if (ire != save_ire) {
UPDATE_OB_PKT_COUNT(ire);
ire->ire_last_used_time = lbolt;
if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsHCOutMcastPkts);
UPDATE_MIB(ill->ill_ip_mib,
ipIfStatsHCOutMcastOctets,
ntohs(ip6h->ip6_plen) +
IPV6_HDR_LEN);
}
}
/*
* Send it down. XXX Do we want to flow control AH/ESP
* packets that carry TCP payloads? We don't flow
* control TCP packets, but we should also not
* flow-control TCP packets that have been protected.
* We don't have an easy way to find out if an AH/ESP
* packet was originally TCP or not currently.
*/
if (io == NULL) {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsHCOutTransmits);
UPDATE_MIB(ill->ill_ip_mib,
ipIfStatsHCOutOctets,
ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
void_ip_t *, ip6h, __dtrace_ipsr_ill_t *,
out_ill, ipha_t *, NULL, ip6_t *, ip6h,
int, 0);
putnext(stq, mp);
} else {
/*
* Safety Pup says: make sure this is
* going to the right interface!
*/
if (io->ipsec_out_capab_ill_index !=
ill_index) {
/* IPsec kstats: bump lose counter */
freemsg(mp1);
} else {
BUMP_MIB(ill->ill_ip_mib,
ipIfStatsHCOutTransmits);
UPDATE_MIB(ill->ill_ip_mib,
ipIfStatsHCOutOctets,
ntohs(ip6h->ip6_plen) +
IPV6_HDR_LEN);
DTRACE_IP7(send, mblk_t *, mp,
conn_t *, NULL, void_ip_t *, ip6h,
__dtrace_ipsr_ill_t *, out_ill,
ipha_t *, NULL, ip6_t *, ip6h, int,
0);
ipsec_hw_putnext(stq, mp);
}
}
if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) {
if (ire != save_ire) {
ire_refrele(ire);
}
if (multirt_send) {
ASSERT(ire1 != NULL);
/*
* Proceed with the next RTF_MULTIRT
* ire, also set up the send-to queue
* accordingly.
*/
ire = ire1;
ire1 = NULL;
stq = ire->ire_stq;
nce = ire->ire_nce;
ill = ire_to_ill(ire);
mp = next_mp;
next_mp = NULL;
continue;
}
ASSERT(next_mp == NULL);
ASSERT(ire1 == NULL);
return;
}
ASSERT(nce->nce_state != ND_INCOMPLETE);
/*
* Check for upper layer advice
*/
if (flags & IPV6_REACHABILITY_CONFIRMATION) {
/*
* It should be o.k. to check the state without
* a lock here, at most we lose an advice.
*/
nce->nce_last = TICK_TO_MSEC(lbolt64);
if (nce->nce_state != ND_REACHABLE) {
mutex_enter(&nce->nce_lock);
nce->nce_state = ND_REACHABLE;
nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
mutex_exit(&nce->nce_lock);
(void) untimeout(nce->nce_timeout_id);
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_xmit_v6: state"
" for %s changed to"
" REACHABLE\n", AF_INET6,
&ire->ire_addr_v6);
}
}
if (ire != save_ire) {
ire_refrele(ire);
}
if (multirt_send) {
ASSERT(ire1 != NULL);
/*
* Proceed with the next RTF_MULTIRT
* ire, also set up the send-to queue
* accordingly.
*/
ire = ire1;
ire1 = NULL;
stq = ire->ire_stq;
nce = ire->ire_nce;
ill = ire_to_ill(ire);
mp = next_mp;
next_mp = NULL;
continue;
}
ASSERT(next_mp == NULL);
ASSERT(ire1 == NULL);
return;
}
delta = TICK_TO_MSEC(lbolt64) - nce->nce_last;
ip1dbg(("ip_xmit_v6: delta = %" PRId64
" ill_reachable_time = %d \n", delta,
ill->ill_reachable_time));
if (delta > (uint64_t)ill->ill_reachable_time) {
nce = ire->ire_nce;
mutex_enter(&nce->nce_lock);
switch (nce->nce_state) {
case ND_REACHABLE:
case ND_STALE:
/*
* ND_REACHABLE is identical to
* ND_STALE in this specific case. If
* reachable time has expired for this
* neighbor (delta is greater than
* reachable time), conceptually, the
* neighbor cache is no longer in
* REACHABLE state, but already in
* STALE state. So the correct
* transition here is to ND_DELAY.
*/
nce->nce_state = ND_DELAY;
mutex_exit(&nce->nce_lock);
NDP_RESTART_TIMER(nce,
ipst->ips_delay_first_probe_time);
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_xmit_v6: state"
" for %s changed to"
" DELAY\n", AF_INET6,
&ire->ire_addr_v6);
}
break;
case ND_DELAY:
case ND_PROBE:
mutex_exit(&nce->nce_lock);
/* Timers have already started */
break;
case ND_UNREACHABLE:
/*
* ndp timer has detected that this nce
* is unreachable and initiated deleting
* this nce and all its associated IREs.
* This is a race where we found the
* ire before it was deleted and have
* just sent out a packet using this
* unreachable nce.
*/
mutex_exit(&nce->nce_lock);
break;
default:
ASSERT(0);
}
}
if (multirt_send) {
ASSERT(ire1 != NULL);
/*
* Proceed with the next RTF_MULTIRT ire,
* Also set up the send-to queue accordingly.
*/
if (ire != save_ire) {
ire_refrele(ire);
}
ire = ire1;
ire1 = NULL;
stq = ire->ire_stq;
nce = ire->ire_nce;
ill = ire_to_ill(ire);
mp = next_mp;
next_mp = NULL;
}
} while (multirt_send);
/*
* In the multirouting case, release the last ire used for
* emission. save_ire will be released by the caller.
*/
if (ire != save_ire) {
ire_refrele(ire);
}
} else {
/*
* Can't apply backpressure, just discard the packet.
*/
BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
freemsg(mp);
return;
}
}
/*
* pr_addr_dbg function provides the needed buffer space to call
* inet_ntop() function's 3rd argument. This function should be
* used by any kernel routine which wants to save INET6_ADDRSTRLEN
* stack buffer space in it's own stack frame. This function uses
* a buffer from it's own stack and prints the information.
* Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
*
* Note: This function can call inet_ntop() once.
*/
void
pr_addr_dbg(char *fmt1, int af, const void *addr)
{
char buf[INET6_ADDRSTRLEN];
if (fmt1 == NULL) {
ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
return;
}
/*
* This does not compare debug level and just prints
* out. Thus it is the responsibility of the caller
* to check the appropriate debug-level before calling
* this function.
*/
if (ip_debug > 0) {
printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
}
}
/*
* Return the length in bytes of the IPv6 headers (base header, ip6i_t
* if needed and extension headers) that will be needed based on the
* ip6_pkt_t structure passed by the caller.
*
* The returned length does not include the length of the upper level
* protocol (ULP) header.
*/
int
ip_total_hdrs_len_v6(ip6_pkt_t *ipp)
{
int len;
len = IPV6_HDR_LEN;
if (ipp->ipp_fields & IPPF_HAS_IP6I)
len += sizeof (ip6i_t);
if (ipp->ipp_fields & IPPF_HOPOPTS) {
ASSERT(ipp->ipp_hopoptslen != 0);
len += ipp->ipp_hopoptslen;
}
if (ipp->ipp_fields & IPPF_RTHDR) {
ASSERT(ipp->ipp_rthdrlen != 0);
len += ipp->ipp_rthdrlen;
}
/*
* En-route destination options
* Only do them if there's a routing header as well
*/
if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) ==
(IPPF_RTDSTOPTS|IPPF_RTHDR)) {
ASSERT(ipp->ipp_rtdstoptslen != 0);
len += ipp->ipp_rtdstoptslen;
}
if (ipp->ipp_fields & IPPF_DSTOPTS) {
ASSERT(ipp->ipp_dstoptslen != 0);
len += ipp->ipp_dstoptslen;
}
return (len);
}
/*
* All-purpose routine to build a header chain of an IPv6 header
* followed by any required extension headers and a proto header,
* preceeded (where necessary) by an ip6i_t private header.
*
* The fields of the IPv6 header that are derived from the ip6_pkt_t
* will be filled in appropriately.
* Thus the caller must fill in the rest of the IPv6 header, such as
* traffic class/flowid, source address (if not set here), hoplimit (if not
* set here) and destination address.
*
* The extension headers and ip6i_t header will all be fully filled in.
*/
void
ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len,
ip6_pkt_t *ipp, uint8_t protocol)
{
uint8_t *nxthdr_ptr;
uint8_t *cp;
ip6i_t *ip6i;
ip6_t *ip6h = (ip6_t *)ext_hdrs;
/*
* If sending private ip6i_t header down (checksum info, nexthop,
* or ifindex), adjust ip header pointer and set ip6i_t header pointer,
* then fill it in. (The checksum info will be filled in by icmp).
*/
if (ipp->ipp_fields & IPPF_HAS_IP6I) {
ip6i = (ip6i_t *)ip6h;
ip6h = (ip6_t *)&ip6i[1];
ip6i->ip6i_flags = 0;
ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
if (ipp->ipp_fields & IPPF_IFINDEX ||
ipp->ipp_fields & IPPF_SCOPE_ID) {
ASSERT(ipp->ipp_ifindex != 0);
ip6i->ip6i_flags |= IP6I_IFINDEX;
ip6i->ip6i_ifindex = ipp->ipp_ifindex;
}
if (ipp->ipp_fields & IPPF_ADDR) {
/*
* Enable per-packet source address verification if
* IPV6_PKTINFO specified the source address.
* ip6_src is set in the transport's _wput function.
*/
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
&ipp->ipp_addr));
ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
}
if (ipp->ipp_fields & IPPF_UNICAST_HOPS) {
ip6h->ip6_hops = ipp->ipp_unicast_hops;
/*
* We need to set this flag so that IP doesn't
* rewrite the IPv6 header's hoplimit with the
* current default value.
*/
ip6i->ip6i_flags |= IP6I_HOPLIMIT;
}
if (ipp->ipp_fields & IPPF_NEXTHOP) {
ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
&ipp->ipp_nexthop));
ip6i->ip6i_flags |= IP6I_NEXTHOP;
ip6i->ip6i_nexthop = ipp->ipp_nexthop;
}
/*
* tell IP this is an ip6i_t private header
*/
ip6i->ip6i_nxt = IPPROTO_RAW;
}
/* Initialize IPv6 header */
ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
if (ipp->ipp_fields & IPPF_TCLASS) {
ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) |
(ipp->ipp_tclass << 20);
}
if (ipp->ipp_fields & IPPF_ADDR)
ip6h->ip6_src = ipp->ipp_addr;
nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
cp = (uint8_t *)&ip6h[1];
/*
* Here's where we have to start stringing together
* any extension headers in the right order:
* Hop-by-hop, destination, routing, and final destination opts.
*/
if (ipp->ipp_fields & IPPF_HOPOPTS) {
/* Hop-by-hop options */
ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
*nxthdr_ptr = IPPROTO_HOPOPTS;
nxthdr_ptr = &hbh->ip6h_nxt;
bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
cp += ipp->ipp_hopoptslen;
}
/*
* En-route destination options
* Only do them if there's a routing header as well
*/
if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) ==
(IPPF_RTDSTOPTS|IPPF_RTHDR)) {
ip6_dest_t *dst = (ip6_dest_t *)cp;
*nxthdr_ptr = IPPROTO_DSTOPTS;
nxthdr_ptr = &dst->ip6d_nxt;
bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen);
cp += ipp->ipp_rtdstoptslen;
}
/*
* Routing header next
*/
if (ipp->ipp_fields & IPPF_RTHDR) {
ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
*nxthdr_ptr = IPPROTO_ROUTING;
nxthdr_ptr = &rt->ip6r_nxt;
bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
cp += ipp->ipp_rthdrlen;
}
/*
* Do ultimate destination options
*/
if (ipp->ipp_fields & IPPF_DSTOPTS) {
ip6_dest_t *dest = (ip6_dest_t *)cp;
*nxthdr_ptr = IPPROTO_DSTOPTS;
nxthdr_ptr = &dest->ip6d_nxt;
bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
cp += ipp->ipp_dstoptslen;
}
/*
* Now set the last header pointer to the proto passed in
*/
*nxthdr_ptr = protocol;
ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len);
}
/*
* Return a pointer to the routing header extension header
* in the IPv6 header(s) chain passed in.
* If none found, return NULL
* Assumes that all extension headers are in same mblk as the v6 header
*/
ip6_rthdr_t *
ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
{
ip6_dest_t *desthdr;
ip6_frag_t *fraghdr;
uint_t hdrlen;
uint8_t nexthdr;
uint8_t *ptr = (uint8_t *)&ip6h[1];
if (ip6h->ip6_nxt == IPPROTO_ROUTING)
return ((ip6_rthdr_t *)ptr);
/*
* The routing header will precede all extension headers
* other than the hop-by-hop and destination options
* extension headers, so if we see anything other than those,
* we're done and didn't find it.
* We could see a destination options header alone but no
* routing header, in which case we'll return NULL as soon as
* we see anything after that.
* Hop-by-hop and destination option headers are identical,
* so we can use either one we want as a template.
*/
nexthdr = ip6h->ip6_nxt;
while (ptr < endptr) {
/* Is there enough left for len + nexthdr? */
if (ptr + MIN_EHDR_LEN > endptr)
return (NULL);
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
desthdr = (ip6_dest_t *)ptr;
hdrlen = 8 * (desthdr->ip6d_len + 1);
nexthdr = desthdr->ip6d_nxt;
break;
case IPPROTO_ROUTING:
return ((ip6_rthdr_t *)ptr);
case IPPROTO_FRAGMENT:
fraghdr = (ip6_frag_t *)ptr;
hdrlen = sizeof (ip6_frag_t);
nexthdr = fraghdr->ip6f_nxt;
break;
default:
return (NULL);
}
ptr += hdrlen;
}
return (NULL);
}
/*
* Called for source-routed packets originating on this node.
* Manipulates the original routing header by moving every entry up
* one slot, placing the first entry in the v6 header's v6_dst field,
* and placing the ultimate destination in the routing header's last
* slot.
*
* Returns the checksum diference between the ultimate destination
* (last hop in the routing header when the packet is sent) and
* the first hop (ip6_dst when the packet is sent)
*/
/* ARGSUSED2 */
uint32_t
ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
{
uint_t numaddr;
uint_t i;
in6_addr_t *addrptr;
in6_addr_t tmp;
ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth;
uint32_t cksm;
uint32_t addrsum = 0;
uint16_t *ptr;
/*
* Perform any processing needed for source routing.
* We know that all extension headers will be in the same mblk
* as the IPv6 header.
*/
/*
* If no segments left in header, or the header length field is zero,
* don't move hop addresses around;
* Checksum difference is zero.
*/
if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
return (0);
ptr = (uint16_t *)&ip6h->ip6_dst;
cksm = 0;
for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
cksm += ptr[i];
}
cksm = (cksm & 0xFFFF) + (cksm >> 16);
/*
* Here's where the fun begins - we have to
* move all addresses up one spot, take the
* first hop and make it our first ip6_dst,
* and place the ultimate destination in the
* newly-opened last slot.
*/
addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
numaddr = rthdr->ip6r0_len / 2;
tmp = *addrptr;
for (i = 0; i < (numaddr - 1); addrptr++, i++) {
*addrptr = addrptr[1];
}
*addrptr = ip6h->ip6_dst;
ip6h->ip6_dst = tmp;
/*
* From the checksummed ultimate destination subtract the checksummed
* current ip6_dst (the first hop address). Return that number.
* (In the v4 case, the second part of this is done in each routine
* that calls ip_massage_options(). We do it all in this one place
* for v6).
*/
ptr = (uint16_t *)&ip6h->ip6_dst;
for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
addrsum += ptr[i];
}
cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
if ((int)cksm < 0)
cksm--;
cksm = (cksm & 0xFFFF) + (cksm >> 16);
return (cksm);
}
/*
* Propagate a multicast group membership operation (join/leave) (*fn) on
* all interfaces crossed by the related multirt routes.
* The call is considered successful if the operation succeeds
* on at least one interface.
* The function is called if the destination address in the packet to send
* is multirouted.
*/
int
ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t,
const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *),
ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp,
mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp)
{
ire_t *ire_gw;
irb_t *irb;
int index, error = 0;
opt_restart_t *or;
ip_stack_t *ipst = ire->ire_ipst;
irb = ire->ire_bucket;
ASSERT(irb != NULL);
ASSERT(DB_TYPE(first_mp) == M_CTL);
or = (opt_restart_t *)first_mp->b_rptr;
IRB_REFHOLD(irb);
for (; ire != NULL; ire = ire->ire_next) {
if ((ire->ire_flags & RTF_MULTIRT) == 0)
continue;
if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp))
continue;
ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0,
IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL,
MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst);
/* No resolver exists for the gateway; skip this ire. */
if (ire_gw == NULL)
continue;
index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex;
/*
* A resolver exists: we can get the interface on which we have
* to apply the operation.
*/
error = fn(connp, checkonly, v6grp, index, fmode, v6src,
first_mp);
if (error == 0)
or->or_private = CGTP_MCAST_SUCCESS;
if (ip_debug > 0) {
ulong_t off;
char *ksym;
ksym = kobj_getsymname((uintptr_t)fn, &off);
ip2dbg(("ip_multirt_apply_membership_v6: "
"called %s, multirt group 0x%08x via itf 0x%08x, "
"error %d [success %u]\n",
ksym ? ksym : "?",
ntohl(V4_PART_OF_V6((*v6grp))),
ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)),
error, or->or_private));
}
ire_refrele(ire_gw);
if (error == EINPROGRESS) {
IRB_REFRELE(irb);
return (error);
}
}
IRB_REFRELE(irb);
/*
* Consider the call as successful if we succeeded on at least
* one interface. Otherwise, return the last encountered error.
*/
return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error);
}
void
*ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
{
kstat_t *ksp;
ip6_stat_t template = {
{ "ip6_udp_fast_path", KSTAT_DATA_UINT64 },
{ "ip6_udp_slow_path", KSTAT_DATA_UINT64 },
{ "ip6_udp_fannorm", KSTAT_DATA_UINT64 },
{ "ip6_udp_fanmb", KSTAT_DATA_UINT64 },
{ "ip6_out_sw_cksum", KSTAT_DATA_UINT64 },
{ "ip6_in_sw_cksum", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 },
};
ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL, stackid);
if (ksp == NULL)
return (NULL);
bcopy(&template, ip6_statisticsp, sizeof (template));
ksp->ks_data = (void *)ip6_statisticsp;
ksp->ks_private = (void *)(uintptr_t)stackid;
kstat_install(ksp);
return (ksp);
}
void
ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
{
if (ksp != NULL) {
ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
kstat_delete_netstack(ksp, stackid);
}
}
/*
* The following two functions set and get the value for the
* IPV6_SRC_PREFERENCES socket option.
*/
int
ip6_set_src_preferences(conn_t *connp, uint32_t prefs)
{
/*
* We only support preferences that are covered by
* IPV6_PREFER_SRC_MASK.
*/
if (prefs & ~IPV6_PREFER_SRC_MASK)
return (EINVAL);
/*
* Look for conflicting preferences or default preferences. If
* both bits of a related pair are clear, the application wants the
* system's default value for that pair. Both bits in a pair can't
* be set.
*/
if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
IPV6_PREFER_SRC_MIPMASK) {
return (EINVAL);
}
if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
IPV6_PREFER_SRC_TMPMASK) {
return (EINVAL);
}
if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
prefs |= IPV6_PREFER_SRC_CGADEFAULT;
} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
IPV6_PREFER_SRC_CGAMASK) {
return (EINVAL);
}
connp->conn_src_preferences = prefs;
return (0);
}
size_t
ip6_get_src_preferences(conn_t *connp, uint32_t *val)
{
*val = connp->conn_src_preferences;
return (sizeof (connp->conn_src_preferences));
}
int
ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti)
{
ire_t *ire;
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
/*
* Verify the source address and ifindex. Privileged users can use
* any source address. For ancillary data the source address is
* checked in ip_wput_v6.
*/
if (pkti->ipi6_ifindex != 0) {
rw_enter(&ipst->ips_ill_g_lock, RW_READER);
if (!phyint_exists(pkti->ipi6_ifindex, ipst)) {
rw_exit(&ipst->ips_ill_g_lock);
return (ENXIO);
}
rw_exit(&ipst->ips_ill_g_lock);
}
if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
secpolicy_net_rawaccess(cr) != 0) {
ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0,
(IRE_LOCAL|IRE_LOOPBACK), NULL, NULL,
connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst);
if (ire != NULL)
ire_refrele(ire);
else
return (ENXIO);
}
return (0);
}
/*
* Get the size of the IP options (including the IP headers size)
* without including the AH header's size. If till_ah is B_FALSE,
* and if AH header is present, dest options beyond AH header will
* also be included in the returned size.
*/
int
ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
{
ip6_t *ip6h;
uint8_t nexthdr;
uint8_t *whereptr;
ip6_hbh_t *hbhhdr;
ip6_dest_t *dsthdr;
ip6_rthdr_t *rthdr;
int ehdrlen;
int size;
ah_t *ah;
ip6h = (ip6_t *)mp->b_rptr;
size = IPV6_HDR_LEN;
nexthdr = ip6h->ip6_nxt;
whereptr = (uint8_t *)&ip6h[1];
for (;;) {
/* Assume IP has already stripped it */
ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW);
switch (nexthdr) {
case IPPROTO_HOPOPTS:
hbhhdr = (ip6_hbh_t *)whereptr;
nexthdr = hbhhdr->ip6h_nxt;
ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
break;
case IPPROTO_DSTOPTS:
dsthdr = (ip6_dest_t *)whereptr;
nexthdr = dsthdr->ip6d_nxt;
ehdrlen = 8 * (dsthdr->ip6d_len + 1);
break;
case IPPROTO_ROUTING:
rthdr = (ip6_rthdr_t *)whereptr;
nexthdr = rthdr->ip6r_nxt;
ehdrlen = 8 * (rthdr->ip6r_len + 1);
break;
default :
if (till_ah) {
ASSERT(nexthdr == IPPROTO_AH);
return (size);
}
/*
* If we don't have a AH header to traverse,
* return now. This happens normally for
* outbound datagrams where we have not inserted
* the AH header.
*/
if (nexthdr != IPPROTO_AH) {
return (size);
}
/*
* We don't include the AH header's size
* to be symmetrical with other cases where
* we either don't have a AH header (outbound)
* or peek into the AH header yet (inbound and
* not pulled up yet).
*/
ah = (ah_t *)whereptr;
nexthdr = ah->ah_nexthdr;
ehdrlen = (ah->ah_length << 2) + 8;
if (nexthdr == IPPROTO_DSTOPTS) {
if (whereptr + ehdrlen >= mp->b_wptr) {
/*
* The destination options header
* is not part of the first mblk.
*/
whereptr = mp->b_cont->b_rptr;
} else {
whereptr += ehdrlen;
}
dsthdr = (ip6_dest_t *)whereptr;
ehdrlen = 8 * (dsthdr->ip6d_len + 1);
size += ehdrlen;
}
return (size);
}
whereptr += ehdrlen;
size += ehdrlen;
}
}
/*
* Utility routine that checks if `v6srcp' is a valid address on underlying
* interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
* associated with `v6srcp' on success. NOTE: if this is not called from
* inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
* group during or after this lookup.
*/
static boolean_t
ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
{
ipif_t *ipif;
ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
if (ipif != NULL) {
if (ipifp != NULL)
*ipifp = ipif;
else
ipif_refrele(ipif);
return (B_TRUE);
}
if (ip_debug > 2) {
pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
"src %s\n", AF_INET6, v6srcp);
}
return (B_FALSE);
}