ip6.c revision f0c3911fac870fd8926c517f55b39ea4489e5a97
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
#include <sys/sysmacros.h>
#define _SUN_TPI_VERSION 2
#include <sys/isa_defs.h>
#include <net/if_types.h>
#include <inet/tcp_impl.h>
#include <inet/udp_impl.h>
#include <inet/ipp_common.h>
#include <inet/ip_multi.h>
#include <inet/ipsec_info.h>
#include <inet/ipsec_impl.h>
#include <inet/ipclassifier.h>
#include <inet/rawip_impl.h>
#include <inet/rts_impl.h>
#include <sys/squeue_impl.h>
#include <rpc/pmap_prot.h>
/* Temporary; for CR 6451644 work-around */
#include <sys/ethernet.h>
extern int ip_squeue_flag;
/*
* Naming conventions:
* These rules should be judiciously applied
* if there is a need to identify something as IPv6 versus IPv4
* IPv6 funcions will end with _v6 in the ip module.
* IPv6 funcions will end with _ipv6 in the transport modules.
* IPv6 macros:
* Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
* Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
* And then there are ..V4_PART_OF_V6.
* The intent is that macros in the ip module end with _V6.
* IPv6 global variables will start with ipv6_
* IPv6 structures will start with ipv6
* IPv6 defined constants should start with IPV6_
* (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
*/
/*
* We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
* from IANA. This mechanism will remain in effect until an official
* number is obtained.
*/
const in6_addr_t ipv6_all_ones =
{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_solicited_node_mcast =
{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_solicited_node_mcast =
{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
#endif /* _BIG_ENDIAN */
/* Leave room for ip_newroute to tack on the src and target addresses */
#define OK_RESOLVER_MP_V6(mp) \
#define IP6_MBLK_OK 0
#define IP6_MBLK_HDR_ERR 1
#define IP6_MBLK_LEN_ERR 2
iulp_t *, ip_stack_t *);
ipif_t **);
/*
* A template for an IPv6 AR_ENTRY_QUERY
*/
static areq_t ipv6_areq_template = {
AR_ENTRY_QUERY, /* cmd */
sizeof (areq_t), /* name len (filled by ill_arp_alloc) */
ETHERTYPE_IPV6, /* protocol, from arps perspective */
sizeof (areq_t), /* target addr offset */
IPV6_ADDR_LEN, /* target addr_length */
0, /* flags */
IPV6_ADDR_LEN, /* sender addr length */
6, /* xmit_count */
1000, /* (re)xmit_interval in milliseconds */
4 /* max # of requests to buffer */
/* anything else filled in by the code */
};
/*
* Handle IPv6 ICMP packets sent to us. Consume the mblk passed in.
* The message has already been checksummed and if needed,
* a copy has been made to be sent any interested ICMP client (conn)
* Note that this is different than icmp_inbound() which does the fanout
* to conn's as well as local processing of the ICMP packets.
*
* All error messages are passed to the matching transport stream.
*
* Zones notes:
* The packet is only processed in the context of the specified zone: typically
* only this zone will reply to an echo request. This means that the caller must
* call icmp_inbound_v6() for each relevant zone.
*/
static void
{
ipsec_in_t *ii;
if (mctl_present) {
}
ip1dbg(("icmp_inbound_v6: pullupmsg failed\n"));
return;
}
}
if (ipst->ips_icmp_accept_clear_messages == 0) {
return;
}
/*
* On a labeled system, we have to check whether the zone itself is
* permitted to receive raw traffic.
*/
if (is_system_labeled()) {
ip1dbg(("icmp_inbound_v6: zone %d can't receive raw",
zoneid));
return;
}
}
icmp6->icmp6_code));
/* Initiate IPPF processing here */
/*
* If the ifindex changes due to SIOCSLIFINDEX
* packet may return to IP on the wrong ill.
*/
if (mctl_present) {
}
return;
}
}
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
break;
case ICMP6_TIME_EXCEEDED:
break;
case ICMP6_PARAM_PROB:
break;
case ICMP6_PACKET_TOO_BIG:
zoneid);
return;
case ICMP6_ECHO_REQUEST:
break;
/*
* We must have exclusive use of the mblk to convert it to
* a response.
* If not, we copy it.
*/
if (mctl_present)
return;
}
if (mctl_present)
else
}
/*
* Turn the echo into an echo reply.
* Remove any extension headers (do not reverse a source route)
* and clear the flow id (keep traffic class for now).
*/
if (hdr_length != IPV6_HDR_LEN) {
int i;
for (i = 0; i < IPV6_HDR_LEN; i++)
}
/*
* Reverse the source and destination addresses.
* If the return address is a multicast, zero out the source
* (ip_wput_v6 will set an address).
*/
} else {
}
/* set the hop limit */
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
if (!mctl_present) {
/*
* This packet should go out the same way as it
* came in i.e in clear. To make sure that global
* policy will not be applied to this in ip_wput,
* we attach a IPSEC_IN mp and clear ipsec_in_secure.
*/
return;
}
/* This is not a secure packet */
}
return;
}
return;
case ICMP6_ECHO_REPLY:
break;
case ND_ROUTER_SOLICIT:
break;
case ND_ROUTER_ADVERT:
break;
case ND_NEIGHBOR_SOLICIT:
if (mctl_present)
/* XXX may wish to pass first_mp up to ndp_input someday. */
return;
case ND_NEIGHBOR_ADVERT:
if (mctl_present)
/* XXX may wish to pass first_mp up to ndp_input someday. */
return;
case ND_REDIRECT: {
if (ipst->ips_ipv6_ignore_redirect)
break;
/*
* As there is no upper client to deliver, we don't
* need the first_mp any more.
*/
if (mctl_present)
break;
}
return;
}
/*
* The next three icmp messages will be handled by MLD.
* Pass all valid MLD packets up to any process(es)
* listening on a raw ICMP socket. MLD messages are
* freed by mld_input function.
*/
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
case MLD_LISTENER_REDUCTION:
if (mctl_present)
return;
default:
break;
}
if (interested) {
} else {
}
}
/*
* Process received IPv6 ICMP Packet too big.
* After updating any IRE it does the fanout to any matching transport streams.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*/
/* ARGSUSED */
static void
{
if (mctl_present)
/*
* We must have exclusive use of the mblk to update the MTU
* in the packet.
* If not, we copy it.
*
* If there's an M_CTL present, we know that allocated first_mp
* earlier in this function, so we know first_mp has refcnt of one.
*/
if (mctl_present)
return;
}
if (mctl_present)
else
}
else
return;
}
}
/*
* For link local destinations matching simply on IRE type is not
* sufficient. Same link local addresses for different ILL's is
* possible.
*/
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no ire for dst %s\n", AF_INET6,
&inner_ip6h->ip6_dst);
}
return;
}
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6 "
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is received,
* we must include a fragment header in
* subsequent packets.
*/
}
/* Decreased it */
}
/* Record the new max frag size for the ULP. */
/*
* If we need a fragment header in every packet
* (above case or multirouting), make sure the
* ULP takes it into account when computing the
* payload size.
*/
sizeof (ip6_frag_t));
} else {
}
}
} else {
/*
* for non-link local destinations we match only on the IRE type
*/
ipst);
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no ire for dst %s\n",
}
return;
}
&inner_ip6h->ip6_dst)) {
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6"
"min mtu %d: %d\n",
IPV6_MIN_MTU, mtu));
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is
* received, we must include a fragment
* header in subsequent packets.
*/
}
/* Decreased it */
}
/* Record the new max frag size for the ULP. */
/*
* If we need a fragment header in
* every packet (above case or
* multirouting), make sure the ULP
* takes it into account when computing
* the payload size.
*/
sizeof (ip6_frag_t));
} else {
}
}
}
}
}
/*
* Fanout for ICMPv6 errors containing IP-in-IPv6 packets. Returns B_TRUE if a
* tunnel consumed the message, and B_FALSE otherwise.
*/
static boolean_t
{
return (B_FALSE);
return (B_TRUE);
}
/*
* Fanout received ICMPv6 error packets to the transports.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*/
void
{
ipsec_in_t *ii;
if (mctl_present) {
} else {
}
/*
* Need to pullup everything in order to use
* ip_hdr_length_nexthdr_v6()
*/
ip1dbg(("icmp_inbound_error_fanout_v6: "
"pullupmsg failed\n"));
goto drop_pkt;
}
}
goto drop_pkt;
goto drop_pkt;
/* Set message type, must be done after pullups */
/*
* We need a separate IP header with the source and destination
* addresses reversed to do fanout/classification because the ip6h in
* the ICMPv6 error is in the form we sent it out.
*/
/* Try to pass the ICMP message to clients who need it */
switch (nexthdr) {
case IPPROTO_UDP: {
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* UDP header to get the port information.
*/
break;
}
/* Attempt to find a client stream based on port. */
return;
}
case IPPROTO_TCP: {
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* the TCP header to get the port information.
*/
break;
}
/*
* Attempt to find a client stream based on port.
* Note that we do a reverse lookup since the header is
* in the form we sent it out.
*/
goto drop_pkt;
}
return;
}
case IPPROTO_SCTP:
/*
* Verify we have at least ICMP_MIN_SCTP_HDR_LEN bytes of
* transport header to get the port information.
*/
goto drop_pkt;
}
}
return;
case IPPROTO_ESP:
case IPPROTO_AH: {
int ipsec_rc;
/*
* We will re-use the IPSEC_IN if it is already present as
* ICMP errors. If there is no IPSEC_IN, allocate a new
* one and attach it in the front.
*/
/*
* ip_fanout_proto_again converts the ICMP errors
* this function, it would work. Convert it back
* to M_CTL before we send up as this is a ICMP
* error. This could have been generated locally or
* by some router. Validate the inner IPSEC
* headers.
*
* NOTE : ill_index is used by ip_fanout_proto_again
* to locate the ill.
*/
} else {
/*
* IPSEC_IN is not present. We attach a ipsec_in
* message and send up to IPSEC for validating
* and removing the IPSEC headers. Clear
* ipsec_in_secure so that when we return
* from IPSEC, we don't mistakenly think that this
* is a secure packet came from the network.
*
* NOTE : ill_index is used by ip_fanout_proto_again
* to locate the ill.
*/
return;
}
/* This is not a secure packet */
}
if (!ipsec_loaded(ipss)) {
return;
}
if (nexthdr == IPPROTO_ESP)
else
if (ipsec_rc == IPSEC_STATUS_FAILED)
return;
return;
}
case IPPROTO_ENCAP:
case IPPROTO_IPV6:
goto drop_pkt;
}
if (nexthdr == IPPROTO_ENCAP ||
/*
* For tunnels that have used IPsec protection,
* we need to adjust the MTU to take into account
* the IPsec overhead.
*/
}
} else {
/*
* Self-encapsulated case. As in the ipv4 case,
* we need to strip the 2nd IP header. Since mp
* is already pulled-up, we can simply bcopy
* the 3rd header + data over the 2nd header.
*/
/*
* Make sure we don't do recursion more than once.
*/
&unused_len, &nexthdrp) ||
*nexthdrp == IPPROTO_IPV6) {
goto drop_pkt;
}
/*
* We are about to modify the packet. Make a copy if
* someone else has a reference to it.
*/
goto drop_pkt;
}
icmp6_offset = (uint16_t)
inner_ip6h = (ip6_t *)
if (mctl_present)
else
}
/*
* Need to set db_type back to M_DATA before
* refeeding mp into this function.
*/
/*
* Copy the 3rd header + remaining data on top
* of the 2nd header.
*/
/*
* Subtract length of the 2nd header.
*/
/*
* Now recurse, and see what I _really_ should be
* doing here.
*/
return;
}
return;
/*
* No IP tunnel is associated with this error. Perhaps a raw
* socket will want it.
*/
/* FALLTHRU */
default:
return;
}
/* NOTREACHED */
ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
}
/*
* Process received IPv6 ICMP Redirect messages.
*/
/* ARGSUSED */
static void
{
int nce_flags = 0;
int err = 0;
int len;
int optlen;
else
/* Verify if it is a valid redirect */
if (!IN6_IS_ADDR_LINKLOCAL(src) ||
(rd->nd_rd_code != 0) ||
(len < sizeof (nd_redirect_t)) ||
(IN6_IS_ADDR_V4MAPPED(dst)) ||
(IN6_IS_ADDR_MULTICAST(dst))) {
return;
}
if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
return;
}
if (len > sizeof (nd_redirect_t)) {
len - sizeof (nd_redirect_t))) {
return;
}
}
}
/* ipif will be refreleased afterwards */
return;
}
/*
* Verify that the IP source address of the redirect is
* the same as the current first-hop router for the specified
* ICMP destination address.
* Also, Make sure we had a route for the dest in question and
* that route was pointing to the old gateway (the source of the
* redirect packet.)
*/
/*
* Check that
* the redirect was not from ourselves
* old gateway is still directly reachable
*/
goto fail_redirect;
}
nce_flags |= NCE_F_NONUD;
/*
* Should we use the old ULP info to create the new gateway? From
* a user's perspective, we should inherit the info so that it
* is a "smooth" transition. If we do not do that, then new
* connections going thru the new gateway will have no route metrics,
* which is counter-intuitive to user. From a network point of
* view, this may or may not make sense even though the new gateway
* is still directly connected to us so the route metrics should not
* change much.
*
* But if the old ire_uinfo is not initialized, we do another
* recursive lookup on the dest using the new gateway. There may
* be a route to that. If so, use it to initialize the redirect
* route.
*/
} else if (redirect_to_router) {
/*
* Only do the following if the redirection is really to
* a router.
*/
ipst);
sizeof (iulp_t));
}
}
B_FALSE, /* don't match across illgrp */
&ipv6_all_ones, /* prefix mask */
&ipv6_all_zeros, /* Mapping mask */
0,
&nce);
switch (err) {
case 0:
break;
case EEXIST:
/*
* Check to see if link layer address has changed and
* process the nce_state accordingly.
*/
break;
default:
ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
err));
goto fail_redirect;
}
}
if (redirect_to_router) {
/* icmp_redirect_ok_v6() must have already verified this */
/*
* Create a Route Association. This will allow us to remember
* a router told us to use the particular gateway.
*/
ire = ire_create_v6(
dst,
&ipv6_all_ones, /* mask */
gateway, /* gateway addr */
NULL, /* no src nce */
NULL, /* no rfq */
NULL, /* no stq */
NULL,
0,
0,
&ulp_info,
NULL,
NULL,
ipst);
} else {
/*
* Just create an on link entry, i.e. interface route.
*/
ire = ire_create_v6(
dst, /* gateway == dst */
&ipv6_all_ones, /* mask */
&ipv6_all_zeros, /* gateway addr */
NULL, /* no src nce */
NULL, /* ire rfq */
stq, /* ire stq */
0,
0,
(RTF_DYNAMIC | RTF_HOST),
&ulp_info,
NULL,
NULL,
ipst);
}
/* Release reference from earlier ipif_get_next_ipif() */
goto fail_redirect;
/* tell routing sockets that we received a redirect */
&rd->nd_rd_target,
/*
* Delete any existing IRE_HOST type ires for this destination.
* This together with the added IRE has the effect of
* modifying an existing redirect.
*/
}
}
}
static ill_t *
{
if (ILL_CAN_LOOKUP(ill))
else
} else {
}
ip0dbg(("ip_queue_to_ill_v6: no ill\n"));
return (ill);
}
/*
* Assigns an appropriate source address to the packet.
* If origdst is one of our IP addresses that use it as the source.
* If the queue is an ill queue then select a source from that ill.
* Otherwise pick a source based on a route lookup back to the origsrc.
*
* src is the return parameter. Returns a pointer to src or NULL if failure.
*/
static in6_addr_t *
{
} else {
}
ipst);
/* Destined to one of our addresses */
return (src);
}
}
/* What is the route back to the original source? */
return (NULL);
}
}
return (src);
}
/*
* Unusual case - can't find a usable source address to reach the
* original source. Use what in the route to the source.
*/
return (NULL);
}
return (src);
}
/*
* Build and ship an IPv6 ICMP message using the packet data in mp,
* and the ICMP header pointed to by "stuff". (May be called as
* writer.)
* Note: assumes that icmp_pkt_err_ok_v6 has been called to
* verify that an icmp error packet can be sent.
*
* If q is an ill write side queue (which is the case when packets
* arrive from ip_rput) then ip_wput code will ensure that packets to
* link-local destinations are sent out that ill.
*
* If v6src_ptr is set use it as a source. Otherwise select a reasonable
* source address (see above function).
*/
static void
{
return;
}
if (mctl_present) {
/*
* If it is :
*
* 1) a IPSEC_OUT, then this is caused by outbound
* datagram originating on this host. IPSEC processing
* may or may not have been done. Refer to comments above
* icmp_inbound_error_fanout for details.
*
* 2) a IPSEC_IN if we are generating a icmp_message
* for an incoming datagram destined for us i.e called
* from ip_fanout_send_icmp.
*/
/*
* Convert the IPSEC_IN to IPSEC_OUT.
*/
return;
}
} else {
/*
* Clear out ipsec_out_proc_begin, so we do a fresh
* ire lookup.
*/
}
} else {
/*
* This is in clear. The icmp message we are building
* here should go out in clear.
*/
ipsec_in_t *ii;
return;
}
/* This is not a secure packet */
/*
* For trusted extensions using a shared IP address we can
* send using any zoneid.
*/
else
/*
* Convert the IPSEC_IN to IPSEC_OUT.
*/
return;
}
}
} else {
return;
}
}
if (msg_len > len_needed) {
return;
}
}
return;
}
/*
* Set ipsec_out_icmp_loopback so we can let the ICMP messages this
* node generates be accepted in peace by all on-host destinations.
* If we do NOT assume that all on-host destinations trust
* (Look for ipsec_out_icmp_loopback).
*/
}
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_wput_v6.
*/
}
/* Send to V6 writeside put routine */
}
/*
* Update the output mib when ICMPv6 packets are sent.
*/
static void
{
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
break;
case ICMP6_TIME_EXCEEDED:
break;
case ICMP6_PARAM_PROB:
break;
case ICMP6_PACKET_TOO_BIG:
break;
case ICMP6_ECHO_REQUEST:
break;
case ICMP6_ECHO_REPLY:
break;
case ND_ROUTER_SOLICIT:
break;
case ND_ROUTER_ADVERT:
break;
case ND_NEIGHBOR_SOLICIT:
break;
case ND_NEIGHBOR_ADVERT:
break;
case ND_REDIRECT:
break;
case MLD_LISTENER_QUERY:
break;
case MLD_LISTENER_REPORT:
case MLD_V2_LISTENER_REPORT:
break;
case MLD_LISTENER_REDUCTION:
break;
}
}
/*
* Check if it is ok to send an ICMPv6 error packet in
* response to the IP packet in mp.
* Free the message and return null if no
* ICMP error packet should be sent.
*/
static mblk_t *
{
if (!mp)
return (NULL);
/* Check if source address uniquely identifies the host */
return (NULL);
}
} else {
}
return (NULL);
}
}
/* Explicitly do not generate errors in response to redirects */
return (NULL);
}
}
/*
* Check that the destination is not multicast and that the packet
* was not sent on link layer broadcast or multicast. (Exception
* is Packet too big message as per the draft - when mcast_ok is set.)
*/
if (!mcast_ok &&
return (NULL);
}
if (icmp_err_rate_limit(ipst)) {
/*
* Only send ICMP error packets every so often.
* but for now this will suffice.
*/
return (NULL);
}
return (mp);
}
/*
* Generate an ICMPv6 redirect message.
* Include target link layer address option if it exits.
* Always include redirect header.
*/
static void
{
int len;
int ll_opt_len = 0;
int pkt_len;
/*
* We are called from ip_rput where we could
* not have attached an IPSEC_IN.
*/
return;
ll_opt_len = (sizeof (nd_opt_hdr_t) +
}
return;
}
rd->nd_rd_code = 0;
rd->nd_rd_reserved = 0;
}
/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
/* Make sure mp is 8 byte aligned */
if (pkt_len > max_redir_hdr_data_len) {
sizeof (nd_opt_rd_hdr_t))/8;
} else {
}
rdh->nd_opt_rh_reserved1 = 0;
rdh->nd_opt_rh_reserved2 = 0;
/* ipif_v6src_addr contains the link-local source address */
/* Redirects sent by router, and router is global zone */
}
/* Generate an ICMP time exceeded message. (May be called as writer.) */
void
{
if (mctl_present)
return;
}
}
/*
* Generate an ICMP unreachable message.
*/
void
{
if (mctl_present)
return;
}
}
/*
* Generate an ICMP pkt too big message.
*/
static void
{
if (mctl_present)
return;
}
icmp6.icmp6_code = 0;
}
/*
* Generate an ICMP parameter problem message. (May be called as writer.)
* 'offset' is the offset from the beginning of the packet in error.
*/
static void
{
if (mctl_present)
return;
}
}
/*
* This code will need to take into account the possibility of binding
* to a link local address on a multi-homed host, in which case the
* outgoing interface (from the conn) will need to be used when getting
* an ire for the dst. Going through proper outgoing interface and
* choosing the source address corresponding to the outgoing interface
* is necessary when the destination address is a link-local address and
* IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set.
* This can happen when active connection is setup; thus ipp pointer
* is passed here from tcp_connect_*() routines, in non-TCP cases NULL
* pointer is passed as ipp pointer.
*/
mblk_t *
{
int protocol;
struct T_bind_req *tbr;
int error = 0;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
goto bad_addr;
}
"ip_bind_v6: bogus msg, len %ld", len);
goto bad_addr;
}
/* Back up and extract the protocol identifier. */
/* Reset the message type in preparation for shipping it back. */
/*
* Check for a zero length address. This is from a protocol that
* wants to register to receive all packets of its type.
*/
if (tbr->ADDR_length == 0) {
NULL) {
/*
* TCP, SCTP, AH, and ESP have single protocol fanouts.
* Do not allow others to bind to these.
*/
goto bad_addr;
}
/*
*
* The udp module never sends down a zero-length address,
* and allowing this on a labeled system will break MLP
* functionality.
*/
goto bad_addr;
/* Allow ipsec plumbing */
protocol != IPPROTO_ESP)
goto bad_addr;
return (mp);
}
/* Extract the address pointer from the message. */
tbr->ADDR_length);
ip1dbg(("ip_bind_v6: no address\n"));
goto bad_addr;
}
ip1dbg(("ip_bind_v6: unaligned address\n"));
goto bad_addr;
}
switch (tbr->ADDR_length) {
default:
ip1dbg(("ip_bind_v6: bad address length %d\n",
(int)tbr->ADDR_length));
goto bad_addr;
case IPV6_ADDR_LEN:
/* Verification of local address only */
lport = 0;
local_bind = B_TRUE;
break;
case sizeof (sin6_t):
local_bind = B_TRUE;
break;
case sizeof (ipa6_conn_t):
/*
* Verify that both the source and destination addresses
* are valid.
*/
/* For raw socket, the local port is not set. */
/* Always verify destination reachability. */
verify_dst = B_TRUE;
break;
case sizeof (ipa6_conn_x_t):
/*
* Verify that the source address is valid.
*/
/*
* Client that passed ipa6_conn_x_t to us specifies whether to
* verify destination reachability.
*/
break;
}
if (local_bind) {
} else {
}
if (error == 0) {
/* Send it home. */
return (mp);
}
if (error > 0)
else
return (mp);
}
/*
* Here address is verified to be a valid local address.
* If the IRE_DB_REQ_TYPE mp is present, a multicast
* address is also considered a valid local address.
* In the case of a multicast address, however, the
* upper protocol is expected to reset the src address
* to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that
* no packets are emitted with multicast address as
* source address.
* The addresses valid for bind are:
* (1) - in6addr_any
* (2) - IP address of an UP interface
* (3) - IP address of a DOWN interface
* (4) - a multicast address. In this case
* the conn will only receive packets destined to
* the specified multicast address. Note: the
* application still has to issue an
* IPV6_JOIN_GROUP socket option.
*
* In all the above cases, the bound address must be valid in the current zone.
* When the address is loopback or multicast, there might be many matching IREs
* so bind has to look up based on the zone.
*/
/*
* Verify the local IP address. Does not change the conn_t except
* conn_fully_bound and conn_policy_cached.
*/
static int
{
int error = 0;
if (mpp)
/*
* If it was previously connected, conn_fully_bound would have
* been set.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
/*
* If an address other than in6addr_any is requested,
* we verify that it is a valid address for bind
* Note: Following code is in if-else-if form for
* readability compared to a condition check.
*/
/* LINTED - statement has no consequent */
if (IRE_IS_LOCAL(src_ire)) {
/*
* (2) Bind to address of local UP interface
*/
} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
/*
* (4) bind to multicast address.
* Fake out the IRE returned to upper
* layer to be a broadcast IRE in
* ip_bind_insert_ire_v6().
* Pass other information that matches
* the ipif (e.g. the source address).
* conn_multicast_ill is only used for
* IPv6 packets
*/
(void) ipif_lookup_zoneid(
&multi_ipif);
} else {
/*
* Look for default like
* ip_wput_v6
*/
}
} else {
}
if (multi_ipif != NULL)
} else {
/*
* Not a valid address for bind
*/
}
}
if (error != 0) {
/* Red Alert! Attempting to be a bogon! */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_laddr_v6: bad src"
}
goto bad_addr;
}
}
/*
* Allow setting new policies. For example, disconnects come
* down as ipa_t bind. As we would have set conn_policy_cached
* to B_TRUE before, we should set it to B_FALSE, so that policy
* can change after the disconnect.
*/
/* If not fanout_insert this was just an address verification */
if (fanout_insert) {
/*
* The addresses have been verified. Time to insert in
* the correct fanout list.
*/
connp->conn_fport = 0;
}
if (error == 0) {
if (ire_requested) {
ipst)) {
error = -1;
goto bad_addr;
}
} else if (ipsec_policy_set) {
error = -1;
goto bad_addr;
}
}
}
if (error != 0) {
if (connp->conn_anon_port) {
B_FALSE);
}
}
if (ipsec_policy_set) {
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
}
return (error);
}
int
{
int error;
/* Bind to IPv4 address */
if (error != 0)
goto bad_addr;
} else {
if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
error = 0;
goto bad_addr;
}
if (error != 0)
goto bad_addr;
}
return (0);
if (error < 0)
return (error);
}
/*
* Verify that both the source and destination addresses
* are valid. If verify_dst, then destination address must also be reachable,
* i.e. have a route. Protocols like TCP want this. Tunnels do not.
* It takes ip6_pkt_t * as one of the arguments to determine correct
* source address when IPV6_PKTINFO or scope_id is set along with a link-local
* destination address. Note that parameter ipp is only useful for TCP connect
* when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all
* non-TCP cases, it is NULL and for all other tcp cases it is not useful.
*
*/
int
{
int error = 0;
if (mpp)
}
/*
* If we never got a disconnect before, clear it now.
*/
/*
* Check whether Trusted Solaris policy allows communication with this
* host, and pretend that the destination is unreachable if not.
*
* This is never a problem for TCP, since that transport is known to
* compute the label properly as part of the tcp_rput_other T_BIND_ACK
* handling. If the remote is unreachable, it will be detected at that
* point, so there's no reason to check it here.
*
* Note that for sendto (and other datagram-oriented friends), this
* check is done as part of the data path label computation instead.
* The check here is just to make non-TCP connect() report the right
* error.
*/
if (ip_debug > 2) {
"ip_bind_connected: no label for dst %s\n",
}
goto bad_addr;
}
/*
* tsol_check_dest() may have created a new cred with
* a modified security label. Use that cred if it exists
* for ire lookups.
*/
if (effective_cred == NULL) {
} else {
}
}
if (IN6_IS_ADDR_MULTICAST(v6dst)) {
/*
* Use an "emulated" IRE_BROADCAST to tell the transport it
* is a multicast.
* Pass other information that matches
* the ipif (e.g. the source address).
*
* conn_multicast_ill is only used for IPv6 packets
*/
} else {
/* Look for default like ip_wput_v6 */
}
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad "
"connected multicast %s\n", AF_INET6,
v6dst);
}
error = ENETUNREACH;
goto bad_addr;
}
} else {
ipst);
/*
* We also prevent ire's with src address INADDR_ANY to
* be used, which are created temporarily for
* sending out packets from endpoints that have
* conn_unspec_src set.
*/
/*
* When verifying destination reachability, we always
* complain.
*
* When not verifying destination reachability but we
* found an IRE, i.e. the destination is reachable,
* then the other tests still apply and we complain.
*/
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad"
" connected dst %s\n", AF_INET6,
v6dst);
}
error = ENETUNREACH;
} else {
}
goto bad_addr;
}
}
}
/*
* If the app does a connect(), it means that it will most likely
* send more than 1 packet to the destination. It makes sense
* to clear the temporary flag.
*/
/*
* We need to recheck for IRE_MARK_TEMPORARY after acquiring
* the lock in order to guarantee irb_tmp_ire_cnt.
*/
irb->irb_tmp_ire_cnt--;
}
}
/*
* See if we should notify ULP about MDT; we do this whether or not
* ire_requested is TRUE, in order to handle active connects; MDT
* eligibility tests for passive connects are handled separately
* through tcp_adapt_ire(). We do this before the source address
* selection, because dst_ire may change after a call to
* ipif_select_source_v6(). This is a best-effort check, as the
* packet for this connection may not actually go through
* dst_ire->ire_stq, and the exact IRE can only be known after
* calling ip_newroute_v6(). This is why we further check on the
* IRE during Multidata packet transmission in tcp_multisend().
*/
}
goto bad_addr;
error = ENETUNREACH;
else
goto bad_addr;
}
if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
}
} else {
/*
* Pick a source address so that a proper inbound load
* spreading would happen. Use dst_ill specified by the
* app. when socket option or scopeid is set.
*/
int err;
/*
* Scope id or IPV6_PKTINFO
*/
ipst);
ip1dbg(("ip_bind_connected_v6:"
" bad ifindex %d\n", if_index));
goto bad_addr;
}
/*
* For IPV6_BOUND_IF socket option,
* conn_outgoing_ill should be set
*/
if (err == ILL_LOOKUP_FAILED) {
ip1dbg(("ip_bind_connected_v6:"
"no ill for bound_if\n"));
goto bad_addr;
}
/* No need to hold ill here */
} else {
/* No need to hold ill here */
}
if (ip6_asp_can_lookup(ipst)) {
zoneid);
pr_addr_dbg("ip_bind_connected_v6: "
"no usable source address for "
"connection to %s\n",
goto bad_addr;
}
} else {
goto bad_addr;
}
}
}
/*
* We do ire_route_lookup_v6() here (and not an interface lookup)
* as we assert that v6src should only come from an
* UP interface for hard binding.
*/
/* src_ire must be a local|loopback */
if (!IRE_IS_LOCAL(src_ire)) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_bind_connected_v6: bad "
}
goto bad_addr;
}
/*
* If the source address is a loopback address, the
* destination had best be local or multicast.
* The transports that can't handle multicast will reject
* those addresses.
*/
ip1dbg(("ip_bind_connected_v6: bad connected loopback\n"));
error = -1;
goto bad_addr;
}
/*
* Allow setting new policies. For example, disconnects come
* down as ipa_t bind. As we would have set conn_policy_cached
* to B_TRUE before, we should set it to B_FALSE, so that policy
* can change after the disconnect.
*/
/*
* The addresses have been verified. Initialize the conn
* before calling the policy as they expect the conns
* initialized.
*/
if (ire_requested) {
/*
* Note that sire will not be NULL if this is an off-link
* connection and there is not cache for that dest yet.
*
* XXX Because of an existing bug, if there are multiple
* default routes, the IRE returned now may not be the actual
* default route used (default routes are chosen in a
* round robin fashion). So if the metrics for different
* default routes are different, we may return the wrong
* metrics. This will not be a problem if the existing
* bug is fixed.
*/
ipst)) {
error = -1;
goto bad_addr;
}
} else if (ipsec_policy_set) {
error = -1;
goto bad_addr;
}
}
/*
* Cache IPsec policy in this conn. If we have per-socket policy,
* we'll cache that. If we don't, we'll inherit global policy.
*
* We can't insert until the conn reflects the policy. Note that
* conn_policy_cached is set by ipsec_conn_cache_policy() even for
* connections where we don't have a policy. This is to prevent
* global policy lookups in the inbound path.
*
* If we insert before we set conn_policy_cached,
* CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true
* because global policy cound be non-empty. We normally call
* ipsec_check_policy() for conn_policy_cached connections only if
* conn_in_enforce_policy is set. But in this case,
* conn_policy_cached can get set anytime since we made the
* CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy()
* is called, which will make the above assumption false. Thus, we
* need to insert after we set conn_policy_cached.
*/
goto bad_addr;
/* If not fanout_insert this was just an address verification */
if (fanout_insert) {
/*
* The addresses have been verified. Time to insert in
* the correct fanout list.
*/
}
if (error == 0) {
/*
* Our initial checks for MDT have passed; the IRE is not
* be supporting MDT. Pass the IRE, IPC and ILL into
* ip_mdinfo_return(), which performs further checks
* against them and upon success, returns the MDT info
* mblk which we will attach to the bind acknowledgment.
*/
if (md_dst_ire != NULL) {
} else {
}
}
}
}
if (ipsec_policy_set) {
/*
* As of now assume that nothing else accompanies
* IPSEC_POLICY_SET.
*/
}
if (md_dst_ire != NULL)
if (effective_cred != NULL)
return (error);
}
/* ARGSUSED */
int
{
int error = 0;
/* For raw socket, the local port is not set. */
/*
* Bind to local and remote address. Local might be
* unspecified in which case it will be extracted from
* ire_src_addr_v6
*/
/* Connect to IPv4 address */
/* Is the source unspecified or mapped? */
if (!IN6_IS_ADDR_V4MAPPED(v6srcp) &&
ip1dbg(("ip_proto_bind_connected_v6: "
"dst is mapped, but not the src\n"));
goto bad_addr;
}
/* Always verify destination reachability. */
if (error != 0)
goto bad_addr;
} else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) {
ip1dbg(("ip_proto_bind_connected_v6: "
"src is mapped, but not the dst\n"));
goto bad_addr;
} else {
if (error != 0)
goto bad_addr;
}
/* Send it home. */
return (0);
if (error == 0)
return (error);
}
/*
* Get the ire in *mpp. Returns false if it fails (due to lack of space).
* Makes the IRE be IRE_BROADCAST if dst is a multicast address.
*/
/* ARGSUSED4 */
static boolean_t
{
/*
* mp initialized above to IRE_DB_REQ_TYPE
* appended mblk. Its <upper protocol>'s
* job to make sure there is room.
*/
return (B_FALSE);
if (IN6_IS_ADDR_MULTICAST(dst) ||
}
sizeof (iulp_t));
}
} else {
/*
* No IRE was found. Remove IRE mblk.
*/
}
return (B_TRUE);
}
/*
* Add an ip6i_t header to the front of the mblk.
* Inline if possible else allocate a separate mblk containing only the ip6i_t.
* Returns NULL if allocation fails (and frees original message).
* Used in outgoing path when going through ip_newroute_*v6().
* Used in incoming path to pass ifindex to transports.
*/
mblk_t *
{
return (NULL);
}
}
/*
* If `ill' is in an IPMP group, make sure we use the IPMP
* interface index so that e.g. IPV6_RECVPKTINFO will get the
* IPMP interface index and not an underlying interface index.
*/
if (IS_UNDER_IPMP(ill))
else
} else {
ip6i->ip6i_flags = 0;
}
return (mp);
}
/*
* Handle protocols with which IP is less intimate. There
* can be more than one stream bound to a particular
* protocol. When this is the case, normally each one gets a copy
* of any incoming packets.
*
* Zones notes:
* Packets will be distributed to streams in all zones. This is really only
* useful for ICMPv6 as only applications in the global zone can create raw
* sockets for other protocols.
*/
static void
{
if (mctl_present) {
} else {
}
if (shared_addr) {
/*
* We don't allow multilevel ports for raw IP, so no need to
* check for that here.
*/
}
zoneid) &&
(!is_system_labeled() ||
connp)))
break;
}
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
}
return;
}
first_connp = connp;
/*
* XXX: Fix the multiple protocol listeners case. We should not
* be walking the conn->next list here.
*/
for (;;) {
(!is_system_labeled() ||
shared_addr, connp)))
break;
}
/*
* Just copy the data part alone. The mctl part is
* needed just for verifying policy and it is never
* sent up.
*/
/*
* No more intested clients or memory
* allocation failed
*/
connp = first_connp;
break;
}
/*
* For link-local always add ifindex so that transport can set
* sin6_scope_id. Avoid it for ICMP error fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&src)) &&
(flags & IP_FF_IPINFO)) {
/* Add header */
}
} else if (
if (flags & IP_FF_RAWIP) {
} else {
}
} else {
secure) {
}
if (mctl_present)
}
}
/* Follow the next pointer before releasing the conn. */
connp = next_connp;
}
/* Last one. Send it upstream. */
/* Initiate IPPF processing */
if (mctl_present)
return;
}
}
/*
* For link-local always add ifindex so that transport can set
* sin6_scope_id. Avoid it for ICMP error fanout.
*/
(flags & IP_FF_IPINFO)) {
/* Add header */
if (mctl_present)
return;
} else if (mctl_present) {
} else {
}
}
if (flags & IP_FF_RAWIP) {
} else {
}
} else {
return;
}
}
if (mctl_present)
}
}
/*
* Send an ICMP error after patching up the packet appropriately. Returns
* non-zero if the appropriate MIB should be bumped; zero otherwise.
*/
int
{
unsigned char db_type;
if (mctl_present) {
} else {
/*
* If this is an ICMP error being reported - which goes
* up as M_CTLs, we need to convert them to M_DATA till
* we finish checking with global policy because
* ipsec_check_global_policy() assumes M_DATA as clear
* and M_CTL as secure.
*/
}
/*
* We are generating an icmp error for some inbound packet.
* Called from all ip_fanout_(udp, tcp, proto) functions.
* Before we generate an error, check with global policy
* to see whether this is allowed to enter the system. As
* there is no "conn", we are checking with global policy.
*/
return (0);
}
if (!mctl_present)
if (flags & IP_FF_SEND_ICMP) {
if (flags & IP_FF_HDR_COMPLETE) {
return (1);
}
}
switch (icmp_type) {
case ICMP6_DST_UNREACH:
break;
case ICMP6_PARAM_PROB:
break;
default:
#ifdef DEBUG
panic("ip_fanout_send_icmp_v6: wrong type");
/*NOTREACHED*/
#else
break;
#endif
}
} else {
return (0);
}
return (1);
}
/*
* Fanout for TCP packets
* The caller puts <fport, lport> in the ports parameter.
*/
static void
{
if (mctl_present) {
} else {
}
/*
* No hard-bound match. Send Reset.
*/
/* Initiate IPPf processing, if needed. */
(flags & IP6_NO_IPPOLICY)) {
return;
}
}
} else {
}
return;
}
if (IPCL_IS_TCP(connp)) {
/*
* If the queue belongs to a conn, and fused tcp
* loopback is enabled, assign the eager's squeue
* to be that of the active connect's.
*/
!secure &&
} else {
}
/*
* db_cksumstuff is unused in the incoming
* path; Thus store the ifindex here. It will
* be cleared in tcp_conn_create_v6().
*/
DB_CKSUMSTUFF(mp) =
}
}
return;
}
return;
}
return;
}
return;
}
if (mctl_present) {
} else {
}
} else {
/*
* Discard first_mp early since we're dealing with a
* fully-connected conn_t and tcp doesn't do policy in
* this case. Also, if someone is bound to IPPROTO_TCP
* over raw IP, they don't expect to see a M_CTL.
*/
if (mctl_present) {
}
}
}
/* Initiate IPPF processing */
if (mctl_present) {
}
return;
} else if (mctl_present) {
/*
* ip_add_info_v6 might return a new mp.
*/
} else {
}
}
/*
* For link-local always add ifindex so that TCP can bind to that
* interface. Avoid it for ICMP error fanout.
*/
(flags & IP_FF_IPINFO))) {
/* Add header */
if (mctl_present)
return;
} else if (mctl_present) {
} else {
}
}
if (IPCL_IS_TCP(connp)) {
} else {
/* SOCK_RAW, IPPROTO_TCP case */
}
}
/*
* Fanout for UDP packets.
* The caller puts <fport, lport> in the ports parameter.
* ire_type must be IRE_BROADCAST for multicast and broadcast packets.
*
* If SO_REUSEADDR is set all multicast and broadcast packets
* will be delivered to all streams bound to the same port.
*
* Zones notes:
* Multicast packets will be distributed to streams in all zones.
*/
static void
{
if (mctl_present) {
} else {
}
/* Extract ports in net byte order */
if (shared_addr) {
/*
* No need to handle exclusive-stack zones since ALL_ZONES
* only applies to the shared stack.
*/
/*
* If no shared MLP is found, tsol_mlp_findzone returns
* ALL_ZONES. In that case, we assume it's SLP, and
* search for the zone based on the packet label.
* That will also return ALL_ZONES on failure, but
* we never allow conn_zoneid to be set to ALL_ZONES.
*/
}
/* Attempt to find a client stream based on destination port. */
if (!IN6_IS_ADDR_MULTICAST(&dst)) {
/*
* Not multicast. Send to the one (first) client we find.
*/
break;
}
}
goto notfound;
if (is_system_labeled() &&
connp))
goto notfound;
/* Found a client */
return;
}
return;
}
}
/* Initiate IPPF processing */
if (mctl_present)
return;
}
}
/*
* For link-local always add ifindex so that
* transport can set sin6_scope_id. Avoid it for
* ICMP error fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&src)) &&
(flags & IP_FF_IPINFO)) {
/* Add header */
if (mctl_present)
return;
} else if (mctl_present) {
} else {
}
}
/* Send it upstream */
if (mctl_present)
return;
}
(!is_system_labeled() ||
connp)))
break;
}
goto notfound;
first_conn = connp;
for (;;) {
(!is_system_labeled() ||
shared_addr, connp)))
break;
}
/*
* Just copy the data part alone. The mctl part is
* needed just for verifying policy and it is never
* sent up.
*/
/*
* No more interested clients or memory
* allocation failed
*/
connp = first_conn;
break;
}
/*
* For link-local always add ifindex so that transport
* can set sin6_scope_id. Avoid it for ICMP error
* fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
IN6_IS_ADDR_LINKLOCAL(&src)) &&
(flags & IP_FF_IPINFO)) {
/* Add header */
}
/* mp1 could have changed */
if (mctl_present)
else
if (mctl_present)
goto next_one;
}
goto next_one;
}
}
if (mctl_present)
/* Send it upstream */
}
/* Follow the next pointer before releasing the conn. */
}
/* Last one. Send it upstream. */
/* Initiate IPPF processing */
if (mctl_present) {
}
return;
}
}
/*
* For link-local always add ifindex so that transport can set
* sin6_scope_id. Avoid it for ICMP error fanout.
*/
if ((connp->conn_ip_recvpktinfo ||
/* Add header */
if (mctl_present)
return;
} else if (mctl_present) {
} else {
}
}
} else {
return;
}
}
/* Send it upstream */
}
if (mctl_present)
return;
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
zoneid);
} else {
}
}
}
/*
* int ip_find_hdr_v6()
*
* This routine is used by the upper layer protocols and the IP tunnel
* module to:
* - Set extension header pointers to appropriate locations
* - Determine IPv6 header length and return it
* - Return a pointer to the last nexthdr value
*
* The caller must initialize ipp_fields.
*
* NOTE: If multiple extension headers of the same type are present,
* ip_find_hdr_v6() will set the respective extension header pointers
* to the first one that it encounters in the IPv6 header. It also
* skips fragment headers. This routine deals with malformed packets
* of various sorts in which case the returned length is up to the
* malformed part.
*/
int
{
/* Is there enough left for len + nexthdr? */
goto done;
switch (nexthdr) {
case IPPROTO_HOPOPTS:
goto done;
/* return only 1st hbh */
}
break;
case IPPROTO_DSTOPTS:
goto done;
/*
* ipp_dstopts is set to the destination header after a
* routing header.
* Assume it is a post-rthdr destination header
* and adjust when we find an rthdr.
*/
}
break;
case IPPROTO_ROUTING:
goto done;
/* return only 1st rthdr */
}
/*
* Make any destination header we've seen be a
* pre-rthdr destination header.
*/
ipp->ipp_dstoptslen = 0;
}
break;
case IPPROTO_FRAGMENT:
ehdrlen = sizeof (ip6_frag_t);
goto done;
}
break;
case IPPROTO_NONE:
default:
goto done;
}
}
done:
return (length);
}
int
{
ip1dbg(("ip_hdr_complete_v6: no source IRE\n"));
return (1);
}
}
return (0);
}
/*
* Try to determine where and what are the IPv6 header length and
* pointer to nexthdr value for the upper layer protocol (or an
* unknown next hdr).
*
* Parameters returns a pointer to the nexthdr value;
* Must handle malformed packets of various sorts.
* Function returns failure for malformed cases.
*/
{
/* Is there enough left for len + nexthdr? */
break;
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
return (B_FALSE);
break;
case IPPROTO_ROUTING:
return (B_FALSE);
break;
case IPPROTO_FRAGMENT:
ehdrlen = sizeof (ip6_frag_t);
return (B_FALSE);
break;
case IPPROTO_NONE:
/* No next header means we're finished */
default:
*hdr_length_ptr = length;
return (B_TRUE);
}
*hdr_length_ptr = length;
}
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
case IPPROTO_FRAGMENT:
/*
* If any know extension headers are still to be processed,
* the packet's malformed (or at least all the IP header(s) are
* not in the same mblk - and that should never happen.
*/
return (B_FALSE);
default:
/*
* If we get here, we know that all of the IP headers were in
* the same mblk, even if the ULP header is in the next mblk.
*/
*hdr_length_ptr = length;
return (B_TRUE);
}
}
/*
* Return the length of the IPv6 related headers (including extension headers)
* Returns a length even if the packet is malformed.
*/
int
{
return (hdr_len);
}
/*
* IPv6 -
* ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need
* to send out a packet to a destination address for which we do not have
* specific routing information.
*
* Handle non-multicast packets. If ill is non-NULL the match is done
* for that ill.
*
* When a specific ill is specified (using IPV6_PKTINFO,
* IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match
* on routing entries (ftable and ctable) that have a matching
* ire->ire_ipif->ipif_ill. Thus this can only be used
* for destinations that are on-link for the specific ill
* and that can appear on multiple links. Thus it is useful
* for multicast destinations, link-local destinations, and
* at some point perhaps for site-local destinations (if the
* node sits at a site boundary).
* We create the cache entries in the regular ctable since
* it can not "confuse" things for other destinations.
*
* NOTE : These are the scopes of some of the variables that point at IRE,
* which needs to be followed while making any future modifications
* to avoid memory leaks.
*
* - ire and sire are the entries looked up initially by
* ire_ftable_lookup_v6.
* - ipif_ire is used to hold the interface ire associated with
* the new cache ire. But it's scope is limited, so we always REFRELE
* it before branching out to error paths.
* - save_ire is initialized before ire_create, so that ire returned
* by ire_create will not over-write the ire. We REFRELE save_ire
* before breaking out of the switch.
*
* Thus on failures, we have to REFRELE only ire and sire, if they
* are not NULL.
*/
/* ARGSUSED */
void
{
int err = 0;
int match_flags;
} else {
}
if (IN6_IS_ADDR_LOOPBACK(v6dstp)) {
ip1dbg(("ip_newroute_v6: dst with loopback addr\n"));
goto icmp_err_ret;
} else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) {
ip1dbg(("ip_newroute_v6: src with loopback addr\n"));
goto icmp_err_ret;
}
/*
* If this IRE is created for forwarding or it is not for
* TCP traffic, mark it as temporary.
*
* Is it sufficient just to check the next header??
*/
/*
* Get what we can from ire_ftable_lookup_v6 which will follow an IRE
* chain until it gets the most specific information available.
* For example, we know that there is no IRE_CACHE for this dest,
* but there may be an IRE_OFFSUBNET which specifies a gateway.
* ire_ftable_lookup_v6 will look up the gateway, etc.
*/
match_flags, ipst);
} else {
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always
* tied to an underlying interface, IS_UNDER_IPMP() may be
* true even when building IREs that will be used for data
* traffic. As such, use the packet's source address to
* determine whether the traffic is test traffic, and set
* MATCH_IRE_MARK_TESTHIDDEN if so.
*/
}
}
ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() "
/*
* We enter a loop that will be run only once in most cases.
* The loop is re-entered in the case where the destination
* can be reached through multiple RTF_MULTIRT-flagged routes.
* The intention is to compute multiple routes to a single
* destination in a single ip_newroute_v6 call.
* The information is contained in sire->ire_flags.
*/
do {
}
}
ip3dbg(("ip_newroute_v6: starting new resolution "
"with first_mp %p, tag %d\n",
/*
* We check if there are trailing unresolved routes for
* the destination contained in sire.
*/
ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, "
"ire %p, sire %p\n",
if (!multirt_is_resolvable) {
/*
* No more multirt routes to resolve; give up
* (all routes resolved or no more resolvable
* routes).
*/
}
} else {
/*
* We simply use first_sire as a flag that
* indicates if a resolvable multirt route has
* already been found during the preceding
* loops. If it is not the case, we may have
* to send an ICMP error to report that the
* destination is unreachable. We do not
* IRE_REFHOLD first_sire.
*/
if (first_sire == NULL) {
first_sire = sire;
}
}
}
/*
* either ire == NULL (the destination cannot be
* resolved) or ire == sire (the gateway cannot be
* resolved). At this point, there are no more routes
* to resolve for the destination, thus we exit.
*/
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_newroute_v6: "
}
ip3dbg(("ip_newroute_v6: "
"ire %p, sire %p, first_sire %p\n",
}
if (first_sire != NULL) {
/*
* At least one multirt route has been found
* in the same ip_newroute() call; there is no
* need to report an ICMP error.
* first_sire was not IRE_REFHOLDed.
*/
return;
}
goto icmp_err_ret;
}
/*
* Verify that the returned IRE does not have either the
* RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is
* either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER.
*/
goto icmp_err_ret;
/*
* Increment the ire_ob_pkt_count field for ire if it is an
* INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and
* increment the same for the parent IRE, sire, if it is some
* sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST)
*/
}
IRE_INTERFACE)) == 0);
} else {
}
/*
* We have a route to reach the destination. Find the
* appropriate ill, then get a source address that matches the
* right scope via ipif_select_source_v6().
*
* If we are here trying to create an IRE_CACHE for an offlink
* destination and have an IRE_CACHE entry for VNI, then use
* ire_stq instead since VNI's queue is a black hole.
*
* Note: While we pick a dst_ill we are really only interested
* in the ill for load spreading. The source ipif is
* determined by source address selection below.
*/
} else {
dst_ill =
} else {
}
}
if (ip_debug > 2) {
pr_addr_dbg("ip_newroute_v6 : no dst "
}
goto icmp_err_ret;
}
/*
* We should have found a route matching "ill"
* as we called ire_ftable_lookup_v6 with
* MATCH_IRE_ILL. Rather than asserting when
* there is a mismatch, we just drop the packet.
*/
ip0dbg(("ip_newroute_v6: BOUND_IF failed: "
goto icmp_err_ret;
}
/*
* Pick a source address which matches the scope of the
* destination address.
* For RTF_SETSRC routes, the source address is imposed by the
* parent ire (sire).
*/
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always
* tied to the underlying interface, IS_UNDER_IPMP() may be
* true even when building IREs that will be used for data
* traffic. As such, see if the packet's source address is a
* test address, and if so use that test address's ipif for
* the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in
* ire_add_v6() can work properly.
*/
!IN6_IS_ADDR_UNSPECIFIED(&v6gw) &&
/*
* The ire cache entry we're adding is for the
* gateway itself. The source address in this case
* is relative to the gateway's address.
*/
/*
* Check that the ipif matching the requested
* source address still exists.
*/
}
}
}
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_v6: no src for "
printf("ip_newroute_v6: interface name %s\n",
}
goto icmp_err_ret;
}
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_newroute_v6: first hop %s\n",
}
ip2dbg(("\tire type %s (%d)\n",
/*
* At this point in ip_newroute_v6(), ire is either the
* IRE_CACHE of the next-hop gateway for an off-subnet
* destination or an IRE_INTERFACE type that should be used
* to resolve an on-subnet destination or an on-subnet
* next-hop gateway.
*
* In the IRE_CACHE case, we have the following :
*
* 1) src_ipif - used for getting a source address.
*
* means packets using this IRE_CACHE will go out on dst_ill.
*
* 3) The IRE sire will point to the prefix that is the longest
* matching route for the destination. These prefix types
* include IRE_DEFAULT, IRE_PREFIX, IRE_HOST.
*
* The newly created IRE_CACHE entry for the off-subnet
* destination is tied to both the prefix route and the
* interface route used to resolve the next-hop gateway
* via the ire_phandle and ire_ihandle fields, respectively.
*
* In the IRE_INTERFACE case, we have the following :
*
* 1) src_ipif - used for getting a source address.
*
* means packets using the IRE_CACHE that we will build
* here will go out on dst_ill.
*
* 3) sire may or may not be NULL. But, the IRE_CACHE that is
* to be created will only be tied to the IRE_INTERFACE that
* was derived from the ire_ihandle field.
*
* If sire is non-NULL, it means the destination is off-link
* and we will first create the IRE_CACHE for the gateway.
* Next time through ip_newroute_v6, we will create the
* IRE_CACHE for the final destination as described above.
*/
case IRE_CACHE: {
if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) {
}
/*
* We need 3 ire's to create a new cache ire for an
* off-link destination from the cache ire of the
* gateway.
*
* 1. The prefix ire 'sire'
* 2. The cache ire of the gateway 'ire'
* 3. The interface ire 'ipif_ire'
*
* We have (1) and (2). We lookup (3) below.
*
* If there is no interface route to the gateway,
* it is a race condition, where we found the cache
* but the inteface route has been deleted.
*/
ip1dbg(("ip_newroute_v6:"
"ire_ihandle_lookup_offlink_v6 failed\n"));
goto icmp_err_ret;
}
/*
* Note: the new ire inherits RTF_SETSRC
* and RTF_MULTIRT to propagate these flags from prefix
* to cache.
*/
/*
* Check cached gateway IRE for any security
* attributes; if found, associate the gateway
* credentials group to the destination IRE.
*/
}
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
&v6gw, /* gateway address */
NULL, /* src nce */
(RTF_SETSRC | RTF_MULTIRT),
NULL,
ipst);
}
break;
}
/* reference now held by IRE */
/*
* Prevent sire and ipif_ire from getting deleted. The
* newly created ire is tied to both of them via the
* phandle and ihandle respectively.
*/
/* Has it been removed already ? */
break;
}
/* Has it been removed already ? */
break;
}
}
}
if (ip6_asp_table_held) {
}
/* Assert that sire is not deleted yet. */
/* Assert that ipif_ire is not deleted yet. */
/*
* Search for the next unresolved
* multirt route.
*/
/* re-enter the loop */
continue;
}
return;
}
case IRE_IF_NORESOLVER:
/*
* We have what we need to build an IRE_CACHE.
*
* handle the Gated case, where we create
* a NORESOLVER route for loopback.
*/
break;
/*
* TSol note: We are creating the ire cache for the
* destination 'dst'. If 'dst' is offlink, going
* through the first hop 'gw', the security attributes
* of 'dst' must be set to point to the gateway
* credentials of gateway 'gw'. If 'dst' is onlink, it
* is possible that 'dst' is a potential gateway that is
* referenced by some route that has some security
* attributes. Thus in the former case, we need to do a
* gcgrp_lookup of 'gw' while in the latter case we
* need to do gcgrp_lookup of 'dst' itself.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw))
else
/*
* Note: the new ire inherits sire flags RTF_SETSRC
* and RTF_MULTIRT to propagate those rules from prefix
* to cache.
*/
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
&v6gw, /* gateway address */
NULL, /* no src nce */
sire->ire_phandle : 0,
(RTF_SETSRC | RTF_MULTIRT) : 0,
NULL,
ipst);
}
break;
}
/* reference now held by IRE */
if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw))
else
if (err != 0) {
break;
}
/* Prevent save_ire from getting deleted */
/* Has it been removed already ? */
break;
}
/*
* In case of MULTIRT, a copy of the current packet
* to send is made to further re-enter the
* loop and attempt another route resolution
*/
}
}
if (ip6_asp_table_held) {
}
/* Assert that it is not deleted yet. */
/*
* If we found a (no)resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. This ensures that we do not
* omit any (no)resolver despite the priority
* in this call.
* IRE_CACHE, if any, will be processed
* by another thread entering ip_newroute(),
* (on resolver response, for example).
* We use this to force multiple parallel
* resolution as soon as a packet needs to be
* sent. The result is, after one packet
* emission all reachable routes are generally
* resolved.
* Otherwise, complete resolution of MULTIRT
* routes would require several emissions as
* side effect.
*/
/*
* Search for the next unresolved multirt
* route.
*/
/* re-enter the loop */
continue;
}
/* Don't need sire anymore */
return;
case IRE_IF_RESOLVER:
/*
* We can't build an IRE_CACHE yet, but at least we
* found a resolver that can help.
*/
/*
* To be at this point in the code with a non-zero gw
* means that dst is reachable through a gateway that
* we have never resolved. By changing dst to the gw
* addr we resolve the gateway first. When
* ire_add_then_send() tries to put the IP dg to dst,
* it will reenter ip_newroute() at which time we will
* find the IRE_CACHE for the gw and create another
* IRE_CACHE above (for dst itself).
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) {
}
/*
* Ask the external resolver to do its thing.
* Make an mblk chain in the following form:
* ARQ_REQ_MBLK-->IRE_MBLK-->packet
*/
ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n"));
if (ip6_asp_table_held) {
}
&dst, /* dest address */
&ipv6_all_ones, /* mask */
/* source address */
&v6gw, /* gateway address */
NULL, /* no src nce */
0,
/* Interface handle */
0, /* flags if any */
NULL,
NULL,
ipst);
ip1dbg(("ip_newroute_v6:"
"ire is NULL\n"));
break;
}
/*
* processing a copy of the packet to
* send for further resolution loops
*/
}
/*
* Now create or find an nce for this interface.
* The hw addr will need to to be set from
* the reply to the AR_ENTRY_QUERY that
* we're about to send. This will be done in
* ire_add_v6().
*/
switch (err) {
case 0:
/*
* New cache entry created.
* Break, then ask the external
* resolver.
*/
break;
case EINPROGRESS:
/*
* Resolution in progress;
* packet has been queued by
* ndp_resolver().
*/
/*
* Check if another multirt
* route must be resolved.
*/
/*
* If we found a resolver, we
* ignore any trailing top
* priority IRE_CACHE in
* further loops. The reason is
* the same as for noresolver.
*/
/*
* Search for the next
* unresolved multirt route.
*/
M_CTL) {
}
/*
* re-enter the loop
*/
continue;
}
return;
default:
/*
* Transient error; packet will be
* freed.
*/
break;
}
if (err != 0)
break;
/*
* Now set up the AR_ENTRY_QUERY and send it.
*/
ip1dbg(("ip_newroute_v6:"
"areq_mp is NULL\n"));
break;
}
/*
* link the chain, then send up to the resolver.
*/
ip1dbg(("ip_newroute_v6:"
"putnext to resolver\n"));
/*
* Check if another multirt route
* must be resolved.
*/
/*
* If we find a resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. The reason is the
* same as for noresolver.
*/
/*
* Search for the next unresolved
* multirt route.
*/
}
/*
* re-enter the loop
*/
continue;
}
return;
}
/*
* Non-external resolver case.
*
* TSol note: Please see the note above the
* IRE_IF_NORESOLVER case.
*/
ire = ire_create_v6(
&dst, /* dest address */
&ipv6_all_ones, /* mask */
&v6gw, /* gateway address */
NULL, /* no src nce */
0,
0, /* flags if any */
NULL,
ipst);
}
break;
}
/* reference now held by IRE */
}
switch (err) {
case 0:
/* Prevent save_ire from getting deleted */
/* Has it been removed already ? */
break;
}
/*
* We have a resolved cache entry,
* add in the IRE.
*/
if (ip6_asp_table_held) {
}
/* Assert that it is not deleted yet. */
/*
* Check if another multirt route
* must be resolved.
*/
/*
* If we find a resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. The reason is the
* same as for noresolver.
*/
/*
* Search for the next unresolved
* multirt route.
*/
}
/*
* re-enter the loop
*/
continue;
}
return;
case EINPROGRESS:
/*
* mp was consumed - presumably queued.
* No need for ire, presumably resolution is
* in progress, and ire will be added when the
* address is resolved.
*/
if (ip6_asp_table_held) {
}
/*
* Check if another multirt route
* must be resolved.
*/
/*
* If we find a resolver, we ignore any
* trailing top priority IRE_CACHE in
* further loops. The reason is the
* same as for noresolver.
*/
/*
* Search for the next unresolved
* multirt route.
*/
}
/*
* re-enter the loop
*/
continue;
}
return;
default:
/* Some transient error */
break;
}
break;
default:
break;
}
if (ip6_asp_table_held) {
}
} while (multirt_resolve_next);
ip1dbg(("ip_newroute_v6: dropped\n"));
}
} else {
}
if (need_rele)
} else {
} else {
}
}
/* Did this packet originate externally? */
}
}
return;
if (ip6_asp_table_held)
}
ip1dbg(("ip_newroute_v6: no route\n"));
/*
* We need to set sire to NULL to avoid double freeing if we
* ever goto err_ret from below.
*/
/* Skip ip6i_t header if present */
/* Make sure the IPv6 header is present */
sizeof (ip6i_t) + IPV6_HDR_LEN) {
ip1dbg(("ip_newroute_v6: pullupmsg failed\n"));
goto err_ret;
}
}
}
/* Did this packet originate externally? */
} else {
}
q = WR(q);
} else {
} else {
}
/* Failed */
}
if (need_rele)
return;
}
}
if (need_rele)
/*
* At this point we will have ire only if RTF_BLACKHOLE
* or RTF_REJECT flags are set on the IRE. It will not
* generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set.
*/
}
return;
}
}
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_newroute_v6: no route to %s\n",
}
}
/*
* ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever
* we need to send out a packet to a destination address for which we do not
* have specific routing information. It is only used for multicast packets.
*
* If unspec_src we allow creating an IRE with source address zero.
* ire_send_v6() will delete it after the packet is sent.
*/
void
{
int err = 0;
const in6_addr_t *ire_v6srcp;
/*
* This loop is run only once in most cases.
* We loop to resolve further routes only when the destination
* can be reached through multiple RTF_MULTIRT-flagged ires.
*/
do {
}
}
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n",
printf("ip_newroute_ipif_v6: if %s, v6 %d\n",
}
} else {
}
/*
* If the interface is a pt-pt interface we look for an
* IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the
* local_address and the pt-pt destination address.
* Otherwise we just match the local address.
*/
goto err_ret;
}
/*
* We check if an IRE_OFFSUBNET for the addr that goes through
* RTF_MULTIRT flags must be honored.
*/
ip2dbg(("ip_newroute_ipif_v6: "
"ipif_lookup_multi_ire_v6("
"ipif %p, dst %08x) = fire %p\n",
(void *)fire));
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always
* tied to the underlying interface, IS_UNDER_IPMP() may be
* true even when building IREs that will be used for data
* traffic. As such, see if the packet's source address is a
* test address, and if so use that test address's ipif for
* the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in
* ire_add_v6() can work properly.
*/
if (IS_UNDER_IPMP(ill))
/*
* Determine the outbound (destination) ill for this route.
* If IPMP is not in use, that's the same as our ill. If IPMP
* is in-use and we're on the IPMP interface, or we're on an
* underlying ill but sending data traffic, use a suitable
* destination ill from the group. The latter case covers a
* subtle edge condition with multicast: when we bring up an
* IPv6 data address, we will create an NCE on an underlying
* interface, and send solitications to ff02::1, which would
* take us through here, and cause us to create an IRE for
* ff02::1. To meet our defined semantics for multicast (and
* ensure there aren't unexpected echoes), that IRE needs to
* use the IPMP group's nominated multicast interface.
*
* Note: the source ipif is determined by source address
* selection later.
*/
if (IS_UNDER_IPMP(ill)) {
} else {
}
goto err_ret;
if (IN6_IS_ADDR_MULTICAST(v6dstp))
else
} else {
}
if (ip_debug > 2) {
pr_addr_dbg("ip_newroute_ipif_v6: "
"no dst ill for dst %s\n",
}
goto err_ret;
}
/*
* Pick a source address which matches the scope of the
* destination address.
* For RTF_SETSRC routes, the source address is imposed by the
* parent ire (fire).
*/
/*
* Check that the ipif matching the requested source
* address still exists.
*/
}
}
if (!unspec_src) {
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_ipif_v6: "
"no src for dst %s\n",
printf(" through interface %s\n",
}
goto err_ret;
}
} else {
}
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n",
printf("ip_newroute_ipif_v6: "
}
goto err_ret;
}
goto err_ret;
ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),",
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg(" address %s\n",
}
ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n",
/*
* an IRE_OFFSUBET was looked up
* on that interface.
* this ire has RTF_MULTIRT flag,
* so the resolution loop
* will be re-entered to resolve
* additional routes on other
* interfaces. For that purpose,
* a copy of the packet is
* made at this point.
*/
if (copy_mp) {
}
}
case IRE_IF_NORESOLVER: {
/*
* We have what we need to build an IRE_CACHE.
*
* handle the Gated case, where we create
* a NORESOLVER route for loopback.
*/
break;
/*
* The newly created ire will inherit the flags of the
* parent ire, if any.
*/
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
ire_v6srcp, /* source address */
NULL, /* gateway address */
NULL, /* no src nce */
NULL,
fire->ire_phandle : 0,
0,
NULL,
NULL,
ipst);
break;
}
if (err != 0) {
break;
}
/* Prevent save_ire from getting deleted */
/* Has it been removed already ? */
break;
}
if (ip6_asp_table_held) {
}
/* Assert that it is not deleted yet. */
}
/*
* The resolution loop is re-entered if we
* actually are in a multirouting case.
*/
if (!need_resolve) {
} else {
/*
* ipif_lookup_group_v6() calls
* ire_lookup_multi_v6() that uses
* ire_ftable_lookup_v6() to find
* an IRE_INTERFACE for the group.
* In the multirt case,
* ire_lookup_multi_v6() then invokes
* ire_multirt_lookup_v6() to find
* the next resolvable ire.
* As a result, we obtain a new
* interface, derived from the
* next ire.
*/
if (ipif_held) {
}
ip2dbg(("ip_newroute_ipif: "
"multirt dst %08x, ipif %p\n",
(void *)ipif));
continue;
} else {
}
}
}
if (ipif_held) {
}
return;
}
case IRE_IF_RESOLVER: {
/*
* We obtain a partial IRE_CACHE which we will pass
* along with the resolver query. When the response
* comes back it will be there ready for us to add.
*/
/*
* the newly created ire will inherit the flags of the
* parent ire, if any.
*/
ire = ire_create_v6(
v6dstp, /* dest address */
&ipv6_all_ones, /* mask */
ire_v6srcp, /* source address */
NULL, /* gateway address */
NULL, /* src nce */
NULL,
fire->ire_phandle : 0,
0,
NULL,
NULL,
ipst);
break;
}
/* Resolve and add ire to the ctable */
switch (err) {
case 0:
/* Prevent save_ire from getting deleted */
/* Has it been removed already ? */
break;
}
/*
* We have a resolved cache entry,
* add in the IRE.
*/
if (ip6_asp_table_held) {
}
/* Assert that it is not deleted yet. */
}
/*
* The resolution loop is re-entered if we
* actually are in a multirouting case.
*/
if (!need_resolve) {
} else {
/*
* ipif_lookup_group_v6() calls
* ire_lookup_multi_v6() that
* uses ire_ftable_lookup_v6()
* to find an IRE_INTERFACE for
* the group. In the multirt
* case, ire_lookup_multi_v6()
* then invokes
* ire_multirt_lookup_v6() to
* find the next resolvable ire.
* As a result, we obtain a new
* interface, derived from the
* next ire.
*/
if (ipif_held) {
}
ip2dbg(("ip_newroute_ipif: "
"multirt dst %08x, "
"ipif %p\n",
(*v6dstp))),
(void *)ipif));
continue;
} else {
}
}
}
if (ipif_held) {
}
return;
case EINPROGRESS:
/*
* mp was consumed - presumably queued.
* No need for ire, presumably resolution is
* in progress, and ire will be added when the
* address is resolved.
*/
if (ip6_asp_table_held) {
}
}
/*
* The resolution loop is re-entered if we
* actually are in a multirouting case.
*/
if (!need_resolve) {
} else {
/*
* ipif_lookup_group_v6() calls
* ire_lookup_multi_v6() that
* uses ire_ftable_lookup_v6()
* to find an IRE_INTERFACE for
* the group. In the multirt
* case, ire_lookup_multi_v6()
* then invokes
* ire_multirt_lookup_v6() to
* find the next resolvable ire.
* As a result, we obtain a new
* interface, derived from the
* next ire.
*/
if (ipif_held) {
}
ip2dbg(("ip_newroute_ipif: "
"multirt dst %08x, "
"ipif %p\n",
(*v6dstp))),
(void *)ipif));
continue;
} else {
}
}
}
if (ipif_held) {
}
return;
default:
/* Some transient error */
break;
}
break;
}
default:
break;
}
if (ip6_asp_table_held) {
}
} while (multirt_resolve_next);
if (ip6_asp_table_held)
/* Multicast - no point in trying to generate ICMP error */
}
} else {
}
ip1dbg(("ip_newroute_ipif_v6: dropped\n"));
if (ill_held)
}
/*
* Parse and process any hop-by-hop or destination options.
*
* Assumes that q is an ill read queue so that ICMP errors for link-local
* destinations are sent out the correct interface.
*
* Returns -1 if there was an error and mp has been consumed.
* Returns 0 if no special action is needed.
* Returns 1 if the packet contained a router alert option for this node
* which is verified to be "interesting/known" for our implementation.
*
* XXX Note: In future as more hbh or dest options are defined,
* it may be better to have different routines for hbh and dest
* options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
* may have same value in different namespaces. Or is it same namespace ??
* Current code checks for each opt_type (other than pads) if it is in
* the expected nexthdr (hbh or dest)
*/
static int
{
int ret = 0;
const char *errtype;
}
while (optlen != 0) {
if (opt_type == IP6OPT_PAD1) {
optused = 1;
} else {
if (optlen < 2)
goto bad_opt;
errtype = "malformed";
goto bad_opt;
} else switch (opt_type) {
case IP6OPT_PADN:
/*
* Note:We don't verify that (N-2) pad octets
* are zero as required by spec. Adhere to
* "be liberal in what you accept..." part of
* implementation philosophy (RFC791,RFC1122)
*/
goto bad_opt;
break;
case IP6OPT_JUMBO:
if (hdr_type != IPPROTO_HOPOPTS)
goto opt_error;
goto opt_error; /* XXX Not implemented! */
case IP6OPT_ROUTER_ALERT: {
struct ip6_opt_router *or;
if (hdr_type != IPPROTO_HOPOPTS)
goto opt_error;
goto bad_opt;
/* Check total length and alignment */
goto opt_error;
/* Check value */
case IP6_ALERT_MLD:
case IP6_ALERT_RSVP:
ret = 1;
}
break;
}
case IP6OPT_HOME_ADDRESS: {
/*
* Minimal support for the home address option
* (which is required by all IPv6 nodes).
* Implement by just swapping the home address
* and source address.
* XXX Note: this has IPsec implications since
* AH needs to take this into account.
* Also, when IPsec is used we need to ensure
* that this is only processed once
* in the received packet (to avoid swapping
* back and forth).
* NOTE:This option processing is considered
* to be unsafe and prone to a denial of
* service attack.
* The current processing is not safe even with
* IPsec secured IP packets. Since the home
* address option processing requirement still
* is in the IETF draft and in the process of
* being redefined for its usage, it has been
* decided to turn off the option by default.
* If this section of code needs to be executed,
* ndd variable ip6_ignore_home_address_opt
* should be set to 0 at the user's own risk.
*/
struct ip6_opt_home_address *oh;
goto opt_error;
if (hdr_type != IPPROTO_DSTOPTS)
goto opt_error;
goto bad_opt;
/*
* We did this dest. opt the first time
* around (i.e. before AH processing).
* If we've done AH... stop now.
*/
ipsec_in_t *ii;
break;
}
/* Check total length and alignment */
goto opt_error;
/* Swap ip6_src and the home address */
/* XXX Note: only 8 byte alignment option */
break;
}
case IP6OPT_TUNNEL_LIMIT:
if (hdr_type != IPPROTO_DSTOPTS) {
goto opt_error;
}
goto bad_opt;
}
if (optused != 3) {
goto opt_error;
}
break;
default:
errtype = "unknown";
/* FALLTHROUGH */
/* Determine which zone should send error */
switch (IP6OPT_TYPE(opt_type)) {
case IP6OPT_TYPE_SKIP:
goto bad_opt;
ip1dbg(("ip_process_options_v6: %s "
"opt 0x%x skipped\n",
break;
case IP6OPT_TYPE_DISCARD:
ip1dbg(("ip_process_options_v6: %s "
"opt 0x%x; packet dropped\n",
return (-1);
case IP6OPT_TYPE_ICMP:
return (-1);
}
return (-1);
case IP6OPT_TYPE_FORCEICMP:
/*
* If we don't have a zone and the dst
* addr is multicast, then pick a zone
* based on the inbound interface.
*/
zoneid =
}
}
return (-1);
}
return (-1);
default:
ASSERT(0);
}
}
}
}
return (ret);
/* Determine which zone should send error */
} else {
}
return (-1);
}
/*
* Process a routing header that is not yet empty.
* Because of RFC 5095, we now reject all route headers.
*/
static void
{
if (!ipst->ips_ipv6_forward_src_routed) {
/* XXX Check for source routed out same interface? */
return;
}
return;
}
/* Sent by forwarding path, and router is global zone */
}
/*
* Read side put procedure for IPv6 module.
*/
void
{
int check;
union DL_primitives *dl;
/*
* Things are opening or closing - only accept DLPI
* ack messages. If the stream is closing and ip_wsrv
* has completed, ip_close is out of the qwait, but has
* not yet completed qprocsoff. Don't proceed any further
* because the ill has been cleaned up and things hanging
* off the ill have been freed.
*/
return;
}
}
case M_DATA: {
int hlen;
struct ether_header *eh;
/*
* This is a work-around for CR 6451644, a bug in Nemo. It
* should be removed when that problem is fixed.
*/
if (hlen >= sizeof (struct ether_vlan_header) &&
ucp -= sizeof (struct ether_vlan_header);
else
ucp -= sizeof (struct ether_header);
/*
* If it's a group address, then fabricate a
* DL_UNITDATA_IND message.
*/
8;
if (ill->ill_sap_length > 0)
if (ill->ill_sap_length < 0)
}
}
break;
}
case M_PROTO:
case M_PCPROTO:
/* Go handle anything other than data elsewhere. */
ip_rput_dlpi(q, mp);
return;
}
/* Save the DLPI header. */
break;
case M_BREAK:
panic("ip_rput_v6: got an M_BREAK");
/*NOTREACHED*/
case M_IOCACK:
case DL_IOC_HDR_INFO:
return;
default:
return;
}
/* FALLTHRU */
case M_ERROR:
case M_HANGUP:
return;
}
return;
case M_CTL:
break;
}
return;
case M_IOCNAK:
case DL_IOC_HDR_INFO:
return;
default:
break;
}
/* FALLTHRU */
default:
return;
}
/*
* if db_ref > 1 then copymsg and free original. Packet may be
* changed and do not want other entity who has a reference to this
* message to trip over the changes. This is a blind change because
* trying to catch all places that might change packet is too
* difficult (since it may be a module above this one).
*/
goto discard;
}
}
if (mctl_present) {
}
return;
}
/*
* ip:::receive must see ipv6 packets with a full header,
* and so is placed after the IP6_MBLK_HDR_ERR check.
*/
int, 0);
if (check != IP6_MBLK_OK) {
return;
}
return;
/*
* Attach any necessary label information to this packet.
*/
if (ip6opt_ls != 0)
ip0dbg(("tsol_get_pkt_label v6 failed\n"));
goto discard;
}
/* IP observability hook. */
}
/*
* It may be a bit too expensive to do this mapped address
* check here, but in the interest of robustness, it seems
* like the correct place.
* TODO: Avoid this check for e.g. connected TCP sockets
*/
ip1dbg(("ip_rput_v6: pkt with mapped src addr\n"));
goto discard;
}
ip1dbg(("ip_rput_v6: pkt with loopback src"));
goto discard;
ip1dbg(("ip_rput_v6: pkt with loopback dst"));
goto discard;
}
} else {
goto discard;
}
return;
}
/*
* Walk through the IPv6 packet in mp and see if there's an AH header
* in it. See if the AH header needs to get done before other headers in
* the packet. (Worker function for ipsec_early_ah_v6().)
*/
#define IPSEC_HDR_DONT_PROCESS 0
#define IPSEC_HDR_PROCESS 1
static int
{
/*
* For now just pullup everything. In general, the less pullups,
* the better, but there's so much squirrelling through anyway,
* it's just easier this way.
*/
return (IPSEC_MEMORY_ERROR);
}
/*
* We can't just use the argument nexthdr in the place
* of nexthdrp becaue we don't dereference nexthdrp
* till we confirm whether it is a valid address.
*/
/* Is there enough left for len + nexthdr? */
return (IPSEC_MEMORY_ERROR);
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
return (IPSEC_MEMORY_ERROR);
/*
* Return DONT_PROCESS because the destination
* options header may be for each hop in a
* routing-header, and we only want AH if we're
* finished with routing headers.
*/
if (*nexthdrp == IPPROTO_DSTOPTS)
return (IPSEC_HDR_DONT_PROCESS);
break;
case IPPROTO_ROUTING:
/*
* If there's more hops left on the routing header,
* return now with DON'T PROCESS.
*/
if (rthdr->ip6r_segleft > 0)
return (IPSEC_HDR_DONT_PROCESS);
return (IPSEC_MEMORY_ERROR);
break;
case IPPROTO_FRAGMENT:
/* Wait for reassembly */
return (IPSEC_HDR_DONT_PROCESS);
case IPPROTO_AH:
*nexthdr = IPPROTO_AH;
return (IPSEC_HDR_PROCESS);
case IPPROTO_NONE:
/* No next header means we're finished */
default:
return (IPSEC_HDR_DONT_PROCESS);
}
}
/*
*/
return (IPSEC_MEMORY_ERROR);
}
/*
* Path for AH if options are present. If this is the first time we are
* sending a datagram to AH, allocate a IPSEC_IN message and prepend it.
* Otherwise, just fanout. Return value answers the boolean question:
* "Did I consume the mblk you sent me?"
*
* Sometimes AH needs to be done before other IPv6 headers for security
* reasons. This function (and its ipsec_needs_processing_v6() above)
* indicates if that is so, and fans out to the appropriate IPsec protocol
* for the datagram passed in.
*/
static boolean_t
{
switch (ipsec_needs_processing_v6(
case IPSEC_MEMORY_ERROR:
return (B_TRUE);
case IPSEC_HDR_DONT_PROCESS:
return (B_FALSE);
}
/* Default means send it to AH! */
if (!mctl_present) {
ip1dbg(("ipsec_early_ah_v6: IPSEC_IN "
"allocation failure.\n"));
return (B_TRUE);
}
/*
* Store the ill_index so that when we come back
* from IPSEC we ride on the same queue.
*/
}
/*
* Cache hardware acceleration info.
*/
"caching data attr.\n"));
}
if (!ipsec_loaded(ipss)) {
return (B_TRUE);
}
return (B_TRUE);
switch (ipsec_rc) {
case IPSEC_STATUS_SUCCESS:
/* we're done with IPsec processing, send it up */
break;
case IPSEC_STATUS_FAILED:
break;
case IPSEC_STATUS_PENDING:
/* no action needed */
break;
}
return (B_TRUE);
}
static boolean_t
{
NULL);
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Validate the IPv6 mblk for alignment.
*/
int
{
/* check for alignment and full IPv6 header */
ip1dbg(("ip_rput_v6: pullupmsg failed\n"));
return (IP6_MBLK_HDR_ERR);
}
}
else
/*
* Check for bogus (too short packet) and packet which
* was padded by the link layer.
*/
ip1dbg(("ip_rput_data_v6: packet too short %d %d\n",
return (IP6_MBLK_LEN_ERR);
}
ip1dbg(("ip_rput_data_v6: adjmsg failed\n"));
return (IP6_MBLK_LEN_ERR);
}
/*
* adjmsg may have freed an mblk from the chain, hence
* invalidate any hw checksum here. This will force IP to
* calculate the checksum in sw, but only for this packet.
*/
DB_CKSUMFLAGS(mp) = 0;
}
return (IP6_MBLK_OK);
}
/*
* ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here.
* ip_rput_v6 has already verified alignment, the min length, the version,
* and db_ref = 1.
*
* The ill passed in (the arg named inill) is the ill that the packet
* actually arrived on. We need to remember this when saving the
* input interface index into potential IPV6_PKTINFO data in
* ip_add_info_v6().
*
* This routine doesn't free dl_mp; that's the caller's responsibility on
* return. (Note that the callers are complex enough that there's no tail
* recursion here anyway.)
*/
void
{
/*
* It's an IPsec accelerated packet.
* Keep a pointer to the data attributes around until
* we allocate the ipsecinfo structure.
*/
("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n"));
/*
* Since it is accelerated, it came directly from
* the ill.
*/
}
else
hck_flags = 0;
/* Clear checksum flags in case we need to forward */
DB_CKSUMFLAGS(mp) = 0;
reass_sum = reass_hck_flags = 0;
/* Process hop by hop header options */
if (nexthdr == IPPROTO_HOPOPTS) {
if (remlen < MIN_EHDR_LEN)
goto pkt_too_short;
return;
}
}
goto pkt_too_short;
return;
}
}
case -1:
/*
* Packet has been consumed and any
* needed ICMP messages sent.
*/
return;
case 0:
/* no action needed */
break;
case 1:
/* Known router alert */
goto ipv6forus;
}
}
/*
* On incoming v6 multicast packets we will bypass the ire table,
* and assume that the read queue corresponds to the targetted
* interface.
*
* The effect of this is the same as the IPv4 original code, but is
* much cleaner I think. See ip_rput for how that was done.
*/
/*
* So that we don't end up with dups, only one ill in an IPMP
* group is nominated to receive multicast data traffic.
* However, link-locals on any underlying interfaces will have
* joined their solicited-node multicast addresses and we must
* accept those packets. (We don't attempt to precisely
* filter out duplicate solicited-node multicast packets since
* e.g. an IPMP interface and underlying interface may have
* the same solicited-node multicast address.) Note that we
* won't generally have duplicates because we only issue a
* DL_ENABMULTI_REQ on one interface in a group; the exception
* is when PHYI_MULTI_BCAST is set.
*/
goto drop_pkt;
}
/*
* XXX TODO Give to mrouted to for multicast forwarding.
*/
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_rput_data_v6: got mcast packet"
" which is not for us: %s\n", AF_INET6,
}
return;
}
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n",
}
goto ipv6forus;
}
/*
* Find an ire that matches destination. For link-local addresses
* we have to match the ill.
* TBD for site local addresses.
*/
} else {
/*
* Should only use IREs that are visible from the
* global zone for forwarding.
*/
}
}
/*
* No matching IRE found. Mark this packet as having
* originated externally.
*/
}
return;
}
goto hada_drop;
/* Sent by forwarding path, and router is global zone */
return;
}
/*
* Per RFC 3513 section 2.5.2, we must not forward packets with
* an unspecified source address.
*/
return;
}
return;
}
/* we have a matching IRE */
/*
* To be quicker, we may wish not to chase pointers
* (ire->ire_ipif->ipif_ill...) and instead store the
* forwarding policy in the ire. An unfortunate side-
* effect of this would be requiring an ire flush whenever
* the ILLF_ROUTER flag changes. For now, chase pointers
* once and store in the boolean no_forward.
*
* This appears twice to keep it out of the non-forwarding,
* yes-it's-for-us-on-the-right-interface case.
*/
/*
* This ire has a send-to queue - forward the packet.
*/
if (no_forward) {
}
return;
}
/*
* ipIfStatsHCInForwDatagrams should only be increment if there
* will be an attempt to forward the packet, which is why we
* increment after the above condition has been checked.
*/
ip1dbg(("ip_rput_data_v6: hop limit expired.\n"));
/* Sent by forwarding path, and router is global zone */
return;
}
/*
* Per RFC 3513 section 2.5.2, we must not forward packets with
* an unspecified source address.
*/
return;
}
if (is_system_labeled()) {
return;
}
/* Size may have changed */
}
/*
* Handle labeled packet resizing.
*/
if (is_system_labeled()) {
}
/* Sent by forwarding path, and router is global zone */
return;
}
/*
* Check to see if we're forwarding the packet to a
* different link from which it came. If so, check the
* source and destination addresses since routers must not
* forward any packets with link-local source or
* destination addresses to other links. Otherwise (if
* we're forwarding onto the same link), conditionally send
* a redirect message.
*/
return;
}
/* TBD add site-local check at site boundary? */
} else if (ipst->ips_ipv6_send_redirects) {
/*
* Don't send a redirect when forwarding a source
* routed packet.
*/
goto forward;
if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) {
v6targ = &gw_addr_v6;
/*
* We won't send redirects to a router
* that doesn't have a link local
* address, but will forward.
*/
if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) {
goto forward;
}
} else {
}
GLOBAL_ZONEID, 0, NULL,
ipst);
if (src_ire_v6 != NULL) {
/*
* The source is directly connected.
*/
}
}
}
/* Hoplimit verified above */
}
return;
}
/*
* Need to put on correct queue for reassembly to find it.
* No need to use put() since reassembly has its own locks.
* Note: multicast packets and packets destined to addresses
* assigned to loopback (ire_rfq is NULL) will be reassembled on
* the arriving ill. Unlike the IPv4 case, enabling strict
* destination multihoming will prevent accepting packets
* addressed to an IRE_LOCAL on lo0.
*/
== NULL) {
return;
}
}
}
/* Don't use the ire after this point, we'll NULL it out to be sure. */
/*
* Looks like this packet is for us one way or another.
* This is where we'll process destination headers etc.
*/
for (; ; ) {
switch (nexthdr) {
case IPPROTO_TCP: {
int offset;
ip0dbg(("tcp hada drop\n"));
goto hada_drop;
}
/* TCP needs all of the TCP header */
if (remlen < TCP_MIN_HEADER_LENGTH)
goto pkt_too_short;
hdr_len + TCP_MIN_HEADER_LENGTH)) {
return;
}
hck_flags = 0;
}
/*
* Extract the offset field from the TCP header.
*/
if (offset != 5) {
if (offset < 5) {
ip1dbg(("ip_rput_data_v6: short "
"TCP data offset"));
return;
}
/*
* There must be TCP options.
* Make sure we can grab them.
*/
offset <<= 2;
goto pkt_too_short;
return;
}
hck_flags = 0;
}
}
/*
* TCP checksum calculation. First sum up the
* pseudo-header fields:
* - Source IPv6 address
* - Destination IPv6 address
* - TCP payload length
* - TCP protocol ID
*/
/* Fold initial sum */
if (cksum_err) {
if (hck_flags & HCK_FULLCKSUM) {
} else if (hck_flags & HCK_PARTIALCKSUM) {
} else {
}
return;
}
return;
}
case IPPROTO_SCTP:
{
/* SCTP needs all of the SCTP header */
goto pkt_too_short;
}
return;
}
}
/* checksum */
return;
}
return;
}
return;
}
case IPPROTO_UDP: {
ip0dbg(("udp hada drop\n"));
goto hada_drop;
}
/* Verify that at least the ports are present */
goto pkt_too_short;
return;
}
hck_flags = 0;
}
/*
* Before going through the regular checksum
* calculation, make sure the received checksum
* is non-zero. RFC 2460 says, a 0x0000 checksum
* in a UDP packet (within IPv6 packet) is invalid
* and should be replaced by 0xffff. This makes
* sense as regular checksum calculation will
* pass for both the cases i.e. 0x0000 and 0xffff.
* Removing one of the case makes error detection
* stronger.
*/
/* 0x0000 checksum is invalid */
ip1dbg(("ip_rput_data_v6: Invalid UDP "
"checksum value 0x0000\n"));
return;
}
/*
* UDP checksum calculation. First sum up the
* pseudo-header fields:
* - Source IPv6 address
* - Destination IPv6 address
* - UDP payload length
* - UDP protocol ID
*/
/* Fold initial sum */
if (reass_hck_flags != 0) {
} else {
}
if (cksum_err) {
if (hck_flags & HCK_FULLCKSUM)
else if (hck_flags & HCK_PARTIALCKSUM)
else
return;
}
goto udp_fanout;
}
case IPPROTO_ICMPV6: {
ip0dbg(("icmp hada drop\n"));
goto hada_drop;
}
if (sum != 0) {
/* IPv6 ICMP checksum failed */
ip1dbg(("ip_rput_data_v6: ICMPv6 checksum "
"failed %x\n",
sum));
return;
}
/* Check variable for testing applications */
if (ipst->ips_ipv6_drop_inbound_icmpv6) {
return;
}
/*
* Assume that there is always at least one conn for
* ICMPv6 (in.ndpd) i.e. don't optimize the case
* where there is no conn.
*/
/*
* In the multicast case, applications may have
* joined the group from different zones, so we
* need to deliver the packet to each of them.
* Loop through the multicast memberships
* structures (ilm) on the receive ill and send
* a copy of the packet up each matching one.
*/
if (!IN6_ARE_ADDR_EQUAL(
continue;
if (!ipif_lookup_zoneid(
continue;
continue;
hdr_len, mctl_present, 0,
}
} else {
}
goto proto_fanout;
}
case IPPROTO_ENCAP:
case IPPROTO_IPV6:
return;
}
/*
* If there was no IP tunnel data-link bound to
* receive this packet, then we fall through to
* allow potential raw sockets bound to either of
* these protocols to pick it up.
*/
/* FALLTHRU */
default: {
/*
* Handle protocols with which IPv6 is less intimate.
*/
ip0dbg(("default hada drop\n"));
goto hada_drop;
}
/*
* Enable sending ICMP for "Unknown" nexthdr
* case. i.e. where we did not FALLTHRU from
* IPPROTO_ICMPV6 processing case above.
* If we did FALLTHRU, then the packet has already been
* processed for IPPF, don't process it again in
* ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the
* flags
*/
if (nexthdr != IPPROTO_ICMPV6)
else
return;
}
case IPPROTO_DSTOPTS: {
/* If packet is too short, look no further */
if (remlen < MIN_EHDR_LEN)
goto pkt_too_short;
/* Check if AH is present. */
return;
}
/*
* Reinitialize pointers, as ipsec_early_ah_v6() does
* complete pullups. We don't have to do more pullups
* as a result.
*/
goto pkt_too_short;
/*
* Note: XXX This code does not seem to make
* distinction between Destination Options Header
* happen if we are at the end of source route.
* This may become significant in future.
* (No real significant Destination Options are
* defined/implemented yet ).
*/
case -1:
/*
* Packet has been consumed and any needed
* ICMP errors sent.
*/
return;
case 0:
/* No action needed continue */
break;
case 1:
/*
* Unnexpected return value
* (Router alert is a Hop-by-Hop option)
*/
#ifdef DEBUG
panic("ip_rput_data_v6: router "
"alert hbh opt indication in dest opt");
/*NOTREACHED*/
#else
return;
#endif
}
break;
}
case IPPROTO_FRAGMENT: {
ip0dbg(("frag hada drop\n"));
goto hada_drop;
}
if (remlen < sizeof (ip6_frag_t))
goto pkt_too_short;
return;
}
hck_flags = 0;
}
/*
* Invoke the CGTP (multirouting) filtering module to
* process the incoming packet. Packets identified as
* duplicates must be discarded. Filtering is active
* only if the the ip_cgtp_filter ndd variable is
* non-zero.
*/
if (ipst->ips_ip_cgtp_filter &&
int cgtp_flt_pkt;
if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
return;
}
}
/* Restore the flags */
&reass_sum, &reass_hck_flags);
/* Reassembly is still pending */
return;
}
/* The first mblk are the headers before the frag hdr */
used = 0;
break;
}
case IPPROTO_HOPOPTS: {
ip0dbg(("hop hada drop\n"));
goto hada_drop;
}
/*
* Illegal header sequence.
* (Hop-by-hop headers are processed above
* and required to immediately follow IPv6 header)
*/
return;
}
case IPPROTO_ROUTING: {
/* If packet is too short, look no further */
if (remlen < MIN_EHDR_LEN)
goto pkt_too_short;
/* Check if AH is present. */
return;
}
/*
* Reinitialize pointers, as ipsec_early_ah_v6() does
* complete pullups. We don't have to do more pullups
* as a result.
*/
goto pkt_too_short;
if (rthdr->ip6r_segleft != 0) {
/* Not end of source route */
if (ll_multicast) {
return;
}
hada_mp);
return;
}
break;
}
case IPPROTO_AH:
case IPPROTO_ESP: {
/*
* a IPSEC_IN message and prepend it. Otherwise,
* just fanout.
*/
ipsec_in_t *ii;
int ipsec_rc;
if (!mctl_present) {
ipst->ips_netstack);
ip1dbg(("ip_rput_data_v6: IPSEC_IN "
"allocation failure.\n"));
return;
}
/*
* Store the ill_index so that when we come back
* from IPSEC we ride on the same queue.
*/
/*
* Cache hardware acceleration info.
*/
("ip_rput_data_v6: "
"caching data attr.\n"));
}
} else {
}
if (!ipsec_loaded(ipss)) {
return;
}
/* select inbound SA and have IPsec process the pkt */
if (nexthdr == IPPROTO_ESP) {
ipst->ips_netstack);
return;
NULL);
} else {
ipst->ips_netstack);
return;
NULL);
}
switch (ipsec_rc) {
case IPSEC_STATUS_SUCCESS:
break;
case IPSEC_STATUS_FAILED:
/* FALLTHRU */
case IPSEC_STATUS_PENDING:
return;
}
/* we're done with IPsec processing, send it up */
return;
}
case IPPROTO_NONE:
/* All processing is done. Count as "delivered". */
return;
}
}
/* NOTREACHED */
ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n",
return;
} else {
ipst);
}
}
zoneid);
return;
}
return;
}
/* Initiate IPPF processing */
return;
}
}
if (connp->conn_ip_recvpktinfo ||
return;
}
}
/* Send it upstream */
return;
ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n"));
/* IPsec kstats: bump counter here */
}
/*
* Reassemble fragment.
* When it returns a completed message the first mblk will only contain
* the headers prior to the fragment header.
*
* prev_nexthdr_offset is an offset indication of where the nexthdr field is
* of the preceding header. This is needed to patch the previous header's
* nexthdr field when reassembly completes.
*/
static mblk_t *
{
*cksum_val = 0;
if (cksum_flags != NULL)
*cksum_flags = 0;
/*
* We utilize hardware computed checksum info only for UDP since
* IP fragmentation is a normal occurence for the protocol. In
* addition, checksum offload support for IP fragments carrying
* UDP payload is commonly implemented across network adapters.
*/
/* Record checksum information from the packet */
/* fragmented payload offset from beginning of mblk */
if ((sum_flags & HCK_PARTIALCKSUM) &&
/*
* Partial checksum has been calculated by hardware
* and attached to the packet; in addition, any
* prepended extraneous data is even byte aligned.
* If any such data exists, we adjust the checksum;
* this would also handle any postpended data.
*/
/* One's complement subtract extraneous checksum */
else
}
} else {
sum_val = 0;
sum_flags = 0;
}
/* Clear hardware checksumming flag */
DB_CKSUMFLAGS(mp) = 0;
/*
* Note: Fragment offset in header is in 8-octet units.
* Clearing least significant 3 bits not only extracts
* it but also gets it in units of octets.
*/
/*
* Is the more frags flag on and the payload length not a multiple
* of eight?
*/
return (NULL);
}
return (NULL);
}
/*
* Would fragment cause reassembled packet to have a payload length
* greater than IP_MAXPACKET - the max payload size?
*/
if (end > IP_MAXPACKET) {
return (NULL);
}
return (NULL);
}
/*
* This packet just has one fragment. Reassembly not
* needed.
*/
if (!more_frags && offset == 0) {
goto reass_done;
}
/*
* Drop the fragmented as early as possible, if
* we don't have resource(s) to re-assemble.
*/
if (ipst->ips_ip_reass_queue_bytes == 0) {
return (NULL);
}
/* Record the ECN field info. */
/*
* If this is not the first fragment, dump the unfragmentable
* portion of the packet.
*/
if (offset)
/*
* Fragmentation reassembly. Each ILL has a hash table for
* queueing packets undergoing reassembly for all IPIFs
* associated with the ILL. The hash is based on the packet
* IP ident field. The ILL frag hash table was allocated
* as a timer block at the time the ILL was created. Whenever
* there is anything on the reassembly queue, the timer will
* be running.
*/
}
/*
* If the reassembly list for this ILL will get too big
* prune it.
*/
}
/* Try to find an existing fragment queue for this packet. */
for (;;) {
if (ipf) {
/*
* It has to match on ident, source address, and
* dest address.
*/
/*
* If we have received too many
* duplicate fragments for this packet
* free it.
*/
return (NULL);
}
break;
}
continue;
}
/*
* If we pruned the list, do we want to store this new
* fragment?. We apply an optimization here based on the
* fact that most fragments will be received in order.
* So if the offset of this incoming fragment is zero,
* it is the first fragment of a new packet. We will
* keep it. Otherwise drop the fragment, as we have
* probably pruned the packet already (since the
* packet cannot be found).
*/
return (NULL);
}
/* New guy. Allocate a frag message. */
if (!mp1) {
return (NULL);
}
/*
* Too many fragmented packets in this hash bucket.
* Free the oldest.
*/
}
/* Initialize the fragment header. */
/* Record reassembly start time. */
/* Record ipf generation and account for frag header */
ipf->ipf_nf_hdr_len = 0;
ipf->ipf_prev_nexthdr_offset = 0;
ipf->ipf_num_dups = 0;
ipfb->ipfb_frag_pkts++;
ipf->ipf_checksum = 0;
ipf->ipf_checksum_flags = 0;
/* Store checksum value in fragment header */
if (sum_flags != 0) {
}
/*
* We handle reassembly two ways. In the easy case,
* where all the fragments show up in order, we do
* minimal bookkeeping, and just clip new pieces on
* the end. If we ever see a hole, then we go off
* to ip_reassemble which has to mark the pieces and
* keep track of the number of holes, etc. Obviously,
* the point of having both mechanisms is so we can
* handle the easy case as efficiently as possible.
*/
if (offset == 0) {
/* Easy case, in-order reassembly so far. */
/* Update the byte count */
/*
* Keep track of next expected offset in
* ipf_end.
*/
} else {
/* Hard case, hole at the beginning. */
/*
* ipf_end == 0 means that we have given up
* on easy reassembly.
*/
/* Forget checksum offload from now on */
ipf->ipf_checksum_flags = 0;
/*
* ipf_hole_cnt is set by ip_reassemble.
* ipf_count is updated by ip_reassemble.
* No need to check for return value here
* as we don't expect reassembly to complete or
* fail for the first fragment itself.
*/
msg_len);
}
/* Update per ipfb and ill byte counts */
/* If the frag timer wasn't already going, start it. */
goto partial_reass_done;
}
/*
* If the packet's flag has changed (it could be coming up
* from an interface different than the previous, therefore
* possibly different checksum capability), then forget about
* any stored checksum states. Otherwise add the value to
* the existing one stored in the fragment header.
*/
} else if (ipf->ipf_checksum_flags != 0) {
/* Forget checksum offload from now on */
ipf->ipf_checksum_flags = 0;
}
/*
* We have a new piece of a datagram which is already being
* reassembled. Update the ECN info if all IP fragments
* are ECN capable. If there is one which is not, clear
* all the info. If there is at least one which has CE
* code point, IP needs to report that up to transport.
*/
if (ecn_info == IPH_ECN_CE)
} else {
}
/* The new fragment fits at the end */
/* Update the byte count */
/* Update per ipfb and ill byte counts */
if (more_frags) {
/* More to come. */
goto partial_reass_done;
}
} else {
/*
* Go do the hard cases.
* Call ip_reassemble().
*/
int ret;
if (offset == 0) {
if (ipf->ipf_prev_nexthdr_offset == 0) {
}
}
/* Save current byte count */
/* Count of bytes added and subtracted (freeb()ed) */
if (count) {
/* Update per ipfb and ill byte counts */
}
if (ret == IP_REASS_PARTIAL) {
goto partial_reass_done;
} else if (ret == IP_REASS_FAILED) {
/* Reassembly failed. Free up all resources */
IP_REASS_SET_START(t_mp, 0);
IP_REASS_SET_END(t_mp, 0);
}
goto partial_reass_done;
}
/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
}
/*
* We have completed reassembly. Unhook the frag header from
* the reassembly list.
*
* Grab the unfragmentable header length next header value out
* of the first fragment
*/
/*
* Before we free the frag header, record the ECN info
* to report back to the transport.
*/
/*
* Store the nextheader field in the header preceding the fragment
* header
*/
/* We need to supply these to caller */
else
sum_val = 0;
if (ipf)
ipfb->ipfb_frag_pkts--;
/* Ditch the frag header. */
/*
* Make sure the packet is good by doing some sanity
* check. If bad we can silentely drop the packet.
*/
if (hdr_length < sizeof (ip6_frag_t)) {
ip1dbg(("ip_rput_frag_v6: bad packet\n"));
return (NULL);
}
/*
* Remove the fragment header from the initial header by
* splitting the mblk into the non-fragmentable header and
* everthing after the fragment extension header. This has the
* side effect of putting all the headers that need destination
* processing into the b_cont block-- on return this fact is
* used in order to avoid having to look at the extensions
* already processed.
*
* Note that this code assumes that the unfragmentable portion
* of the header is in the first mblk and increments
* the read pointer past it. If this assumption is broken
* this code fails badly.
*/
ip1dbg(("ip_rput_frag_v6: dupb failed\n"));
return (NULL);
}
}
/* Restore original IP length in header. */
/* Record the ECN info. */
/* Reassembly is successful; return checksum information if needed */
if (cksum_flags != NULL)
*cksum_flags = sum_flags;
return (mp);
}
/*
* Given an mblk and a ptr, find the destination address in an IPv6 routing
* header.
*/
static in6_addr_t
{
return (rv);
}
/*
* Corrupt packet. Either the routing header length is odd
* (can't happen) or mismatched compared to the packet, or the
* number of addresses is. Return what we can. This will
* only be a problem on forwarded packets that get squeezed
* through an outbound tunnel enforcing IPsec Tunnel Mode.
*/
whereptr);
return (rv);
}
if (segleft != 0) {
}
return (rv);
}
/*
* Walk through the options to see if there is a routing header.
* If present get the destination which is the last address of
* the option.
*/
{
int ehdrlen;
/* We assume at least the IPv6 base header is within one mblk. */
if (is_fragment != NULL)
*is_fragment = B_FALSE;
/*
* We also assume (thanks to ipsec_tun_outbound()'s pullup) that
* no extension headers will be split across mblks.
*/
nexthdr == IPPROTO_ROUTING) {
if (nexthdr == IPPROTO_ROUTING)
/*
* All IPv6 extension headers have the next-header in byte
* 0, and the (length - 8) in 8-byte-words.
*/
if (current_mp == NULL) {
/* Bad packet. Return what we can. */
goto done;
}
}
}
done:
*is_fragment = B_TRUE;
return (rv);
}
/*
* ip_source_routed_v6:
* This function is called by redirect code in ip_rput_data_v6 to
* know whether this packet is source routed through this node i.e
* whether this node (router) is part of the journey. This
* function is called under two cases :
*
* case 1 : Routing header was processed by this node and
* ip_process_rthdr replaced ip6_dst with the next hop
* and we are forwarding the packet to the next hop.
*
* case 2 : Routing header was not processed by this node and we
* are just forwarding the packet.
*
* For case (1) we don't want to send redirects. For case(2) we
* want to send redirects.
*/
static boolean_t
{
ip2dbg(("ip_source_routed_v6\n"));
/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
while (nexthdr == IPPROTO_HOPOPTS ||
nexthdr == IPPROTO_DSTOPTS) {
/*
* Check if we have already processed
* packets or we are just a forwarding
* router which only pulled up msgs up
* to IPV6HDR and one HBH ext header
*/
ip2dbg(("ip_source_routed_v6: Extension"
" headers not processed\n"));
return (B_FALSE);
}
}
switch (nexthdr) {
case IPPROTO_ROUTING:
/*
* If for some reason, we haven't pulled up
* the routing hdr data mblk, then we must
* not have processed it at all. So for sure
* we are not part of the source routed journey.
*/
ip2dbg(("ip_source_routed_v6: Routing"
" header not processed\n"));
return (B_FALSE);
}
/*
* Either we are an intermediate router or the
* last hop before destination and we have
* already processed the routing header.
* If segment_left is greater than or equal to zero,
* then we must be the (numaddr - segleft) entry
* of the routing header. Although ip6r0_segleft
* is a unit8_t variable, we still check for zero
* or greater value, if in case the data type
* is changed someday in future.
*/
if (rthdr->ip6r0_segleft > 0 ||
rthdr->ip6r0_segleft == 0) {
sizeof (*rthdr));
ipst);
return (B_TRUE);
}
ip1dbg(("ip_source_routed_v6: No ire found\n"));
}
}
/* FALLTHRU */
default:
ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
return (B_FALSE);
}
}
/*
* ip_wput_v6 -- Packets sent down from transport modules show up here.
* Assumes that the following set of headers appear in the first
* mblk:
* ip6i_t (if present) CAN also appear as a separate mblk.
* ip6_t
* Any extension headers
* The routine can handle an ICMPv6 header that is not in the first mblk.
*
* The order to determine the outgoing interface is as follows:
* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill.
* 2. If q is an ill queue and (link local or multicast destination) then
* use that ill.
* 3. If IPV6_BOUND_IF has been set use that ill.
* 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise
* look for the best IRE match for the unspecified group to determine
* the ill.
* 5. For unicast: Just do an IRE lookup for the best match.
*
* arg2 is always a queue_t *.
* When that queue is an ill_t (i.e. q_next != NULL), then arg must be
* the zoneid.
* When that queue is not an ill_t, then arg must be a conn_t pointer.
*/
void
{
int cksum_request; /* -1 => normal. */
/* Otherwise contains insert offset for checksum */
int unspec_src;
int match_flags = MATCH_IRE_ILL;
int err = 0;
int ip6i_flags = 0;
} else {
}
/*
* Highest bit in version field is Reachability Confirmation bit
* used by NUD in ip_xmit_v6().
*/
#ifdef _BIG_ENDIAN
#else
#endif
/*
* M_CTL comes from 5 places
*
* 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections
* both V4 and V6 datagrams.
*
* V4 and V6 datagrams.
*
* 3) NDP callbacks when nce is resolved and IPSEC_OUT has been
* attached.
*
* 4) Notifications from an external resolver (for XRESOLV ifs)
*
* IPsec hardware acceleration support.
*
* We need to handle (1)'s IPv6 case and (3) here. For the
* IPv4 case in (1), and (2), IPSEC processing has already
* started. The code in ip_wput() already knows how to handle
* continuing IPSEC processing (for IPv4 and IPv6). All other
* M_CTLs (including case (4)) are passed on to ip_wput_nondata()
* for handling.
*/
/* Multidata transmit? */
/*
* We should never get here, since all Multidata messages
* originating from tcp should have been directed over to
* tcp_multisend() in the first place.
*/
return;
/*
* Validate this M_CTL message. The only three types of
* M_CTL messages we expect to see in this code path are
* ipsec_out_t or ipsec_in_t structures (allocated as
* ipsec_info_t unions), or ipsec_ctl_t structures.
* The ipsec_out_type and ipsec_in_type overlap in the two
* data structures, and they are either set to IPSEC_OUT
* or IPSEC_IN depending on which data structure it is.
* ipsec_ctl_t is an IPSEC_CTL.
*
* All other M_CTL messages are sent to ip_wput_nondata()
* for handling.
*/
if ((mlen == sizeof (ipsec_ctl_t)) &&
return;
}
if ((mlen < sizeof (ipsec_info_t)) ||
return;
}
/* NDP callbacks have q_next non-NULL. That's case #3. */
/*
* For a freshly-generated TCP dgram that needs IPV6
* processing, don't call ip_wput immediately. We can
* tell this by the ipsec_out_proc_begin. In-progress
* IPSEC_OUT messages have proc_begin set to TRUE,
* and we want to send all IPSEC_IN messages to
* ip_wput() for IPsec processing or finishing.
*/
goto notv6;
}
}
return;
}
goto notv6;
}
}
if (mctl_present)
else
if (err != 0) {
tsol_ip_log_drop_checklabel_ip6, char *,
return;
}
}
/*
* We don't know if this ill will be used for IPv6
* until the ILLF_IPV6 flag is set via SIOCSLIFNAME.
* ipif_set_values() sets the ill_isv6 flag to true if
* ILLF_IPV6 is set. If the ill_isv6 flag isn't true,
* just drop the packet.
*/
ip1dbg(("ip_wput_v6: Received an IPv6 packet before "
"ILLF_IPV6 was set\n"));
return;
}
/* For uniformity do a refhold */
if (!ILL_CAN_LOOKUP(ill)) {
return;
}
unspec_src = 0;
} else {
/* is queue flow controlled? */
/*
* 1) TCP sends down M_CTL for detached connections.
*
* We don't flow control either of the above. Only
* UDP and others are flow controlled for which we
* can't have a M_CTL.
*/
return;
}
}
/*
* If there is a policy, try to attach an ipsec_out in
* the front. At the end, first_mp either points to a
* M_DATA message or IPSEC_OUT message linked to a
* M_DATA message. We have to do it now as we might
* lose the "conn" if we go through ip_newroute.
*/
if (!mctl_present &&
/* XXX Any better way to get the protocol fast ? */
if (need_decref)
return;
} else {
}
}
}
/* check for alignment and full IPv6 header */
ip0dbg(("ip_wput_v6: bad alignment or length\n"));
if (do_outrequests)
if (need_decref)
return;
}
cksum_request = -1;
/*
* Once neighbor discovery has completed, ndp_process() will provide
* locally generated packets for which processing can be reattempted.
* In these cases, connp is NULL and the original zone is part of a
* prepended ipsec_out_t.
*/
/*
* When coming from icmp_input_v6, the zoneid might not match
* for the loopback case, because inside icmp_input_v6 the
* queue_t is a conn queue from the sending side.
*/
}
/*
* This is an ip6i_t header followed by an ip6_hdr.
* Check which fields are set.
*
* When the packet comes from a transport we should have
* all needed headers in the first mblk. However, when
* going through ip_newroute*_v6 the ip6i might be in
* a separate mblk when we return here. In that case
* we pullup everything to ensure that extension and transport
* headers "stay" in the first mblk.
*/
sizeof (ip6i_t) + IPV6_HDR_LEN));
ip1dbg(("ip_wput_v6: pullupmsg failed\n"));
if (do_outrequests) {
}
if (need_decref)
return;
}
}
/*
* Advance rptr past the ip6i_t to get ready for
* transmitting the packet. However, if the packet gets
* passed to ip_newroute*_v6 then rptr is moved back so
* that the ip6i_t header can be inspected when the
* packet comes back here after passing through
* ire_add_then_send.
*/
if (do_outrequests) {
}
ip1dbg(("ip_wput_v6: bad ifindex %d\n",
ip6i->ip6i_ifindex));
if (need_decref)
return;
}
/*
* Preserve the index so that when we return from
* IPSEC processing, we know where to send the packet.
*/
if (mctl_present) {
}
}
/* rpcmod doesn't send down db_credp for UDP packets */
else
}
if (secpolicy_net_rawaccess(cr) != 0) {
/*
* Use IPCL_ZONEID to honor SO_ALLZONES.
*/
if (do_outrequests)
ip1dbg(("ip_wput_v6: bad source "
"addr\n"));
if (need_decref)
return;
}
}
/* No need to verify again when using ip_newroute */
}
/*
* Make sure they match since ip_newroute*_v6 etc might
* (unknown to them) inspect ip6i_nexthop when
* they think they access ip6_dst.
*/
}
cksum_request = 1;
unspec_src = 1;
}
/*
* Store ip6i_t info that we need after we come back
* from IPSEC processing.
*/
if (mctl_present) {
}
}
if (IN6_IS_ADDR_MULTICAST(v6dstp))
goto ipv6multicast;
/* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */
goto send_from_ill;
}
/*
* 2. If q is an ill queue and there's a link-local destination
* then use that ill.
*/
goto send_from_ill;
/* 3. If IPV6_BOUND_IF has been set use that ill. */
if (err == ILL_LOOKUP_FAILED) {
if (need_decref)
return;
}
goto send_from_ill;
}
/*
* 4. For unicast: Just do an IRE lookup for the best match.
* If we get here for a link-local address it is rather random
* what interface we pick on a multihomed host.
* *If* there is an IRE_CACHE (and the link-local address
* isn't duplicated on multi links) this will find the IRE_CACHE.
* Otherwise it will use one of the matching IRE_INTERFACE routes
* for the link-local prefix. Hence, applications
* *should* be encouraged to specify an outgoing interface when sending
* to a link local address.
*/
!connp->conn_fully_bound)) {
/*
* We cache IRE_CACHEs to avoid lookups. We don't do
* this for the tcp global queue and listen end point
* as it does not really have a real destination to
* talk to.
*/
ipst);
} else {
/*
* IRE_MARK_CONDEMNED is marked in ire_delete. We don't
* grab a lock here to check for CONDEMNED as it is okay
* to send a packet or two with the IRE_CACHE that is going
* away.
*/
} else {
/* Release the old ire */
if (CONN_CACHE_IRE(connp) &&
}
}
/*
* We can continue to use the ire but since it
* was not cached, we should drop the extra
* reference.
*/
if (!cached)
}
}
}
if (do_outrequests) {
/* Handle IRE_LOCAL's that might appear here */
} else {
}
}
/*
* Check if the ire has the RTF_MULTIRT flag, inherited
* from an IRE_OFFSUBNET ire entry in ip_newroute().
*/
/*
* Force hop limit of multirouted packets if required.
* The hop limit of such packets is bounded by the
* ip_multirt_ttl ndd variable.
* NDP packets must have a hop limit of 255; don't
* change the hop limit in that case.
*/
if ((ipst->ips_ip_multirt_ttl > 0) &&
if (ip_debug > 3) {
ip2dbg(("ip_wput_v6: forcing multirt "
"hop limit to %d (was %d) ",
&ire->ire_addr_v6);
}
}
/*
* We look at this point if there are pending
* unresolved routes. ire_multirt_need_resolve_v6()
* checks in O(n) that all IRE_OFFSUBNET ire
* entries for the packet's destination and
* flagged RTF_MULTIRT are currently resolved.
* If some remain unresolved, we do a copy
* of the current message. It will be used
* to initiate additional route resolutions.
*/
ip2dbg(("ip_wput_v6: ire %p, "
"multirt_need_resolve %d, first_mp %p\n",
(void *)ire, multirt_need_resolve,
(void *)first_mp));
if (multirt_need_resolve) {
}
}
}
if (need_decref) {
}
/*
* Try to resolve another multiroute if
* ire_multirt_need_resolve_v6() deemed it necessary.
* copy_mp will be consumed (sent or freed) by
* ip_newroute_v6().
*/
if (mctl_present) {
} else {
}
}
return;
}
/*
* No full IRE for this destination. Send it to
* ip_newroute_v6 to see if anything else matches.
* Mark this packet as having originated on this
* machine.
* Update rptr if there was an ip6i_t header.
*/
if (unspec_src) {
/*
* Add ip6i_t header to carry unspec_src
* until the packet comes back in ip_wput_v6.
*/
if (do_outrequests)
if (mctl_present)
if (need_decref)
return;
}
if (mctl_present) {
} else {
}
sizeof (ip6i_t)) {
/*
* ndp_resolver called from ip_newroute_v6
* expects pulled up message.
*/
ip1dbg(("ip_wput_v6: pullupmsg"
" failed\n"));
if (do_outrequests) {
}
if (need_decref)
return;
}
}
}
if (mctl_present) {
}
}
if (do_outrequests)
if (need_decref)
return;
/*
* Handle multicast packets with or without an conn.
* Assumes that the transports set ip6_hops taking
* IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit)
* into account.
*/
ip2dbg(("ip_wput_v6: multicast\n"));
/*
* Hold the conn_lock till we refhold the ill of interest that is
* pointed to from the conn. Since we cannot do an ill/ipif_refrele
* while holding any locks, postpone the refrele until after the
* conn_lock is dropped.
*/
} else {
}
/* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */
/*
* 2. If q is an ill queue and (link local or multicast
* destination) then use that ill.
* We don't need the ipif initialization here.
* This useless assert below is just to prevent lint from
* reporting a null body if statement.
*/
/*
* 3. If IPV6_BOUND_IF has been set use that ill.
*
* 4. For multicast: if IPV6_MULTICAST_IF has been set use it.
* Otherwise look for the best IRE match for the unspecified
* group to determine the ill.
*
* conn_multicast_ill is used for only IPv6 packets.
* conn_multicast_ipif is used for only IPv4 packets.
* Thus a PF_INET6 socket send both IPv4 and IPv6
* multicast packets using different IP*_MULTICAST_IF
* interfaces.
*/
if (err == ILL_LOOKUP_FAILED) {
ip1dbg(("ip_output_v6: multicast"
" conn_outgoing_ill no ipif\n"));
if (conn_lock_held)
if (do_outrequests)
if (need_decref)
return;
}
if (err == ILL_LOOKUP_FAILED) {
ip1dbg(("ip_output_v6: multicast"
" conn_multicast_ill no ipif\n"));
goto multicast_discard;
}
} else {
ip1dbg(("ip_output_v6: multicast no ipif\n"));
goto multicast_discard;
}
/*
* We have a ref to this ipif, so we can safely
* access ipif_ill.
*/
if (!ILL_CAN_LOOKUP(ill)) {
ip1dbg(("ip_output_v6: multicast no ipif\n"));
goto multicast_discard;
}
/*
* Save binding until IPV6_MULTICAST_IF
* changes it
*/
}
}
if (conn_lock_held)
/*
* For multicast loopback interfaces replace the multicast address
* with a unicast address for the ire lookup.
*/
if (IS_LOOPBACK(ill))
if (do_outrequests) {
}
/*
* As we may lose the conn by the time we reach ip_wput_ire_v6
* we copy conn_multicast_loop and conn_dontroute on to an
* ipsec_out. In case if this datagram goes out secure,
* we need the ill_index also. Copy that also into the
* ipsec_out.
*/
if (mctl_present) {
} else {
NULL) {
if (need_decref)
return;
}
/* This is not a secure packet */
}
if (do_outrequests) {
}
/*
* Because nce_xmit() calls ip_output_v6() and NCEs are always tied to
* an underlying interface, IS_UNDER_IPMP() may be true even when
* building IREs that will be used for data traffic. As such, use the
* packet's source address to determine whether the traffic is test
* traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so.
*
* Separately, we also need to mark probe packets so that ND can
* process them specially; see the comments in nce_queue_mp_common().
*/
if (mctl_present)
goto discard;
}
if (mctl_present)
else
/* ndp_resolver() expects a pulled-up message */
ip1dbg(("ip_output_v6: pullupmsg failed\n"));
if (need_decref)
return;
}
}
}
/*
* When a specific ill is specified (using IPV6_PKTINFO,
* IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match
* on routing entries (ftable and ctable) that have a matching
* ire->ire_ipif->ipif_ill. Thus this can only be used
* for destinations that are on-link for the specific ill
* and that can appear on multiple links. Thus it is useful
* for multicast destinations, link-local destinations, and
* at some point perhaps for site-local destinations (if the
* node sits at a site boundary).
* We create the cache entries in the regular ctable since
* it can not "confuse" things for other destinations.
* table.
*
* NOTE : conn_ire_cache is not used for caching ire_ctable_lookups.
* It is used only when ire_cache_lookup is used above.
*/
/*
* Check if the ire has the RTF_MULTIRT flag, inherited
* from an IRE_OFFSUBNET ire entry in ip_newroute().
*/
/*
* Force hop limit of multirouted packets if required.
* The hop limit of such packets is bounded by the
* ip_multirt_ttl ndd variable.
* NDP packets must have a hop limit of 255; don't
* change the hop limit in that case.
*/
if ((ipst->ips_ip_multirt_ttl > 0) &&
if (ip_debug > 3) {
ip2dbg(("ip_wput_v6: forcing multirt "
"hop limit to %d (was %d) ",
&ire->ire_addr_v6);
}
}
/*
* We look at this point if there are pending
* unresolved routes. ire_multirt_need_resolve_v6()
* checks in O(n) that all IRE_OFFSUBNET ire
* entries for the packet's destination and
* flagged RTF_MULTIRT are currently resolved.
* If some remain unresolved, we make a copy
* of the current message. It will be used
* to initiate additional route resolutions.
*/
ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, "
"multirt_need_resolve %d, first_mp %p\n",
(void *)ire, multirt_need_resolve,
(void *)first_mp));
if (multirt_need_resolve) {
}
}
}
ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n",
if (need_decref) {
}
/*
* Try to resolve another multiroute if
* ire_multirt_need_resolve_v6() deemed it necessary.
* copy_mp will be consumed (sent or freed) by
* ip_newroute_[ipif_]v6().
*/
if (mctl_present) {
} else {
}
ip1dbg(("ip_wput_v6: No ipif for "
"multicast\n"));
return;
}
zoneid);
} else {
}
}
return;
}
if (need_decref) {
}
/* Update rptr if there was an ip6i_t header. */
if (unspec_src) {
/*
* Add ip6i_t header to carry unspec_src
* until the packet comes back in ip_wput_v6.
*/
if (mctl_present) {
} else {
v6dstp);
}
return;
}
sizeof (ip6i_t)) {
/*
* ndp_resolver called from ip_newroute_v6
* expects a pulled up message.
*/
ip1dbg(("ip_wput_v6: pullupmsg"
" failed\n"));
return;
}
}
}
if (mctl_present) {
}
}
if (IN6_IS_ADDR_MULTICAST(v6dstp)) {
} else {
}
return;
/* FIXME?: assume the caller calls the right version of ip_output? */
/*
* We can change conn_send for all types of conn, even
* though only TCP uses it right now.
* FIXME: sctp could use conn_send but doesn't currently.
*/
}
}
/*
* If this is a conn_t queue, then we pass in the conn. This includes the
* zoneid.
* Otherwise, this is a message for an ill_t queue,
* in which case we use the global zoneid since those are all part of
* the global zone.
*/
void
{
if (CONN_Q(q))
else
}
/*
* NULL send-to queue - packet is to be delivered locally.
*/
void
{
if (!io->ipsec_out_secure) {
} else {
}
} else {
}
/*
* Remove reachability confirmation bit from version field
* before passing the packet on to any firewall hooks or
* looping back the packet.
*/
return;
/*
* ::1 is special, as we cannot lookup its zoneid by
* address. For this case, restrict the lookup to the
* source zone.
*/
}
int, 1);
/* Fastpath */
switch (nexthdr) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_ICMPV6:
case IPPROTO_SCTP:
break;
default: {
&hdr_length, &nexthdrp)) {
/* Malformed packet */
return;
}
break;
}
}
switch (nexthdr) {
case IPPROTO_TCP:
/*
* M_DATA mblk, so init mblk (chain) for
* no struio().
*/
do {
}
return;
case IPPROTO_UDP:
return;
case IPPROTO_SCTP:
{
return;
}
case IPPROTO_ICMPV6: {
/* check for full IPv6+ICMPv6 header */
(hdr_length + ICMP6_MINLEN)) {
ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg"
" failed\n"));
return;
}
}
/* Update output mib stats */
/* Check variable for testing applications */
if (ipst->ips_ipv6_drop_inbound_icmpv6) {
return;
}
/*
* Assume that there is always at least one conn for
* ICMPv6 (in.ndpd) i.e. don't optimize the case
* where there is no conn.
*/
!IS_LOOPBACK(ill)) {
/*
* In the multicast case, applications may have
* joined the group from different zones, so we
* need to deliver the packet to each of them.
* Loop through the multicast memberships
* structures (ilm) on the receive ill and send
* a copy of the packet up each matching one.
* However, we don't do this for multicasts sent
* on the loopback interface (PHYI_LOOPBACK flag
* set) as they must stay in the sender's zone.
*/
if (!IN6_ARE_ADDR_EQUAL(
continue;
if ((fanout_flags &
continue;
if (!ipif_lookup_zoneid(
continue;
continue;
}
} else {
NULL);
}
}
/* FALLTHRU */
default: {
/*
* Handle protocols with which IPv6 is less intimate.
*/
/*
* Enable sending ICMP for "Unknown" nexthdr
* case. i.e. where we did not FALLTHRU from
* IPPROTO_ICMPV6 processing case above.
*/
if (nexthdr != IPPROTO_ICMPV6)
/*
* Note: There can be more than one stream bound
* to a particular protocol. When this is the case,
* each one gets a copy of any incoming packets.
*/
return;
}
}
}
/*
* Send packet using IRE.
* Checksumming is controlled by cksum_request:
* Otherwise => checksum_request contains insert offset for checksum
*
* Assumes that the following set of headers appear in the first
* mblk:
* ip6_t
* Any extension headers
* The routine can handle an ICMPv6 header that is not in the first mblk.
*
* NOTE : This function does not ire_refrele the ire passed in as the
* argument unlike ip_wput_ire where the REFRELE is done.
* Refer to ip_wput_ire for more on this.
*/
static void
{
int max_frag;
} else {
/*
* Grab the zone id now because the M_CTL can be discarded by
* ip_wput_ire_parse_ipsec_out() below.
*/
/*
* For the multicast case, ipsec_out carries conn_dontroute and
* conn_multicast_loop as conn may not be available here. We
* need this for multicast loopback and forwarding which is done
* later in the code.
*/
/*
* If conn_dontroute is not set or conn_multicast_loop
* is set, we need to do forwarding/loopback. For
* datagrams from ip_wput_multicast, conn_dontroute is
* set to B_TRUE and conn_multicast_loop is set to
* B_FALSE so that we neither do forwarding nor
* loopback.
*/
if (!conn_dontroute || conn_multicast_loop)
}
}
/*
* If the sender didn't supply the hop limit and there is a default
* unicast hop limit associated with the output interface, we use
* that if the packet is unicast. Interface specific unicast hop
* limits as set via the SIOCSLIFLNKINFO ioctl.
*/
}
/*
* When a zone sends a packet to another zone, we try to deliver
* the packet under the same conditions as if the destination
* was a real node on the network. To do so, we look for a
* matching route in the forwarding table.
* RTF_REJECT and RTF_BLACKHOLE are handled just like
* ip_newroute_v6() does.
* Note that IRE_LOCAL are special, since they are used
* when the zoneid doesn't match in some cases. This means that
* we need to handle ipha_src differently since ire_src_addr
* belongs to the receiving zone instead of the sending zone.
* When ip_restrict_interzone_loopback is set, then
* ire_cache_lookup_v6() ensures that IRE_LOCAL are only used
* for loopback between zones when the logical "Ethernet" would
* have looped them back.
*/
!unspec_src) {
}
} else {
return;
}
}
/* Failed */
return;
}
return;
}
}
return;
}
}
} else {
}
/*
* Select the source address using ipif_select_source_v6.
*/
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_wput_ire_v6: no src for "
}
return;
}
}
!IS_LOOPBACK(ill)) {
int fanout_flags = 0;
!connp->conn_multicast_loop) {
}
ip1dbg(("ip_wput_ire_v6: "
"Loopback multicast\n"));
if (mctl_present) {
} else {
}
0, ipst);
/*
* DTrace this as ip:::send. A blocked
* packet will fire the send probe, but
* not the receive probe.
*/
int, 1);
/*
* Deliver locally and to
* every local zone, except
* the sending zone when
* IPV6_MULTICAST_LOOP is
* disabled.
*/
}
} else {
ip1dbg(("ip_wput_ire_v6: "
"copymsg failed\n"));
}
}
}
IS_LOOPBACK(ill)) {
/*
* Local multicast or just loopback on loopback
* interface.
*/
ip1dbg(("ip_wput_ire_v6: local multicast only\n"));
return;
}
}
/*
* non-NULL send-to queue - packet is to be sent
* out an interface.
*/
/* Driver is flow-controlling? */
if (!IP_FLOW_CONTROLLED_ULP(nexthdr) &&
/*
* Queue packet if we have an conn to give back
* pressure. We can't queue packets intended for
* hardware acceleration since we've tossed that
* state already. If the packet is being fed back
* from ire_send_v6, we don't know the position in
* the queue to enqueue the packet and we discard
* the packet.
*/
/*
* caller == IP_WSRV implies we are
* the service thread, and the
* queue is already noenabled.
* The check for canput and
* the putbq is not atomic.
* So we need to check again.
*/
connp->conn_did_putbq = 0;
} else {
}
return;
}
return;
}
/*
* Look for reachability confirmations from the transport.
*/
if (mctl_present)
}
/* Fastpath */
switch (nexthdr) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_ICMPV6:
case IPPROTO_SCTP:
break;
default: {
&hdr_length, &nexthdrp)) {
/* Malformed packet */
return;
}
break;
}
}
/*
* The packet header is processed once for all, even
* in the multirouting case. We disable hardware
* checksum if the packet is multirouted, as it will be
* replicated via several interfaces, and not all of
* them may have this capability.
*/
if (cksum_request == 1 &&
/* Skip the transport checksum */
goto cksum_done;
}
/*
* Do user-configured raw checksum.
* Compute checksum and insert at offset "cksum_request"
*/
/* check for enough headers for checksum */
(cksum_request + sizeof (int16_t))) {
cksum_request + sizeof (int16_t))) {
ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg"
" failed\n"));
return;
}
}
/*
* icmp has placed length and routing
* header adjustment in *insp.
*/
} else if (nexthdr == IPPROTO_TCP) {
/*
* Check for full IPv6 header + enough TCP header
* to get at the checksum field.
*/
ip1dbg(("ip_wput_v6: TCP hdr pullupmsg"
" failed\n"));
return;
}
}
/*
* Note: The TCP module has stored the length value
* into the tcp checksum field, so we don't
* need to explicitly sum it in here.
*/
/* Fold the initial sum */
/* Software checksum? */
if (DB_CKSUMFLAGS(mp) == 0) {
}
} else if (nexthdr == IPPROTO_UDP) {
/*
* check for full IPv6 header + enough UDP header
* to get at the UDP checksum field
*/
ip1dbg(("ip_wput_v6: UDP hdr pullupmsg"
" failed\n"));
return;
}
}
/*
* Note: The UDP module has stored the length value
* into the udp checksum field, so we don't
* need to explicitly sum it in here.
*/
/* Fold the initial sum */
/* Software checksum? */
if (DB_CKSUMFLAGS(mp) == 0) {
}
} else if (nexthdr == IPPROTO_ICMPV6) {
/* check for full IPv6+ICMPv6 header */
(hdr_length + ICMP6_MINLEN)) {
ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg"
" failed\n"));
return;
}
}
/*
* icmp has placed length and routing
* header adjustment in icmp6_cksum.
*/
/* Update output mib stats */
} else if (nexthdr == IPPROTO_SCTP) {
sizeof (*sctph))) {
ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg"
" failed\n"));
return;
}
}
}
/*
* We force the insertion of a fragment header using the
* IPH_FRAG_HDR flag in two cases:
* - after reception of an ICMPv6 "packet too big" message
* with a MTU < 1280 (cf. RFC 2460 section 5)
* - for multirouted IPv6 packets, so that the receiver can
* discard duplicates according to their fragment identifier
*
* Two flags modifed from the API can modify this behavior.
* The first is IPV6_USE_MIN_MTU. With this API the user
* can specify how to manage PMTUD for unicast and multicast.
*
* IPV6_DONTFRAG disallows fragmentation.
*/
switch (IP6I_USE_MIN_MTU_API(flags)) {
case IPV6_USE_MIN_MTU_DEFAULT:
case IPV6_USE_MIN_MTU_UNICAST:
}
break;
case IPV6_USE_MIN_MTU_NEVER:
break;
}
return;
}
ip0dbg(("Packet length mismatch: %d, %ld\n",
return;
}
/* Do IPSEC processing first */
if (mctl_present) {
return;
}
ip2dbg(("Fragmenting Size = %d, mtu = %d\n",
IPV6_HDR_LEN, max_frag));
/* Initiate IPPF processing */
return;
}
}
return;
}
/* Do IPSEC processing first */
if (mctl_present) {
(flags & IP6I_DONTFRAG)) {
/*
* IPsec headers will push the packet over the
* MTU limit. Issue an ICMPv6 Packet Too Big
* message for this packet if the upper-layer
* that issued this packet will be able to
* react to the icmp_pkt2big_v6() that we'll
* generate.
*/
return;
}
return;
}
/*
* XXX multicast: add ip_mforward_v6() here.
* Check conn_dontroute
*/
#ifdef lint
/*
* XXX The only purpose of this statement is to avoid lint
* errors. See the above "XXX multicast". When that gets
* fixed, remove this whole #ifdef lint section.
*/
ip3dbg(("multicast forward is %s.\n",
#endif
} else {
/*
* DTrace this as ip:::send. A blocked packet will fire the
* send probe, but not the receive probe.
*/
zoneid);
}
}
}
/*
* Outbound IPv6 fragmentation routine using MDT.
*/
static void
{
int i1;
uint32_t ident;
/*
* Move read ptr past unfragmentable portion, we don't want this part
* of the data in our fragments.
*/
/* Calculate how many packets we will send out */
/* Allocate a message block which will hold all the IP Headers. */
/*
* Create the header buffer, Multidata and destination address
* and SAP attribute that should be associated with it.
*/
} else {
}
return;
}
/*
* Add a payload buffer to the Multidata; this operation must not
* fail, or otherwise our logic in this routine is broken. There
* is no memory allocation done by the routine, so any returned
* failure simply tells us that we've done something wrong.
*
* A failure tells us that either we're adding the same payload
* buffer more than once, or we're trying to add more buffers than
* allowed. None of the above cases should happen, and we panic
* programming mistake.
*/
goto pbuf_panic;
}
/*
* len is the total length of the fragmentable data in this
* datagram. For each fragment sent, we will decrement len
* by the amount of fragmentable data sent in that fragment
* until len reaches zero.
*/
offset = 0;
while (len != 0) {
int error;
fraghdr->ip6f_reserved = 0;
((len != 0) ? IP6F_MORE_FRAG : 0);
fraghdr->ip6f_ident = ident;
/*
* Record offset and size of header and data of the next packet
* in the multidata message.
*/
unfragmentable_len + sizeof (ip6_frag_t), 0);
} else {
/*
* Attach the next payload message block to the
* multidata message.
*/
goto pbuf_panic;
}
KM_NOSLEEP)) == NULL) {
/*
* Any failure other than ENOMEM indicates that we
* have passed in invalid pdesc info or parameters
* to mmd_addpdesc, which must not happen.
*
* EINVAL is a result of failure on boundary checks
* against the pdesc info contents. It should not
* happen, and we panic because either there's
* mistake.
*/
"pdesc logic error detected for "
"mmd %p pinfo %p (%d)\n",
/* NOTREACHED */
}
/* Free unattached payload message blocks as well */
goto free_mmd;
}
/* Advance fragment offset. */
/* Advance to location for next header in the buffer. */
hdr_ptr += hdr_chunk_len;
/* Did we reach the next payload message block? */
/*
* Attach the next message block with payload
* data to the multidata message.
*/
goto pbuf_panic;
}
}
/* Update IP statistics */
/*
* The ipv6 header len is accounted for in unfragmentable_len so
* when calculating the fragmentation overhead just add the frag
* header len.
*/
/* Send it down */
return;
pbuf_idx);
/* NOTREACHED */
}
/*
* IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
* We have not optimized this in terms of number of mblks
* allocated. For instance, for each fragment sent we always allocate a
* mblk to hold the IPv6 header and fragment header.
*
* Assumes that all the extension headers are contained in the first mblk.
*
* The fragment header is inserted after an hop-by-hop options header
* and after [an optional destinations header followed by] a routing header.
*
* NOTE : This function does not ire_refrele the ire passed in as
* the argument.
*/
void
{
uint32_t ident;
if (max_frag <= 0) {
return;
}
/*
* Determine the length of the unfragmentable portion of this
* datagram. This consists of the IPv6 header, a potential
* hop-by-hop options header, a potential pre-routing-header
* destination options header, and a potential routing header.
*/
if (nexthdr == IPPROTO_HOPOPTS) {
}
if (nexthdr == IPPROTO_DSTOPTS) {
}
}
if (nexthdr == IPPROTO_ROUTING) {
}
sizeof (ip6_frag_t)) & ~7;
/* Check if we can use MDT to send out the frags. */
return;
}
/*
* Allocate an mblk with enough room for the link-layer
* header, the unfragmentable part of the datagram, and the
* fragment header. This (or a copy) will be used as the
* first mblk for each fragment we send.
*/
return;
}
fraghdr->ip6f_reserved = 0;
fraghdr->ip6f_offlg = 0;
/*
* len is the total length of the fragmentable data in this
* datagram. For each fragment sent, we will decrement len
* by the amount of fragmentable data sent in that fragment
* until len reaches zero.
*/
/*
* Move read ptr past unfragmentable portion, we don't want this part
* of the data in our fragments.
*/
while (len != 0) {
if (len != 0) {
/* Not last */
ip1dbg(("ip_wput_frag_v6: copyb failed\n"));
return;
}
} else {
/* Last fragment */
off_flags = 0;
}
/*
* Note: Optimization alert.
* In IPv6 (and IPv4) protocol header, Fragment Offset
* ("offset") is 13 bits wide and in 8-octet units.
* In IPv6 protocol header (unlike IPv4) in a 16 bit field,
* it occupies the most significant 13 bits.
* (least significant 13 bits in IPv4).
* We do not do any shifts here. Not shifting is same effect
* as taking offset value in octet units, dividing by 8 and
* then shifting 3 bits left to line it up in place in proper
* place protocol header.
*/
/* mp has already been freed by ip_carve_mp() */
ip1dbg(("ip_carve_mp: failed\n"));
return;
}
/* Get the priority marking, if any */
reachable = 0; /* No need to redo state machine in loop */
}
}
/*
* Determine if the ill and multicast aspects of that packets
* "matches" the conn.
*/
{
/*
* conn_incoming_ill is set by IPV6_BOUND_IF which limits
* unicast and multicast reception to conn_incoming_ill.
* conn_wantpacket_v6 is called both for unicast and
* multicast.
*/
return (B_FALSE);
} else {
return (B_FALSE);
}
}
if (connp->conn_multi_router)
return (B_TRUE);
if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) &&
/*
* Unicast case: we match the conn only if it's in the specified
* zone.
*/
}
if ((fanout_flags & IP_FF_NO_MCAST_LOOP) &&
/*
* Loopback case: the sending endpoint has IP_MULTICAST_LOOP
* disabled, therefore we don't dispatch the multicast packet to
* the sending zone.
*/
return (B_FALSE);
}
/*
* Multicast packet on the loopback interface: we only match
* conns who joined the group in the specified zone.
*/
return (B_FALSE);
}
return (wantpacket);
}
/*
* Transmit a packet and update any NUD state based on the flags
* XXX need to "recover" any ip6i_t when doing putq!
*
* NOTE : This function does not ire_refrele the ire passed in as the
* argument.
*/
void
{
if (!ill) {
ip0dbg(("ip_xmit_v6: ire_to_ill failed\n"));
return;
}
/* Flow-control check has been done in ip_wput_ire_v6 */
/*
* In most cases, the emission loop below is entered only
* once. Only in the case where the ire holds the
* RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT
* flagged ires in the bucket, and send the packet
* through all crossed RTF_MULTIRT routes.
*/
/*
* Multirouting case. The bucket where ire is stored
* probably holds other RTF_MULTIRT flagged ires
* to the destination. In this call to ip_xmit_v6,
* we attempt to send the packet through all
* those ires. Thus, we first ensure that ire is the
* first RTF_MULTIRT ire in the bucket,
* before walking the ire list.
*/
/* Make sure we do not omit any multiroute ire. */
&ire->ire_addr_v6)) &&
break;
}
/* ire will be released by the caller */
}
ILL_MDT_USABLE(ill)) {
/*
* This tcp connection was marked as MDT-capable, but
* it has been turned off due changes in the interface.
* Now that the interface support is back, turn it on
* by notifying tcp. We don't directly modify tcp_mdt,
* since we leave all the details to the tcp code that
* knows better.
*/
ip0dbg(("ip_xmit_v6: can't re-enable MDT for "
"connp %p (ENOMEM)\n", (void *)connp));
} else {
}
}
do {
if (multirt_send) {
/*
* We are in a multiple send case, need to get
* the next ire and make a duplicate of the
* packet. ire1 holds here the next ire to
* process in the bucket. If multirouting is
* expected, any non-RTF_MULTIRT ire that has
* the right destination address is ignored.
*/
continue;
if (!IN6_ARE_ADDR_EQUAL(
&ire1->ire_addr_v6,
&ire->ire_addr_v6))
continue;
continue;
/* Got one */
}
break;
}
}
}
/* Last multiroute ire; don't loop anymore. */
}
}
/* Initiate IPPF processing */
}
return;
}
}
/*
* Check for fastpath, we need to hold nce_lock to
* prevent fastpath update from chaining nce_fp_mp.
*/
/*
* make sure there is room for the fastpath
* datalink header
*/
}
return;
}
/* Get the priority marking, if any */
} else {
/*
* fastpath - pre-pend datalink
* header
*/
fp_prepend = B_TRUE;
}
} else {
/*
* Get the DL_UNITDATA_REQ.
*/
ip1dbg(("ip_xmit_v6: No resolution "
"block ire = %p\n", (void *)ire));
}
return;
}
/*
* Prepend the DL_UNITDATA_REQ.
*/
}
return;
}
/* Get the priority marking, if any */
}
if (multirt_send) {
}
/*
* Proceed with the next RTF_MULTIRT
* ire, also set up the send-to queue
* accordingly.
*/
continue;
} else {
break;
}
}
/*
* The IP observability hook expects b_rptr to
* be where the IPv6 header starts, so advance
* past the link layer header.
*/
if (fp_prepend)
if (fp_prepend)
}
/*
* Update ire and MIB counters; for save_ire, this has
* been done by the caller.
*/
}
}
/*
* packets that carry TCP payloads? We don't flow
* control TCP packets, but we should also not
* flow-control TCP packets that have been protected.
* packet was originally TCP or not currently.
*/
int, 0);
} else {
/*
* Safety Pup says: make sure this is
* going to the right interface!
*/
if (io->ipsec_out_capab_ill_index !=
ill_index) {
/* IPsec kstats: bump lose counter */
} else {
0);
}
}
}
if (multirt_send) {
/*
* Proceed with the next RTF_MULTIRT
* ire, also set up the send-to queue
* accordingly.
*/
continue;
}
return;
}
/*
* Check for upper layer advice
*/
if (flags & IPV6_REACHABILITY_CONFIRMATION) {
/*
* It should be o.k. to check the state without
* a lock here, at most we lose an advice.
*/
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("ip_xmit_v6: state"
" for %s changed to"
" REACHABLE\n", AF_INET6,
&ire->ire_addr_v6);
}
}
}
if (multirt_send) {
/*
* Proceed with the next RTF_MULTIRT
* ire, also set up the send-to queue
* accordingly.
*/
continue;
}
return;
}
" ill_reachable_time = %d \n", delta,
case ND_REACHABLE:
case ND_STALE:
/*
* ND_REACHABLE is identical to
* ND_STALE in this specific case. If
* reachable time has expired for this
* neighbor (delta is greater than
* reachable time), conceptually, the
* neighbor cache is no longer in
* REACHABLE state, but already in
* STALE state. So the correct
* transition here is to ND_DELAY.
*/
if (ip_debug > 3) {
/* ip2dbg */
pr_addr_dbg("ip_xmit_v6: state"
" for %s changed to"
" DELAY\n", AF_INET6,
&ire->ire_addr_v6);
}
break;
case ND_DELAY:
case ND_PROBE:
/* Timers have already started */
break;
case ND_UNREACHABLE:
/*
* ndp timer has detected that this nce
* is unreachable and initiated deleting
* this nce and all its associated IREs.
* This is a race where we found the
* ire before it was deleted and have
* just sent out a packet using this
* unreachable nce.
*/
break;
default:
ASSERT(0);
}
}
if (multirt_send) {
/*
* Proceed with the next RTF_MULTIRT ire,
* Also set up the send-to queue accordingly.
*/
}
}
} while (multirt_send);
/*
* In the multirouting case, release the last ire used for
* emission. save_ire will be released by the caller.
*/
}
} else {
/*
* Can't apply backpressure, just discard the packet.
*/
return;
}
}
/*
* pr_addr_dbg function provides the needed buffer space to call
* inet_ntop() function's 3rd argument. This function should be
* used by any kernel routine which wants to save INET6_ADDRSTRLEN
* stack buffer space in it's own stack frame. This function uses
* a buffer from it's own stack and prints the information.
* Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
*
* Note: This function can call inet_ntop() once.
*/
void
{
char buf[INET6_ADDRSTRLEN];
ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
return;
}
/*
* This does not compare debug level and just prints
* out. Thus it is the responsibility of the caller
* to check the appropriate debug-level before calling
* this function.
*/
if (ip_debug > 0) {
}
}
/*
* Return the length in bytes of the IPv6 headers (base header, ip6i_t
* if needed and extension headers) that will be needed based on the
* ip6_pkt_t structure passed by the caller.
*
* The returned length does not include the length of the upper level
* protocol (ULP) header.
*/
int
{
int len;
len = IPV6_HDR_LEN;
}
}
/*
* En-route destination options
* Only do them if there's a routing header as well
*/
(IPPF_RTDSTOPTS|IPPF_RTHDR)) {
}
}
return (len);
}
/*
* All-purpose routine to build a header chain of an IPv6 header
* followed by any required extension headers and a proto header,
* preceeded (where necessary) by an ip6i_t private header.
*
* The fields of the IPv6 header that are derived from the ip6_pkt_t
* will be filled in appropriately.
* Thus the caller must fill in the rest of the IPv6 header, such as
* set here) and destination address.
*
* The extension headers and ip6i_t header will all be fully filled in.
*/
void
{
/*
* If sending private ip6i_t header down (checksum info, nexthop,
* or ifindex), adjust ip header pointer and set ip6i_t header pointer,
* then fill it in. (The checksum info will be filled in by icmp).
*/
ip6i->ip6i_flags = 0;
}
/*
* Enable per-packet source address verification if
* IPV6_PKTINFO specified the source address.
* ip6_src is set in the transport's _wput function.
*/
}
/*
* We need to set this flag so that IP doesn't
* rewrite the IPv6 header's hoplimit with the
* current default value.
*/
}
&ipp->ipp_nexthop));
}
/*
* tell IP this is an ip6i_t private header
*/
}
/* Initialize IPv6 header */
}
/*
* Here's where we have to start stringing together
* any extension headers in the right order:
* Hop-by-hop, destination, routing, and final destination opts.
*/
/* Hop-by-hop options */
}
/*
* En-route destination options
* Only do them if there's a routing header as well
*/
(IPPF_RTDSTOPTS|IPPF_RTHDR)) {
}
/*
* Routing header next
*/
}
/*
* Do ultimate destination options
*/
}
/*
* Now set the last header pointer to the proto passed in
*/
*nxthdr_ptr = protocol;
}
/*
* Return a pointer to the routing header extension header
* in the IPv6 header(s) chain passed in.
* If none found, return NULL
* Assumes that all extension headers are in same mblk as the v6 header
*/
{
return ((ip6_rthdr_t *)ptr);
/*
* The routing header will precede all extension headers
* other than the hop-by-hop and destination options
* extension headers, so if we see anything other than those,
* we're done and didn't find it.
* We could see a destination options header alone but no
* routing header, in which case we'll return NULL as soon as
* we see anything after that.
* Hop-by-hop and destination option headers are identical,
* so we can use either one we want as a template.
*/
/* Is there enough left for len + nexthdr? */
return (NULL);
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
break;
case IPPROTO_ROUTING:
return ((ip6_rthdr_t *)ptr);
case IPPROTO_FRAGMENT:
hdrlen = sizeof (ip6_frag_t);
break;
default:
return (NULL);
}
}
return (NULL);
}
/*
* Called for source-routed packets originating on this node.
* Manipulates the original routing header by moving every entry up
* one slot, placing the first entry in the v6 header's v6_dst field,
* and placing the ultimate destination in the routing header's last
* slot.
*
* Returns the checksum diference between the ultimate destination
* (last hop in the routing header when the packet is sent) and
* the first hop (ip6_dst when the packet is sent)
*/
/* ARGSUSED2 */
{
uint_t i;
/*
* Perform any processing needed for source routing.
* We know that all extension headers will be in the same mblk
* as the IPv6 header.
*/
/*
* If no segments left in header, or the header length field is zero,
* don't move hop addresses around;
* Checksum difference is zero.
*/
return (0);
cksm = 0;
for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
}
/*
* Here's where the fun begins - we have to
* move all addresses up one spot, take the
* first hop and make it our first ip6_dst,
* and place the ultimate destination in the
* newly-opened last slot.
*/
}
/*
* From the checksummed ultimate destination subtract the checksummed
* current ip6_dst (the first hop address). Return that number.
* (In the v4 case, the second part of this is done in each routine
* that calls ip_massage_options(). We do it all in this one place
* for v6).
*/
for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
}
if ((int)cksm < 0)
cksm--;
return (cksm);
}
/*
* all interfaces crossed by the related multirt routes.
* The call is considered successful if the operation succeeds
* on at least one interface.
* The function is called if the destination address in the packet to send
* is multirouted.
*/
int
{
continue;
continue;
/* No resolver exists for the gateway; skip this ire. */
continue;
/*
* A resolver exists: we can get the interface on which we have
* to apply the operation.
*/
first_mp);
if (error == 0)
if (ip_debug > 0) {
char *ksym;
ip2dbg(("ip_multirt_apply_membership_v6: "
"called %s, multirt group 0x%08x via itf 0x%08x, "
"error %d [success %u]\n",
}
if (error == EINPROGRESS) {
return (error);
}
}
/*
* Consider the call as successful if we succeeded on at least
* one interface. Otherwise, return the last encountered error.
*/
}
void
{
ip6_stat_t template = {
{ "ip6_udp_fast_path", KSTAT_DATA_UINT64 },
{ "ip6_udp_slow_path", KSTAT_DATA_UINT64 },
{ "ip6_udp_fannorm", KSTAT_DATA_UINT64 },
{ "ip6_udp_fanmb", KSTAT_DATA_UINT64 },
{ "ip6_out_sw_cksum", KSTAT_DATA_UINT64 },
{ "ip6_in_sw_cksum", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 },
{ "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 },
};
return (NULL);
return (ksp);
}
void
{
}
}
/*
* The following two functions set and get the value for the
* IPV6_SRC_PREFERENCES socket option.
*/
int
{
/*
* We only support preferences that are covered by
* IPV6_PREFER_SRC_MASK.
*/
if (prefs & ~IPV6_PREFER_SRC_MASK)
return (EINVAL);
/*
* Look for conflicting preferences or default preferences. If
* both bits of a related pair are clear, the application wants the
* system's default value for that pair. Both bits in a pair can't
* be set.
*/
if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
return (EINVAL);
}
if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
return (EINVAL);
}
if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
return (EINVAL);
}
return (0);
}
{
return (sizeof (connp->conn_src_preferences));
}
int
{
/*
* Verify the source address and ifindex. Privileged users can use
* any source address. For ancillary data the source address is
* checked in ip_wput_v6.
*/
if (pkti->ipi6_ifindex != 0) {
return (ENXIO);
}
}
secpolicy_net_rawaccess(cr) != 0) {
else
return (ENXIO);
}
return (0);
}
/*
* Get the size of the IP options (including the IP headers size)
* without including the AH header's size. If till_ah is B_FALSE,
* and if AH header is present, dest options beyond AH header will
* also be included in the returned size.
*/
int
{
int ehdrlen;
int size;
size = IPV6_HDR_LEN;
for (;;) {
/* Assume IP has already stripped it */
switch (nexthdr) {
case IPPROTO_HOPOPTS:
break;
case IPPROTO_DSTOPTS:
break;
case IPPROTO_ROUTING:
break;
default :
if (till_ah) {
return (size);
}
/*
* If we don't have a AH header to traverse,
* return now. This happens normally for
* outbound datagrams where we have not inserted
* the AH header.
*/
if (nexthdr != IPPROTO_AH) {
return (size);
}
/*
* We don't include the AH header's size
* to be symmetrical with other cases where
* we either don't have a AH header (outbound)
* or peek into the AH header yet (inbound and
* not pulled up yet).
*/
if (nexthdr == IPPROTO_DSTOPTS) {
/*
* The destination options header
* is not part of the first mblk.
*/
} else {
}
}
return (size);
}
}
}
/*
* Utility routine that checks if `v6srcp' is a valid address on underlying
* interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
* associated with `v6srcp' on success. NOTE: if this is not called from
* inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
* group during or after this lookup.
*/
static boolean_t
{
else
return (B_TRUE);
}
if (ip_debug > 2) {
pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
}
return (B_FALSE);
}