ip6.c revision 01685f973ffa404db3bc35b99a86c94e268d6587
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
#include <sys/sysmacros.h>
#define _SUN_TPI_VERSION 2
#include <sys/isa_defs.h>
#include <net/if_types.h>
#include <inet/tcp_impl.h>
#include <inet/udp_impl.h>
#include <inet/ipp_common.h>
#include <inet/ip_multi.h>
#include <inet/ipsec_impl.h>
#include <inet/ipclassifier.h>
#include <inet/rawip_impl.h>
#include <inet/rts_impl.h>
#include <sys/squeue_impl.h>
/* Temporary; for CR 6451644 work-around */
#include <sys/ethernet.h>
/*
* Naming conventions:
* These rules should be judiciously applied
* if there is a need to identify something as IPv6 versus IPv4
* IPv6 funcions will end with _v6 in the ip module.
* IPv6 funcions will end with _ipv6 in the transport modules.
* IPv6 macros:
* Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
* Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
* And then there are ..V4_PART_OF_V6.
* The intent is that macros in the ip module end with _V6.
* IPv6 global variables will start with ipv6_
* IPv6 structures will start with ipv6
* IPv6 defined constants should start with IPV6_
* (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
*/
/*
* We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
* from IANA. This mechanism will remain in effect until an official
* number is obtained.
*/
const in6_addr_t ipv6_all_ones =
{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#ifdef _BIG_ENDIAN
const in6_addr_t ipv6_solicited_node_mcast =
{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
#else /* _BIG_ENDIAN */
const in6_addr_t ipv6_solicited_node_mcast =
{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
#endif /* _BIG_ENDIAN */
ip_recv_attr_t *);
ip_recv_attr_t *);
in6_addr_t *, ip_recv_attr_t *);
ip_recv_attr_t *);
/*
* icmp_inbound_v6 deals with ICMP messages that are handled by IP.
* If the ICMP message is consumed by IP, i.e., it should not be delivered
* to any IPPROTO_ICMP raw sockets, then it returns NULL.
* Likewise, if the ICMP error is misformed (too short, etc), then it
* returns NULL. The caller uses this to determine whether or not to send
* to raw sockets.
*
* All error messages are passed to the matching transport stream.
*
* See comment for icmp_inbound_v4() on how IPsec is handled.
*/
mblk_t *
{
int ip_hdr_length; /* Outer header length */
/* Check for Martian packets */
return (NULL);
}
/* Make sure ira_l2src is set for ndp_input */
return (NULL);
}
return (NULL);
}
}
icmp6->icmp6_code));
/*
* We will set "interested" to "true" if we should pass a copy to
* the transport i.e., if it is an error message.
*/
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
break;
case ICMP6_TIME_EXCEEDED:
break;
case ICMP6_PARAM_PROB:
break;
case ICMP6_PACKET_TOO_BIG:
break;
case ICMP6_ECHO_REQUEST:
break;
/*
* We must have exclusive use of the mblk to convert it to
* a response.
* If not, we copy it.
*/
ip_drop_input("ipIfStatsInDiscards - copymsg",
return (NULL);
}
}
return (NULL);
case ICMP6_ECHO_REPLY:
break;
case ND_ROUTER_SOLICIT:
break;
case ND_ROUTER_ADVERT:
break;
case ND_NEIGHBOR_SOLICIT:
return (NULL);
case ND_NEIGHBOR_ADVERT:
return (NULL);
case ND_REDIRECT:
if (ipst->ips_ipv6_ignore_redirect)
break;
/* We now allow a RAW socket to receive this. */
interested = B_TRUE;
break;
/*
* The next three icmp messages will be handled by MLD.
* Pass all valid MLD packets up to any process(es)
* listening on a raw ICMP socket.
*/
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
case MLD_LISTENER_REDUCTION:
return (mp);
default:
break;
}
/*
* if there isn't one.
*/
/* If there is an ICMP client and we want one too, copy it. */
if (!interested) {
/* Caller will deliver to RAW sockets */
return (mp);
}
}
} else if (!interested) {
/* Neither we nor raw sockets are interested. Drop packet now */
return (NULL);
}
/*
* ICMP error or redirect packet. Make sure we have enough of
* the header and that db_ref == 1 since we might end up modifying
* the packet.
*/
ip_drop_input("ipIfStatsInDiscards - ip_pullup",
return (mp_ret);
}
}
return (mp_ret);
}
}
/*
* In case mp has changed, verify the message before any further
* processes.
*/
return (mp_ret);
}
switch (icmp6->icmp6_type) {
case ND_REDIRECT:
break;
case ICMP6_PACKET_TOO_BIG:
/* Update DCE and adjust MTU is icmp header if needed */
/* FALLTHRU */
default:
break;
}
return (mp_ret);
}
/*
* Send an ICMP echo reply.
* The caller has already updated the payload part of the packet.
* We handle the ICMP checksum, IP source address selection and feed
* the packet into ip_output_simple.
*/
static void
{
/*
* Remove any extension headers (do not reverse a source route)
* and clear the flow id (keep traffic class for now).
*/
if (ip_hdr_length != IPV6_HDR_LEN) {
int i;
for (i = 0; i < IPV6_HDR_LEN; i++) {
}
i -= (ip_hdr_length - IPV6_HDR_LEN);
}
/* Reverse the source and destination addresses. */
/* set the hop limit */
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_output
*/
ixas.ixa_ifindex = 0;
/*
* This packet should go out the same way as it
* came in i.e in clear, independent of the IPsec
* policy for transmitting packets.
*/
} else {
/* Note: mp already consumed and ip_drop_packet done */
return;
}
}
/* Was the destination (now source) link-local? Send out same group */
if (IS_UNDER_IPMP(ill))
else
}
/*
* Not one or our addresses (IRE_LOCALs), thus we let
* ip_output_simple pick the source.
*/
}
/* Should we send using dce_pmtu? */
ixa_cleanup(&ixas);
}
/*
* Verify the ICMP messages for either for ICMP error or redirect packet.
* The caller should have fully pulled up the message. If it's a redirect
* packet, only basic checks on IP header will be done; otherwise, verify
* the packet by looking at the included ULP header.
*
* Called before icmp_inbound_error_fanout_v6 is called.
*/
static boolean_t
{
goto truncated;
hdr_length = sizeof (nd_redirect_t);
} else {
goto discard_pkt;
}
goto truncated;
/*
* Stop here for ICMP_REDIRECT.
*/
return (B_TRUE);
/*
* ICMP errors only.
*/
goto discard_pkt;
/* Try to pass the ICMP message to clients who need it */
switch (nexthdr) {
case IPPROTO_UDP:
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* transport header.
*/
goto truncated;
break;
case IPPROTO_TCP: {
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* transport header.
*/
goto truncated;
/*
* With IPMP we need to match across group, which we do
* since we have the upper ill from ira_ill.
*/
goto discard_pkt;
goto discard_pkt;
}
break;
}
case IPPROTO_SCTP:
/*
* Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
* transport header.
*/
goto truncated;
break;
case IPPROTO_ESP:
case IPPROTO_AH:
break;
case IPPROTO_ENCAP:
case IPPROTO_IPV6: {
/* Look for self-encapsulated packets that caused an error */
goto truncated;
break;
}
default:
break;
}
return (B_TRUE);
/* Bogus ICMP error. */
return (B_FALSE);
/* We pulled up everthing already. Must be truncated */
return (B_FALSE);
}
/*
* Process received IPv6 ICMP Packet too big.
* The caller is responsible for validating the packet before passing it in
* and also to fanout the ICMP error to any matching transport conns. Assumes
* the message has been fully pulled up.
*
* Before getting here, the caller has called icmp_inbound_verify_v6()
* that should have verified with ULP to prevent undoing the changes we're
* going to make to DCE. For example, TCP might have verified that the packet
* which generated error is in the send window.
*
* In some cases modified this MTU in the ICMP header packet; the caller
* should pass to the matching ULP after this returns.
*/
static void
{
int old_max_frag;
/* Caller has already pulled up everything. */
/*
* For link local destinations matching simply on address is not
* sufficient. Same link local addresses for different ILL's is
* possible.
*/
if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
} else {
}
/* Couldn't add a unique one - ENOMEM */
if (ip_debug > 2) {
/* ip1dbg */
pr_addr_dbg("icmp_inbound_too_big_v6:"
"no dce for dst %s\n", AF_INET6,
&final_dst);
}
return;
}
else
if (mtu < IPV6_MIN_MTU) {
ip1dbg(("Received mtu less than IPv6 "
mtu = IPV6_MIN_MTU;
/*
* If an mtu less than IPv6 min mtu is received,
* we must include a fragment header in
* subsequent packets.
*/
} else {
}
/* Prepare to send the new max frag size for the ULP. */
/*
* If we need a fragment header in every packet
* (above case or multirouting), make sure the
* ULP takes it into account when computing the
* payload size.
*/
} else {
}
/* We now have a PMTU for sure */
/*
* After dropping the lock the new value is visible to everyone.
* Then we bump the generation number so any cached values reinspect
* the dce_t.
*/
}
/*
* Fanout received ICMPv6 error packets to the transports.
* Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
*
* The caller must have called icmp_inbound_verify_v6.
*/
void
{
/* Caller has already pulled up everything. */
goto drop_pkt;
/*
* We need a separate IP header with the source and destination
* addresses reversed to do fanout/classification because the ip6h in
* the ICMPv6 error is in the form we sent it out.
*/
/* Try to pass the ICMP message to clients who need it */
switch (nexthdr) {
case IPPROTO_UDP: {
/* Attempt to find a client stream based on port. */
/* Note that we send error to all matches. */
return;
}
case IPPROTO_TCP: {
/*
* Attempt to find a client stream based on port.
* Note that we do a reverse lookup since the header is
* in the form we sent it out.
*/
/*
* With IPMP we need to match across group, which we do
* since we have the upper ill from ira_ill.
*/
goto drop_pkt;
}
/* Note that mp is NULL */
return;
}
}
if (IPCL_IS_TCP(connp)) {
} else {
/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
}
return;
}
case IPPROTO_SCTP:
/* Find a SCTP client stream for this packet. */
return;
case IPPROTO_ESP:
case IPPROTO_AH:
if (!ipsec_loaded(ipss)) {
return;
}
if (nexthdr == IPPROTO_ESP)
else
return;
/* Just in case ipsec didn't preserve the NULL b_cont */
goto drop_pkt;
}
/*
* If succesful, the mp has been modified to not include
* error handler.
*/
goto drop_pkt;
/* Don't call hdr_length_v6() unless you have to. */
else
/* Verify the modified message before any further processes. */
return;
}
return;
case IPPROTO_IPV6: {
/* Look for self-encapsulated packets that caused an error */
/*
* Self-encapsulated case. As in the ipv4 case,
* we need to strip the 2nd IP header. Since mp
* is already pulled-up, we can simply bcopy
* the 3rd header + data over the 2nd header.
*/
/*
* Make sure we don't do recursion more than once.
*/
&unused_len, &nexthdrp) ||
*nexthdrp == IPPROTO_IPV6) {
goto drop_pkt;
}
/*
* Copy the 3rd header + remaining data on top
* of the 2nd header.
*/
/*
* Subtract length of the 2nd header.
*/
/* Don't call hdr_length_v6() unless you have to. */
else
/*
* Verify the modified message before any further
* processes.
*/
return;
}
/*
* Now recurse, and see what I _really_ should be
* doing here.
*/
return;
}
/* FALLTHRU */
}
case IPPROTO_ENCAP:
return;
}
/*
* No IP tunnel is interested, fallthrough and see
* if a raw socket will want it.
*/
/* FALLTHRU */
default:
return;
}
/* NOTREACHED */
ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
}
/*
* Process received IPv6 ICMP Redirect messages.
* Assumes the caller has verified that the headers are in the pulled up mblk.
* Consumes mp.
*/
/* ARGSUSED */
static void
{
int ncec_flags = 0;
int err = 0;
int len;
int optlen;
/*
* Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
* and make it be the IPMP upper so avoid being confused by a packet
* addressed to a unicast address on a different ill.
*/
if (IS_UNDER_IPMP(rill)) {
ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
return;
}
}
/* Verify if it is a valid redirect */
if (!IN6_IS_ADDR_LINKLOCAL(src) ||
(rd->nd_rd_code != 0) ||
(len < sizeof (nd_redirect_t)) ||
(IN6_IS_ADDR_V4MAPPED(dst)) ||
(IN6_IS_ADDR_MULTICAST(dst))) {
goto fail_redirect;
}
if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
goto fail_redirect;
}
if (optlen != 0) {
ip_drop_input("ipv6IfIcmpInBadRedirects - options",
goto fail_redirect;
}
}
} else {
}
/*
* Verify that the IP source address of the redirect is
* the same as the current first-hop router for the specified
* ICMP destination address.
* Also, Make sure we had a route for the dest in question and
* that route was pointing to the old gateway (the source of the
* redirect packet.)
* We do longest match and then compare ire_gateway_addr_v6 below.
*/
/*
* Check that
* the redirect was not from ourselves
* old gateway is still directly reachable
*/
goto fail_redirect;
}
switch (err) {
case 0:
break;
case EEXIST:
/*
* Check to see if link layer address has changed and
* process the ncec_state accordingly.
*/
break;
default:
ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
err));
goto fail_redirect;
}
}
if (redirect_to_router) {
/*
* Create a Route Association. This will allow us to remember
* a router told us to use the particular gateway.
*/
ire = ire_create_v6(
dst,
&ipv6_all_ones, /* mask */
gateway, /* gateway addr */
NULL,
ipst);
} else {
/*
* Just create an on link entry, i.e. interface route.
* The gateway field is our link-local on the ill.
*/
break;
}
/* We have no link-local address! */
goto fail_redirect;
}
ire = ire_create_v6(
dst, /* gateway == dst */
&ipv6_all_ones, /* mask */
&gw, /* gateway addr */
(RTF_DYNAMIC | RTF_HOST),
NULL,
ipst);
}
goto fail_redirect;
/* Check if it was a duplicate entry */
}
/* tell routing sockets that we received a redirect */
&rd->nd_rd_target,
&ipv6_all_ones, 0, src,
/*
* Delete any existing IRE_HOST type ires for this destination.
* This together with the added IRE has the effect of
* modifying an existing redirect.
*/
NULL);
}
}
}
/*
* Build and ship an IPv6 ICMP message using the packet data in mp,
* and the ICMP header pointed to by "stuff". (May be called as
* writer.)
* Note: assumes that icmp_pkt_err_ok_v6 has been called to
* verify that an icmp error packet can be sent.
*
* If v6src_ptr is set use it as a source. Otherwise select a reasonable
* source address (see above function).
*/
static void
{
ixas.ixa_ifindex = 0;
/*
* If the source of the original packet was link-local, then
* make sure we send on the same ill (group) as we received it on.
*/
if (IS_UNDER_IPMP(ill))
else
}
/*
* Apply IPsec based on how IPsec was applied to
* the packet that had the error.
*
* If it was an outbound packet that caused the ICMP
* error, then the caller will have setup the IRA
* appropriately.
*/
/* Note: mp already consumed and ip_drop_packet done */
return;
}
} else {
/*
* This is in clear. The icmp message we are building
* here should go out in clear, independent of our policy.
*/
}
/*
* If the caller specified the source we use that.
* Otherwise, if the packet was for one of our unicast addresses, make
* sure we respond with that as the source. Otherwise
* have ip_output_simple pick the source address.
*/
} else {
} else {
}
}
if (msg_len > len_needed) {
return;
}
}
return;
}
/*
* Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
* node generates be accepted in peace by all on-host destinations.
* If we do NOT assume that all on-host destinations trust
* (Look for IXAF_TRUSTED_ICMP).
*/
}
/*
* Prepare for checksum by putting icmp length in the icmp
* checksum field. The checksum is calculated in ip_output_wire_v6.
*/
}
ixa_cleanup(&ixas);
}
/*
* Update the output mib when ICMPv6 packets are sent.
*/
void
{
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
break;
case ICMP6_TIME_EXCEEDED:
break;
case ICMP6_PARAM_PROB:
break;
case ICMP6_PACKET_TOO_BIG:
break;
case ICMP6_ECHO_REQUEST:
break;
case ICMP6_ECHO_REPLY:
break;
case ND_ROUTER_SOLICIT:
break;
case ND_ROUTER_ADVERT:
break;
case ND_NEIGHBOR_SOLICIT:
break;
case ND_NEIGHBOR_ADVERT:
break;
case ND_REDIRECT:
break;
case MLD_LISTENER_QUERY:
break;
case MLD_LISTENER_REPORT:
case MLD_V2_LISTENER_REPORT:
break;
case MLD_LISTENER_REDUCTION:
break;
}
}
/*
* Check if it is ok to send an ICMPv6 error packet in
* response to the IP packet in mp.
* Free the message and return null if no
* ICMP error packet should be sent.
*/
static mblk_t *
{
if (!mp)
return (NULL);
/* We view multicast and broadcast as the same.. */
(IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
/* Check if source address uniquely identifies the host */
return (NULL);
}
return (NULL);
}
}
/* Explicitly do not generate errors in response to redirects */
return (NULL);
}
}
/*
* Check that the destination is not multicast and that the packet
* was not sent on link layer broadcast or multicast. (Exception
* is Packet too big message as per the draft - when mcast_ok is set.)
*/
if (!mcast_ok &&
return (NULL);
}
/*
* If this is a labeled system, then check to see if we're allowed to
* send a response to this particular sender. If not, then just drop.
*/
return (NULL);
}
if (icmp_err_rate_limit(ipst)) {
/*
* Only send ICMP error packets every so often.
* but for now this will suffice.
*/
return (NULL);
}
return (mp);
}
/*
* Called when a packet was sent out the same link that it arrived on.
* Check if it is ok to send a redirect and then send it.
*/
void
{
/*
* Don't send a redirect when forwarding a source
* routed packet.
*/
return;
/* Target is directly connected */
} else {
/* Determine the most specific IRE used to send the packets */
return;
/*
* We won't send redirects to a router
* that doesn't have a link local
* address, but will forward.
*/
return;
}
}
if (src_ire_v6 == NULL) {
return;
}
/*
* The source is directly connected.
*/
}
/*
* Generate an ICMPv6 redirect message.
* Include target link layer address option if it exits.
* Always include redirect header.
*/
static void
{
int len;
int ll_opt_len = 0;
int pkt_len;
return;
ip_drop_output("no IPMP ill for sending redirect",
return;
}
} else {
}
ll_opt_len = (sizeof (nd_opt_hdr_t) +
}
if (need_refrele)
return;
}
rd->nd_rd_code = 0;
rd->nd_rd_reserved = 0;
}
/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
/* Make sure mp is 8 byte aligned */
if (pkt_len > max_redir_hdr_data_len) {
sizeof (nd_opt_rd_hdr_t))/8;
} else {
}
rdh->nd_opt_rh_reserved1 = 0;
rdh->nd_opt_rh_reserved2 = 0;
/* ipif_v6lcl_addr contains the link-local source address */
/* Redirects sent by router, and router is global zone */
if (need_refrele)
}
/* Generate an ICMP time exceeded message. (May be called as writer.) */
void
{
return;
}
/*
* Generate an ICMP unreachable message.
* When called from ip_output side a minimal ip_recv_attr_t needs to be
* constructed by the caller.
*/
void
{
return;
}
/*
* Generate an ICMP pkt too big message.
* When called from ip_output side a minimal ip_recv_attr_t needs to be
* constructed by the caller.
*/
void
{
return;
icmp6.icmp6_code = 0;
}
/*
* Generate an ICMP parameter problem message. (May be called as writer.)
* 'offset' is the offset from the beginning of the packet in error.
* When called from ip_output side a minimal ip_recv_attr_t needs to be
* constructed by the caller.
*/
static void
{
return;
}
void
{
/* Determine the offset of the bad nexthdr value */
/* Malformed packet */
return;
}
}
/*
* Verify whether or not the IP address is a valid local address.
* Could be a unicast, including one for a down interface.
* If allow_mcbc then a multicast or broadcast address is also
* acceptable.
*
* In the case of a multicast address, however, the
* upper protocol is expected to reset the src address
* to zero when we return IPVL_MCAST so that
* no packets are emitted with multicast address as
* source address.
* The addresses valid for bind are:
* (1) - in6addr_any
* (2) - IP address of an UP interface
* (3) - IP address of a DOWN interface
* (4) - a multicast address. In this case
* the conn will only receive packets destined to
* the specified multicast address. Note: the
* application still has to issue an
* IPV6_JOIN_GROUP socket option.
*
* In all the above cases, the bound address must be valid in the current zone.
* When the address is loopback or multicast, there might be many matching IREs
* so bind has to look up based on the zone.
*/
{
if (scopeid != 0) {
return (IPVL_BAD);
}
/*
* If an address other than in6addr_any is requested,
* we verify that it is a valid address for bind
* Note: Following code is in if-else-if form for
* readability compared to a condition check.
*/
/*
* (2) Bind to address of local UP interface
*/
return (IPVL_UNICAST_UP);
} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
/* (4) bind to multicast address. */
/*
* Note: caller should take IPV6_MULTICAST_IF
* into account when selecting a real source address.
*/
if (allow_mcbc)
return (IPVL_MCAST);
else
return (IPVL_BAD);
} else {
/*
* (3) Bind to address of local DOWN interface?
* (ipif_lookup_addr() looks up all interfaces
* but we do not get here for UP interfaces
* - case (2) above)
*/
return (IPVL_BAD);
/* Not a useful source? */
return (IPVL_BAD);
}
return (IPVL_UNICAST_DOWN);
}
}
/*
* Verify that both the source and destination addresses are valid. If
* IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
* i.e. have no route to it. Protocols like TCP want to verify destination
* reachability, while tunnels do not.
*
* Determine the route, the interface, and (optionally) the source address
* to use to reach a given destination.
* Note that we allow connect to broadcast and multicast addresses when
* IPDF_ALLOW_MCBC is set.
* first_hop and dst_addr are normally the same, but if source routing
* they will differ; in that case the first_hop is what we'll use for the
* routing lookup but the dce and label checks will be done on dst_addr,
*
* If uinfo is set, then we fill in the best available information
* we have for the destination. This is based on (in priority order) any
* metrics and path MTU stored in a dce_t, route metrics, and finally the
* ill_mtu.
*
* Tsol note: If we have a source route then dst_addr != firsthop. But we
* always do the label check on dst_addr.
*
* Assumes that the caller has set ixa_scopeid for link-local communication.
*/
int
{
int error = 0;
/*
* We never send to zero; the ULPs map it to the loopback address.
* We can't allow it since we use zero to mean unitialized in some
* places.
*/
if (is_system_labeled()) {
if (error != 0)
return (error);
/* Update the label */
}
}
/*
* Select a route; For IPMP interfaces, we would only select
* a "hidden" route (i.e., going through a specific under_ill)
* if ixa_ifindex has been specified.
*/
if (error != 0)
goto bad_addr;
/*
* ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
* If IPDF_VERIFY_DST is set, the destination must be reachable.
* Otherwise the destination needn't be reachable.
*
* If we match on a reject or black hole, then we've got a
* local failure. May as well fail out the connect() attempt,
* since it's never going to succeed.
*/
/*
* If we're verifying destination reachability, we always want
* to complain here.
*
* If we're not verifying destination reachability but the
* destination has a route, we still want to fail on the
* temporary address and broadcast address tests.
*
* In both cases do we let the code continue so some reasonable
* information is returned to the caller. That enables the
* caller to use (and even cache) the IRE. conn_ip_ouput will
* use the generation mismatch path to check for the unreachable
* case thereby avoiding any specific check in the main path.
*/
if (flags & IPDF_VERIFY_DST) {
/*
* Set errno but continue to set up ixa_ire to be
* the RTF_REJECT|RTF_BLACKHOLE IRE.
* That allows callers to use ip_output to get an
* ICMP error back.
*/
error = ENETUNREACH;
else
}
}
!(flags & IPDF_ALLOW_MCBC)) {
error = ENETUNREACH;
}
/* Cache things */
#ifdef DEBUG
#endif
/*
* For multicast with multirt we have a flag passed back from
* ire_lookup_multi_ill_v6 since we don't have an IRE for each
* possible multicast address.
* We also need a flag for multicast since we can't check
* whether RTF_MULTIRT is set in ixa_ire for multicast.
*/
if (multirt) {
} else {
}
/* Get an nce to cache. */
/* Allocation failure? */
} else {
}
}
/*
* If the source address is a loopback address, the
* destination had best be local or multicast.
* If we are sending to an IRE_LOCAL using a loopback source then
* it had better be the same zoneid.
*/
if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
goto bad_addr;
}
goto bad_addr;
}
}
/*
* Does the caller want us to pick a source address?
*/
if (flags & IPDF_SELECT_SRC) {
/*
* We use use ire_nexthop_ill to avoid the under ipmp
* interface for source address selection. Note that for ipmp
* probe packets, ixa_ifindex would have been specified, and
* the ip_select_route() invocation would have picked an ire
* will ire_ill pointing at an under interface.
*/
/* If unreachable we have no ill but need some source */
/* Make sure we look for a better source address */
} else {
if (error != 0) {
goto bad_addr;
}
}
/*
* We allow the source address to to down.
* However, we check that we don't use the loopback address
* as a source when sending out on the wire.
*/
if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
goto bad_addr;
}
}
/*
* Make sure we don't leave an unreachable ixa_nce in place
* since ip_select_route is used when we unplumb i.e., remove
* references on ixa_ire, ixa_nce, and ixa_dce.
*/
}
ifindex = 0;
if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
/* If we are creating a DCE we'd better have an ifindex */
else
flags &= ~IPDF_UNIQUE_DCE;
}
if (flags & IPDF_UNIQUE_DCE) {
/* Fallback to the default dce if allocation fails */
} else {
&generation);
}
} else {
}
#ifdef DEBUG
#endif
/*
* Note that IPv6 multicast supports PMTU discovery unlike IPv4
* multicast. But pmtu discovery is only enabled for connected
* sockets in general.
*/
/*
* Set initial value for fragmentation limit. Either conn_ip_output
* or ULP might updates it when there are routing changes.
* Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
*/
/* Make sure ixa_fragsize and ixa_pmtu remain identical */
/*
* Extract information useful for some transports.
* First we look for DCE metrics. Then we take what we have in
* the metrics in the route, where the offlink is used if we have
* one.
*/
/* Allow ire_metrics to decrease the path MTU from above */
}
return (error);
/*
* Make sure we don't leave an unreachable ixa_nce in place
* since ip_select_route is used when we unplumb i.e., remove
* references on ixa_ire, ixa_nce, and ixa_dce.
*/
}
return (error);
}
/*
* Handle protocols with which IP is less intimate. There
* can be more than one stream bound to a particular
* protocol. When this is the case, normally each one gets a copy
* of any incoming packets.
*
* Zones notes:
* Packets will be distributed to conns in all zones. This is really only
* useful for ICMPv6 as only applications in the global zone can create raw
* sockets for other protocols.
*/
void
{
/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
break;
}
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
return;
}
first_connp = connp;
/*
* XXX: Fix the multiple protocol listeners case. We should not
* be walking the conn->conn_next list here.
*/
for (;;) {
/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
break;
}
/* No more interested clients */
connp = first_connp;
break;
}
/* Memory allocation failed */
connp = first_connp;
break;
}
ira);
/* Follow the next pointer before releasing the conn. */
connp = next_connp;
}
/* Last one. Send it upstream. */
}
/*
* Called when it is conceptually a ULP that would sent the packet
* e.g., port unreachable and nexthdr unknown. Check that the packet
* would have passed the IPsec global policy before sending the error.
*
* Send an ICMP error after patching up the packet appropriately.
* Uses ip_drop_input and bumps the appropriate MIB.
* For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
*/
void
{
/*
* We are generating an icmp error for some inbound packet.
* Called from all ip_fanout_(udp, tcp, proto) functions.
* Before we generate an error, check with global policy
* to see whether this is allowed to enter the system. As
* there is no "conn", we are checking with global policy.
*/
return;
}
/* We never send errors for protocols that we do implement */
return;
}
switch (icmp_type) {
case ICMP6_DST_UNREACH:
break;
case ICMP6_PARAM_PROB:
/* Let the system determine the offset for this one */
break;
default:
#ifdef DEBUG
panic("ip_fanout_send_icmp_v6: wrong type");
/*NOTREACHED*/
#else
break;
#endif
}
}
/*
* Fanout for UDP packets that are multicast or ICMP errors.
* (Unicast fanout is handled in ip_input_v6.)
*
* If SO_REUSEADDR is set all multicast packets
* will be delivered to all conns bound to the same port.
*
* Fanout for UDP packets.
* The caller puts <fport, lport> in the ports parameter.
* ire_type must be IRE_BROADCAST for multicast and broadcast packets.
*
* If SO_REUSEADDR is set all multicast and broadcast packets
* will be delivered to all conns bound to the same port.
*
* Zones notes:
* Earlier in ip_input on a system with multiple shared-IP zones we
* duplicate the multicast and broadcast packets and send them up
* with each explicit zoneid that exists on that ill.
* This means that here we can match the zoneid with SO_ALLZONES being special.
*/
void
{
/* Attempt to find a client stream based on destination port. */
break;
}
goto notfound;
if (connp->conn_reuseaddr) {
for (;;) {
break;
}
/* No more interested clients */
connp = first_connp;
break;
}
/* Memory allocation failed */
connp = first_connp;
break;
}
/* Follow the next pointer before releasing the conn. */
connp = next_connp;
}
}
/* Last one. Send it upstream. */
return;
/*
* No one bound to this port. Is
* there a client that wants all
* unclaimed datagrams?
*/
} else {
}
}
/*
* int ip_find_hdr_v6()
*
* This routine is used by the upper layer protocols, iptun, and IPsec:
* - Set extension header pointers to appropriate locations
* - Determine IPv6 header length and return it
* - Return a pointer to the last nexthdr value
*
* The caller must initialize ipp_fields.
* The upper layer protocols normally set label_separate which makes the
* routine put the TX label in ipp_label_v6. If this is not set then
* the hop-by-hop options including the label are placed in ipp_hopopts.
*
* NOTE: If multiple extension headers of the same type are present,
* ip_find_hdr_v6() will set the respective extension header pointers
* to the first one that it encounters in the IPv6 header. It also
* skips fragment headers. This routine deals with malformed packets
* of various sorts in which case the returned length is up to the
* malformed part.
*/
int
{
/* Is there enough left for len + nexthdr? */
goto done;
switch (nexthdr) {
case IPPROTO_HOPOPTS: {
/* We check for any CIPSO */
goto done;
if (!label_separate) {
} else {
/*
* We have dropped packets with bad options in
* ip6_input. No need to check return value
* here.
*/
}
} else {
ipp->ipp_label_len_v6 = 0;
hbh_needed = B_TRUE;
}
/* return only 1st hbh */
}
break;
}
case IPPROTO_DSTOPTS:
goto done;
/*
* ipp_dstopts is set to the destination header after a
* routing header.
* Assume it is a post-rthdr destination header
* and adjust when we find an rthdr.
*/
}
break;
case IPPROTO_ROUTING:
goto done;
/* return only 1st rthdr */
}
/*
* Make any destination header we've seen be a
* pre-rthdr destination header.
*/
ipp->ipp_dstoptslen = 0;
}
break;
case IPPROTO_FRAGMENT:
ehdrlen = sizeof (ip6_frag_t);
goto done;
}
break;
case IPPROTO_NONE:
default:
goto done;
}
}
done:
return (length);
}
/*
* Try to determine where and what are the IPv6 header length and
* pointer to nexthdr value for the upper layer protocol (or an
* unknown next hdr).
*
* Parameters returns a pointer to the nexthdr value;
* Must handle malformed packets of various sorts.
* Function returns failure for malformed cases.
*/
{
/* Is there enough left for len + nexthdr? */
break;
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
return (B_FALSE);
break;
case IPPROTO_ROUTING:
return (B_FALSE);
break;
case IPPROTO_FRAGMENT:
ehdrlen = sizeof (ip6_frag_t);
return (B_FALSE);
break;
case IPPROTO_NONE:
/* No next header means we're finished */
default:
*hdr_length_ptr = length;
return (B_TRUE);
}
*hdr_length_ptr = length;
}
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
case IPPROTO_FRAGMENT:
/*
* If any know extension headers are still to be processed,
* the packet's malformed (or at least all the IP header(s) are
* not in the same mblk - and that should never happen.
*/
return (B_FALSE);
default:
/*
* If we get here, we know that all of the IP headers were in
* the same mblk, even if the ULP header is in the next mblk.
*/
*hdr_length_ptr = length;
return (B_TRUE);
}
}
/*
* Return the length of the IPv6 related headers (including extension headers)
* Returns a length even if the packet is malformed.
*/
int
{
return (hdr_len);
}
/*
* Parse and process any hop-by-hop or destination options.
*
* Assumes that q is an ill read queue so that ICMP errors for link-local
* destinations are sent out the correct interface.
*
* Returns -1 if there was an error and mp has been consumed.
* Returns 0 if no special action is needed.
* Returns 1 if the packet contained a router alert option for this node
* which is verified to be "interesting/known" for our implementation.
*
* XXX Note: In future as more hbh or dest options are defined,
* it may be better to have different routines for hbh and dest
* options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
* may have same value in different namespaces. Or is it same namespace ??
* Current code checks for each opt_type (other than pads) if it is in
* the expected nexthdr (hbh or dest)
*/
int
{
int ret = 0;
const char *errtype;
while (optlen != 0) {
if (opt_type == IP6OPT_PAD1) {
optused = 1;
} else {
if (optlen < 2)
goto bad_opt;
errtype = "malformed";
goto bad_opt;
} else switch (opt_type) {
case IP6OPT_PADN:
/*
* Note:We don't verify that (N-2) pad octets
* are zero as required by spec. Adhere to
* "be liberal in what you accept..." part of
* implementation philosophy (RFC791,RFC1122)
*/
goto bad_opt;
break;
case IP6OPT_JUMBO:
if (hdr_type != IPPROTO_HOPOPTS)
goto opt_error;
goto opt_error; /* XXX Not implemented! */
case IP6OPT_ROUTER_ALERT: {
struct ip6_opt_router *or;
if (hdr_type != IPPROTO_HOPOPTS)
goto opt_error;
goto bad_opt;
/* Check total length and alignment */
goto opt_error;
/* Check value */
case IP6_ALERT_MLD:
case IP6_ALERT_RSVP:
ret = 1;
}
break;
}
case IP6OPT_HOME_ADDRESS: {
/*
* Minimal support for the home address option
* (which is required by all IPv6 nodes).
* Implement by just swapping the home address
* and source address.
* XXX Note: this has IPsec implications since
* AH needs to take this into account.
* Also, when IPsec is used we need to ensure
* that this is only processed once
* in the received packet (to avoid swapping
* back and forth).
* NOTE:This option processing is considered
* to be unsafe and prone to a denial of
* service attack.
* The current processing is not safe even with
* IPsec secured IP packets. Since the home
* address option processing requirement still
* is in the IETF draft and in the process of
* being redefined for its usage, it has been
* decided to turn off the option by default.
* If this section of code needs to be executed,
* ndd variable ip6_ignore_home_address_opt
* should be set to 0 at the user's own risk.
*/
struct ip6_opt_home_address *oh;
goto opt_error;
if (hdr_type != IPPROTO_DSTOPTS)
goto opt_error;
goto bad_opt;
/*
* We did this dest. opt the first time
* around (i.e. before AH processing).
* If we've done AH... stop now.
*/
break;
/* Check total length and alignment */
goto opt_error;
/* Swap ip6_src and the home address */
/* XXX Note: only 8 byte alignment option */
break;
}
case IP6OPT_TUNNEL_LIMIT:
if (hdr_type != IPPROTO_DSTOPTS) {
goto opt_error;
}
goto bad_opt;
}
if (optused != 3) {
goto opt_error;
}
break;
default:
errtype = "unknown";
/* FALLTHROUGH */
/* Determine which zone should send error */
switch (IP6OPT_TYPE(opt_type)) {
case IP6OPT_TYPE_SKIP:
goto bad_opt;
ip1dbg(("ip_process_options_v6: %s "
"opt 0x%x skipped\n",
break;
case IP6OPT_TYPE_DISCARD:
ip1dbg(("ip_process_options_v6: %s "
"opt 0x%x; packet dropped\n",
ip_drop_input("ipIfStatsInHdrErrors",
return (-1);
case IP6OPT_TYPE_ICMP:
ip_drop_input("ipIfStatsInHdrErrors",
return (-1);
case IP6OPT_TYPE_FORCEICMP:
ip_drop_input("ipIfStatsInHdrErrors",
return (-1);
default:
ASSERT(0);
}
}
}
}
return (ret);
/* Determine which zone should send error */
return (-1);
}
/*
* Process a routing header that is not yet empty.
* Because of RFC 5095, we now reject all route headers.
*/
void
{
if (!ipst->ips_ipv6_forward_src_routed) {
/* XXX Check for source routed out same interface? */
return;
}
}
/*
* Read side put procedure for IPv6 module.
*/
void
{
union DL_primitives *dl;
/*
* Things are opening or closing - only accept DLPI
* ack messages. If the stream is closing and ip_wsrv
* has completed, ip_close is out of the qwait, but has
* not yet completed qprocsoff. Don't proceed any further
* because the ill has been cleaned up and things hanging
* off the ill have been freed.
*/
return;
}
}
struct mac_header_info_s mhi;
} else {
}
}
/*
* Walk through the IPv6 packet in mp and see if there's an AH header
* in it. See if the AH header needs to get done before other headers in
* the packet. (Worker function for ipsec_early_ah_v6().)
*/
#define IPSEC_HDR_DONT_PROCESS 0
#define IPSEC_HDR_PROCESS 1
static int
{
/*
* For now just pullup everything. In general, the less pullups,
* the better, but there's so much squirrelling through anyway,
* it's just easier this way.
*/
return (IPSEC_MEMORY_ERROR);
}
/*
* We can't just use the argument nexthdr in the place
* of nexthdrp becaue we don't dereference nexthdrp
* till we confirm whether it is a valid address.
*/
/* Is there enough left for len + nexthdr? */
return (IPSEC_MEMORY_ERROR);
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
return (IPSEC_MEMORY_ERROR);
/*
* Return DONT_PROCESS because the destination
* options header may be for each hop in a
* routing-header, and we only want AH if we're
* finished with routing headers.
*/
if (*nexthdrp == IPPROTO_DSTOPTS)
return (IPSEC_HDR_DONT_PROCESS);
break;
case IPPROTO_ROUTING:
/*
* If there's more hops left on the routing header,
* return now with DON'T PROCESS.
*/
if (rthdr->ip6r_segleft > 0)
return (IPSEC_HDR_DONT_PROCESS);
return (IPSEC_MEMORY_ERROR);
break;
case IPPROTO_FRAGMENT:
/* Wait for reassembly */
return (IPSEC_HDR_DONT_PROCESS);
case IPPROTO_AH:
*nexthdr = IPPROTO_AH;
return (IPSEC_HDR_PROCESS);
case IPPROTO_NONE:
/* No next header means we're finished */
default:
return (IPSEC_HDR_DONT_PROCESS);
}
}
/*
*/
return (IPSEC_MEMORY_ERROR);
}
/*
* Path for AH if options are present.
* Returns NULL if the mblk was consumed.
*
* Sometimes AH needs to be done before other IPv6 headers for security
* reasons. This function (and its ipsec_needs_processing_v6() above)
* indicates if that is so, and fans out to the appropriate IPsec protocol
* for the datagram passed in.
*/
mblk_t *
{
case IPSEC_MEMORY_ERROR:
return (NULL);
case IPSEC_HDR_DONT_PROCESS:
return (mp);
}
/* Default means send it to AH! */
if (!ipsec_loaded(ipss)) {
return (NULL);
}
return (NULL);
/*
* Either it failed or is pending. In the former case
* ipIfStatsInDiscards was increased.
*/
return (NULL);
}
/* we're done with IPsec processing, send it up */
return (NULL);
}
/*
* Reassemble fragment.
* When it returns a completed message the first mblk will only contain
* the headers prior to the fragment header, with the nexthdr value updated
* to be the header after the fragment header.
*/
mblk_t *
{
/*
* We utilize hardware computed checksum info only for UDP since
* IP fragmentation is a normal occurence for the protocol. In
* addition, checksum offload support for IP fragments carrying
* UDP payload is commonly implemented across network adapters.
*/
/* Record checksum information from the packet */
/* fragmented payload offset from beginning of mblk */
if ((sum_flags & HCK_PARTIALCKSUM) &&
/*
* Partial checksum has been calculated by hardware
* and attached to the packet; in addition, any
* prepended extraneous data is even byte aligned.
* If any such data exists, we adjust the checksum;
* this would also handle any postpended data.
*/
/* One's complement subtract extraneous checksum */
else
}
} else {
sum_val = 0;
sum_flags = 0;
}
/* Clear hardware checksumming flag */
DB_CKSUMFLAGS(mp) = 0;
/*
* Determine the offset (from the begining of the IP header)
* of the nexthdr value which has IPPROTO_FRAGMENT. We use
* this when removing the fragment header from the packet.
* This packet consists of the IPv6 header, a potential
* hop-by-hop options header, a potential pre-routing-header
* destination options header, and a potential routing header.
*/
if (prev_nexthdr == IPPROTO_HOPOPTS) {
}
if (prev_nexthdr == IPPROTO_DSTOPTS) {
}
if (prev_nexthdr == IPPROTO_ROUTING) {
}
if (prev_nexthdr != IPPROTO_FRAGMENT) {
/* Can't handle other headers before the fragment header */
return (NULL);
}
/*
* Note: Fragment offset in header is in 8-octet units.
* Clearing least significant 3 bits not only extracts
* it but also gets it in units of octets.
*/
/*
* Is the more frags flag on and the payload length not a multiple
* of eight?
*/
return (NULL);
}
/*
* Would fragment cause reassembled packet to have a payload length
* greater than IP_MAXPACKET - the max payload size?
*/
if (end > IP_MAXPACKET) {
return (NULL);
}
/*
* This packet just has one fragment. Reassembly not
* needed.
*/
if (!more_frags && offset == 0) {
goto reass_done;
}
/*
* Drop the fragmented as early as possible, if
* we don't have resource(s) to re-assemble.
*/
if (ipst->ips_ip_reass_queue_bytes == 0) {
return (NULL);
}
/* Record the ECN field info. */
/*
* If this is not the first fragment, dump the unfragmentable
* portion of the packet.
*/
if (offset)
/*
* Fragmentation reassembly. Each ILL has a hash table for
* queueing packets undergoing reassembly for all IPIFs
* associated with the ILL. The hash is based on the packet
* IP ident field. The ILL frag hash table was allocated
* as a timer block at the time the ILL was created. Whenever
* there is anything on the reassembly queue, the timer will
* be running.
*/
/* Handle vnic loopback of fragments */
msg_len = 0;
else
}
/*
* If the reassembly list for this ILL will get too big
* prune it.
*/
}
/* Try to find an existing fragment queue for this packet. */
for (;;) {
if (ipf) {
/*
* It has to match on ident, source address, and
* dest address.
*/
/*
* If we have received too many
* duplicate fragments for this packet
* free it.
*/
return (NULL);
}
break;
}
continue;
}
/*
* If we pruned the list, do we want to store this new
* fragment?. We apply an optimization here based on the
* fact that most fragments will be received in order.
* So if the offset of this incoming fragment is zero,
* it is the first fragment of a new packet. We will
* keep it. Otherwise drop the fragment, as we have
* probably pruned the packet already (since the
* packet cannot be found).
*/
return (NULL);
}
/* New guy. Allocate a frag message. */
if (!mp1) {
return (NULL);
}
/*
* Too many fragmented packets in this hash bucket.
* Free the oldest.
*/
}
/* Initialize the fragment header. */
/* Record reassembly start time. */
/* Record ipf generation and account for frag header */
ipf->ipf_nf_hdr_len = 0;
ipf->ipf_prev_nexthdr_offset = 0;
ipf->ipf_num_dups = 0;
ipfb->ipfb_frag_pkts++;
ipf->ipf_checksum = 0;
ipf->ipf_checksum_flags = 0;
/* Store checksum value in fragment header */
if (sum_flags != 0) {
}
/*
* We handle reassembly two ways. In the easy case,
* where all the fragments show up in order, we do
* minimal bookkeeping, and just clip new pieces on
* the end. If we ever see a hole, then we go off
* to ip_reassemble which has to mark the pieces and
* keep track of the number of holes, etc. Obviously,
* the point of having both mechanisms is so we can
* handle the easy case as efficiently as possible.
*/
if (offset == 0) {
/* Easy case, in-order reassembly so far. */
/* Update the byte count */
/*
* Keep track of next expected offset in
* ipf_end.
*/
} else {
/* Hard case, hole at the beginning. */
/*
* ipf_end == 0 means that we have given up
* on easy reassembly.
*/
/* Forget checksum offload from now on */
ipf->ipf_checksum_flags = 0;
/*
* ipf_hole_cnt is set by ip_reassemble.
* ipf_count is updated by ip_reassemble.
* No need to check for return value here
* as we don't expect reassembly to complete or
* fail for the first fragment itself.
*/
msg_len);
}
/* Update per ipfb and ill byte counts */
/* If the frag timer wasn't already going, start it. */
goto partial_reass_done;
}
/*
* If the packet's flag has changed (it could be coming up
* from an interface different than the previous, therefore
* possibly different checksum capability), then forget about
* any stored checksum states. Otherwise add the value to
* the existing one stored in the fragment header.
*/
} else if (ipf->ipf_checksum_flags != 0) {
/* Forget checksum offload from now on */
ipf->ipf_checksum_flags = 0;
}
/*
* We have a new piece of a datagram which is already being
* reassembled. Update the ECN info if all IP fragments
* are ECN capable. If there is one which is not, clear
* all the info. If there is at least one which has CE
* code point, IP needs to report that up to transport.
*/
if (ecn_info == IPH_ECN_CE)
} else {
}
/* The new fragment fits at the end */
/* Update the byte count */
/* Update per ipfb and ill byte counts */
if (more_frags) {
/* More to come. */
goto partial_reass_done;
}
} else {
/*
* Go do the hard cases.
* Call ip_reassemble().
*/
int ret;
if (offset == 0) {
if (ipf->ipf_prev_nexthdr_offset == 0) {
}
}
/* Save current byte count */
/* Count of bytes added and subtracted (freeb()ed) */
if (count) {
/* Update per ipfb and ill byte counts */
}
if (ret == IP_REASS_PARTIAL) {
goto partial_reass_done;
} else if (ret == IP_REASS_FAILED) {
/* Reassembly failed. Free up all resources */
IP_REASS_SET_START(t_mp, 0);
IP_REASS_SET_END(t_mp, 0);
}
goto partial_reass_done;
}
/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
}
/*
* We have completed reassembly. Unhook the frag header from
* the reassembly list.
*
* Grab the unfragmentable header length next header value out
* of the first fragment
*/
/*
* Before we free the frag header, record the ECN info
* to report back to the transport.
*/
/*
* Store the nextheader field in the header preceding the fragment
* header
*/
/* We need to supply these to caller */
else
sum_val = 0;
if (ipf)
ipfb->ipfb_frag_pkts--;
/* Ditch the frag header. */
/*
* Make sure the packet is good by doing some sanity
* check. If bad we can silentely drop the packet.
*/
if (hdr_length < sizeof (ip6_frag_t)) {
ip1dbg(("ip_input_fragment_v6: bad packet\n"));
return (NULL);
}
/*
* Remove the fragment header from the initial header by
* splitting the mblk into the non-fragmentable header and
* everthing after the fragment extension header. This has the
* side effect of putting all the headers that need destination
* processing into the b_cont block-- on return this fact is
* used in order to avoid having to look at the extensions
* already processed.
*
* Note that this code assumes that the unfragmentable portion
* of the header is in the first mblk and increments
* the read pointer past it. If this assumption is broken
* this code fails badly.
*/
ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
return (NULL);
}
}
/* Restore original IP length in header. */
/* Record the ECN info. */
/* Update the receive attributes */
/* Reassembly is successful; set checksum information in packet */
return (mp);
}
/*
* Given an mblk and a ptr, find the destination address in an IPv6 routing
* header.
*/
static in6_addr_t
{
return (rv);
}
/*
* Corrupt packet. Either the routing header length is odd
* (can't happen) or mismatched compared to the packet, or the
* number of addresses is. Return what we can. This will
* only be a problem on forwarded packets that get squeezed
* through an outbound tunnel enforcing IPsec Tunnel Mode.
*/
whereptr);
return (rv);
}
if (segleft != 0) {
}
return (rv);
}
/*
* Walk through the options to see if there is a routing header.
* If present get the destination which is the last address of
* the option.
* mp needs to be provided in cases when the extension headers might span
* b_cont; mp is never modified by this function.
*/
{
int ehdrlen;
/* We assume at least the IPv6 base header is within one mblk. */
if (is_fragment != NULL)
*is_fragment = B_FALSE;
/*
* We also assume (thanks to ipsec_tun_outbound()'s pullup) that
* no extension headers will be split across mblks.
*/
nexthdr == IPPROTO_ROUTING) {
if (nexthdr == IPPROTO_ROUTING)
/*
* All IPv6 extension headers have the next-header in byte
* 0, and the (length - 8) in 8-byte-words.
*/
while (current_mp != NULL &&
if (current_mp == NULL) {
/* Bad packet. Return what we can. */
goto done;
}
}
}
done:
*is_fragment = B_TRUE;
return (rv);
}
/*
* ip_source_routed_v6:
* This function is called by redirect code (called from ip_input_v6) to
* know whether this packet is source routed through this node i.e
* whether this node (router) is part of the journey. This
* function is called under two cases :
*
* case 1 : Routing header was processed by this node and
* ip_process_rthdr replaced ip6_dst with the next hop
* and we are forwarding the packet to the next hop.
*
* case 2 : Routing header was not processed by this node and we
* are just forwarding the packet.
*
* For case (1) we don't want to send redirects. For case(2) we
* want to send redirects.
*/
static boolean_t
{
ip2dbg(("ip_source_routed_v6\n"));
/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
while (nexthdr == IPPROTO_HOPOPTS ||
nexthdr == IPPROTO_DSTOPTS) {
/*
* Check if we have already processed
* packets or we are just a forwarding
* router which only pulled up msgs up
* to IPV6HDR and one HBH ext header
*/
ip2dbg(("ip_source_routed_v6: Extension"
" headers not processed\n"));
return (B_FALSE);
}
}
switch (nexthdr) {
case IPPROTO_ROUTING:
/*
* If for some reason, we haven't pulled up
* the routing hdr data mblk, then we must
* not have processed it at all. So for sure
* we are not part of the source routed journey.
*/
ip2dbg(("ip_source_routed_v6: Routing"
" header not processed\n"));
return (B_FALSE);
}
/*
* Either we are an intermediate router or the
* last hop before destination and we have
* already processed the routing header.
* If segment_left is greater than or equal to zero,
* then we must be the (numaddr - segleft) entry
* of the routing header. Although ip6r0_segleft
* is a unit8_t variable, we still check for zero
* or greater value, if in case the data type
* is changed someday in future.
*/
if (rthdr->ip6r0_segleft > 0 ||
rthdr->ip6r0_segleft == 0) {
sizeof (*rthdr));
return (B_TRUE);
ip1dbg(("ip_source_routed_v6: Not local\n"));
}
}
/* FALLTHRU */
default:
ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
return (B_FALSE);
}
}
/*
* IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
* We have not optimized this in terms of number of mblks
* allocated. For instance, for each fragment sent we always allocate a
* mblk to hold the IPv6 header and fragment header.
*
* Assumes that all the extension headers are contained in the first mblk
* and that the fragment header has has already been added by calling
* ip_fraghdr_add_v6.
*/
int
{
int error = 0;
if (max_frag == 0) {
return (EINVAL);
}
/*
* Caller should have added fraghdr_t to pkt_len, and also
* updated ip6_plen.
*/
/*
* Determine the length of the unfragmentable portion of this
* datagram. This consists of the IPv6 header, a potential
* hop-by-hop options header, a potential pre-routing-header
* destination options header, and a potential routing header.
*/
if (nexthdr == IPPROTO_HOPOPTS) {
}
if (nexthdr == IPPROTO_DSTOPTS) {
}
}
if (nexthdr == IPPROTO_ROUTING) {
}
if (nexthdr != IPPROTO_FRAGMENT) {
return (EINVAL);
}
unfragmentable_len += sizeof (ip6_frag_t);
/*
* Allocate an mblk with enough room for the link-layer
* header and the unfragmentable part of the datagram, which includes
* the fragment header. This (or a copy) will be used as the
* first mblk for each fragment we send.
*/
return (ENOBUFS);
}
/*
* pkt_len is set to the total length of the fragmentable data in this
* datagram. For each fragment sent, we will decrement pkt_len
* by the amount of fragmentable data sent in that fragment
* until len reaches zero.
*/
/*
* Move read ptr past unfragmentable portion, we don't want this part
* of the data in our fragments.
*/
}
while (pkt_len != 0) {
if (pkt_len != 0) {
/* Not last */
ip_drop_output("FragFails: copyb failed",
ip1dbg(("ip_fragment_v6: copyb failed\n"));
return (ENOBUFS);
}
} else {
/* Last fragment */
off_flags = 0;
}
sizeof (ip6_frag_t));
/*
* Note: Optimization alert.
* In IPv6 (and IPv4) protocol header, Fragment Offset
* ("offset") is 13 bits wide and in 8-octet units.
* In IPv6 protocol header (unlike IPv4) in a 16 bit field,
* it occupies the most significant 13 bits.
* (least significant 13 bits in IPv4).
* We do not do any shifts here. Not shifting is same effect
* as taking offset value in octet units, dividing by 8 and
* then shifting 3 bits left to line it up in place in proper
* place protocol header.
*/
/* mp has already been freed by ip_carve_mp() */
ip_drop_output("FragFails: could not carve mp",
ip1dbg(("ip_carve_mp: failed\n"));
return (ENOBUFS);
}
/* Get the priority marking, if any */
/* No point in sending the other fragments */
ip_drop_output("FragFails: postfragfn failed",
return (error);
}
/* No need to redo state machine in loop */
ixaflags &= ~IXAF_REACH_CONF;
}
return (error);
}
/*
* Add a fragment header to an IPv6 packet.
* Assumes that all the extension headers are contained in the first mblk.
*
* The fragment header is inserted after an hop-by-hop options header
* and after [an optional destinations header followed by] a routing header.
*/
mblk_t *
{
/*
* Determine the length of the unfragmentable portion of this
* datagram. This consists of the IPv6 header, a potential
* hop-by-hop options header, a potential pre-routing-header
* destination options header, and a potential routing header.
*/
if (nexthdr == IPPROTO_HOPOPTS) {
}
if (nexthdr == IPPROTO_DSTOPTS) {
}
}
if (nexthdr == IPPROTO_ROUTING) {
}
/*
* Allocate an mblk with enough room for the link-layer
* header, the unfragmentable part of the datagram, and the
* fragment header.
*/
return (NULL);
}
fraghdr->ip6f_reserved = 0;
fraghdr->ip6f_offlg = 0;
/* Get the priority marking, if any */
/*
* Move read ptr past unfragmentable portion, we don't want this part
* of the data in our fragments.
*/
return (hmp);
}
/*
* Determine if the ill and multicast aspects of that packets
* "matches" the conn.
*/
{
/*
* conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
* scopeid. This is used to limit
* unicast and multicast reception to conn_incoming_ifindex.
* conn_wantpacket_v6 is called both for unicast and
* multicast packets.
*/
/* mpathd can bind to the under IPMP interface, which we allow */
if (!IS_UNDER_IPMP(ill))
return (B_FALSE);
return (B_FALSE);
}
return (B_FALSE);
return (B_TRUE);
if (connp->conn_multi_router)
return (B_TRUE);
return (B_TRUE);
}
/*
* pr_addr_dbg function provides the needed buffer space to call
* inet_ntop() function's 3rd argument. This function should be
* used by any kernel routine which wants to save INET6_ADDRSTRLEN
* stack buffer space in it's own stack frame. This function uses
* a buffer from it's own stack and prints the information.
* Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
*
* Note: This function can call inet_ntop() once.
*/
void
{
char buf[INET6_ADDRSTRLEN];
ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
return;
}
/*
* This does not compare debug level and just prints
* out. Thus it is the responsibility of the caller
* to check the appropriate debug-level before calling
* this function.
*/
if (ip_debug > 0) {
}
}
/*
* Return the length in bytes of the IPv6 headers (base header
* extension headers) that will be needed based on the
* ip_pkt_t structure passed by the caller.
*
* The returned length does not include the length of the upper level
* protocol (ULP) header.
*/
int
{
int len;
len = IPV6_HDR_LEN;
/*
* If there's a security label here, then we ignore any hop-by-hop
* options the user may try to set.
*/
/*
* Note that ipp_label_len_v6 is just the option - not
* the hopopts extension header. It also needs to be padded
* to a multiple of 8 bytes.
*/
len += hopoptslen;
}
/*
* En-route destination options
* Only do them if there's a routing header as well
*/
}
}
}
return (len);
}
/*
* All-purpose routine to build a header chain of an IPv6 header
* followed by any required extension headers and a proto header.
*
* The caller has to set the source and destination address as well as
* ip6_plen. The caller has to massage any routing header and compensate
* for the ULP pseudo-header checksum due to the source route.
*
* The extension headers will all be fully filled in.
*/
void
{
/* Initialize IPv6 header */
/* Overrides the class part of flowinfo */
ipp->ipp_tclass);
}
else
/*
* Here's where we have to start stringing together
* any extension headers in the right order:
* Hop-by-hop, destination, routing, and final destination opts.
*/
/*
* If there's a security label here, then we ignore any hop-by-hop
* options the user may try to set.
*/
/*
* Hop-by-hop options with the label.
* Note that ipp_label_v6 is just the option - not
* the hopopts extension header. It also needs to be padded
* to a multiple of 8 bytes.
*/
switch (padlen) {
case 0:
break;
case 1:
cp[0] = IP6OPT_PAD1;
break;
default:
cp[0] = IP6OPT_PADN;
break;
}
/* Hop-by-hop options */
}
/*
* En-route destination options
* Only do them if there's a routing header as well
*/
}
/*
* Routing header next
*/
}
/*
* Do ultimate destination options
*/
}
/*
* Now set the last header pointer to the proto passed in
*/
*nxthdr_ptr = protocol;
}
/*
* Return a pointer to the routing header extension header
* in the IPv6 header(s) chain passed in.
* If none found, return NULL
* Assumes that all extension headers are in same mblk as the v6 header
*/
{
return ((ip6_rthdr_t *)ptr);
/*
* The routing header will precede all extension headers
* other than the hop-by-hop and destination options
* extension headers, so if we see anything other than those,
* we're done and didn't find it.
* We could see a destination options header alone but no
* routing header, in which case we'll return NULL as soon as
* we see anything after that.
* Hop-by-hop and destination option headers are identical,
* so we can use either one we want as a template.
*/
/* Is there enough left for len + nexthdr? */
return (NULL);
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
break;
case IPPROTO_ROUTING:
return ((ip6_rthdr_t *)ptr);
case IPPROTO_FRAGMENT:
hdrlen = sizeof (ip6_frag_t);
break;
default:
return (NULL);
}
}
return (NULL);
}
/*
* Called for source-routed packets originating on this node.
* Manipulates the original routing header by moving every entry up
* one slot, placing the first entry in the v6 header's v6_dst field,
* and placing the ultimate destination in the routing header's last
* slot.
*
* Returns the checksum diference between the ultimate destination
* (last hop in the routing header when the packet is sent) and
* the first hop (ip6_dst when the packet is sent)
*/
/* ARGSUSED2 */
{
uint_t i;
/*
* Perform any processing needed for source routing.
* We know that all extension headers will be in the same mblk
* as the IPv6 header.
*/
/*
* If no segments left in header, or the header length field is zero,
* don't move hop addresses around;
* Checksum difference is zero.
*/
return (0);
cksm = 0;
for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
}
/*
* Here's where the fun begins - we have to
* move all addresses up one spot, take the
* first hop and make it our first ip6_dst,
* and place the ultimate destination in the
* newly-opened last slot.
*/
}
/*
* From the checksummed ultimate destination subtract the checksummed
* current ip6_dst (the first hop address). Return that number.
* (In the v4 case, the second part of this is done in each routine
* that calls ip_massage_options(). We do it all in this one place
* for v6).
*/
for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
}
if ((int)cksm < 0)
cksm--;
return (cksm);
}
void
{
ip6_stat_t template = {
{ "ip6_udp_fannorm", KSTAT_DATA_UINT64 },
{ "ip6_udp_fanmb", KSTAT_DATA_UINT64 },
{ "ip6_recv_pullup", KSTAT_DATA_UINT64 },
{ "ip6_db_ref", KSTAT_DATA_UINT64 },
{ "ip6_notaligned", KSTAT_DATA_UINT64 },
{ "ip6_multimblk", KSTAT_DATA_UINT64 },
{ "ipsec_proto_ahesp", KSTAT_DATA_UINT64 },
{ "ip6_out_sw_cksum", KSTAT_DATA_UINT64 },
{ "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
{ "ip6_in_sw_cksum", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
{ "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
};
return (NULL);
return (ksp);
}
void
{
}
}
/*
* The following two functions set and get the value for the
* IPV6_SRC_PREFERENCES socket option.
*/
int
{
/*
* We only support preferences that are covered by
* IPV6_PREFER_SRC_MASK.
*/
if (prefs & ~IPV6_PREFER_SRC_MASK)
return (EINVAL);
/*
* Look for conflicting preferences or default preferences. If
* both bits of a related pair are clear, the application wants the
* system's default value for that pair. Both bits in a pair can't
* be set.
*/
if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
return (EINVAL);
}
if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
return (EINVAL);
}
if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
return (EINVAL);
}
return (0);
}
{
return (sizeof (ixa->ixa_src_preferences));
}
/*
* Get the size of the IP options (including the IP headers size)
* without including the AH header's size. If till_ah is B_FALSE,
* and if AH header is present, dest options beyond AH header will
* also be included in the returned size.
*/
int
{
int ehdrlen;
int size;
size = IPV6_HDR_LEN;
for (;;) {
/* Assume IP has already stripped it */
switch (nexthdr) {
case IPPROTO_HOPOPTS:
break;
case IPPROTO_DSTOPTS:
break;
case IPPROTO_ROUTING:
break;
default :
if (till_ah) {
return (size);
}
/*
* If we don't have a AH header to traverse,
* return now. This happens normally for
* outbound datagrams where we have not inserted
* the AH header.
*/
if (nexthdr != IPPROTO_AH) {
return (size);
}
/*
* We don't include the AH header's size
* to be symmetrical with other cases where
* we either don't have a AH header (outbound)
* or peek into the AH header yet (inbound and
* not pulled up yet).
*/
if (nexthdr == IPPROTO_DSTOPTS) {
/*
* The destination options header
* is not part of the first mblk.
*/
} else {
}
}
return (size);
}
}
}
/*
* Utility routine that checks if `v6srcp' is a valid address on underlying
* interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
* associated with `v6srcp' on success. NOTE: if this is not called from
* inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
* group during or after this lookup.
*/
{
else
return (B_TRUE);
}
if (ip_debug > 2) {
pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
}
return (B_FALSE);
}