ip6_input.c revision a5407c02d5ed61b29481b9b71f1307d7ebec9e5c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/* Copyright (c) 1990 Mentat Inc. */
#include <sys/sysmacros.h>
#define _SUN_TPI_VERSION 2
#include <sys/xti_inet.h>
#include <sys/isa_defs.h>
#include <inet/kstatcom.h>
#include <netinet/igmp_var.h>
#include <inet/tcp_impl.h>
#include <inet/ip_multi.h>
#include <inet/ip_ftable.h>
#include <inet/ip_listutils.h>
#include <netinet/ip_mroute.h>
#include <inet/ipp_common.h>
#include <inet/ipsec_impl.h>
#include <inet/ip_netinfo.h>
#include <sys/squeue_impl.h>
#include <sys/ethernet.h>
#include <net/if_types.h>
#include <ipp/ipp_impl.h>
#include <inet/ipclassifier.h>
#include <inet/udp_impl.h>
#ifdef DEBUG
extern boolean_t skip_sctp_cksum;
#endif
ip_recv_attr_t *);
/*
* Direct read side procedure capable of dealing with chains. GLDv3 based
* drivers call this function directly with mblk chains while STREAMS
* read side procedure ip_rput() calls this for single packet with ip_ring
* set to NULL to process one packet at a time.
*
* The ill will always be valid if this function is called directly from
* the driver.
*
* If ip_input_v6() is called from GLDv3:
*
* - This must be a non-VLAN IP stream.
* - 'mp' is either an untagged or a special priority-tagged packet.
* - Any VLAN tag that was in the MAC header has been stripped.
*
* If the IP header in packet is not 32-bit aligned, every message in the
* chain will be aligned before further operations. This is required on SPARC
* platform.
*/
void
struct mac_header_info_s *mhip)
{
NULL);
}
/*
* ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
* a chain of packets in the poll mode. The packets have gone through the
* data link processing but not IP processing. For performance and latency
* reasons, the squeue wants to process the chain in line instead of feeding
* it back via ip_input path.
*
* We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
* will pass back any TCP packets matching the target sqp to
* ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
* ip_input_v6 and ip_fanout_v6 as normal.
* The TCP packets that match the target squeue are returned to the caller
* as a b_next chain after each packet has been prepend with an mblk
* from ip_recv_attr_to_mblk.
*/
mblk_t *
{
}
/*
* Used by ip_input_v6 and ip_accept_tcp_v6
* The last three arguments are only used by ip_accept_tcp_v6, and mhip is
* only used by ip_input_v6.
*/
mblk_t *
{
/* These ones do not change as we loop over packets */
/* For ECMP and outbound transmit ring selection */
if (target_sqp != NULL)
/*
* We try to have a mhip pointer when possible, but
* it might be NULL in some cases. In those cases we
* have to assume unicast.
*/
switch (mhip->mhi_dsttype) {
case MAC_ADDRTYPE_MULTICAST :
break;
case MAC_ADDRTYPE_BROADCAST :
break;
}
}
/*
* Initialize the one-element route cache.
*
* We do ire caching from one iteration to
* another. In the event the packet chain contains
* all packets from the same dst, this caching saves
* an ire_route_recursive for each of the succeeding
* packets in a packet chain.
*/
/* Loop over b_next */
/*
* if db_ref > 1 then copymsg and free original. Packet
* may be changed and we do not want the other entity
* who has a reference to this message to trip over the
* changes. This is a blind change because trying to
* catch all places that might change the packet is too
* difficult.
*
* This corresponds to the fast path case, where we have
* a chain of M_DATA mblks. We check the db_ref count
* of only the 1st data block in the mblk chain. There
* doesn't seem to be a reason why a device driver would
* send up data with varying db_ref counts in the mblk
* chain. In any case the Fast path is a private
* interface, and our drivers don't do such a thing.
* Given the above assumption, there is no need to walk
* down the entire mblk chain (which could have a
* potential performance problem)
*
* The "(DB_REF(mp) > 1)" check was moved from ip_rput()
* to here because of exclusive ip stacks and vnics.
* Packets transmitted from exclusive stack over vnic
* can have db_ref > 1 and when it gets looped back to
* another vnic in a different zone, you have ip_input()
* getting dblks with db_ref > 1. So if someone
* complains of TCP performance under this scenario,
* take a serious look here on the impact of copymsg().
*/
continue;
}
/*
* IP header ptr not aligned?
* OR IP header not complete in first mblk
*/
continue;
}
/* Protect against a mix of Ethertypes and IP versions */
/* mhip might point into 1st packet in the chain. */
continue;
}
/*
* Check for Martian addrs; we have to explicitly
* test for for zero dst since this is also used as
* an indication that the rtc is not used.
*/
/* mhip might point into 1st packet in the chain. */
continue;
}
/*
* Keep L2SRC from a previous packet in chain since mhip
* might point into an earlier packet in the chain.
*/
iras.ira_free_flags = 0;
/*
* We must count all incoming packets, even if they end
* up being dropped later on. Defer counting bytes until
* we have the whole IP header in first mblk.
*/
/*
* Call one of:
* ill_input_full_v6
* ill_input_short_v6
* The former is used in the case of TX. See ill_set_inputfn().
*/
/* Any references to clean up? No hold on ira_ill */
/* Better be called from ip_accept_tcp */
/* Found one packet to accept */
else
acnt++;
}
/* mhip might point into 1st packet in the chain. */
}
/* Any remaining references to the route cache? */
}
/* Better be called from ip_accept_tcp */
return (ahead);
}
return (NULL);
}
/*
* This input function is used when
* - is_system_labeled()
*
* Note that for IPv6 CGTP filtering is handled only when receiving fragment
* headers, and RSVP uses router alert options, thus we don't need anything
* extra for them.
*/
void
{
/*
* Attach any necessary label information to
* this packet
*/
if (is_system_labeled()) {
/*
* This updates ira_cred, ira_tsl and ira_free_flags based
* on the label.
*/
if (ip6opt_ls != 0)
ip0dbg(("tsol_get_pkt_label v6 failed\n"));
return;
}
/* Note that ira_tsl can be NULL here. */
/* tsol_get_pkt_label sometimes does pullupmsg */
}
}
/*
* Check for IPv6 addresses that should not appear on the wire
* as either source or destination.
* If we ever implement Stateless IPv6 Translators (SIIT) we'd have
* to revisit the IPv4-mapped part.
*/
static boolean_t
{
if (IN6_IS_ADDR_V4MAPPED(addr)) {
ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
return (B_TRUE);
}
if (IN6_IS_ADDR_LOOPBACK(addr)) {
ip1dbg(("ip_input_v6: pkt with loopback addr"));
return (B_TRUE);
}
/*
* having :: in the src is ok: it's used for DAD.
*/
ip1dbg(("ip_input_v6: pkt with unspecified addr"));
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Routing lookup for IPv6 link-locals.
* First we look on the inbound interface, then we check for IPMP and
* look on the upper interface.
* We update ira_ruifindex if we find the IRE on the upper interface.
*/
static ire_t *
{
!IS_UNDER_IPMP(ill))
return (ire);
/*
* When we are using IMP we need to look for an IRE on both the
* under and upper interfaces since there are different
* link-local addresses for the under and upper.
*/
return (ire);
return (ire);
}
/*
* This is the tail-end of the full receive side packet handling.
* It can be used directly when the configuration is simple.
*/
void
{
/*
* v4mapped. All of them start with a 64 bits of zero.
*/
ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
return;
}
}
ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
return;
}
}
/* multiple mblk or too short */
if (len != 0) {
return;
}
int, 0);
/*
* The event for packets being received from a 'physical'
* destination address as being local so that packets can be
* redirected to loopback addresses using ipnat.
*/
if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
int ll_multicast = 0;
int error;
return;
/* The length could have changed */
/*
* In case the destination changed we override any previous
* change to nexthop.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
return;
}
}
/*
* On the inbound path the src zone will be unknown as
* this packet has come from the wire.
*/
}
return;
}
/*
* For IPv6 we update ira_ip_hdr_length and ira_protocol as
* we parse the headers, starting with the hop-by-hop options header.
*/
return;
}
return;
}
}
return;
}
return;
}
}
/*
* Update ira_ip_hdr_length to skip the hop-by-hop header
* once we get to ip_fanout_v6
*/
case -1:
/*
* Packet has been consumed and any
* needed ICMP messages sent.
*/
return;
case 0:
/* no action needed */
break;
case 1:
/*
* Known router alert. Make use handle it as local
* by setting the nexthop to be the all-host multicast
* address, and skip multicast membership filter by
* marking as a router alert.
*/
break;
}
}
/*
* Here we check to see if we machine is setup as
* L3 loadbalancer and if the incoming packet is for a VIP
*
* Check the following:
* - there is at least a rule
* - protocol of the packet is supported
*
* We don't load balance IPv6 link-locals.
*/
!IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
int lb_ret;
/* For convenience, we just pull up the mblk. */
ip_drop_input("ipIfStatsInDiscards - pullupmsg",
return;
}
}
if (lb_ret == ILB_DROPPED) {
return;
}
if (lb_ret == ILB_BALANCED) {
/* Set the dst to that of the chosen server */
DB_CKSUMFLAGS(mp) = 0;
}
}
else
/* Can not use route cache with TX since the labels can differ */
if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
} else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
ipst);
} else {
/* Match destination and label */
NULL);
}
/* Update the route cache so we do the ire_refrele */
/* Use the route cache */
} else {
/* Update the route cache */
if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
} else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
ipst);
} else {
}
}
ire->ire_ib_pkt_count++;
/*
* Based on ire_type and ire_flags call one of:
* ire_recv_local_v6 - for IRE_LOCAL
* ire_recv_loopback_v6 - for IRE_LOOPBACK
* ire_recv_multirt_v6 - if RTF_MULTIRT
* ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
* ire_recv_multicast_v6 - for IRE_MULTICAST
* ire_recv_noaccept_v6 - for ire_noaccept ones
* ire_recv_forward_v6 - for the rest.
*/
}
/*
* ire_recvfn for IREs that need forwarding
*/
void
{
return;
}
return;
}
/*
* Either ire_nce_capable or ire_dep_parent would be set for the IRE
* when it is found by ire_route_recursive, but that some other thread
* could have changed the routes with the effect of clearing
* ire_dep_parent. In that case we'd end up dropping the packet, or
* finding a new nce below.
* Get, allocate, or update the nce.
* We get a refhold on ire_nce_cache as a result of this to avoid races
* where ire_nce_cache is deleted.
*
* This ensures that we don't forward if the interface is down since
* ipif_down removes all the nces.
*/
/* Not yet set up - try to set one up */
(void) ire_revalidate_nce(ire);
/* The ire_dep_parent chain went bad, or no memory */
return;
}
}
if (nce->nce_is_condemned) {
return;
}
}
/*
* Unless we are forwarding, drop the packet.
* Unlike IPv4 we don't allow source routed packets out the same
* interface when we are not a router.
* Note that ill_forward_set() will set the ILLF_ROUTER on
* all the group members when it gets an ipmp-ill or under-ill.
*/
return;
}
ire->ire_ib_pkt_count--;
/*
* Should only use IREs that are visible from the
* global zone for forwarding.
* For IPv6 any source route would have already been
* advanced in ip_fanout_v6
*/
ire->ire_ib_pkt_count++;
return;
}
/*
* ipIfStatsHCInForwDatagrams should only be increment if there
* will be an attempt to forward the packet, which is why we
* increment after the above condition has been checked.
*/
/* Initiate Read side IPPF processing */
/* ip_process translates an IS_UNDER_IPMP */
/* ip_drop_packet and MIB done */
"during IPPF processing\n"));
return;
}
}
if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
int error;
return;
}
/*
* Even if the destination was changed by the filter we use the
* forwarding decision that was made based on the address
* in ip_input.
*/
/* Might have changed */
}
/* Packet is being forwarded. Turning off hwcksum flag. */
DB_CKSUMFLAGS(mp) = 0;
/*
* Per RFC 3513 section 2.5.2, we must not forward packets with
* an unspecified source address.
* The loopback address check for both src and dst has already
* been checked in ip_input_v6
* In the future one can envision adding RPF checks using number 3.
*/
switch (ipst->ips_src_check) {
case 0:
break;
case 1:
case 2:
return;
}
break;
}
/*
* Check to see if we're forwarding the packet to a
* different link from which it came. If so, check the
* source and destination addresses since routers must not
* forward any packets with link-local source or
* destination addresses to other links. Otherwise (if
* we're forwarding onto the same link), conditionally send
* a redirect message.
*/
return;
}
/* TBD add site-local check at site boundary? */
} else if (ipst->ips_ipv6_send_redirects) {
}
added_tx_len = 0;
if (iraflags & IRAF_SYSTEM_LABELED) {
/*
* CIPSO options as needed.
*/
return;
}
/*
* Size may have changed. Remember amount added in case
* ip_fragment needs to send an ICMP too big.
*/
}
return;
}
/*
* Used for sending out unicast and multicast packets that are
* forwarded.
*/
void
{
ira);
return;
}
/* Initiate Write side IPPF processing before any fragmentation */
/* ip_process translates an IS_UNDER_IPMP */
/* ip_drop_packet and MIB done */
" during IPPF processing\n"));
return;
}
}
if (iraflags & IRAF_SYSTEM_LABELED) {
/*
* Remove any CIPSO option added by
* tsol_ip_forward, and make sure we report
* a path MTU so that there
* is room to add such a CIPSO option for future
* packets.
*/
}
return;
}
if (iraflags & IRAF_LOOPBACK_COPY) {
/*
* IXAF_NO_LOOP_ZONEID is not set hence 6th arg
* is don't care
*/
} else {
}
}
/*
* ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
* which is what ire_route_recursive returns when there is no matching ire.
* Send ICMP unreachable unless blackhole.
*/
void
{
/* Would we have forwarded this packet if we had a route? */
return;
}
return;
}
/*
* If we had a route this could have been forwarded. Count as such.
*
* ipIfStatsHCInForwDatagrams should only be increment if there
* will be an attempt to forward the packet, which is why we
* increment after the above condition has been checked.
*/
ipst);
} else {
ira);
}
}
/*
* ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
* VRRP when in noaccept mode.
* We silently drop packets except for Neighbor Solicitations and
* Neighbor Advertisements.
*/
void
{
int ip_hdr_length;
return;
}
return;
}
return;
}
}
return;
}
}
/*
* ire_recvfn for IRE_MULTICAST.
*/
void
{
/* Tag for higher-level protocols */
/*
* So that we don't end up with dups, only one ill an IPMP group is
* nominated to receive multicast traffic.
* If we have no cast_ill we are liberal and accept everything.
*/
if (IS_UNDER_IPMP(ill)) {
/* For an under ill_grp can change under lock */
return;
}
/*
* We switch to the upper ill so that mrouter and hasmembers
* can operate on upper here and in ip_input_multicast.
*/
} else {
}
}
#ifdef notdef
/*
* Check if we are a multicast router - send ip_mforward a copy of
* the packet.
* Due to mroute_decap tunnels we consider forwarding packets even if
* mrouted has not joined the allmulti group on this interface.
*/
if (ipst->ips_ip_g_mrouter) {
int retval;
/*
* Clear the indication that this may have hardware
* checksum as we are not using it for forwarding.
*/
DB_CKSUMFLAGS(mp) = 0;
/*
* ip_mforward helps us make these distinctions: If received
* on tunnel and not IGMP, then drop.
* If IGMP packet, then don't check membership
* If received on a phyint and IGMP or PIM, then
* don't check membership
*/
/* ip_mforward updates mib variables if needed */
switch (retval) {
case 0:
/*
* pkt is okay and arrived on phyint.
*/
break;
case -1:
/* pkt is mal-formed, toss it */
goto done;
case 1:
/*
* pkt is okay and arrived on a tunnel
*
* If we are running a multicast router
* we need to see all mld packets, which
* are marked with router alerts.
*/
goto forus;
goto done;
}
}
#endif /* notdef */
/*
* If this was a router alert we skip the group membership check.
*/
goto forus;
/*
* Check if we have members on this ill. This is not necessary for
* filter before passing to each conn_t.
*/
/*
* Nobody interested
*
* This might just be caused by the fact that
* multiple IP Multicast addresses map to the same
* link layer multicast - no need to increment counter!
*/
goto done;
}
ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
/*
* After reassembly and IPsec we will need to duplicate the
* multicast packet for all matching zones on the ill.
*/
/* Reassemble on the ill on which the packet arrived */
done:
}
}
/*
* ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
* Drop packets since we don't forward out multirt routes.
*/
/* ARGSUSED */
void
{
}
/*
* ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
* has rewritten the packet to have a loopback destination address (We
* filter out packet with a loopback destination from arriving over the wire).
* We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
*/
void
{
/* Switch to the lo0 ill for further processing */
/*
* Update ira_ill to be the ILL on which the IP address
* is hosted.
* No need to hold the ill since we have a hold on the ire
*/
/* Restore */
return;
}
}
/*
* ire_recvfn for IRE_LOCAL.
*/
void
{
/* Make a note for DAD that this address is in use */
/* Only target the IRE_LOCAL with the right zoneid. */
/*
* If the packet arrived on the wrong ill, we check that
* this is ok.
* If it is, then we ensure that we do the reassembly on
* the ill on which the address is hosted. We keep ira_rill as
* the one on which the packet arrived, so that IP_PKTINFO and
* friends can report this.
*/
/* Drop packet */
return;
}
/*
* Update ira_ill to be the ILL on which the IP address
* is hosted. No need to hold the ill since we have a
* hold on the ire. Note that we do the switch even if
* new_ire == ire (for IPMP, ire would be the one corresponding
* to the IPMP ill).
*/
/* ira_ruifindex tracks the upper for ira_rill */
if (IS_UNDER_IPMP(ill))
/* Restore */
return;
}
}
/*
* Common function for packets arriving for the host. Handles
* checksum verification, reassembly checks, etc.
*/
static void
{
/*
* For multicast we need some extra work before
* we call ip_fanout_v6(), since in the case of shared-IP zones
* we need to pretend that a packet arrived for each zoneid.
*/
if (iraflags & IRAF_MULTICAST) {
return;
}
}
/*
* Handle multiple zones which want to receive the same multicast packets
* on this ill by delivering a packet to each of them.
*
* Note that for packets delivered to transports we could instead do this
* as part of the fanout code, but since we need to handle icmp_inbound
* it is simpler to have multicast work the same as IPv4 broadcast.
*
* The ip_fanout matching for multicast matches based on ilm independent of
* zoneid since the zoneid restriction is applied when joining a multicast
* group.
*/
/* ARGSUSED */
static void
{
/* ire_recv_multicast has switched to the upper ill for IPMP */
/*
* If we don't have more than one shared-IP zone, or if
* there are no members in anything but the global zone,
* then just set the zoneid and proceed.
*/
GLOBAL_ZONEID)) {
/* If sender didn't want this zone to receive it, drop */
if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
return;
}
return;
}
/*
* Here we loop over all zoneids that have members in the group
* and deliver a packet to ip_fanout for each zoneid.
*
* First find any members in the lowest numeric zoneid by looking for
* first zoneid larger than -1 (ALL_ZONES).
* We terminate the loop when we receive -1 (ALL_ZONES).
*/
/*
* and doing that at the end.
*/
if (zoneid == GLOBAL_ZONEID)
continue;
/* If sender didn't want this zone to receive it, skip */
if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
continue;
/* Failed to deliver to one zone */
continue;
}
/*
* IPsec might have modified ira_pktlen and ira_ip_hdr_length
* so we restore them for a potential next iteration
*/
}
/* Do the main ire */
/* If sender didn't want this zone to receive it, drop */
if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
} else {
}
}
/*
* Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
* is in use. Updates ira_zoneid and ira_flags as a result.
*/
static void
{
/*
* If the packet is unlabeled we might allow read-down
* for MAC_EXEMPT. Below we clear this if it is a multi-level
* port (MLP).
* Note that ira_tsl can be NULL here.
*/
return;
switch (protocol) {
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_UDP:
/* Caller ensures this */
/*
* Only these transports support MLP.
* We know their destination port numbers is in
* the same place in the header.
*/
/*
* No need to handle exclusive-stack zones
* since ALL_ZONES only applies to the shared IP instance.
*/
/*
* If no shared MLP is found, tsol_mlp_findzone returns
* ALL_ZONES. In that case, we assume it's SLP, and
* search for the zone based on the packet label.
*
* If there is such a zone, we prefer to find a
* connection in it. Otherwise, we look for a
* MAC-exempt connection in any zone whose label
* dominates the default label on the packet.
*/
else
break;
default:
/* Handle shared address for other protocols */
break;
}
}
/*
* Increment checksum failure statistics
*/
static void
{
switch (protocol) {
case IPPROTO_TCP:
if (hck_flags & HCK_FULLCKSUM)
else if (hck_flags & HCK_PARTIALCKSUM)
else
break;
case IPPROTO_UDP:
if (hck_flags & HCK_FULLCKSUM)
else if (hck_flags & HCK_PARTIALCKSUM)
else
break;
case IPPROTO_ICMPV6:
break;
default:
ASSERT(0);
break;
}
}
/* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
{
switch (protocol) {
case IPPROTO_TCP:
/* Protocol and length */
/* IP addresses */
break;
case IPPROTO_UDP: {
/* Protocol and length */
/* IP addresses */
break;
}
case IPPROTO_ICMPV6:
/* Protocol and length */
/* IP addresses */
break;
default:
cksum = 0;
break;
}
return (cksum);
}
/*
* Software verification of the ULP checksums.
* Returns B_TRUE if ok.
* Increments statistics of failed.
*/
static boolean_t
{
protocol == IPPROTO_ICMPV6);
if (cksum == 0)
return (B_TRUE);
return (B_FALSE);
}
/*
* Verify the ULP checksums.
* Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
* algorithm.
* Increments statistics if failed.
*/
static boolean_t
{
switch (protocol) {
case IPPROTO_TCP:
case IPPROTO_ICMPV6:
break;
case IPPROTO_UDP: {
/*
* Before going through the regular checksum
* calculation, make sure the received checksum
* is non-zero. RFC 2460 says, a 0x0000 checksum
* in a UDP packet (within IPv6 packet) is invalid
* and should be replaced by 0xffff. This makes
* sense as regular checksum calculation will
* pass for both the cases i.e. 0x0000 and 0xffff.
* Removing one of the case makes error detection
* stronger.
*/
if (udpha->uha_checksum == 0) {
/* 0x0000 checksum is invalid */
return (B_FALSE);
}
break;
}
case IPPROTO_SCTP: {
#ifdef DEBUG
if (skip_sctp_cksum)
return (B_TRUE);
#endif
return (B_TRUE);
/*
* Defer until later whether a bad checksum is ok
* in order to allow RAW sockets to use Adler checksum
* with SCTP.
*/
return (B_TRUE);
}
default:
/* No ULP checksum to verify. */
return (B_TRUE);
}
/*
* Revert to software checksum calculation if the interface
* isn't capable of checksum offload.
* We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
* Note: IRAF_NO_HW_CKSUM is not currently used.
*/
!dohwcksum) {
}
/*
* We apply this for all ULP protocols. Does the HW know to
* not set the flags for SCTP and other protocols.
*/
if (hck_flags & HCK_FULLCKSUM_OK) {
/*
* Hardware has already verified the checksum.
*/
return (B_TRUE);
}
if (hck_flags & HCK_FULLCKSUM) {
/*
* Full checksum has been computed by the hardware
* and has been attached. If the driver wants us to
* verify the correctness of the attached value, in
* order to protect against faulty hardware, compare
* it against -0 (0xFFFF) to see if it's valid.
*/
if (cksum == 0xFFFF)
return (B_TRUE);
return (B_FALSE);
}
if ((hck_flags & HCK_PARTIALCKSUM) &&
/*
* Partial checksum has been calculated by hardware
* and attached to the packet; in addition, any
* prepended extraneous data is even byte aligned,
* and there are at most two mblks associated with
* the packet. If any such data exists, we adjust
* the checksum; also take care any postpended data.
*/
/*
* One's complement subtract extraneous checksum
*/
else
if (!(~cksum & 0xFFFF))
return (B_TRUE);
return (B_FALSE);
}
}
/*
* Handle fanout of received packets.
* Unicast packets that are looped back (from ire_send_local_v6) and packets
* from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
*
* IPQoS Notes
* Before sending it to the client, invoke IPPF processing. Policy processing
* takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
*/
void
{
int offset;
/*
* We repeat this as we parse over destination options header and
* fragment headers (earlier we've handled any hop-by-hop options
* header.)
* We update ira_protocol and ira_ip_hdr_length as we skip past
* the intermediate headers; they already point past any
* hop-by-hop header.
*/
/*
* Time for IPP once we've done reassembly and IPsec.
* We skip this for loopback packets since we don't do IPQoS
* on loopback.
*/
!(iraflags & IRAF_LOOPBACK) &&
protocol != IPPROTO_FRAGMENT)) {
/*
* Use the interface on which the packet arrived - not where
* the IP address is hosted.
*/
/* ip_process translates an IS_UNDER_IPMP */
/* ip_drop_packet and MIB done */
return;
}
}
/* Determine the minimum required size of the upper-layer header */
/* Need to do this for at least the set of ULPs that TX handles. */
switch (protocol) {
case IPPROTO_TCP:
break;
case IPPROTO_SCTP:
break;
case IPPROTO_UDP:
break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
break;
case IPPROTO_FRAGMENT:
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
break;
default:
break;
}
/* Make sure we have the min ULP header length */
goto pkt_too_short;
ira);
goto discard;
}
/*
* If trusted extensions then determine the zoneid and TX specific
* ira_flags.
*/
if (iraflags & IRAF_SYSTEM_LABELED) {
/* This can update ira->ira_flags and ira->ira_zoneid */
}
/* Verify ULP checksum. Handles TCP, UDP, and SCTP */
if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
/* Bad checksum. Stats are already incremented */
return;
}
/* IRAF_SCTP_CSUM_ERR could have been set */
}
switch (protocol) {
case IPPROTO_TCP:
/* For TCP, discard multicast packets. */
if (iraflags & IRAF_MULTIBROADCAST)
goto discard;
/* First mblk contains IP+TCP headers per above check */
/* TCP options present? */
if (offset != 5) {
if (offset < 5)
goto discard;
/*
* There must be TCP options.
* Make sure we can grab them.
*/
offset <<= 2;
offset += ip_hdr_length;
goto pkt_too_short;
goto discard;
}
}
/*
* Pass up a squeue hint to tcp.
* If ira_sqp is already set (this is loopback) we leave it
* alone.
*/
}
/* Look for AF_INET or AF_INET6 that matches */
/* Send the TH_RST */
return;
}
if (connp->conn_incoming_ifindex != 0 &&
/* Send the TH_RST */
return;
}
(iraflags & IRAF_IPSEC_SECURE)) {
/* Note that mp is NULL */
return;
}
}
/* Found a client; up it goes */
if (!IPCL_IS_TCP(connp)) {
/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
return;
}
/*
* We do different processing whether called from
* ip_accept_tcp and we match the target, don't match
* the target, and when we are called by ip_input.
*/
if (iraflags & IRAF_TARGET_SQP) {
ip_drop_input("ipIfStatsInDiscards",
} else {
connp);
/*
* Conn ref release when drained from
* the squeue.
*/
}
} else {
}
} else {
}
return;
case IPPROTO_SCTP: {
/* For SCTP, discard multicast packets. */
if (iraflags & IRAF_MULTIBROADCAST)
goto discard;
/*
* Since there is no SCTP h/w cksum support yet, just
* clear the flag.
*/
DB_CKSUMFLAGS(mp) = 0;
/* Length ensured above */
/* get the ports */
if (iraflags & IRAF_SCTP_CSUM_ERR) {
/*
* No potential sctp checksum errors go to the Sun
* sctp stack however they might be Adler-32 summed
* packets a userland stack bound to a raw IP socket
* could reasonably use. Note though that Adler-32 is
* a long deprecated algorithm and customer sctp
* networks should eventually migrate to CRC-32 at
* which time this facility should be removed.
*/
return;
}
/* Check for raw socket or OOTB handling */
return;
}
if (connp->conn_incoming_ifindex != 0 &&
/* Check for raw socket or OOTB handling */
return;
}
/* Found a client; up it goes */
/* sctp_input does a rele of the sctp_t */
return;
}
case IPPROTO_UDP:
/* First mblk contains IP+UDP headers as checked above */
if (iraflags & IRAF_MULTIBROADCAST) {
return;
}
/* Look for AF_INET or AF_INET6 that matches */
connf_head != NULL) {
} else {
}
return;
}
if (connp->conn_incoming_ifindex != 0 &&
goto no_udp_match;
}
return;
}
(iraflags & IRAF_IPSEC_SECURE)) {
/* Note that mp is NULL */
return;
}
}
/* Found a client; up it goes */
return;
default:
break;
}
/*
* Clear hardware checksumming flag as it is currently only
* used by TCP and UDP.
*/
DB_CKSUMFLAGS(mp) = 0;
switch (protocol) {
case IPPROTO_ICMPV6:
/* Check variable for testing applications */
if (ipst->ips_ipv6_drop_inbound_icmpv6) {
return;
}
/*
* We need to accomodate icmp messages coming in clear
* until we get everything secure from the wire. If
* icmp_accept_clear_messages is zero we check with
* the global policy and act accordingly. If it is
* non-zero, we accept the message without any checks.
* But *this does not mean* that this will be delivered
* to RAW socket clients. By accepting we might send
* replies back, change our MTU value etc.,
* policy dispositions.
*/
if (ipst->ips_icmp_accept_clear_messages == 0) {
return;
}
/*
* On a labeled system, we have to check whether the zone
* itself is permitted to receive raw traffic.
*/
return;
}
}
/* No need to pass to RAW sockets */
return;
}
break;
case IPPROTO_DSTOPTS: {
/* We already check for MIN_EHDR_LEN above */
/* Check if AH is present and needs to be processed. */
return;
/*
* Reinitialize pointers, as ipsec_early_ah_v6() does
* complete pullups. We don't have to do more pullups
* as a result.
*/
goto pkt_too_short;
goto discard;
}
goto pkt_too_short;
goto discard;
}
/*
* Update ira_ip_hdr_length to skip the destination header
* when we repeat.
*/
/*
* Note: XXX This code does not seem to make
* distinction between Destination Options Header
* happen if we are at the end of source route.
* This may become significant in future.
* (No real significant Destination Options are
* defined/implemented yet ).
*/
case -1:
/*
* Packet has been consumed and any needed
* ICMP errors sent.
*/
return;
case 0:
/* No action needed continue */
break;
case 1:
/*
* Unnexpected return value
* (Router alert is a Hop-by-Hop option)
*/
#ifdef DEBUG
panic("ip_fanout_v6: router "
"alert hbh opt indication in dest opt");
/*NOTREACHED*/
#else
return;
#endif
}
goto repeat;
}
case IPPROTO_FRAGMENT: {
goto pkt_too_short;
goto discard;
}
/*
* Invoke the CGTP (multirouting) filtering module to
* process the incoming packet. Packets identified as
* duplicates must be discarded. Filtering is active
* only if the ip_cgtp_filter ndd variable is
* non-zero.
*/
if (ipst->ips_ip_cgtp_filter &&
int cgtp_flt_pkt;
/*
* CGTP and IPMP are mutually exclusive so
* phyint_ifindex is fine here.
*/
if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
return;
}
}
/*
* Update ip_hdr_length to skip the frag header
* ip_input_fragment_v6 will determine the extension header
* prior to the fragment header and update its nexthdr value,
* and also set ira_protocol to the nexthdr that follows the
* completed fragment.
*/
ip_hdr_length += sizeof (ip6_frag_t);
/*
* Make sure we have ira_l2src before we loose the original
* mblk
*/
/* Reassembly is still pending */
return;
}
/*
* The mblk chain has the frag header removed and
* ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
* IP header has been updated to refleact the result.
*/
goto repeat;
}
case IPPROTO_HOPOPTS:
/*
* Illegal header sequence.
* (Hop-by-hop headers are processed above
* and required to immediately follow IPv6 header)
*/
return;
case IPPROTO_ROUTING: {
/* Check if AH is present and needs to be processed. */
return;
/*
* Reinitialize pointers, as ipsec_early_ah_v6() does
* complete pullups. We don't have to do more pullups
* as a result.
*/
goto pkt_too_short;
goto discard;
}
goto pkt_too_short;
goto discard;
}
if (rthdr->ip6r_segleft != 0) {
/* Not end of source route */
ip_drop_input("ipIfStatsInForwProhibits",
return;
}
return;
}
goto repeat;
}
case IPPROTO_AH:
case IPPROTO_ESP: {
/*
*/
if (!ipsec_loaded(ipss)) {
return;
}
/* select inbound SA and have IPsec process the pkt */
if (protocol == IPPROTO_ESP) {
return;
ira);
} else {
return;
ira);
}
/*
* Either it failed or is pending. In the former case
* ipIfStatsInDiscards was increased.
*/
return;
}
/* we're done with IPsec processing, send it up */
return;
}
case IPPROTO_NONE:
/* All processing is done. Count as "delivered". */
return;
case IPPROTO_ENCAP:
case IPPROTO_IPV6:
/* iptun will verify trusted label */
return;
}
/* FALLTHRU */
default:
/*
* On a labeled system, we have to check whether the zone
* itself is permitted to receive raw traffic.
*/
return;
}
}
break;
}
/*
* The above input functions may have returned the pulled up message.
* So ip6h need to be reinitialized.
*/
/* No user-level listener for these packets packets */
return;
}
/*
* Handle fanout to raw sockets. There
* can be more than one stream bound to a particular
* protocol. When this is the case, each one gets a copy
* of any incoming packets.
*/
return;
return;
}