ip.h revision 7f125a531d922638f1af62a3d69f9ffc00bc2ee7
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1990 Mentat Inc.
*/
#ifndef _INET_IP_H
#define _INET_IP_H
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/isa_defs.h>
#include <sys/hook_event.h>
#include <sys/hook_impl.h>
#include <inet/ip_stack.h>
#ifdef _KERNEL
#ifdef DEBUG
#define CONN_DEBUG
#endif
#define IP_DEBUG
/*
* The mt-streams(9F) flags for the IP module; put here so that other
* "drivers" that are actually IP (e.g., ICMP, UDP) can use the same set
* of flags.
*/
#define IP_DEVMTFLAGS D_MP
#endif /* _KERNEL */
#define IP_MOD_NAME "ip"
#define IP_DEV_NAME "/dev/ip"
#define IP6_DEV_NAME "/dev/ip6"
#define UDP_MOD_NAME "udp"
#define UDP_DEV_NAME "/dev/udp"
#define UDP6_DEV_NAME "/dev/udp6"
#define TCP_MOD_NAME "tcp"
#define TCP_DEV_NAME "/dev/tcp"
#define TCP6_DEV_NAME "/dev/tcp6"
#define SCTP_MOD_NAME "sctp"
#ifndef _IPADDR_T
#define _IPADDR_T
#endif
/* Number of bits in an address */
#define IP_ABITS 32
#define IPV4_ABITS IP_ABITS
#define IPV6_ABITS 128
#define IP_MAX_HW_LEN 40
#define ILL_FRAG_HASH_TBL_COUNT ((unsigned int)64)
#define IPV4_ADDR_LEN 4
#define IP_ADDR_LEN IPV4_ADDR_LEN
#define IP_ARP_PROTO_TYPE 0x0800
#define IPV4_VERSION 4
#define IP_VERSION IPV4_VERSION
#define IP_SIMPLE_HDR_LENGTH_IN_WORDS 5
#define IP_SIMPLE_HDR_LENGTH 20
#define IP_MAX_HDR_LENGTH 60
/*
* 2 files should be cleaned up to remove all redundant definitions.
*/
#define IP_MAXPACKET 65535
#define IP_SIMPLE_HDR_VERSION \
#define UDPH_SIZE 8
/*
* Constants and type definitions to support IP IOCTL commands
*/
#define IP_IOC_IRE_DELETE 4
#define IP_IOC_IRE_DELETE_NO_REPLY 5
#define IP_IOC_RTS_REQUEST 7
/* Common definitions used by IP IOCTL data structures */
typedef struct ipllcmd_s {
} ipllc_t;
/* IP IRE Delete Command Structure. */
typedef struct ipid_s {
} ipid_t;
#ifdef _KERNEL
/*
* Temporary state for ip options parser.
*/
typedef struct ipoptp_s
{
} ipoptp_t;
/*
* Flag(s) for ipoptp_flags
*/
#define IPOPTP_ERROR 0x00000001
#endif /* _KERNEL */
/* Controls forwarding of IP packets, set via ipadm(1M)/ndd(1M) */
#define IP_FORWARD_NEVER 0
#define IP_FORWARD_ALWAYS 1
#define IPH_HDR_LENGTH(ipha) \
#define IPH_HDR_VERSION(ipha) \
#ifdef _KERNEL
/*
* IP reassembly macros. We hide starting and ending offsets in b_next and
* b_prev of messages on the reassembly queue. The messages are chained using
* b_cont. These macros are used in ip_reassemble() so we don't have to see
* the ugly casts and assignments.
* Note that the offsets are <= 64k i.e. a uint_t is sufficient to represent
* them.
*/
#define IP_REASS_SET_START(mp, u) \
#define IP_REASS_SET_END(mp, u) \
#define IP_REASS_COMPLETE 0x1
#define IP_REASS_PARTIAL 0x2
#define IP_REASS_FAILED 0x4
/*
* Test to determine whether this is a module instance of IP or a
* driver instance of IP.
*/
#define GRAB_CONN_LOCK(q) { \
}
#define RELEASE_CONN_LOCK(q) { \
}
/*
* Ref counter macros for ioctls. This provides a guard for TCP to stop
* stream. The ioctl could have been queued on e.g. an ipsq. tcp_close will wait
* until the ioctlref count is zero before proceeding.
* Ideally conn_oper_pending_ill would be used for this purpose. However, in the
* case where an ioctl is aborted or interrupted, it can be cleared prematurely.
* There are also some race possibilities between ip and the stream head which
* can also end up with conn_oper_pending_ill being cleared prematurely. So, to
* avoid these situations, we use a dedicated ref counter for ioctls which is
* used in addition to and in parallel with the normal conn_ref count.
*/
#define CONN_INC_IOCTLREF_LOCKED(connp) { \
(connp)->conn_ioctlref++; \
}
#define CONN_INC_IOCTLREF(connp) { \
}
#define CONN_DEC_IOCTLREF(connp) { \
/* Make sure conn_ioctlref will not underflow. */ \
if ((--(connp)->conn_ioctlref == 0) && \
} \
}
/*
* Complete the pending operation. Usually an ioctl. Can also
* be a bind or option management request that got enqueued
* in an ipsq_t. Called on completion of the operation.
*/
#define CONN_OPER_PENDING_DONE(connp) { \
CONN_DEC_REF(connp); \
}
/*
* Values for squeue switch:
*/
#define IP_SQUEUE_ENTER_NODRAIN 1
#define IP_SQUEUE_ENTER 2
#define IP_SQUEUE_FILL 3
extern int ip_squeue_flag;
/* IP Fragmentation Reassembly Header */
typedef struct ipf_s {
struct ipf_s *ipf_hash_next;
int ipf_hole_cnt; /* Number of holes (hard-case). */
int ipf_end; /* Tail end offset (0 -> hard-case). */
} ipf_t;
/*
* IPv4 Fragments
*/
#define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \
#endif /* _KERNEL */
/* ICMP types */
#define ICMP_ECHO_REPLY 0
#define ICMP_DEST_UNREACHABLE 3
#define ICMP_SOURCE_QUENCH 4
#define ICMP_REDIRECT 5
#define ICMP_ECHO_REQUEST 8
#define ICMP_ROUTER_ADVERTISEMENT 9
#define ICMP_ROUTER_SOLICITATION 10
#define ICMP_TIME_EXCEEDED 11
#define ICMP_PARAM_PROBLEM 12
#define ICMP_TIME_STAMP_REQUEST 13
#define ICMP_TIME_STAMP_REPLY 14
#define ICMP_INFO_REQUEST 15
#define ICMP_INFO_REPLY 16
#define ICMP_ADDRESS_MASK_REQUEST 17
#define ICMP_ADDRESS_MASK_REPLY 18
/* Evaluates to true if the ICMP type is an ICMP error */
#define ICMP_IS_ERROR(type) ( \
(type) == ICMP_DEST_UNREACHABLE || \
(type) == ICMP_SOURCE_QUENCH || \
(type) == ICMP_TIME_EXCEEDED || \
(type) == ICMP_PARAM_PROBLEM)
/* ICMP_TIME_EXCEEDED codes */
#define ICMP_TTL_EXCEEDED 0
#define ICMP_REASSEMBLY_TIME_EXCEEDED 1
/* ICMP_DEST_UNREACHABLE codes */
#define ICMP_NET_UNREACHABLE 0
#define ICMP_HOST_UNREACHABLE 1
#define ICMP_PROTOCOL_UNREACHABLE 2
#define ICMP_PORT_UNREACHABLE 3
#define ICMP_FRAGMENTATION_NEEDED 4
#define ICMP_SOURCE_ROUTE_FAILED 5
#define ICMP_DEST_NET_UNKNOWN 6
#define ICMP_DEST_HOST_UNKNOWN 7
#define ICMP_SRC_HOST_ISOLATED 8
#define ICMP_DEST_NET_UNREACH_ADMIN 9
#define ICMP_DEST_HOST_UNREACH_ADMIN 10
#define ICMP_DEST_NET_UNREACH_TOS 11
#define ICMP_DEST_HOST_UNREACH_TOS 12
/* ICMP Header Structure */
typedef struct icmph_s {
union {
} u_echo;
struct { /* Destination unreachable structure */
} u_du;
struct { /* Parameter problem structure */
} u_pp;
struct { /* Redirect structure */
} u_rd;
} icmph_u;
} icmph_t;
#define ICMPH_SIZE 8
/*
* Minimum length of transport layer header included in an ICMP error
* message for it to be considered valid.
*/
#define ICMP_MIN_TP_HDR_LEN 8
/* Aligned IP header */
typedef struct ipha_s {
} ipha_t;
/*
* IP Flags
*
* Some of these constant names are copied for the DTrace IP provider in
* in sync.
*/
/* Byte-order specific values */
#ifdef _BIG_ENDIAN
#else
#endif
/* ECN code points for IPv4 TOS byte and IPv6 traffic class octet. */
struct ill_s;
/* IP Mac info structure */
typedef struct ip_m_s {
int ip_m_type; /* From <net/if_types.h> */
} ip_m_t;
/*
* The following functions attempt to reduce the link layer dependency
* of the IP stack. The current set of link specific operations are:
* a. map from IPv4 class D (224.0/4) multicast address range or the
* IPv6 multicast address range (ff00::/8) to the link layer multicast
* address.
* b. derive the default IPv6 interface identifier from the interface.
* c. derive the default IPv6 destination interface identifier from
* the interface (point-to-point only).
*/
/* ip_m_v6*intfid return void and are never NULL */
/* Router entry types */
#ifndef _KERNEL
/* Keep so user-level still compiles */
#endif
/* net without any address mapping. */
/* net with resolver. */
/* Keep so user-level still compiles */
#define IRE_OFFLINK IRE_OFFSUBNET
/*
* Note that we view IRE_NOROUTE as ONLINK since we can "send" to them without
*/
/* Arguments to ire_flush_cache() */
#define IRE_FLUSH_DELETE 0
#define IRE_FLUSH_ADD 1
#define IRE_FLUSH_GWCHANGE 2
/*
* Flags to ire_route_recursive
*/
#define IRR_NONE 0
/*
* These are kept in a separate field in the conn and the synchronization
* depends on the atomic 32 bit access to that field.
*/
/*
* Flags for dce_flags field. Specifies which information has been set.
* dce_ident is always present, but the other ones are identified by the flags.
*/
#ifdef _KERNEL
/*
*/
#define MAX_FILTER_SIZE 64
typedef struct slist_s {
int sl_numsrc;
} slist_t;
/*
* Following struct is used to maintain retransmission state for
* a multicast group. One rtx_state_t struct is an in-line field
* of the ilm_t struct; the slist_ts in the rtx_state_t struct are
* alloc'd as needed.
*/
typedef struct rtx_state_s {
int rtx_cnt; /* retrans count */
int rtx_fmode_cnt; /* retrans count for fmode change */
} rtx_state_t;
/*
* Used to construct list of multicast address records that will be
* sent in a single listener report.
*/
typedef struct mrec_s {
} mrec_t;
/* Group membership list per upper conn */
/*
* We record the multicast information from the socket option in
* ilg_ifaddr/ilg_ifindex. This allows rejoining the group in the case when
* the ifaddr (or ifindex) disappears and later reappears, potentially on
* a different ill. The IPv6 multicast socket options and ioctls all specify
* the interface address and others use the index. We record here the method
* that was actually used (and leave the other of ilg_ifaddr or ilg_ifindex)
* at zero so that we can rejoin the way the application intended.
*
* We track the ill on which we will or already have joined an ilm using
* ilg_ill. When we have succeeded joining the ilm and have a refhold on it
* then we set ilg_ilm. Thus intentionally there is a window where ilg_ill is
* set and ilg_ilm is not set. This allows clearing ilg_ill as a signal that
* the ill is being unplumbed and the ilm should be discarded.
*
* ilg records the state of multicast memberships of a socket end point.
* ilm records the state of multicast memberships with the driver and is
* maintained per interface.
*
* The ilg state is protected by conn_ilg_lock.
* The ilg will not be freed until ilg_refcnt drops to zero.
*/
typedef struct ilg_s {
} ilg_t;
/*
* Multicast address list entry for ill.
* ilm_ill is used by IPv4 and IPv6
*
* The ilm state (and other multicast state on the ill) is protected by
* ill_mcast_lock. Operations that change state on both an ilg and ilm
* in addition use ill_mcast_serializer to ensure that we can't have
* interleaving between e.g., add and delete operations for the same conn_t,
* group, and ill. The ill_mcast_serializer is also used to ensure that
* multicast group joins do not occur on an interface that is in the process
* of joining an IPMP group.
*
* The comment below (and for other netstack_t references) refers
* to the fact that we only do netstack_hold in particular cases,
* such as the references from open endpoints (ill_t and conn_t's
* pointers). Internally within IP we rely on IP's ability to cleanup e.g.
* ire_t's when an ill goes away.
*/
typedef struct ilm_s {
int ilm_refcnt;
int ilm_no_ilg_cnt; /* number of joins w/ no ilg */
} ilm_t;
/*
* Soft reference to an IPsec SA.
*
* On relative terms, conn's can be persistent (living as long as the
* processes which create them), while SA's are ephemeral (dying when
* they hit their time-based or byte-based lifetimes).
*
* We could hold a hard reference to an SA from an ipsec_latch_t,
* but this would cause expired SA's to linger for a potentially
* unbounded time.
*
* Instead, we remember the hash bucket number and bucket generation
* in addition to the pointer. The bucket generation is incremented on
* each deletion.
*/
typedef struct ipsa_ref_s
{
struct isaf_s *ipsr_bucket;
} ipsa_ref_t;
/*
* IPsec "latching" state.
*
* In the presence of IPsec policy, fully-bound conn's bind a connection
* to more than just the 5-tuple, but also a specific IPsec action and
* identity-pair.
* The identity pair is accessed from both the receive and transmit side
* hence it is maintained in the ipsec_latch_t structure. conn_latch and
* ixa_ipsec_latch points to it.
* The policy and actions are stored in conn_latch_in_policy and
* conn_latch_in_action for the inbound side, and in ixa_ipsec_policy and
* ixa_ipsec_action for the transmit side.
*
* As an optimization, we also cache soft references to IPsec SA's in
* ip_xmit_attr_t so that we can fast-path around most of the work needed for
* outbound IPsec SA selection.
*/
typedef struct ipsec_latch_s
{
struct ipsid_s *ipl_local_cid;
struct ipsid_s *ipl_remote_cid;
unsigned int
ipl_ids_latched : 1,
ipl_pad_to_bit_31 : 31;
#define IPLATCH_REFHOLD(ipl) { \
}
#define IPLATCH_REFRELE(ipl) { \
membar_exit(); \
iplatch_free(ipl); \
}
/*
* peer identity structure.
*/
/*
*/
typedef struct ipsec_selector {
/*
* Note that we put v4 addresses in the *first* 32-bit word of the
* in spd.c
*/
/* Values used in IP by IPSEC Code */
#define IPSEC_OUTBOUND B_TRUE
#define IPSEC_INBOUND B_FALSE
/*
* There are two variants in policy failures. The packet may come in
* secure when not needed (IPSEC_POLICY_???_NOT_NEEDED) or it may not
* have the desired level of protection (IPSEC_POLICY_MISMATCH).
*/
#define IPSEC_POLICY_NOT_NEEDED 0
#define IPSEC_POLICY_MISMATCH 1
#define IPSEC_POLICY_AUTH_NOT_NEEDED 2
#define IPSEC_POLICY_ENCR_NOT_NEEDED 3
#define IPSEC_POLICY_SE_NOT_NEEDED 4
/*
* Check with IPSEC inbound policy if
*
* 1) per-socket policy is present - indicated by conn_in_enforce_policy.
* 2) Or if we have not cached policy on the conn and the global policy is
* non-empty.
*/
((connp)->conn_in_enforce_policy || \
(!((connp)->conn_policy_cached) && \
((connp)->conn_in_enforce_policy || \
(!(connp)->conn_policy_cached && \
((connp)->conn_out_enforce_policy || \
(!((connp)->conn_policy_cached) && \
((connp)->conn_out_enforce_policy || \
(!(connp)->conn_policy_cached && \
/*
* Information cached in IRE for upper layer protocol (ULP).
*/
typedef struct iulp_s {
/* These three are passed out by ip_set_destination */
iulp_not_used : 25;
} iulp_t;
/*
* The conn drain list structure (idl_t), protected by idl_lock. Each conn_t
* inserted in the list points back at this idl_t using conn_idl, and is
* chained by conn_drain_next and conn_drain_prev, which are also protected by
* idl_lock. When flow control is relieved, either ip_wsrv() (STREAMS) or
* ill_flow_enable() (non-STREAMS) will call conn_drain().
*
* The conn drain list, idl_t, itself is part of tx cookie list structure.
* A tx cookie list points to a blocked Tx ring and contains the list of
* all conn's that are blocked due to the flow-controlled Tx ring (via
* the idl drain list). Note that a link can have multiple Tx rings. The
* drain list will store the conn's blocked due to Tx ring being flow
* controlled.
*/
typedef uintptr_t ip_mac_tx_cookie_t;
typedef struct idl_tx_list_s idl_tx_list_t;
struct idl_tx_list_s {
int txl_drain_index;
};
struct idl_s {
};
/*
* Interface route structure which holds the necessary information to recreate
* routes that are tied to an interface i.e. have ire_ill set.
*
* These routes which were initially created via a routing socket or via the
* SIOCADDRT ioctl may be gateway routes (RTF_GATEWAY being set) or may be
* traditional interface routes. When an ill comes back up after being
* down, this information will be used to recreate the routes. These
* are part of an mblk_t chain that hangs off of the ILL (ill_saved_ire_mp).
*/
typedef struct ifrt_s {
} ifrt_t;
/* Number of IP addresses that can be hosted on a physical interface */
#define MAX_ADDRS_PER_IF 8192
/*
* Number of Source addresses to be considered for source address
* selection. Used by ipif_select_source_v4/v6.
*/
#define MAX_IPIF_SELECT_SOURCE 50
#ifdef IP_DEBUG
/*
* Trace refholds and refreles for debugging.
*/
#define TR_STACK_DEPTH 14
typedef struct tr_buf_s {
int tr_depth;
} tr_buf_t;
typedef struct th_trace_s {
int th_refcnt;
#define TR_BUF_MAX 38
} th_trace_t;
typedef struct th_hash_s {
} th_hash_t;
#endif
/* The following are ipif_state_flags */
/* IP interface structure, one per local address */
typedef struct ipif_s {
int ipif_id; /* Logical unit number */
/*
* The packet count in the ipif contain the sum of the
*/
/* Exclusive bit fields, protected by ipsq_t */
unsigned int
ipif_pad_to_31 : 28;
/*
* For an IPMP interface, ipif_bound_ill tracks the ill whose hardware
* an ill pointer (rather than an index) because only ills that are
* part of a group will be pointed to, and an ill cannot disappear
* while it's in a group.
*/
struct ill_s *ipif_bound_ill;
} ipif_t;
/*
* The following table lists the protection levels of the various members
* of the ipif_t. The following notation is used.
*
* Write once - Written to only once at the time of bringing up
* the interface and can be safely read after the bringup without any lock.
*
* ipsq - Need to execute in the ipsq to perform the indicated access.
*
* ill_lock - Need to hold this mutex to perform the indicated access.
*
* write access respectively.
*
* down ill - Written to only when the ill is down (i.e all ipifs are down)
* up ill - Read only when the ill is up (i.e. at least 1 ipif is up)
*
* Table of ipif_t members and their protection
*
* ipif_next ipsq + ill_lock + ipsq OR ill_lock OR
* ill_g_lock ill_g_lock
* ipif_ill ipsq + down ipif write once
* ipif_id ipsq + down ipif write once
* ipif_v6lcl_addr ipsq + down ipif up ipif
* ipif_v6subnet ipsq + down ipif up ipif
* ipif_v6net_mask ipsq + down ipif up ipif
*
* ipif_v6brd_addr
* ipif_v6pp_dst_addr
* ipif_flags ill_lock ill_lock
* ipif_ire_type ipsq + down ill up ill
*
* ipif_ib_pkt_count Approx
*
* bit fields ill_lock ill_lock
*
* ipif_allhosts_ilm ipsq ipsq
* ipif_solmulti_ilm ipsq ipsq
*
* ipif_seqid ipsq Write once
*
* ipif_state_flags ill_lock ill_lock
* ipif_refcnt ill_lock ill_lock
* ipif_bound_ill ipsq + ipmp_lock ipsq OR ipmp_lock
* ipif_bound_next ipsq ipsq
* ipif_bound ipsq ipsq
*
* ipif_ire_local ipsq + ips_ill_g_lock ipsq OR ips_ill_g_lock
* ipif_ire_if ipsq + ips_ill_g_lock ipsq OR ips_ill_g_lock
*/
/*
* Return values from ip_laddr_verify_{v4,v6}
*/
#ifdef DEBUG
#else
#define IPIF_TRACE_REF(ipif)
#define ILL_TRACE_REF(ill)
#define IPIF_UNTRACE_REF(ipif)
#define ILL_UNTRACE_REF(ill)
#endif
/* IPv4 compatibility macros */
/* Macros for easy backreferences to the ill. */
/*
* mode value for ip_ioctl_finish for finishing an ioctl
*/
/*
* The IP-MT design revolves around the serialization objects ipsq_t (IPSQ)
* and ipxop_t (exclusive operation or "xop"). Becoming "writer" on an IPSQ
* ensures that no other threads can become "writer" on any IPSQs sharing that
* IPSQ's xop until the writer thread is done.
*
* Each phyint points to one IPSQ that remains fixed over the phyint's life.
* Each IPSQ points to one xop that can change over the IPSQ's life. If a
* phyint is *not* in an IPMP group, then its IPSQ will refer to the IPSQ's
* "own" xop (ipsq_ownxop). If a phyint *is* part of an IPMP group, then its
* IPSQ will refer to the "group" xop, which is shorthand for the xop of the
* IPSQ of the IPMP meta-interface's phyint. Thus, all phyints that are part
* of the same IPMP group will have their IPSQ's point to the group xop, and
* thus becoming "writer" on any phyint in the group will prevent any other
* writer on any other phyint in the group. All IPSQs sharing the same xop
* are chained together through ipsq_next (in the degenerate common case,
* ipsq_next simply refers to itself). Note that the group xop is guaranteed
* to exist at least as long as there are members in the group, since the IPMP
* meta-interface can only be destroyed if the group is empty.
*
* Incoming exclusive operation requests are enqueued on the IPSQ they arrived
* on rather than the xop. This makes switching xop's (as would happen when a
* phyint leaves an IPMP group) simple, because after the phyint leaves the
* group, any operations enqueued on its IPSQ can be safely processed with
* respect to its new xop, and any operations enqueued on the IPSQs of its
* former group can be processed with respect to their existing group xop.
* Even so, switching xops is a subtle dance; see ipsq_dq() for details.
*
* An IPSQ's "own" xop is embedded within the IPSQ itself since they have have
* identical lifetimes, and because doing so simplifies pointer management.
* While each phyint and IPSQ point to each other, it is not possible to free
* the IPSQ when the phyint is freed, since we may still *inside* the IPSQ
* when the phyint is being freed. Thus, ipsq_phyint is set to NULL when the
* phyint is freed, and the IPSQ free is later done in ipsq_exit().
*
* ipsq_t synchronization: read write
*
* ipsq_xopq_mphead ipx_lock ipx_lock
* ipsq_xopq_mptail ipx_lock ipx_lock
* ipsq_xop_switch_mp ipsq_lock ipsq_lock
* ipsq_phyint write once write once
* ipsq_next RW_READER ill_g_lock RW_WRITER ill_g_lock
* ipsq_xop ipsq_lock or ipsq ipsq_lock + ipsq
* ipsq_swxop ipsq ipsq
* ipsq_ownxop see ipxop_t see ipxop_t
* ipsq_ipst write once write once
*
* ipxop_t synchronization: read write
*
* ipx_writer ipx_lock ipx_lock
* ipx_xop_queued ipx_lock ipx_lock
* ipx_mphead ipx_lock ipx_lock
* ipx_mptail ipx_lock ipx_lock
* ipx_ipsq write once write once
* ips_ipsq_queued ipx_lock ipx_lock
* ipx_waitfor ipsq or ipx_lock ipsq + ipx_lock
* ipx_reentry_cnt ipsq or ipx_lock ipsq + ipx_lock
* ipx_current_done ipsq ipsq
* ipx_current_ioctl ipsq ipsq
* ipx_current_ipif ipsq or ipx_lock ipsq + ipx_lock
* ipx_pending_ipif ipsq or ipx_lock ipsq + ipx_lock
* ipx_pending_mp ipsq or ipx_lock ipsq + ipx_lock
* ipx_forced ipsq ipsq
* ipx_depth ipsq ipsq
* ipx_stack ipsq ipsq
*/
typedef struct ipxop_s {
int ipx_waitfor; /* waiting; values encoded below */
int ipx_reentry_cnt;
int ipx_current_ioctl; /* current ioctl, or 0 if no ioctl */
#ifdef DEBUG
int ipx_depth; /* debugging aid */
#define IPX_STACK_DEPTH 15
#endif
} ipxop_t;
typedef struct ipsq_s {
} ipsq_t;
/*
* ipx_waitfor values:
*/
enum {
ILL_DOWN, /* ill_down() waiting for refcnts to drop */
IPIF_FREE, /* ipif_free() waiting for refcnts to drop */
ILL_FREE /* ill unplumb waiting for refcnts to drop */
};
/* Operation types for ipsq_try_enter() */
#define CUR_OP 0 /* request writer within current operation */
/*
* Kstats tracked on each IPMP meta-interface. Order here must match
*/
enum {
IPMP_KSTAT_MAX /* keep last */
};
/*
* phyint represents state that is common to both IPv4 and IPv6 interfaces.
* There is a separate ill_t representing IPv4 and IPv6 which has a
* backpointer to the phyint structure for accessing common state.
*/
typedef struct phyint {
struct ill_s *phyint_illv4;
struct ill_s *phyint_illv6;
} phyint_t;
#define CACHE_ALIGN_SIZE 64
struct _phyint_list_s_ {
};
typedef union phyint_list_u {
struct _phyint_list_s_ phyint_list_s;
/*
* Fragmentation hash bucket
*/
typedef struct ipfb_s {
} ipfb_t;
/*
* IRE bucket structure. Usually there is an array of such structures,
* each pointing to a linked list of ires. irb_refcnt counts the number
* of walkers of a given hash bucket. Usually the reference count is
* bumped up if the walker wants no IRES to be DELETED while walking the
* list. Bumping up does not PREVENT ADDITION. This allows walking a given
* hash bucket without stumbling up on a free pointer.
*
* irb_t structures in ip_ftable are dynamically allocated and freed.
* In order to identify the irb_t structures that can be safely kmem_free'd
* we need to ensure that
* - the irb_refcnt is quiescent, indicating no other walkers,
* - no other threads or ire's are holding references to the irb,
* i.e., irb_nire == 0,
* - there are no active ire's in the bucket, i.e., irb_ire_cnt == 0
*/
typedef struct irb {
/* Should be first in this struct */
/* Once IPv6 uses radix then IRB_MARK_DYNAMIC will be always be set */
int irb_nire; /* Num of ftable ire's that ref irb */
} irb_t;
/*
* This is the structure used to store the multicast physical addresses
* that an interface has joined.
* The refcnt keeps track of the number of multicast IP addresses mapping
* to a physical multicast address.
*/
typedef struct multiphysaddr_s {
struct multiphysaddr_s *mpa_next;
char mpa_addr[IP_MAX_HW_LEN];
int mpa_refcnt;
/* Forward declarations */
struct dce_s;
struct ire_s;
struct ncec_s;
struct nce_s;
struct ip_recv_attr_s;
typedef struct ip_recv_attr_s ip_recv_attr_t;
struct ip_xmit_attr_s;
typedef struct ip_xmit_attr_s ip_xmit_attr_t;
struct tsol_ire_gw_secattr_s;
typedef struct tsol_ire_gw_secattr_s tsol_ire_gw_secattr_t;
/*
* This is a structure for a one-element route cache that is passed
* by reference between ip_input and ill_inputfn.
*/
typedef struct {
} rtc_t;
/*
* Note: Temporarily use 64 bits, and will probably go back to 32 bits after
* more cleanup work is done.
*/
/* The ill input function pointer type */
rtc_t *);
/* The ire receive function pointer type */
/* The ire send and postfrag function pointer types */
ip_xmit_attr_t *, uint32_t *);
#define IP_V4_G_HEAD 0
#define IP_V6_G_HEAD 1
#define MAX_G_HEADS 2
/*
* unpadded ill_if structure
*/
struct _ill_if_s_ {
union ill_if_u *illif_next;
union ill_if_u *illif_prev;
int illif_name_len; /* name length */
};
/* cache aligned ill_if structure */
typedef union ill_if_u {
struct _ill_if_s_ ill_if_s;
} ill_if_t;
typedef struct ill_walk_context_s {
int ctx_current_list; /* current list being searched */
int ctx_last_list; /* last list to search */
/*
* ill_g_heads structure, one for IPV4 and one for IPV6
*/
struct _ill_g_head_s_ {
};
typedef union ill_g_head_u {
struct _ill_g_head_s_ ill_g_head_s;
} ill_g_head_t;
#define IP_V4_ILL_G_LIST(ipst) \
#define IP_V6_ILL_G_LIST(ipst) \
#define IP_VX_ILL_G_LIST(i, ipst) \
/*
* Capabilities, possible flags for ill_capabilities.
*/
/*
* Per-ill Hardware Checksumming capbilities.
*/
typedef struct ill_hcksum_capab_s ill_hcksum_capab_t;
/*
* Per-ill Zero-copy capabilities.
*/
typedef struct ill_zerocopy_capab_s ill_zerocopy_capab_t;
/*
* DLD capbilities.
*/
typedef struct ill_dld_capab_s ill_dld_capab_t;
/*
* Per-ill polling resource map.
*/
typedef struct ill_rx_ring ill_rx_ring_t;
/*
* Per-ill Large Send Offload capabilities.
*/
typedef struct ill_lso_capab_s ill_lso_capab_t;
/* The following are ill_state_flags */
/*
* ILL_DOWN_IN_PROGRESS is set to ensure the following:
* - no packets are sent to the driver after the DL_UNBIND_REQ is sent,
* - no longstanding references will be acquired on objects that are being
* brought down.
*/
#define ILL_DOWN_IN_PROGRESS 0x08
/* Is this an ILL whose source address is used by other ILL's ? */
#define IS_USESRC_ILL(ill) \
(((ill)->ill_usesrc_ifindex == 0) && \
#define IS_USESRC_CLI_ILL(ill) \
(((ill)->ill_usesrc_ifindex != 0) && \
/* Is this an virtual network interface (vni) ILL ? */
/* Is this a loopback ILL? */
#define IS_LOOPBACK(ill) \
/* Is this an IPMP meta-interface ILL? */
/* Is this ILL under an IPMP meta-interface? (aka "in a group?") */
#define IS_UNDER_IPMP(ill) \
/* Is ill1 in the same illgrp as ill2? */
/* Is ill1 on the same LAN as ill2? */
/*
* IPMP group ILL state structure -- up to two per IPMP group (V4 and V6).
* guaranteed to persist while there are interfaces of that type in the group.
* In general, most fields are accessed outside of the IPSQ (e.g., in the
* datapath), and thus use locks in addition to the IPSQ for protection.
*
* synchronization: read write
*
* ig_if ipsq or ill_g_lock ipsq and ill_g_lock
* ig_actif ipsq or ipmp_lock ipsq and ipmp_lock
* ig_nactif ipsq or ipmp_lock ipsq and ipmp_lock
* ig_next_ill ipsq or ipmp_lock ipsq and ipmp_lock
* ig_ipmp_ill write once write once
* ig_cast_ill ipsq or ipmp_lock ipsq and ipmp_lock
* ig_arpent ipsq ipsq
* ig_mtu ipsq ipsq
*/
typedef struct ipmp_illgrp_s {
/*
* IPMP group state structure -- one per IPMP group. Created when the
* IPMP meta-interface is plumbed; it is guaranteed to persist while there
* are interfaces in it.
*
* ipmp_grp_t synchronization: read write
*
* gr_name ipmp_lock ipmp_lock
* gr_ifname write once write once
* gr_mactype ipmp_lock ipmp_lock
* gr_phyint write once write once
* gr_nif ipmp_lock ipmp_lock
* gr_nactif ipsq ipsq
* gr_v4 ipmp_lock ipmp_lock
* gr_v6 ipmp_lock ipmp_lock
* gr_nv4 ipmp_lock ipmp_lock
* gr_nv6 ipmp_lock ipmp_lock
* gr_pendv4 ipmp_lock ipmp_lock
* gr_pendv6 ipmp_lock ipmp_lock
* gr_linkdownmp ipsq ipsq
* gr_ksp ipmp_lock ipmp_lock
* gr_kstats0 atomic atomic
*/
typedef struct ipmp_grp_s {
} ipmp_grp_t;
/*
* IPMP ARP entry -- one per SIOCS*ARP entry tied to the group. Used to keep
* ARP up-to-date as the active set of interfaces in the group changes.
*/
typedef struct ipmp_arpent_s {
struct arl_s;
/*
* Per-ill capabilities.
*/
struct ill_hcksum_capab_s {
};
struct ill_zerocopy_capab_s {
};
struct ill_lso_capab_s {
};
/*
* IP Lower level Structure.
* Instance data structure in ip_open when there is a device below us.
*/
typedef struct ill_s {
int ill_error; /* Error value sent up by device. */
char *ill_name; /* Our name. */
/*
* Physical Point of Attachment num. If DLPI style 1 provider
* then this is derived from the devname.
*/
/* supports broadcast. */
int ill_mcast_type; /* type of router which is querier */
/* on this interface */
int ill_mcast_qi; /* IGMPv3/MLDv2 query interval var */
/*
* All non-NULL cells between 'ill_first_mp_to_free' and
* 'ill_last_mp_to_free' are freed in ill_delete.
*/
#define ill_first_mp_to_free ill_bcast_mp
/* Following bit fields protected by ipsq_t */
ill_needs_attach : 1,
ill_reserved : 1,
ill_isv6 : 1,
ill_dlpi_style_set : 1,
ill_ifname_pending : 1,
ill_logical_down : 1,
ill_dl_up : 1,
ill_up_ipifs : 1,
ill_replumbing : 1,
ill_arl_dlpi_pending : 1,
ill_grp_pending : 1,
ill_pad_to_bit_31 : 17;
/* Following bit fields protected by ill_lock */
/*
* ill_manual_linklocal : system will not change the
* linklocal whenever ill_token changes.
*/
ill_manual_linklocal : 1,
ill_pad_bit_31 : 27;
/*
* Used in SIOCSIFMUXID and SIOCGIFMUXID for 'ifconfig unplumb'.
*/
int ill_muxid; /* muxid returned from plink */
/* Used for IP frag reassembly throttling on a per ILL basis. */
int ill_type; /* From <net/if_types.h> */
/*
* Capabilities related fields.
*/
/*
* The ill_nd_lla* fields handle the link layer address option
* from neighbor discovery. This is used for external IPv6
* address resolution.
*/
/*
* We have 4 phys_addr_req's sent down. This field keeps track
* of which one is pending.
*/
/*
* Used to save errors that occur during plumbing
*/
/*
* Contains the upper read queue pointer of the module immediately
* beneath IP. This field allows IP to validate sub-capability
* acknowledgments coming up from downstream.
*/
void *ill_flownotify_mh; /* Tx flow ctl, mac cb handle */
/* with ire_ill set so they can */
/* survive the ill going down and up. */
struct arl_ill_common_s *ill_common;
/*
* IPMP fields.
*/
/* list of multicast physical addresses joined on this ill */
} ill_t;
/*
* ILL_FREE_OK() means that there are no incoming pointer references
* to the ill.
*/
#define ILL_FREE_OK(ill) \
/*
* quiescence checks. See comments above IPIF_DOWN_OK for details
* on why ires and nces are selectively considered for this macro.
*/
#define ILL_DOWN_OK(ill) \
ill->ill_nce_cnt == 0)
/*
* The following table lists the protection levels of the various members
* of the ill_t. Same notation as that used for ipif_t above is used.
*
* Write Read
*
* ill_ifptr ill_g_lock + s Write once
* ill_rq ipsq Write once
* ill_wq ipsq Write once
*
* ill_error ipsq None
* ill_ipif ill_g_lock + ipsq ill_g_lock OR ipsq
* ill_ipif_up_count ill_lock + ipsq ill_lock OR ipsq
* ill_max_frag ill_lock ill_lock
* ill_current_frag ill_lock ill_lock
*
* ill_name ill_g_lock + ipsq Write once
* ill_name_length ill_g_lock + ipsq Write once
* ill_ndd_name ipsq Write once
* ill_net_type ipsq Write once
* ill_ppa ill_g_lock + ipsq Write once
* ill_sap ipsq + down ill Write once
* ill_sap_length ipsq + down ill Write once
* ill_phys_addr_length ipsq + down ill Write once
*
* ill_bcast_addr_length ipsq ipsq
* ill_mactype ipsq ipsq
* ill_frag_ptr ipsq ipsq
*
* ill_frag_timer_id ill_lock ill_lock
* ill_frag_hash_tbl ipsq up ill
* ill_ilm ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_global_timer ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_mcast_type ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_mcast_v1_time ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_mcast_v2_time ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_mcast_v1_tset ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_mcast_v2_tset ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_mcast_rv ill_mcast_lock(WRITER) ill_mcast_lock(READER)
* ill_mcast_qi ill_mcast_lock(WRITER) ill_mcast_lock(READER)
*
* ill_down_mp ipsq ipsq
* ill_dlpi_deferred ill_lock ill_lock
* ill_dlpi_pending ipsq + ill_lock ipsq or ill_lock or
* absence of ipsq writer.
* ill_phys_addr_mp ipsq + down ill only when ill is up
* ill_mcast_deferred ill_lock ill_lock
* ill_phys_addr ipsq + down ill only when ill is up
* ill_dest_addr_mp ipsq + down ill only when ill is up
* ill_dest_addr ipsq + down ill only when ill is up
*
* ill_state_flags ill_lock ill_lock
* exclusive bit flags ipsq_t ipsq_t
* shared bit flags ill_lock ill_lock
*
* ill_muxid ipsq Not atomic
*
* ill_ipf_gen Not atomic
* ill_frag_count atomics atomics
* ill_type ipsq + down ill only when ill is up
* ill_dlpi_multicast_state ill_lock ill_lock
* ill_dlpi_fastpath_state ill_lock ill_lock
* ill_dlpi_capab_state ipsq ipsq
* ill_max_hops ipsq Not atomic
*
* ill_mtu ill_lock None
*
* ill_user_mtu ipsq + ill_lock ill_lock
* ill_reachable_time ipsq + ill_lock ill_lock
* ill_reachable_retrans_time ipsq + ill_lock ill_lock
* ill_max_buf ipsq + ill_lock ill_lock
*
* Next 2 fields need ill_lock because of the get ioctls. They should not
* report partially updated results without executing in the ipsq.
* ill_token ipsq + ill_lock ill_lock
* ill_token_length ipsq + ill_lock ill_lock
* ill_dest_token ipsq + down ill only when ill is up
* ill_xmit_count ipsq + down ill write once
* ill_ip6_mib ipsq + down ill only when ill is up
* ill_icmp6_mib ipsq + down ill only when ill is up
*
* ill_phyint ipsq, ill_g_lock, ill_lock Any of them
* ill_flags ill_lock ill_lock
* ill_nd_lla_mp ipsq + down ill only when ill is up
* ill_nd_lla ipsq + down ill only when ill is up
* ill_nd_lla_len ipsq + down ill only when ill is up
* ill_phys_addr_pend ipsq + down ill only when ill is up
* ill_ifname_pending_err ipsq ipsq
* ill_avl_byppa ipsq, ill_g_lock write once
*
* ill_fastpath_list ill_lock ill_lock
* ill_refcnt ill_lock ill_lock
* ill_ire_cnt ill_lock ill_lock
* ill_cv ill_lock ill_lock
* ill_ncec_cnt ill_lock ill_lock
* ill_nce_cnt ill_lock ill_lock
* ill_ilm_cnt ill_lock ill_lock
* ill_src_ipif ill_g_lock ill_g_lock
* ill_trace ill_lock ill_lock
* ill_usesrc_grp_next ill_g_usesrc_lock ill_g_usesrc_lock
* ill_dhcpinit atomics atomics
* ill_flownotify_mh write once write once
* ill_capab_pending_cnt ipsq ipsq
* ill_ipallmulti_cnt ill_lock ill_lock
* ill_ipallmulti_ilm ill_lock ill_lock
* ill_saved_ire_mp ill_saved_ire_lock ill_saved_ire_lock
* ill_saved_ire_cnt ill_saved_ire_lock ill_saved_ire_lock
* ill_arl ??? ???
* ill_ire_multicast ipsq + quiescent none
* ill_bound_ipif ipsq ipsq
* ill_actnode ipsq + ipmp_lock ipsq OR ipmp_lock
* ill_grpnode ipsq + ill_g_lock ipsq OR ill_g_lock
* ill_src_ipif ill_g_lock ill_g_lock
* ill_move_ipif ipsq ipsq
* ill_nom_cast ipsq ipsq OR advisory
* ill_refresh_tid ill_lock ill_lock
* ill_grp (for IPMP ill) write once write once
* ill_grp (for underlying ill) ipsq + ill_g_lock ipsq OR ill_g_lock
* ill_grp_pending ill_mcast_serializer ill_mcast_serializer
* ill_mrouter_cnt atomics atomics
* ill_mphysaddr_list ill_lock ill_lock
*
* NOTE: It's OK to make heuristic decisions on an underlying interface
* by using IS_UNDER_IPMP() or comparing ill_grp's raw pointer value.
*/
/*
* For ioctl restart mechanism see ip_reprocess_ioctl()
*/
struct ip_ioctl_cmd_s;
struct ip_ioctl_cmd_s *, void *);
typedef struct ip_ioctl_cmd_s {
int ipi_cmd;
/*
* ipi_cmd_type:
*
* IF_CMD 1 old style ifreq cmd
* LIF_CMD 2 new style lifreq cmd
* ARP_CMD 3 arpreq cmd
* XARP_CMD 4 xarpreq cmd
* MSFILT_CMD 5 multicast source filter cmd
* MISC_CMD 6 misc cmd (not a more specific one above)
*/
#define IPI_DONTCARE 0 /* For ioctl encoded values that don't matter */
/* Flag values in ipi_flags */
/* unused 0x10 */
extern ip_ioctl_cmd_t ip_ndx_ioctl_table[];
extern ip_ioctl_cmd_t ip_misc_ioctl_table[];
extern int ip_ndx_ioctl_count;
extern int ip_misc_ioctl_count;
typedef struct ipmx_s {
ipmx_notused : 31;
} ipmx_t;
/*
* State for detecting if a driver supports certain features.
* Support for DL_ENABMULTI_REQ uses ill_dlpi_multicast_state.
* Support for DLPI M_DATA fastpath uses ill_dlpi_fastpath_state.
*/
#define IDS_UNKNOWN 0 /* No DLPI request sent */
/* Support for DL_CAPABILITY_REQ uses ill_dlpi_capab_state. */
enum {
};
/* Extended NDP Management Structure */
typedef struct ipndp_s {
char *ip_ndp_name;
} ipndp_t;
/* IXA Notification types */
typedef enum {
IXAN_LSO, /* LSO capability change */
IXAN_PMTU, /* PMTU change */
IXAN_ZCOPY /* ZEROCOPY capability change */
typedef uint_t ixa_notify_arg_t;
/*
* Attribute flags that are common to the transmit and receive attributes
*/
/* a copy */
/*
* Transmit side attributes used between the transport protocols and IP as
* well as inside IP. It is also used to cache information in the conn_t i.e.
* replaces conn_ire and the IPsec caching in the conn_t.
*/
struct ip_xmit_attr_s {
/*
* Always initialized independently of ixa_flags settings.
* Used by ip_xmit so we keep them up front for cache locality.
*/
/* Always set for conn_ip_output(); might be stale */
/*
* Since TCP keeps the conn_t around past the process going away
* we need to use the "notr" (e.g, ire_refhold_notr) for ixa_ire,
* ixa_nce, and ixa_dce.
*/
/* Set by ULP if IXAF_VERIFY_PMTU; otherwise set by IP */
int ixa_raw_cksum_offset; /* If IXAF_SET_RAW_CKSUM */
/*
* Cached LSO information.
*/
/*
* The following IPsec fields are only initialized when
* IXAF_IPSEC_SECURE is set. Otherwise they contain garbage.
*/
/* 0: ESP, 1: AH */
/*
* The selectors here are potentially different than the SPD rule's
* selectors, and we need to have both available for IKEv2.
*
* NOTE: "Source" and "Dest" are w.r.t. outbound datagrams. Ports can
* be zero, and the protocol number is needed to make the ports
* significant.
*/
/* Always initialized independently of ixa_flags settings */
#ifdef DEBUG
#endif
/*
* Must be set by ULP if any of IXAF_VERIFY_LSO, IXAF_VERIFY_PMTU,
* or IXAF_VERIFY_ZCOPY is set.
*/
void *ixa_notify_cookie; /* ULP cookie for ixa_notify */
};
/*
* Flags to indicate which transmit attributes are set.
* Split into "xxx_SET" ones which indicate that the "xxx" field it set, and
* single flags.
*/
/* Note the following uses bits 0x10000000 through 0x80000000 */
#define IXAF_IS_IPV4 IAF_IS_IPV4
#define IXAF_TRUSTED_ICMP IAF_TRUSTED_ICMP
#define IXAF_LOOPBACK_COPY IAF_LOOPBACK_COPY
/* Note: use the upper 32 bits */
/*
* The normal flags for sending packets e.g., icmp errors
*/
#define IXAF_BASIC_SIMPLE_V4 \
/*
* Normally these fields do not have a hold. But in some cases they do, for
* We use ixa_free_flags to indicate that they have a hold and need to be
* released on cleanup.
*/
/*
* Simplistic way to set the ixa_xmit_hint for locally generated traffic
* and forwarded traffic. The shift amount are based on the size of the
* structs to discard the low order bits which don't have much if any variation
* (coloring in kmem_cache_alloc might provide some variation).
*
* Basing the locally generated hint on the address of the conn_t means that
* the packets from the same socket/connection do not get reordered.
* Basing the hint for forwarded traffic on the ill_ring_t means that
* packets from the same NIC+ring are likely to use the same outbound ring
* hence we get low contention on the ring in the transmitting driver.
*/
/*
* IP set Destination Flags used by function ip_set_destination,
* ip_attr_connect, and conn_connect.
*/
/*
* Receive side attributes used between the transport protocols and IP as
* well as inside IP.
*/
struct ip_recv_attr_s {
/*
* This is a hint for TCP SYN packets.
* Always initialized independently of ira_flags settings
*/
/* For ip_accept_tcp when IRAF_TARGET_SQP is set */
/* Always initialized independently of ira_flags settings */
/*
* ira_rill and ira_ill is set inside IP, but not when conn_recv is
* called; ULPs should use ira_ruifindex instead.
*/
/* Used when IRAF_VERIFIED_SRC is set; this source was ok */
/*
* The following IPsec fields are only initialized when
* IRAF_IPSEC_SECURE is set. Otherwise they contain garbage.
*/
/*
* For IP_RECVSLLA and ip_ndp_conflict/find_solicitation.
* Same size as max for sockaddr_dl
*/
#define IRA_L2SRC_SIZE 244
/*
* Local handle that we use to do lazy setting of ira_l2src.
* We defer setting l2src until needed but we do before any
* ip_input pullupmsg or copymsg.
*/
};
/*
* Flags to indicate which receive attributes are set.
*/
#define IRAF_DHCP_UNICAST 0x00000200
/* from a matching inner packet? */
/* Unused 0x04000000 */
/* Unused 0x08000000 */
/* Below starts with 0x10000000 */
#define IRAF_IS_IPV4 IAF_IS_IPV4
#define IRAF_TRUSTED_ICMP IAF_TRUSTED_ICMP
#define IRAF_LOOPBACK_COPY IAF_LOOPBACK_COPY
/*
* Normally these fields do not have a hold. But in some cases they do, for
* We use ira_free_flags to indicate that they have a hold and need to be
* released on cleanup.
*/
/*
* Optional destination cache entry for path MTU information,
* and ULP metrics.
*/
struct dce_s {
struct dcb_s *dce_bucket;
union {
} dce_u;
/* Note that for IPv6+IPMP we use the ifindex for the upper interface */
};
/*
* Values for dce_generation.
*
* If a DCE has DCE_GENERATION_CONDEMNED, the last dce_refrele should delete
* it.
*
* DCE_GENERATION_VERIFY is never stored in dce_generation but it is
* stored in places that cache DCE (such as ixa_dce_generation).
* It is used as a signal that the cache is stale and needs to be reverified.
*/
#define DCE_GENERATION_CONDEMNED 0
#define DCE_GENERATION_VERIFY 1
#define DCE_GENERATION_INITIAL 2
#define DCE_IS_CONDEMNED(dce) \
/*
* Values for ips_src_generation.
*
* SRC_GENERATION_VERIFY is never stored in ips_src_generation but it is
* stored in places that cache IREs (ixa_src_generation). It is used as a
* signal that the cache is stale and needs to be reverified.
*/
#define SRC_GENERATION_VERIFY 0
#define SRC_GENERATION_INITIAL 1
/*
* The kernel stores security attributes of all gateways in a database made
* up of one or more tsol_gcdb_t elements. Each tsol_gcdb_t contains the
* security-related credentials of the gateway. More than one gateways may
* share entries in the database.
*
* The tsol_gc_t structure represents the gateway to credential association,
* and refers to an entry in the database. One or more tsol_gc_t entities are
* grouped together to form one or more tsol_gcgrp_t, each representing the
* list of security attributes specific to the gateway. A gateway may be
* associated with at most one credentials group.
*/
struct tsol_gcgrp_s;
/*
* Gateway security credential record.
*/
typedef struct tsol_gcdb_s {
} tsol_gcdb_t;
/*
* Gateway to credential association.
*/
typedef struct tsol_gc_s {
} tsol_gc_t;
/*
* Gateway credentials group address.
*/
typedef struct tsol_gcgrp_addr_s {
int ga_af; /* address family */
/*
* Gateway credentials group.
*/
typedef struct tsol_gcgrp_s {
} tsol_gcgrp_t;
extern kmutex_t gcgrp_lock;
#define GC_REFRELE(p) { \
if (--((p)->gc_refcnt) == 0) \
gc_inactive(p); \
else \
}
#define GCGRP_REFHOLD(p) { \
mutex_enter(&gcgrp_lock); \
++((p)->gcgrp_refcnt); \
ASSERT((p)->gcgrp_refcnt != 0); \
mutex_exit(&gcgrp_lock); \
}
#define GCGRP_REFRELE(p) { \
mutex_enter(&gcgrp_lock); \
ASSERT((p)->gcgrp_refcnt > 0); \
if (--((p)->gcgrp_refcnt) == 0) \
gcgrp_inactive(p); \
mutex_exit(&gcgrp_lock); \
}
/*
* IRE gateway security attributes structure, pointed to by tsol_ire_gw_secattr
*/
struct tsol_tnrhc;
struct tsol_ire_gw_secattr_s {
};
void irb_refrele_ftable(irb_t *);
extern struct kmem_cache *rt_entry_cache;
typedef struct ire4 {
} ire4_t;
typedef struct ire6 {
} ire6_t;
typedef union ire_addr {
} ire_addr_u_t;
/*
* Internet Routing Entry
* When we have multiple identical IREs we logically add them by manipulating
* ire_identical_ref and ire_delete first decrements
* that and when it reaches 1 we know it is the last IRE.
* "identical" is defined as being the same for:
* ire_addr, ire_netmask, ire_gateway, ire_ill, ire_zoneid, and ire_type
* For instance, multiple IRE_BROADCASTs for the same subnet number are
* viewed as identical, and so are the IRE_INTERFACEs when there are
* multiple logical interfaces (on the same ill) with the same subnet prefix.
*/
struct ire_s {
/*
* ire_testhidden is TRUE for INTERFACE IREs of IS_UNDER_IPMP(ill)
* interfaces
*/
/*
* Cached information of where to send packets that match this route.
* The ire_dep_* information is used to determine when ire_nce_cache
* needs to be updated.
* ire_nce_cache is the fastpath for the Neighbor Cache Entry
* for IPv6; arp info for IPv4
* Since this is a cache setup and torn down independently of
* applications we need to use nce_ref{rele,hold}_notr for it.
*/
/*
* Quick check whether the ire_type and ire_masklen indicates
* that the IRE can have ire_nce_cache set i.e., whether it is
* IRE_ONLINK and for a single destination.
*/
/*
* Dependency tracking so we can safely cache IRE and NCE pointers
* in offlink and onlink IREs.
* These are locked under the ips_ire_dep_lock rwlock. Write held
* when modifying the linkage.
* ire_dep_parent (Also chain towards IRE for nexthop)
* ire_dep_parent_generation: ire_generation of ire_dep_parent
* ire_dep_children (From parent to first child)
* ire_dep_sib_next (linked list of siblings)
* ire_dep_sib_ptpn (linked list of siblings)
*
* The parent has a ire_refhold on each child, and each child has
* an ire_refhold on its parent.
* Since ire_dep_parent is a cache setup and torn down independently of
* applications we need to use ire_ref{rele,hold}_notr for it.
*/
/* ire_defense* and ire_last_used_time are only used on IRE_LOCALs */
/*
* default and prefix routes that are added without explicitly
* specifying the interface are termed "unbound" routes, and will
* have ire_unbound set to true.
*/
};
/* IPv4 compatibility macros */
/*
* Values for ire_generation.
*
* If an IRE is marked with IRE_IS_CONDEMNED, the last walker of
* the bucket should delete this IRE from this bucket.
*
* IRE_GENERATION_VERIFY is never stored in ire_generation but it is
* stored in places that cache IREs (such as ixa_ire_generation and
* ire_dep_parent_generation). It is used as a signal that the cache is
* stale and needs to be reverified.
*/
#define IRE_GENERATION_CONDEMNED 0
#define IRE_GENERATION_VERIFY 1
#define IRE_GENERATION_INITIAL 2
#define IRE_IS_CONDEMNED(ire) \
/* Convenient typedefs for sockaddrs */
typedef struct sockaddr_in sin_t;
typedef struct sockaddr_in6 sin6_t;
typedef struct nv_s {
char *nv_name;
} nv_t;
#define ILL_FRAG_HASH(s, i) \
/*
* The MAX number of allowed fragmented packets per hash bucket
* calculation is based on the most common mtu size of 1500. This limit
* will work well for other mtu sizes as well.
*/
#define COMMON_IP_MTU 1500
#define MAX_FRAG_MIN 10
#define MAX_FRAG_PKTS(ipst) \
/*
* Maximum dups allowed per packet.
*/
extern uint_t ip_max_frag_dups;
/*
* Per-packet information for received packets and transmitted.
* Used by the transport protocols when converting between the packet
* and ancillary data and socket options.
*
* Note: This private data structure and related IPPF_* constant
* definitions are exposed to enable compilation of some debugging tools
* a temporary hack and long term alternate interfaces should be defined
* to support the needs of such tools and private definitions moved to
* private headers.
*/
struct ip_pkt_s {
};
extern void ip_pkt_source_route_reverse_v4(ip_pkt_t *);
/* ipp_fields values */
/*
* Data structure which is passed to conn_opt_get/set.
* The conn_t is included even though it can be inferred from queue_t.
* setsockopt and getsockopt use conn_ixa and conn_xmit_ipp. However,
* when handling ancillary data we use separate ixa and ipps.
*/
typedef struct conn_opt_arg_s {
/*
* Flags for what changed.
* If we want to be more efficient in the future we can have more fine
* grained flags e.g., a flag for just IP_TOS changing.
* For now we either call ip_set_destination (for "route changed")
* and/or conn_build_hdr_template/conn_prepend_hdr (for "header changed").
*/
#define COA_HEADER_CHANGED 0x0001
#define COA_ROUTE_CHANGED 0x0002
#define COA_ICMP_BIND_NEEDED 0x0020
#define COA_OOBINLINE_CHANGED 0x0040
#define TCP_PORTS_OFFSET 0
#define UDP_PORTS_OFFSET 0
/*
* ill / ipif lookup functions increment the refcnt on the ill / ipif only
* after calling these macros. This ensures that the refcnt on the ipif or
* ill will eventually drop down to zero.
*/
#define ILL_CAN_LOOKUP(ill) \
#define ILL_IS_CONDEMNED(ill) \
#define IPIF_CAN_LOOKUP(ipif) \
#define IPIF_IS_CONDEMNED(ipif) \
#define IPIF_IS_CHANGING(ipif) \
/* Macros used to assert that this thread is a writer */
/*
* Grab ill locks in the proper order. The order is highest addressed
* ill is locked first.
*/
{ \
} else { \
} \
}
{ \
}
/* Get the other protocol instance ill */
/* ioctl command info: Ioctl properties extracted and stored in here */
typedef struct cmd_info_s
{
} cmd_info_t;
extern struct kmem_cache *ire_cache;
extern ipaddr_t ip_g_all_ones;
extern uint_t ip_loopback_mtuplus;
extern uint_t ip_loopback_mtu_v6plus;
extern vmem_t *ip_minor_arena_sa;
extern vmem_t *ip_minor_arena_la;
/*
* ip_g_forward controls IP forwarding. It takes two values:
* 0: IP_FORWARD_NEVER Don't forward packets ever.
* 1: IP_FORWARD_ALWAYS Forward packets for elsewhere.
*
* RFC1122 says there must be a configuration switch to control forwarding,
* but that the default MUST be to not forward packets ever. Implicit
* control based on configuration of multiple interfaces MUST NOT be
* implemented (Section 3.1). SunOS 4.1 did provide the "automatic" capability
* and, in fact, it was the default. That capability is now provided in the
*/
#define ips_ip_respond_to_address_mask_broadcast \
/* IPv6 configuration knobs */
/* Misc IP configuration knobs */
extern int dohwcksum; /* use h/w cksum if supported by the h/w */
#ifdef ZC_TEST
extern int noswcksum;
#endif
extern char ipif_loopback_name[];
extern nv_t *ire_nv_tbl;
extern struct module_info ip_mod_info;
#define HOOKS4_INTERESTED_PHYSICAL_IN(ipst) \
#define HOOKS6_INTERESTED_PHYSICAL_IN(ipst) \
#define HOOKS4_INTERESTED_PHYSICAL_OUT(ipst) \
#define HOOKS6_INTERESTED_PHYSICAL_OUT(ipst) \
#define HOOKS4_INTERESTED_FORWARDING(ipst) \
#define HOOKS6_INTERESTED_FORWARDING(ipst) \
#define HOOKS4_INTERESTED_LOOPBACK_IN(ipst) \
#define HOOKS6_INTERESTED_LOOPBACK_IN(ipst) \
#define HOOKS4_INTERESTED_LOOPBACK_OUT(ipst) \
#define HOOKS6_INTERESTED_LOOPBACK_OUT(ipst) \
/*
* Hooks marcos used inside of ip
* The callers use the above INTERESTED macros first, hence
* the he_interested check is superflous.
*/
if ((_hook).he_interested) { \
\
\
if (_err != 0) { \
ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\
} \
} else { \
} \
}
if ((_hook).he_interested) { \
\
\
if (_err != 0) { \
ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\
} \
} else { \
} \
}
(fp) = 0; \
} else if (IS_UNDER_IPMP(ill)) { \
} else { \
}
/*
* Network byte order macros
*/
#ifdef _BIG_ENDIAN
#define N_IN_CLASSA_NET IN_CLASSA_NET
#define N_IN_CLASSD_NET IN_CLASSD_NET
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#define IP_LOOPBACK_ADDR(addr) \
extern int ip_debug;
extern uint_t ip_thread_data;
extern krwlock_t ip_thread_rwlock;
extern list_t ip_thread_list;
#ifdef IP_DEBUG
#else
#define ip0dbg(a) /* */
#define ip1dbg(a) /* */
#define ip2dbg(a) /* */
#define ip3dbg(a) /* */
#endif /* IP_DEBUG */
/* Default MAC-layer address string length for mac_colon_addr */
#define MAC_STR_LEN 128
struct mac_header_info_s;
extern void ill_frag_timer(void *);
extern void ill_frag_timer_start(ill_t *);
extern char *ip_dot_addr(ipaddr_t, char *);
ip_recv_attr_t *);
ip_recv_attr_t *);
ip_recv_attr_t *);
extern void ip_ire_g_fini(void);
extern void ip_ire_g_init(void);
extern void ip_ire_fini(ip_stack_t *);
extern void ip_ire_init(ip_stack_t *);
size_t);
struct mac_header_info_s *);
struct mac_header_info_s *);
extern void ill_input_full_v4(mblk_t *, void *, void *,
ip_recv_attr_t *, rtc_t *);
extern void ill_input_short_v4(mblk_t *, void *, void *,
ip_recv_attr_t *, rtc_t *);
extern void ill_input_full_v6(mblk_t *, void *, void *,
ip_recv_attr_t *, rtc_t *);
extern void ill_input_short_v6(mblk_t *, void *, void *,
ip_recv_attr_t *, rtc_t *);
ip_recv_attr_t *, int *);
ip_recv_attr_t *);
ip_recv_attr_t *);
ip_recv_attr_t *);
ip_recv_attr_t *);
ip_recv_attr_t *);
extern int ip_total_hdrs_len_v4(const ip_pkt_t *);
extern void ip_mib2_add_ip_stats(mib2_ipIfStatsEntry_t *,
extern void ip_mib2_add_icmp6_stats(mib2_ipv6IfIcmpEntry_t *,
ip_recv_attr_t *);
ill_t *);
extern void ixa_inactive(ip_xmit_attr_t *);
extern void ixa_refrele(ip_xmit_attr_t *);
extern void ixa_cleanup(ip_xmit_attr_t *);
uchar_t *);
const in6_addr_t *, ip_pkt_t *);
extern void ip_attr_newdst(ip_xmit_attr_t *);
const in6_addr_t *, in6_addr_t *);
extern char *ip_nv_lookup(nv_t *, int);
extern void arp_bringup_done(ill_t *, int);
extern void arp_replumb_done(ill_t *, int);
extern void ipmp_init(ip_stack_t *);
extern void ipmp_destroy(ip_stack_t *);
extern void ipmp_grp_destroy(ipmp_grp_t *);
extern int ipmp_grp_rename(ipmp_grp_t *, const char *);
extern void ipmp_illgrp_destroy(ipmp_illgrp_t *);
extern void ipmp_illgrp_refresh_mtu(ipmp_illgrp_t *);
extern void ipmp_illgrp_refresh_arpent(ipmp_illgrp_t *);
extern int ipmp_illgrp_unlink_grp(ipmp_illgrp_t *);
extern void ipmp_ill_leave_illgrp(ill_t *);
extern void ipmp_ill_refresh_active(ill_t *);
extern void ipmp_phyint_leave_grp(phyint_t *);
extern void ipmp_phyint_refresh_active(phyint_t *);
extern void ipmp_ncec_delete_nce(ncec_t *);
extern void ipmp_ncec_refresh_nce(ncec_t *);
extern int conn_ipsec_length(conn_t *);
const conn_t *, ip_xmit_attr_t *);
extern int ipsec_out_extra_length(ip_xmit_attr_t *);
ip_recv_attr_t *);
extern void ire_cleanup(ire_t *);
extern void ire_inactive(ire_t *);
#ifdef DEBUG
extern void th_trace_unref(const void *);
extern void th_trace_cleanup(const void *, boolean_t);
extern void ire_trace_ref(ire_t *);
extern void ire_untrace_ref(ire_t *);
#endif
extern void ip_quiesce_conn(conn_t *);
uint_t);
uint_t);
extern void conn_ioctl_cleanup(conn_t *);
extern void tnet_init(void);
extern void tnet_fini(void);
/*
* Hook functions to enable cluster networking
* On non-clustered systems these vectors must always be NULL.
*/
void *args);
void *);
/* Hooks for CGTP (multirt routes) filtering module */
#define CGTP_FILTER_REV_1 1
#define CGTP_FILTER_REV_2 2
#define CGTP_FILTER_REV_3 3
#define CGTP_FILTER_REV CGTP_FILTER_REV_3
/* cfo_filter and cfo_filter_v6 hooks return values */
#define CGTP_IP_PKT_NOT_CGTP 0
#define CGTP_IP_PKT_PREMIUM 1
#define CGTP_IP_PKT_DUPLICATE 2
/* Version 3 of the filter interface */
typedef struct cgtp_filter_ops {
int cfo_filter_rev; /* CGTP_FILTER_REV_3 */
int (*cfo_change_state)(netstackid_t, int);
in6_addr_t *, in6_addr_t *);
ip6_frag_t *);
#define CGTP_MCAST_SUCCESS 1
/*
* The separate CGTP module needs this global symbol so that it
* can check the version and determine whether to use the old or the new
* version of the filtering interface.
*/
extern int ip_cgtp_filter_rev;
extern int ip_cgtp_filter_supported(void);
extern int ip_cgtp_filter_unregister(netstackid_t);
extern int ip_cgtp_filter_is_registered(netstackid_t);
/*
* rr_ring_state cycles in the order shown below from RR_FREE through
* RR_FREE_IN_PROG and back to RR_FREE.
*/
typedef enum {
RR_FREE, /* Free slot */
RR_SQUEUE_UNBOUND, /* Ring's squeue is unbound */
RR_SQUEUE_BIND_INPROG, /* Ring's squeue bind in progress */
RR_SQUEUE_BOUND, /* Ring's squeue bound to cpu */
RR_FREE_INPROG /* Ring is being freed */
/*
* we need to duplicate the definitions here because we cannot
*/
typedef boolean_t (*ip_mac_intr_disable_t)(void *);
typedef void (*ip_mac_intr_enable_t)(void *);
typedef void (*ip_flow_enable_t)(void *, ip_mac_tx_cookie_t);
typedef void *(*ip_dld_callb_t)(void *,
ip_flow_enable_t, void *);
typedef int (*ip_capab_func_t)(void *, uint_t,
void *, uint_t);
/*
* POLLING README
* sq_get_pkts() is called to pick packets from softring in poll mode. It
* calls rr_rx to get the chain and process it with rr_ip_accept.
* rr_rx = mac_soft_ring_poll() to pick packets
* rr_ip_accept = ip_accept_tcp() to process packets
*/
/*
* XXX: With protocol, service specific squeues, they will have
* specific acceptor functions.
*/
/*
* rr_intr_enable, rr_intr_disable, rr_rx_handle, rr_rx:
* May be accessed while in the squeue AND after checking that SQS_POLL_CAPAB
* is set.
*
* rr_ring_state: Protected by ill_lock.
*/
struct ill_rx_ring {
void *rr_intr_handle; /* Handle interrupt funcs */
void *rr_rx_handle; /* Handle for Rx ring */
};
/*
* IP - DLD direct function call capability
* Suffixes, df - dld function, dh - dld handle,
* cf - client (IP) function, ch - client handle
*/
typedef struct ill_dld_direct_s { /* DLD provided driver Tx */
void *idd_tx_dh; /* dld_str_t *dsp */
void *idd_tx_cb_dh; /* mac_client_handle_t *mch */
void *idd_tx_fctl_dh; /* mac_client_handle */
/* IP - DLD polling capability */
typedef struct ill_dld_poll_s {
/* Describes ill->ill_dld_capab */
struct ill_dld_capab_s {
void *idc_capab_dh; /* dld_str_t *dsp */
};
/*
* IP squeues exports
*/
extern boolean_t ip_squeue_fanout;
extern void ip_squeue_init(void (*)(squeue_t *));
extern void *ip_squeue_add_ring(ill_t *, void *);
extern void ip_squeue_clean_all(ill_t *);
struct ip6_mtuinfo *);
ip_stack_t *);
extern void dce_g_init(void);
extern void dce_g_destroy(void);
extern void dce_stack_init(ip_stack_t *);
extern void dce_stack_destroy(ip_stack_t *);
uint_t *);
ip_stack_t *);
ip_stack_t *);
ip_stack_t *);
extern void dce_increment_generation(dce_t *);
extern void dce_refrele(dce_t *);
extern void dce_refhold(dce_t *);
extern void dce_refrele_notr(dce_t *);
extern void dce_refhold_notr(dce_t *);
ip_stack_t *, boolean_t);
extern int ip_laddr_fanout_insert(conn_t *);
/*
* Squeue tags. Tags only need to be unique when the callback function is the
* same to distinguish between different calls, but we use unique tags for
* convenience anyway.
*/
#define SQTAG_IP_INPUT 1
#define SQTAG_TCP_INPUT_ICMP_ERR 2
#define SQTAG_TCP6_INPUT_ICMP_ERR 3
#define SQTAG_IP_TCP_INPUT 4
#define SQTAG_IP6_TCP_INPUT 5
#define SQTAG_IP_TCP_CLOSE 6
#define SQTAG_TCP_OUTPUT 7
#define SQTAG_TCP_TIMER 8
#define SQTAG_TCP_TIMEWAIT 9
#define SQTAG_TCP_ACCEPT_FINISH 10
#define SQTAG_TCP_ACCEPT_FINISH_Q0 11
#define SQTAG_TCP_ACCEPT_PENDING 12
#define SQTAG_TCP_LISTEN_DISCON 13
#define SQTAG_TCP_CONN_REQ_1 14
#define SQTAG_TCP_EAGER_BLOWOFF 15
#define SQTAG_TCP_EAGER_CLEANUP 16
#define SQTAG_TCP_EAGER_CLEANUP_Q0 17
#define SQTAG_TCP_CONN_IND 18
#define SQTAG_TCP_RSRV 19
#define SQTAG_TCP_ABORT_BUCKET 20
#define SQTAG_TCP_REINPUT 21
#define SQTAG_TCP_REINPUT_EAGER 22
#define SQTAG_TCP_INPUT_MCTL 23
#define SQTAG_TCP_RPUTOTHER 24
#define SQTAG_IP_PROTO_AGAIN 25
#define SQTAG_IP_FANOUT_TCP 26
#define SQTAG_IPSQ_CLEAN_RING 27
#define SQTAG_TCP_WPUT_OTHER 28
#define SQTAG_TCP_CONN_REQ_UNBOUND 29
#define SQTAG_TCP_SEND_PENDING 30
#define SQTAG_BIND_RETRY 31
#define SQTAG_UDP_FANOUT 32
#define SQTAG_UDP_INPUT 33
#define SQTAG_UDP_WPUT 34
#define SQTAG_UDP_OUTPUT 35
#define SQTAG_TCP_KSSL_INPUT 36
#define SQTAG_TCP_DROP_Q0 37
#define SQTAG_TCP_CONN_REQ_2 38
#define SQTAG_IP_INPUT_RX_RING 39
#define SQTAG_SQUEUE_CHANGE 40
#define SQTAG_CONNECT_FINISH 41
#define SQTAG_SYNCHRONOUS_OP 42
#define SQTAG_TCP_SHUTDOWN_OUTPUT 43
#define SQTAG_TCP_IXA_CLEANUP 44
#define SQTAG_TCP_SEND_SYNACK 45
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _INET_IP_H */