ip.h revision 3efde6d032b3bcd6957e912c3f2a59253f28a9dc
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
#ifndef _INET_IP_H
#define _INET_IP_H
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/isa_defs.h>
#include <sys/hook_event.h>
#include <sys/hook_impl.h>
#include <inet/ip_stack.h>
#ifdef _KERNEL
#include <sys/multidata.h>
#ifdef DEBUG
#define CONN_DEBUG
#endif
#define IP_DEBUG
/*
* The mt-streams(9F) flags for the IP module; put here so that other
* "drivers" that are actually IP (e.g., ICMP, UDP) can use the same set
* of flags.
*/
#define IP_DEVMTFLAGS D_MP
#endif /* _KERNEL */
#define IP_MOD_NAME "ip"
#define IP_DEV_NAME "/dev/ip"
#define IP6_DEV_NAME "/dev/ip6"
#define UDP_MOD_NAME "udp"
#define UDP_DEV_NAME "/dev/udp"
#define UDP6_DEV_NAME "/dev/udp6"
#define TCP_MOD_NAME "tcp"
#define TCP_DEV_NAME "/dev/tcp"
#define TCP6_DEV_NAME "/dev/tcp6"
#define SCTP_MOD_NAME "sctp"
#ifndef _IPADDR_T
#define _IPADDR_T
#endif
/* Number of bits in an address */
#define IP_ABITS 32
#define IPV6_ABITS 128
/*
* Flag to IP write side to indicate that the appln has sent in a pre-built
* IP header. Stored in ipha_ident (which is otherwise zero).
*/
#define IP_HDR_INCLUDED 0xFFFF
#define ILL_FRAG_HASH_TBL_COUNT ((unsigned int)64)
#define IPV4_ADDR_LEN 4
#define IP_ADDR_LEN IPV4_ADDR_LEN
#define IP_ARP_PROTO_TYPE 0x0800
#define IPV4_VERSION 4
#define IP_VERSION IPV4_VERSION
#define IP_SIMPLE_HDR_LENGTH_IN_WORDS 5
#define IP_SIMPLE_HDR_LENGTH 20
#define IP_MAX_HDR_LENGTH 60
/*
* 2 files should be cleaned up to remove all redundant definitions.
*/
#define IP_MAXPACKET 65535
#define IP_SIMPLE_HDR_VERSION \
#define UDPH_SIZE 8
/* Leave room for ip_newroute to tack on the src and target addresses */
#define OK_RESOLVER_MP(mp) \
/*
* Constants and type definitions to support IP IOCTL commands
*/
#define IP_IOC_IRE_DELETE 4
#define IP_IOC_IRE_DELETE_NO_REPLY 5
#define IP_IOC_IRE_ADVISE_NO_REPLY 6
#define IP_IOC_RTS_REQUEST 7
/* Common definitions used by IP IOCTL data structures */
typedef struct ipllcmd_s {
} ipllc_t;
/* IP IRE Change Command Structure. */
typedef struct ipic_s {
} ipic_t;
/* IP IRE Delete Command Structure. */
typedef struct ipid_s {
} ipid_t;
#ifdef _KERNEL
/*
* Temporary state for ip options parser.
*/
typedef struct ipoptp_s
{
} ipoptp_t;
/*
* Flag(s) for ipoptp_flags
*/
#define IPOPTP_ERROR 0x00000001
#endif /* _KERNEL */
/* Controls forwarding of IP packets, set via ndd */
#define IP_FORWARD_NEVER 0
#define IP_FORWARD_ALWAYS 1
#define IPH_HDR_LENGTH(ipha) \
#define IPH_HDR_VERSION(ipha) \
#ifdef _KERNEL
/*
* IP reassembly macros. We hide starting and ending offsets in b_next and
* b_prev of messages on the reassembly queue. The messages are chained using
* b_cont. These macros are used in ip_reassemble() so we don't have to see
* the ugly casts and assignments.
* Note that the offsets are <= 64k i.e. a uint_t is sufficient to represent
* them.
*/
#define IP_REASS_SET_START(mp, u) \
#define IP_REASS_SET_END(mp, u) \
#define IP_REASS_COMPLETE 0x1
#define IP_REASS_PARTIAL 0x2
#define IP_REASS_FAILED 0x4
/*
* Test to determine whether this is a module instance of IP or a
* driver instance of IP.
*/
/*
* The following two macros are used by IP to get the appropriate
* wq and rq for a conn. If it is a TCP conn, then we need
* from a conn directly if it knows that the conn is not TCP.
*/
#define CONNP_TO_WQ(connp) \
#define GRAB_CONN_LOCK(q) { \
}
#define RELEASE_CONN_LOCK(q) { \
}
/* "Congestion controlled" protocol */
/*
* Complete the pending operation. Usually an ioctl. Can also
* be a bind or option management request that got enqueued
* in an ipsq_t. Called on completion of the operation.
*/
#define CONN_OPER_PENDING_DONE(connp) { \
CONN_DEC_REF(connp); \
}
/*
* Flags for the various ip_fanout_* routines.
*/
/*
* Following flags are used by IPQoS to determine if policy processing is
* required.
*/
#ifndef IRE_DB_TYPE
#define IRE_DB_TYPE M_SIG
#endif
#ifndef IRE_DB_REQ_TYPE
#define IRE_DB_REQ_TYPE M_PCSIG
#endif
#ifndef IRE_ARPRESOLVE_TYPE
#define IRE_ARPRESOLVE_TYPE M_EVENT
#endif
/*
* Values for squeue switch:
*/
#define IP_SQUEUE_ENTER_NODRAIN 1
#define IP_SQUEUE_ENTER 2
/*
* This is part of the interface between Transport provider and
* IP which can be used to set policy information. This is usually
* accompanied with O_T_BIND_REQ/T_BIND_REQ.ip_bind assumes that
* only IPSEC_POLICY_SET is there when it is found in the chain.
* The information contained is an struct ipsec_req_t. On success
* or failure, either the T_BIND_ACK or the T_ERROR_ACK is returned.
* IPSEC_POLICY_SET is never returned.
*/
#define IPSEC_POLICY_SET M_SETOPTS
/* IP Fragmentation Reassembly Header */
typedef struct ipf_s {
struct ipf_s *ipf_hash_next;
int ipf_hole_cnt; /* Number of holes (hard-case). */
int ipf_end; /* Tail end offset (0 -> hard-case). */
} ipf_t;
/*
* IPv4 Fragments
*/
#define IS_V4_FRAGMENT(ipha_fragment_offset_and_flags) \
typedef enum {
IB_PKT = 0x01,
OB_PKT = 0x02
} ip_pkt_t;
#define UPDATE_IB_PKT_COUNT(ire)\
{ \
(ire)->ire_ib_pkt_count++; \
/* \
* forwarding packet \
*/ \
else \
} \
}
#define UPDATE_OB_PKT_COUNT(ire)\
{ \
(ire)->ire_ob_pkt_count++;\
} \
}
{ \
switch (ipha->ipha_protocol) { \
case IPPROTO_UDP: \
break; \
default: \
break; \
} \
}
/*
* NCE_EXPIRED is TRUE when we have a non-permanent nce that was
* found to be REACHABLE more than ip_ire_arp_interval ms ago.
* This macro is used to age existing nce_t entries. The
* nce's will get cleaned up in the following circumstances:
* - ip_ire_trash_reclaim will free nce's using ndp_cache_reclaim
* when memory is low,
* - ip_arp_news, when updates are received.
* - if the nce is NCE_EXPIRED(), it will deleted, so that a new
* arp request will need to be triggered from an ND_INITIAL nce.
*
* Note that the nce state transition follows the pattern:
* ND_INITIAL -> ND_INCOMPLETE -> ND_REACHABLE
* after which the nce is deleted when it has expired.
*
* nce_last is the timestamp that indicates when the nce_res_mp in the
* nce_t was last updated to a valid link-layer address. nce_last gets
* - when the nce is created
* - every time we get a sane arp response for the nce.
*/
#endif /* _KERNEL */
/* ICMP types */
#define ICMP_ECHO_REPLY 0
#define ICMP_DEST_UNREACHABLE 3
#define ICMP_SOURCE_QUENCH 4
#define ICMP_REDIRECT 5
#define ICMP_ECHO_REQUEST 8
#define ICMP_ROUTER_ADVERTISEMENT 9
#define ICMP_ROUTER_SOLICITATION 10
#define ICMP_TIME_EXCEEDED 11
#define ICMP_PARAM_PROBLEM 12
#define ICMP_TIME_STAMP_REQUEST 13
#define ICMP_TIME_STAMP_REPLY 14
#define ICMP_INFO_REQUEST 15
#define ICMP_INFO_REPLY 16
#define ICMP_ADDRESS_MASK_REQUEST 17
#define ICMP_ADDRESS_MASK_REPLY 18
/* ICMP_TIME_EXCEEDED codes */
#define ICMP_TTL_EXCEEDED 0
#define ICMP_REASSEMBLY_TIME_EXCEEDED 1
/* ICMP_DEST_UNREACHABLE codes */
#define ICMP_NET_UNREACHABLE 0
#define ICMP_HOST_UNREACHABLE 1
#define ICMP_PROTOCOL_UNREACHABLE 2
#define ICMP_PORT_UNREACHABLE 3
#define ICMP_FRAGMENTATION_NEEDED 4
#define ICMP_SOURCE_ROUTE_FAILED 5
#define ICMP_DEST_NET_UNKNOWN 6
#define ICMP_DEST_HOST_UNKNOWN 7
#define ICMP_SRC_HOST_ISOLATED 8
#define ICMP_DEST_NET_UNREACH_ADMIN 9
#define ICMP_DEST_HOST_UNREACH_ADMIN 10
#define ICMP_DEST_NET_UNREACH_TOS 11
#define ICMP_DEST_HOST_UNREACH_TOS 12
/* ICMP Header Structure */
typedef struct icmph_s {
union {
} u_echo;
struct { /* Destination unreachable structure */
} u_du;
struct { /* Parameter problem structure */
} u_pp;
struct { /* Redirect structure */
} u_rd;
} icmph_u;
} icmph_t;
#define ICMPH_SIZE 8
/*
* Minimum length of transport layer header included in an ICMP error
* message for it to be considered valid.
*/
#define ICMP_MIN_TP_HDR_LEN 8
/* Aligned IP header */
typedef struct ipha_s {
} ipha_t;
/*
* IP Flags
*
* Some of these constant names are copied for the DTrace IP provider in
* in sync.
*/
/* ECN code points for IPv4 TOS byte and IPv6 traffic class octet. */
struct ill_s;
in6_addr_t *);
ipaddr_t *);
/* IP Mac info structure */
typedef struct ip_m_s {
int ip_m_type; /* From <net/if_types.h> */
} ip_m_t;
/*
* The following functions attempt to reduce the link layer dependency
* of the IP stack. The current set of link specific operations are:
* a. map from IPv4 class D (224.0/4) multicast address range to the link
* layer multicast address range.
* b. map from IPv6 multicast address range (ff00::/8) to the link
* layer multicast address range.
* c. derive the default IPv6 interface identifier from the interface.
* d. derive the default IPv6 destination interface identifier from
* the interface (point-to-point only).
*/
/* Router entry types */
/* net without any address mapping. */
/* net with resolver. */
/*
* If an IRE is marked with IRE_MARK_CONDEMNED, the last walker of
* the bucket should delete this IRE from this bucket.
*/
#define IRE_MARK_CONDEMNED 0x0001
/*
* An IRE with IRE_MARK_TESTHIDDEN is used by in.mpathd for test traffic. It
* can only be looked up by requesting MATCH_IRE_MARK_TESTHIDDEN.
*/
#define IRE_MARK_TESTHIDDEN 0x0004
/*
* An IRE with IRE_MARK_NOADD is created in ip_newroute_ipif when the outgoing
* interface is specified by e.g. IP_PKTINFO. The IRE is not added to the IRE
* cache table.
*/
/*
* IRE marked with IRE_MARK_TEMPORARY means that this IRE has been used
* either for forwarding a packet or has not been used for sending
* traffic on TCP connections terminated on this system. In both
* cases, this IRE is the first to go when IRE is being cleaned up.
*/
#define IRE_MARK_TEMPORARY 0x0010
/*
* IRE marked with IRE_MARK_USESRC_CHECK means that while adding an IRE with
* this mark, additional atomic checks need to be performed. For eg: by the
* time an IRE_CACHE is created, sent up to ARP and then comes back to IP; the
* usesrc grouping could have changed in which case we want to fail adding
* the IRE_CACHE entry
*/
#define IRE_MARK_USESRC_CHECK 0x0020
/*
* IRE_MARK_PRIVATE_ADDR is used for IP_NEXTHOP. When IP_NEXTHOP is set, the
* routing table lookup for the destination is bypassed and the packet is
* sent directly to the specified nexthop. The associated IRE_CACHE entries
* should be marked with IRE_MARK_PRIVATE_ADDR flag so that they don't show up
* in regular ire cache lookups.
*/
#define IRE_MARK_PRIVATE_ADDR 0x0040
/*
* When we send an ARP resolution query for the nexthop gateway's ire,
* we use esballoc to create the ire_t in the AR_ENTRY_QUERY mblk
* chain, and mark its ire_marks with IRE_MARK_UNCACHED. This flag
* indicates that information from ARP has not been transferred to a
* permanent IRE_CACHE entry. The flag is reset only when the
* information is successfully transferred to an ire_cache entry (in
* ire_add()). Attempting to free the AR_ENTRY_QUERY mblk chain prior
* to ire_add (e.g., from arp, or from ip`ip_wput_nondata) will
* be cleaned up. The free callback routine (ire_freemblk()) checks
* for IRE_MARK_UNCACHED to see if any resources that are pinned down
* will need to be cleaned up or not.
*/
#define IRE_MARK_UNCACHED 0x0080
/*
* The comment below (and for other netstack_t references) refers
* to the fact that we only do netstack_hold in particular cases,
* such as the references from open streams (ill_t and conn_t's
* pointers). Internally within IP we rely on IP's ability to cleanup e.g.
* ire_t's when an ill goes away.
*/
typedef struct ire_expire_arg_s {
int iea_flush_flag;
/* Flags with ire_expire routine */
/* Arguments to ire_flush_cache() */
#define IRE_FLUSH_DELETE 0
#define IRE_FLUSH_ADD 1
/*
* These are kept in a separate field in the conn and the synchronization
* depends on the atomic 32 bit access to that field.
*/
/* Used to check connection state flags before caching the IRE */
#define CONN_CACHE_IRE(connp) \
/*
* Parameter to ip_output giving the identity of the caller.
* IP_WSRV means the packet was enqueued in the STREAMS queue
* due to flow control and is now being reprocessed in the context of
* the STREAMS service procedure, consequent to flow control relief.
* IRE_SEND means the packet is being reprocessed consequent to an
* ire cache creation and addition and this may or may not be happening
* in the service procedure context. Anything other than the above 2
* cases is identified as IP_WPUT. Most commonly this is the case of
* packets coming down from the application.
*/
#ifdef _KERNEL
/*
*/
#define MAX_FILTER_SIZE 64
typedef struct slist_s {
int sl_numsrc;
} slist_t;
/*
* Following struct is used to maintain retransmission state for
* a multicast group. One rtx_state_t struct is an in-line field
* of the ilm_t struct; the slist_ts in the rtx_state_t struct are
* alloc'd as needed.
*/
typedef struct rtx_state_s {
int rtx_cnt; /* retrans count */
int rtx_fmode_cnt; /* retrans count for fmode change */
} rtx_state_t;
/*
* Used to construct list of multicast address records that will be
* sent in a single listener report.
*/
typedef struct mrec_s {
} mrec_t;
/* Group membership list per upper conn */
/*
* XXX can we make ilg survive an ifconfig unplumb + plumb
*
* ilg_ipif is used by IPv4 as multicast groups are joined using an interface
* address (ipif).
* ilg_ill is used by IPv6 as multicast groups are joined using an interface
* index (phyint->phyint_ifindex).
* ilg_ill is NULL for IPv4 and ilg_ipif is NULL for IPv6.
*
* ilg records the state of multicast memberships of a socket end point.
* ilm records the state of multicast memberships with the driver and is
* maintained per interface.
*
* There is no direct link between a given ilg and ilm. If the
* application has joined a group G with ifindex I, we will have
* an ilg with ilg_v6group and ilg_ill. There will be a corresponding
* ilm with ilm_ill/ilm_v6addr recording the multicast membership.
* To delete the membership:
*
* a) Search for ilg matching on G and I with ilg_v6group
* and ilg_ill. Delete ilg_ill.
* b) Search the corresponding ilm matching on G and I with
* ilm_v6addr and ilm_ill. Delete ilm.
*
* For IPv4 the only difference is that we look using ipifs, not ills.
*/
/*
* The ilg_t and ilm_t members are protected by ipsq. They can be changed only
* multicast group has to execute in the ipsq.
*/
typedef struct ilg_s {
} ilg_t;
/*
* Multicast address list entry for ill.
* ilm_ipif is used by IPv4 as multicast groups are joined using ipif.
* ilm_ill is used by IPv6 as multicast groups are joined using ill.
* ilm_ill is NULL for IPv4 and ilm_ipif is NULL for IPv6.
*
* The comment below (and for other netstack_t references) refers
* to the fact that we only do netstack_hold in particular cases,
* such as the references from open streams (ill_t and conn_t's
* pointers). Internally within IP we rely on IP's ability to cleanup e.g.
* ire_t's when an ill goes away.
*/
typedef struct ilm_s {
int ilm_refcnt;
int ilm_no_ilg_cnt; /* number of joins w/ no ilg */
} ilm_t;
typedef struct ilm_walker {
} ilm_walker_t;
/*
* Soft reference to an IPsec SA.
*
* On relative terms, conn's can be persistent (living as long as the
* processes which create them), while SA's are ephemeral (dying when
* they hit their time-based or byte-based lifetimes).
*
* We could hold a hard reference to an SA from an ipsec_latch_t,
* but this would cause expired SA's to linger for a potentially
* unbounded time.
*
* Instead, we remember the hash bucket number and bucket generation
* in addition to the pointer. The bucket generation is incremented on
* each deletion.
*/
typedef struct ipsa_ref_s
{
struct isaf_s *ipsr_bucket;
} ipsa_ref_t;
/*
* IPsec "latching" state.
*
* In the presence of IPsec policy, fully-bound conn's bind a connection
* to more than just the 5-tuple, but also a specific IPsec action and
* identity-pair.
*
* As an optimization, we also cache soft references to IPsec SA's
* here so that we can fast-path around most of the work needed for
* outbound IPsec SA selection.
*
* Were it not for TCP's detached connections, this state would be
* in-line in conn_t; instead, this is in a separate structure so it
* can be handed off to TCP when a connection is detached.
*/
typedef struct ipsec_latch_s
{
struct ipsid_s *ipl_local_cid;
struct ipsid_s *ipl_remote_cid;
unsigned int
ipl_ids_latched : 1,
ipl_pad_to_bit_31 : 27;
#define IPLATCH_REFHOLD(ipl) { \
}
membar_exit(); \
}
/*
* peer identity structure.
*/
/*
* The old IP client structure "ipc_t" is gone. All the data is stored in the
* connection structure "conn_t" now. The mapping of old and new fields looks
* like this:
*
* ipc_ulp conn_ulp
* ipc_rq conn_rq
* ipc_wq conn_wq
*
* ipc_laddr conn_src
* ipc_faddr conn_rem
* ipc_v6laddr conn_srcv6
* ipc_v6faddr conn_remv6
*
* ipc_lport conn_lport
* ipc_fport conn_fport
* ipc_ports conn_ports
*
* ipc_policy conn_policy
* ipc_latch conn_latch
*
* ipc_irc_lock conn_lock
* ipc_ire_cache conn_ire_cache
*
* ipc_state_flags conn_state_flags
* ipc_outgoing_ill conn_outgoing_ill
*
* ipc_dontroute conn_dontroute
* ipc_loopback conn_loopback
* ipc_broadcast conn_broadcast
* ipc_reuseaddr conn_reuseaddr
*
* ipc_multicast_loop conn_multicast_loop
* ipc_multi_router conn_multi_router
* ipc_draining conn_draining
*
* ipc_did_putbq conn_did_putbq
* ipc_unspec_src conn_unspec_src
* ipc_policy_cached conn_policy_cached
*
* ipc_in_enforce_policy conn_in_enforce_policy
* ipc_out_enforce_policy conn_out_enforce_policy
* ipc_af_isv6 conn_af_isv6
* ipc_pkt_isv6 conn_pkt_isv6
*
* ipc_ipv6_recvpktinfo conn_ipv6_recvpktinfo
*
* ipc_ipv6_recvhoplimit conn_ipv6_recvhoplimit
* ipc_ipv6_recvhopopts conn_ipv6_recvhopopts
* ipc_ipv6_recvdstopts conn_ipv6_recvdstopts
*
* ipc_ipv6_recvrthdr conn_ipv6_recvrthdr
* ipc_ipv6_recvrtdstopts conn_ipv6_recvrtdstopts
* ipc_fully_bound conn_fully_bound
*
* ipc_recvif conn_recvif
*
* ipc_recvslla conn_recvslla
* ipc_acking_unbind conn_acking_unbind
* ipc_pad_to_bit_31 conn_pad_to_bit_31
*
* ipc_proto conn_proto
* ipc_incoming_ill conn_incoming_ill
* ipc_pending_ill conn_pending_ill
* ipc_unbind_mp conn_unbind_mp
* ipc_ilg conn_ilg
* ipc_ilg_allocated conn_ilg_allocated
* ipc_ilg_inuse conn_ilg_inuse
* ipc_ilg_walker_cnt conn_ilg_walker_cnt
* ipc_refcv conn_refcv
* ipc_multicast_ipif conn_multicast_ipif
* ipc_multicast_ill conn_multicast_ill
* ipc_drain_next conn_drain_next
* ipc_drain_prev conn_drain_prev
* ipc_idl conn_idl
*/
/*
*/
typedef struct ipsec_selector {
/*
* Note that we put v4 addresses in the *first* 32-bit word of the
* in spd.c
*/
/* Values used in IP by IPSEC Code */
#define IPSEC_OUTBOUND B_TRUE
#define IPSEC_INBOUND B_FALSE
/*
* There are two variants in policy failures. The packet may come in
* secure when not needed (IPSEC_POLICY_???_NOT_NEEDED) or it may not
* have the desired level of protection (IPSEC_POLICY_MISMATCH).
*/
#define IPSEC_POLICY_NOT_NEEDED 0
#define IPSEC_POLICY_MISMATCH 1
#define IPSEC_POLICY_AUTH_NOT_NEEDED 2
#define IPSEC_POLICY_ENCR_NOT_NEEDED 3
#define IPSEC_POLICY_SE_NOT_NEEDED 4
/*
* Folowing macro is used whenever the code does not know whether there
* is a M_CTL present in the front and it needs to examine the actual mp
* i.e the IP header. As a M_CTL message could be in the front, this
* extracts the packet into mp and the M_CTL mp into first_mp. If M_CTL
* mp is not present, both first_mp and mp point to the same message.
*/
(mctl_present) = B_TRUE; \
} else { \
(mctl_present) = B_FALSE; \
}
/*
* Check with IPSEC inbound policy if
*
* 1) per-socket policy is present - indicated by conn_in_enforce_policy.
* 2) Or if we have not cached policy on the conn and the global policy is
* non-empty.
*/
((connp)->conn_in_enforce_policy || \
(!((connp)->conn_policy_cached) && \
((connp)->conn_in_enforce_policy || \
(!(connp)->conn_policy_cached && \
((connp)->conn_out_enforce_policy || \
(!((connp)->conn_policy_cached) && \
((connp)->conn_out_enforce_policy || \
(!(connp)->conn_policy_cached && \
/*
* Information cached in IRE for upper layer protocol (ULP).
*
* Notice that ire_max_frag is not included in the iulp_t structure, which
* it may seem that it should. But ire_max_frag cannot really be cached. It
* is fixed for each interface. For MTU found by PMTUd, we may want to cache
* it. But currently, we do not do that.
*/
typedef struct iulp_s {
iulp_not_used : 28;
} iulp_t;
/* Zero iulp_t. */
extern const iulp_t ire_uinfo_null;
/*
* The conn drain list structure (idl_t).
* The list is protected by idl_lock. Each conn_t inserted in the list
* points back at this idl_t using conn_idl. IP primes the draining of the
* conns queued in these lists, by qenabling the 1st conn of each list. This
* occurs when STREAMS backenables ip_wsrv on the IP module. Each conn instance
* of ip_wsrv successively qenables the next conn in the list.
* idl_lock protects all other members of idl_t and conn_drain_next
* and conn_drain_prev of conn_t. The conn_lock protects IPCF_DRAIN_DISABLED
* flag of the conn_t and conn_idl.
*
* The conn drain list, idl_t, itself is part of tx cookie list structure.
* A tx cookie list points to a blocked Tx ring and contains the list of
* all conn's that are blocked due to the flow-controlled Tx ring (via
* the idl drain list). Note that a link can have multiple Tx rings. The
* drain list will store the conn's blocked due to Tx ring being flow
* controlled.
*/
typedef uintptr_t ip_mac_tx_cookie_t;
typedef struct idl_tx_list_s idl_tx_list_t;
struct idl_tx_list_s {
int txl_drain_index;
};
struct idl_s {
/* drain list again */
idl_unused : 31;
};
/*
* Interface route structure which holds the necessary information to recreate
* routes that are tied to an interface (namely where ire_ipif != NULL).
* These routes which were initially created via a routing socket or via the
* SIOCADDRT ioctl may be gateway routes (RTF_GATEWAY being set) or may be
* traditional interface routes. When an interface comes back up after being
* marked down, this information will be used to recreate the routes. These
* are part of an mblk_t chain that hangs off of the IPIF (ipif_saved_ire_mp).
*/
typedef struct ifrt_s {
} ifrt_t;
/* Number of IP addresses that can be hosted on a physical interface */
#define MAX_ADDRS_PER_IF 8192
/*
* Number of Source addresses to be considered for source address
* selection. Used by ipif_select_source[_v6].
*/
#define MAX_IPIF_SELECT_SOURCE 50
#ifdef IP_DEBUG
/*
* Trace refholds and refreles for debugging.
*/
#define TR_STACK_DEPTH 14
typedef struct tr_buf_s {
int tr_depth;
} tr_buf_t;
typedef struct th_trace_s {
int th_refcnt;
#define TR_BUF_MAX 38
} th_trace_t;
typedef struct th_hash_s {
} th_hash_t;
#endif
/* The following are ipif_state_flags */
/* IP interface structure, one per local address */
typedef struct ipif_s {
int ipif_id; /* Logical unit number */
/* prevent awkward out of mem */
/* condition later */
/* on this interface so that they */
/* can survive ifconfig down. */
/* will be reported on. Used when */
/* handling an igmp timeout. */
/*
* The packet counts in the ipif contain the sum of the
* packet counts in dead IREs that were affiliated with
* this ipif.
*/
/* Exclusive bit fields, protected by ipsq_t */
unsigned int
ipif_pad_to_31 : 26;
/* Number of ire's and ilm's referencing this ipif */
/*
* For an IPMP interface, ipif_bound_ill tracks the ill whose hardware
* an ill pointer (rather than an index) because only ills that are
* part of a group will be pointed to, and an ill cannot disappear
* while it's in a group.
*/
struct ill_s *ipif_bound_ill;
} ipif_t;
/*
* IPIF_FREE_OK() means that there are no incoming references
* to the ipif. Incoming refs would prevent the ipif from being freed.
*/
#define IPIF_FREE_OK(ipif) \
/*
* IPIF_DOWN_OK() determines whether the incoming pointer reference counts
* would permit the ipif to be considered quiescent. In order for
* an ipif or ill to be considered quiescent, the ire and nce references
*
* We do not require the ilm references to go to zero for quiescence
* because the quiescence checks are done to ensure that
* has been marked down, and incoming packets to addresses on a
* queiscent interface are rejected. This implies that all the
* creation of any ires using that source address must be prevented.
* Similarly incoming unicast packets destined to the 'down' address
* will not be accepted once that ire is gone. However incoming
* multicast packets are not destined to the downed address.
* They are only related to the ill in question. Furthermore
* the current API behavior allows applications to join or leave
* multicast groups, i.e., IP_ADD_MEMBERSHIP / LEAVE_MEMBERSHIP, using a
* down address. Therefore the ilm references are not included in
* the _DOWN_OK macros.
*/
/*
* The following table lists the protection levels of the various members
* of the ipif_t. The following notation is used.
*
* Write once - Written to only once at the time of bringing up
* the interface and can be safely read after the bringup without any lock.
*
* ipsq - Need to execute in the ipsq to perform the indicated access.
*
* ill_lock - Need to hold this mutex to perform the indicated access.
*
* write access respectively.
*
* down ill - Written to only when the ill is down (i.e all ipifs are down)
* up ill - Read only when the ill is up (i.e. at least 1 ipif is up)
*
* Table of ipif_t members and their protection
*
* ipif_next ipsq + ill_lock + ipsq OR ill_lock OR
* ill_g_lock ill_g_lock
* ipif_ill ipsq + down ipif write once
* ipif_id ipsq + down ipif write once
* ipif_mtu ipsq
* ipif_v6lcl_addr ipsq + down ipif up ipif
* ipif_v6src_addr ipsq + down ipif up ipif
* ipif_v6subnet ipsq + down ipif up ipif
* ipif_v6net_mask ipsq + down ipif up ipif
*
* ipif_v6brd_addr
* ipif_v6pp_dst_addr
* ipif_flags ill_lock ill_lock
* ipif_metric
* ipif_ire_type ipsq + down ill up ill
*
* ipif_arp_del_mp ipsq ipsq
* ipif_saved_ire_mp ipif_saved_ire_lock ipif_saved_ire_lock
* ipif_igmp_rpt ipsq ipsq
*
* ipif_fo_pkt_count Approx
* ipif_ib_pkt_count Approx
* ipif_ob_pkt_count Approx
*
* bit fields ill_lock ill_lock
*
* ipif_seqid ipsq Write once
*
* ipif_state_flags ill_lock ill_lock
* ipif_refcnt ill_lock ill_lock
* ipif_ire_cnt ill_lock ill_lock
* ipif_ilm_cnt ill_lock ill_lock
* ipif_saved_ire_cnt
*
* ipif_bound_ill ipsq + ipmp_lock ipsq OR ipmp_lock
* ipif_bound_next ipsq ipsq
* ipif_bound ipsq ipsq
*/
#ifdef DEBUG
#else
#define IPIF_TRACE_REF(ipif)
#define ILL_TRACE_REF(ill)
#define IPIF_UNTRACE_REF(ipif)
#define ILL_UNTRACE_REF(ill)
#endif
/* IPv4 compatibility macros */
/* Macros for easy backreferences to the ill. */
/*
* mode value for ip_ioctl_finish for finishing an ioctl
*/
/*
* The IP-MT design revolves around the serialization objects ipsq_t (IPSQ)
* and ipxop_t (exclusive operation or "xop"). Becoming "writer" on an IPSQ
* ensures that no other threads can become "writer" on any IPSQs sharing that
* IPSQ's xop until the writer thread is done.
*
* Each phyint points to one IPSQ that remains fixed over the phyint's life.
* Each IPSQ points to one xop that can change over the IPSQ's life. If a
* phyint is *not* in an IPMP group, then its IPSQ will refer to the IPSQ's
* "own" xop (ipsq_ownxop). If a phyint *is* part of an IPMP group, then its
* IPSQ will refer to the "group" xop, which is shorthand for the xop of the
* IPSQ of the IPMP meta-interface's phyint. Thus, all phyints that are part
* of the same IPMP group will have their IPSQ's point to the group xop, and
* thus becoming "writer" on any phyint in the group will prevent any other
* writer on any other phyint in the group. All IPSQs sharing the same xop
* are chained together through ipsq_next (in the degenerate common case,
* ipsq_next simply refers to itself). Note that the group xop is guaranteed
* to exist at least as long as there are members in the group, since the IPMP
* meta-interface can only be destroyed if the group is empty.
*
* Incoming exclusive operation requests are enqueued on the IPSQ they arrived
* on rather than the xop. This makes switching xop's (as would happen when a
* phyint leaves an IPMP group) simple, because after the phyint leaves the
* group, any operations enqueued on its IPSQ can be safely processed with
* respect to its new xop, and any operations enqueued on the IPSQs of its
* former group can be processed with respect to their existing group xop.
* Even so, switching xops is a subtle dance; see ipsq_dq() for details.
*
* An IPSQ's "own" xop is embedded within the IPSQ itself since they have have
* identical lifetimes, and because doing so simplifies pointer management.
* While each phyint and IPSQ point to each other, it is not possible to free
* the IPSQ when the phyint is freed, since we may still *inside* the IPSQ
* when the phyint is being freed. Thus, ipsq_phyint is set to NULL when the
* phyint is freed, and the IPSQ free is later done in ipsq_exit().
*
* ipsq_t synchronization: read write
*
* ipsq_xopq_mphead ipx_lock ipx_lock
* ipsq_xopq_mptail ipx_lock ipx_lock
* ipsq_xop_switch_mp ipsq_lock ipsq_lock
* ipsq_phyint write once write once
* ipsq_next RW_READER ill_g_lock RW_WRITER ill_g_lock
* ipsq_xop ipsq_lock or ipsq ipsq_lock + ipsq
* ipsq_swxop ipsq ipsq
* ipsq_ownxop see ipxop_t see ipxop_t
* ipsq_ipst write once write once
*
* ipxop_t synchronization: read write
*
* ipx_writer ipx_lock ipx_lock
* ipx_xop_queued ipx_lock ipx_lock
* ipx_mphead ipx_lock ipx_lock
* ipx_mptail ipx_lock ipx_lock
* ipx_ipsq write once write once
* ips_ipsq_queued ipx_lock ipx_lock
* ipx_waitfor ipsq or ipx_lock ipsq + ipx_lock
* ipx_reentry_cnt ipsq or ipx_lock ipsq + ipx_lock
* ipx_current_done ipsq ipsq
* ipx_current_ioctl ipsq ipsq
* ipx_current_ipif ipsq or ipx_lock ipsq + ipx_lock
* ipx_pending_ipif ipsq or ipx_lock ipsq + ipx_lock
* ipx_pending_mp ipsq or ipx_lock ipsq + ipx_lock
* ipx_forced ipsq ipsq
* ipx_depth ipsq ipsq
* ipx_stack ipsq ipsq
*/
typedef struct ipxop_s {
int ipx_waitfor; /* waiting; values encoded below */
int ipx_reentry_cnt;
int ipx_current_ioctl; /* current ioctl, or 0 if no ioctl */
#ifdef DEBUG
int ipx_depth; /* debugging aid */
#define IPX_STACK_DEPTH 15
#endif
} ipxop_t;
typedef struct ipsq_s {
} ipsq_t;
/*
* ipx_waitfor values:
*/
enum {
ILL_DOWN, /* ill_down() waiting for refcnts to drop */
IPIF_FREE, /* ipif_free() waiting for refcnts to drop */
ILL_FREE /* ill unplumb waiting for refcnts to drop */
};
/* Operation types for ipsq_try_enter() */
#define CUR_OP 0 /* request writer within current operation */
/*
* Kstats tracked on each IPMP meta-interface. Order here must match
*/
enum {
IPMP_KSTAT_MAX /* keep last */
};
/*
* phyint represents state that is common to both IPv4 and IPv6 interfaces.
* There is a separate ill_t representing IPv4 and IPv6 which has a
* backpointer to the phyint structure for accessing common state.
*/
typedef struct phyint {
struct ill_s *phyint_illv4;
struct ill_s *phyint_illv6;
} phyint_t;
#define CACHE_ALIGN_SIZE 64
struct _phyint_list_s_ {
};
typedef union phyint_list_u {
struct _phyint_list_s_ phyint_list_s;
/*
* Fragmentation hash bucket
*/
typedef struct ipfb_s {
} ipfb_t;
/*
* IRE bucket structure. Usually there is an array of such structures,
* each pointing to a linked list of ires. irb_refcnt counts the number
* of walkers of a given hash bucket. Usually the reference count is
* bumped up if the walker wants no IRES to be DELETED while walking the
* list. Bumping up does not PREVENT ADDITION. This allows walking a given
* hash bucket without stumbling up on a free pointer.
*
* irb_t structures in ip_ftable are dynamically allocated and freed.
* In order to identify the irb_t structures that can be safely kmem_free'd
* we need to ensure that
* - the irb_refcnt is quiescent, indicating no other walkers,
* - no other threads or ire's are holding references to the irb,
* i.e., irb_nire == 0,
* - there are no active ire's in the bucket, i.e., irb_ire_cnt == 0
*/
typedef struct irb {
/* Should be first in this struct */
#define IRB_MARK_CONDEMNED 0x0001
#define IRB_MARK_FTABLE 0x0002
int irb_nire; /* Num of ftable ire's that ref irb */
} irb_t;
/* The following are return values of ip_xmit_v4() */
typedef enum {
SEND_PASSED = 0, /* sent packet out on wire */
SEND_FAILED, /* sending of packet failed */
LOOKUP_IN_PROGRESS, /* ire cache found, ARP resolution in progress */
LLHDR_RESLV_FAILED /* macaddr resl of onlink dst or nexthop failed */
#define IP_V4_G_HEAD 0
#define IP_V6_G_HEAD 1
#define MAX_G_HEADS 2
/*
* unpadded ill_if structure
*/
struct _ill_if_s_ {
union ill_if_u *illif_next;
union ill_if_u *illif_prev;
int illif_name_len; /* name length */
};
/* cache aligned ill_if structure */
typedef union ill_if_u {
struct _ill_if_s_ ill_if_s;
} ill_if_t;
typedef struct ill_walk_context_s {
int ctx_current_list; /* current list being searched */
int ctx_last_list; /* last list to search */
/*
* ill_g_heads structure, one for IPV4 and one for IPV6
*/
struct _ill_g_head_s_ {
};
typedef union ill_g_head_u {
struct _ill_g_head_s_ ill_g_head_s;
} ill_g_head_t;
#define IP_V4_ILL_G_LIST(ipst) \
#define IP_V6_ILL_G_LIST(ipst) \
#define IP_VX_ILL_G_LIST(i, ipst) \
/*
* Capabilities, possible flags for ill_capabilities.
*/
/*
* Per-ill Multidata Transmit capabilities.
*/
typedef struct ill_mdt_capab_s ill_mdt_capab_t;
/*
* Per-ill IPsec capabilities.
*/
typedef struct ill_ipsec_capab_s ill_ipsec_capab_t;
/*
* Per-ill Hardware Checksumming capbilities.
*/
typedef struct ill_hcksum_capab_s ill_hcksum_capab_t;
/*
* Per-ill Zero-copy capabilities.
*/
typedef struct ill_zerocopy_capab_s ill_zerocopy_capab_t;
/*
* DLD capbilities.
*/
typedef struct ill_dld_capab_s ill_dld_capab_t;
/*
* Per-ill polling resource map.
*/
typedef struct ill_rx_ring ill_rx_ring_t;
/*
* Per-ill Large Segment Offload capabilities.
*/
typedef struct ill_lso_capab_s ill_lso_capab_t;
/* The following are ill_state_flags */
/* Is this an ILL whose source address is used by other ILL's ? */
#define IS_USESRC_ILL(ill) \
(((ill)->ill_usesrc_ifindex == 0) && \
#define IS_USESRC_CLI_ILL(ill) \
(((ill)->ill_usesrc_ifindex != 0) && \
/* Is this an virtual network interface (vni) ILL ? */
/* Is this a loopback ILL? */
#define IS_LOOPBACK(ill) \
/* Is this an IPMP meta-interface ILL? */
/* Is this ILL under an IPMP meta-interface? (aka "in a group?") */
#define IS_UNDER_IPMP(ill) \
/* Is ill1 in the same illgrp as ill2? */
/* Is ill1 on the same LAN as ill2? */
/*
* IPMP group ILL state structure -- up to two per IPMP group (V4 and V6).
* guaranteed to persist while there are interfaces of that type in the group.
* In general, most fields are accessed outside of the IPSQ (e.g., in the
* datapath), and thus use locks in addition to the IPSQ for protection.
*
* synchronization: read write
*
* ig_if ipsq or ill_g_lock ipsq and ill_g_lock
* ig_actif ipsq or ipmp_lock ipsq and ipmp_lock
* ig_nactif ipsq or ipmp_lock ipsq and ipmp_lock
* ig_next_ill ipsq or ipmp_lock ipsq and ipmp_lock
* ig_ipmp_ill write once write once
* ig_cast_ill ipsq or ipmp_lock ipsq and ipmp_lock
* ig_arpent ipsq ipsq
* ig_mtu ipsq ipsq
*/
typedef struct ipmp_illgrp_s {
/*
* IPMP group state structure -- one per IPMP group. Created when the
* IPMP meta-interface is plumbed; it is guaranteed to persist while there
* are interfaces in it.
*
* ipmp_grp_t synchronization: read write
*
* gr_name ipmp_lock ipmp_lock
* gr_ifname write once write once
* gr_mactype ipmp_lock ipmp_lock
* gr_phyint write once write once
* gr_nif ipmp_lock ipmp_lock
* gr_nactif ipsq ipsq
* gr_v4 ipmp_lock ipmp_lock
* gr_v6 ipmp_lock ipmp_lock
* gr_nv4 ipmp_lock ipmp_lock
* gr_nv6 ipmp_lock ipmp_lock
* gr_pendv4 ipmp_lock ipmp_lock
* gr_pendv6 ipmp_lock ipmp_lock
* gr_linkdownmp ipsq ipsq
* gr_ksp ipmp_lock ipmp_lock
* gr_kstats0 atomic atomic
*/
typedef struct ipmp_grp_s {
} ipmp_grp_t;
/*
* IPMP ARP entry -- one per SIOCS*ARP entry tied to the group. Used to keep
* ARP up-to-date as the active set of interfaces in the group changes.
*/
typedef struct ipmp_arpent_s {
/*
* IP Lower level Structure.
* Instance data structure in ip_open when there is a device below us.
*/
typedef struct ill_s {
int ill_error; /* Error value sent up by device. */
char *ill_name; /* Our name. */
char *ill_ndd_name; /* Name + ":ip?_forwarding" for NDD. */
/*
* Physical Point of Attachment num. If DLPI style 1 provider
* then this is derived from the devname.
*/
/* supports broadcast. */
int ill_mcast_type; /* type of router which is querier */
/* on this interface */
int ill_mcast_qi; /* IGMPv3/MLDv2 query interval var */
/*
* All non-NULL cells between 'ill_first_mp_to_free' and
* 'ill_last_mp_to_free' are freed in ill_delete.
*/
#define ill_first_mp_to_free ill_bcast_mp
#define ill_last_mp_to_free ill_phys_addr_mp
/* Following bit fields protected by ipsq_t */
ill_needs_attach : 1,
ill_reserved : 1,
ill_isv6 : 1,
ill_dlpi_style_set : 1,
ill_ifname_pending : 1,
ill_join_allmulti : 1,
ill_logical_down : 1,
ill_dl_up : 1,
ill_up_ipifs : 1,
ill_pad_to_bit_31 : 17;
/* Following bit fields protected by ill_lock */
ill_ilm_cleanup_reqd : 1,
ill_arp_closing : 1,
ill_pad_bit_31 : 26;
/*
* Used in SIOCSIFMUXID and SIOCGIFMUXID for 'ifconfig unplumb'.
*/
int ill_arp_muxid; /* muxid returned from plink for arp */
int ill_ip_muxid; /* muxid returned from plink for ip */
/* Used for IP frag reassembly throttling on a per ILL basis. */
int ill_type; /* From <net/if_types.h> */
/*
* Capabilities related fields.
*/
/*
* New fields for IPv6
*/
/*
* Following two mblks are allocated common to all
* the ipifs when the first interface is coming up.
* It is sent up to arp when the last ipif is coming
* down.
*/
/*
* Used for implementing IFF_NOARP. As IFF_NOARP is used
* to turn off for all the logicals, it is here instead
* of the ipif.
*/
/*
* The ill_nd_lla* fields handle the link layer address option
* from neighbor discovery. This is used for external IPv6
* address resolution.
*/
/*
* We now have 3 phys_addr_req's sent down. This field keeps track
* of which one is pending.
*/
/*
* Used to save errors that occur during plumbing
*/
void *ill_fastpath_list; /* both ire and nce hang off this */
/*
* Contains the upper read queue pointer of the module immediately
* beneath IP. This field allows IP to validate sub-capability
* acknowledgments coming up from downstream.
*/
void *ill_flownotify_mh; /* Tx flow ctl, mac cb handle */
/*
* IPMP fields.
*/
} ill_t;
/*
* ILL_FREE_OK() means that there are no incoming pointer references
* to the ill.
*/
#define ILL_FREE_OK(ill) \
(ill)->ill_nce_cnt == 0)
/*
* quiescence checks. See comments above IPIF_DOWN_OK for details
* on why ires and nces are selectively considered for this macro.
*/
/*
* The following table lists the protection levels of the various members
* of the ill_t. Same notation as that used for ipif_t above is used.
*
* Write Read
*
* ill_ifptr ill_g_lock + s Write once
* ill_rq ipsq Write once
* ill_wq ipsq Write once
*
* ill_error ipsq None
* ill_ipif ill_g_lock + ipsq ill_g_lock OR ipsq
* ill_ipif_up_count ill_lock + ipsq ill_lock OR ipsq
* ill_max_frag ipsq Write once
*
* ill_name ill_g_lock + ipsq Write once
* ill_name_length ill_g_lock + ipsq Write once
* ill_ndd_name ipsq Write once
* ill_net_type ipsq Write once
* ill_ppa ill_g_lock + ipsq Write once
* ill_sap ipsq + down ill Write once
* ill_sap_length ipsq + down ill Write once
* ill_phys_addr_length ipsq + down ill Write once
*
* ill_bcast_addr_length ipsq ipsq
* ill_mactype ipsq ipsq
* ill_frag_ptr ipsq ipsq
*
* ill_frag_timer_id ill_lock ill_lock
* ill_frag_hash_tbl ipsq up ill
* ill_ilm ipsq + ill_lock ill_lock
* ill_mcast_type ill_lock ill_lock
* ill_mcast_v1_time ill_lock ill_lock
* ill_mcast_v2_time ill_lock ill_lock
* ill_mcast_v1_tset ill_lock ill_lock
* ill_mcast_v2_tset ill_lock ill_lock
* ill_mcast_rv ill_lock ill_lock
* ill_mcast_qi ill_lock ill_lock
* ill_pending_mp ill_lock ill_lock
*
* ill_bcast_mp ipsq ipsq
* ill_resolver_mp ipsq only when ill is up
* ill_down_mp ipsq ipsq
* ill_dlpi_deferred ill_lock ill_lock
* ill_dlpi_pending ill_lock ill_lock
* ill_phys_addr_mp ipsq + down ill only when ill is up
* ill_phys_addr ipsq + down ill only when ill is up
*
* ill_state_flags ill_lock ill_lock
* exclusive bit flags ipsq_t ipsq_t
* shared bit flags ill_lock ill_lock
*
* ill_arp_muxid ipsq Not atomic
* ill_ip_muxid ipsq Not atomic
*
* ill_ipf_gen Not atomic
* ill_frag_count atomics atomics
* ill_type ipsq + down ill only when ill is up
* ill_dlpi_multicast_state ill_lock ill_lock
* ill_dlpi_fastpath_state ill_lock ill_lock
* ill_dlpi_capab_state ipsq ipsq
* ill_max_hops ipsq Not atomic
*
* ill_max_mtu
*
* ill_user_mtu ipsq + ill_lock ill_lock
* ill_reachable_time ipsq + ill_lock ill_lock
* ill_reachable_retrans_time ipsq + ill_lock ill_lock
* ill_max_buf ipsq + ill_lock ill_lock
*
* Next 2 fields need ill_lock because of the get ioctls. They should not
* report partially updated results without executing in the ipsq.
* ill_token ipsq + ill_lock ill_lock
* ill_token_length ipsq + ill_lock ill_lock
* ill_xmit_count ipsq + down ill write once
* ill_ip6_mib ipsq + down ill only when ill is up
* ill_icmp6_mib ipsq + down ill only when ill is up
* ill_arp_down_mp ipsq ipsq
* ill_arp_del_mapping_mp ipsq ipsq
* ill_arp_on_mp ipsq ipsq
*
* ill_phyint ipsq, ill_g_lock, ill_lock Any of them
* ill_flags ill_lock ill_lock
* ill_nd_lla_mp ipsq + down ill only when ill is up
* ill_nd_lla ipsq + down ill only when ill is up
* ill_nd_lla_len ipsq + down ill only when ill is up
* ill_phys_addr_pend ipsq + down ill only when ill is up
* ill_ifname_pending_err ipsq ipsq
* ill_avl_byppa ipsq, ill_g_lock write once
*
* ill_fastpath_list ill_lock ill_lock
* ill_refcnt ill_lock ill_lock
* ill_ire_cnt ill_lock ill_lock
* ill_cv ill_lock ill_lock
* ill_ilm_walker_cnt ill_lock ill_lock
* ill_nce_cnt ill_lock ill_lock
* ill_ilm_cnt ill_lock ill_lock
* ill_src_ipif ill_g_lock ill_g_lock
* ill_trace ill_lock ill_lock
* ill_usesrc_grp_next ill_g_usesrc_lock ill_g_usesrc_lock
* ill_dhcpinit atomics atomics
* ill_flownotify_mh write once write once
* ill_capab_pending_cnt ipsq ipsq
*
* ill_bound_cnt ipsq ipsq
* ill_bound_ipif ipsq ipsq
* ill_actnode ipsq + ipmp_lock ipsq OR ipmp_lock
* ill_grpnode ipsq + ill_g_lock ipsq OR ill_g_lock
* ill_src_ipif ill_g_lock ill_g_lock
* ill_move_ipif ipsq ipsq
* ill_nom_cast ipsq ipsq OR advisory
* ill_refresh_tid ill_lock ill_lock
* ill_grp (for IPMP ill) write once write once
* ill_grp (for underlying ill) ipsq + ill_g_lock ipsq OR ill_g_lock
*
* NOTE: It's OK to make heuristic decisions on an underlying interface
* by using IS_UNDER_IPMP() or comparing ill_grp's raw pointer value.
*/
/*
* For ioctl restart mechanism see ip_reprocess_ioctl()
*/
struct ip_ioctl_cmd_s;
struct ip_ioctl_cmd_s *, void *);
typedef struct ip_ioctl_cmd_s {
int ipi_cmd;
/*
* ipi_cmd_type:
*
* IF_CMD 1 old style ifreq cmd
* LIF_CMD 2 new style lifreq cmd
* TUN_CMD 3 tunnel related
* ARP_CMD 4 arpreq cmd
* XARP_CMD 5 xarpreq cmd
* MSFILT_CMD 6 multicast source filter cmd
* MISC_CMD 7 misc cmd (not a more specific one above)
*/
#define IPI_DONTCARE 0 /* For ioctl encoded values that don't matter */
/* Flag values in ipi_flags */
/* unused 0x10 */
extern ip_ioctl_cmd_t ip_ndx_ioctl_table[];
extern ip_ioctl_cmd_t ip_misc_ioctl_table[];
extern int ip_ndx_ioctl_count;
extern int ip_misc_ioctl_count;
typedef struct ipmx_s {
ipmx_notused : 31;
} ipmx_t;
/*
* State for detecting if a driver supports certain features.
* Support for DL_ENABMULTI_REQ uses ill_dlpi_multicast_state.
* Support for DLPI M_DATA fastpath uses ill_dlpi_fastpath_state.
*/
#define IDS_UNKNOWN 0 /* No DLPI request sent */
/* Support for DL_CAPABILITY_REQ uses ill_dlpi_capab_state. */
enum {
};
/* Named Dispatch Parameter Management Structure */
typedef struct ipparam_s {
char *ip_param_name;
} ipparam_t;
/* Extended NDP Management Structure */
typedef struct ipndp_s {
char *ip_ndp_name;
} ipndp_t;
/*
* The kernel stores security attributes of all gateways in a database made
* up of one or more tsol_gcdb_t elements. Each tsol_gcdb_t contains the
* security-related credentials of the gateway. More than one gateways may
* share entries in the database.
*
* The tsol_gc_t structure represents the gateway to credential association,
* and refers to an entry in the database. One or more tsol_gc_t entities are
* grouped together to form one or more tsol_gcgrp_t, each representing the
* list of security attributes specific to the gateway. A gateway may be
* associated with at most one credentials group.
*/
struct tsol_gcgrp_s;
/*
* Gateway security credential record.
*/
typedef struct tsol_gcdb_s {
} tsol_gcdb_t;
/*
* Gateway to credential association.
*/
typedef struct tsol_gc_s {
} tsol_gc_t;
/*
* Gateway credentials group address.
*/
typedef struct tsol_gcgrp_addr_s {
int ga_af; /* address family */
/*
* Gateway credentials group.
*/
typedef struct tsol_gcgrp_s {
} tsol_gcgrp_t;
extern kmutex_t gcgrp_lock;
#define GC_REFRELE(p) { \
if (--((p)->gc_refcnt) == 0) \
gc_inactive(p); \
else \
}
#define GCGRP_REFHOLD(p) { \
mutex_enter(&gcgrp_lock); \
++((p)->gcgrp_refcnt); \
ASSERT((p)->gcgrp_refcnt != 0); \
mutex_exit(&gcgrp_lock); \
}
#define GCGRP_REFRELE(p) { \
mutex_enter(&gcgrp_lock); \
ASSERT((p)->gcgrp_refcnt > 0); \
if (--((p)->gcgrp_refcnt) == 0) \
gcgrp_inactive(p); \
mutex_exit(&gcgrp_lock); \
}
/*
* IRE gateway security attributes structure, pointed to by tsol_ire_gw_secattr
*/
struct tsol_tnrhc;
typedef struct tsol_ire_gw_secattr_s {
/*
* count of the IREs and IRBs (ire bucket).
*
* 1) We bump up the reference count of an IRE to make sure that
* it does not get deleted and freed while we are using it.
* Typically all the lookup functions hold the bucket lock,
* and look for the IRE. If it finds an IRE, it bumps up the
* reference count before dropping the lock. Sometimes we *may* want
* to bump up the reference count after we *looked* up i.e without
* holding the bucket lock. So, the IRE_REFHOLD macro does not assert
* on the bucket lock being held. Any thread trying to delete from
* the hash bucket can still do so but cannot free the IRE if
* ire_refcnt is not 0.
*
* 2) We bump up the reference count on the bucket where the IRE resides
* (IRB), when we want to prevent the IREs getting deleted from a given
* hash bucket. This makes life easier for ire_walk type functions which
* wants to walk the IRE list, call a function, but needs to drop
* the bucket lock to prevent recursive rw_enters. While the
* lock is dropped, the list could be changed by other threads or
* the same thread could end up deleting the ire or the ire pointed by
* ire_next. IRE_REFHOLDing the ire or ire_next is not sufficient as
* a delete will still remove the ire from the bucket while we have
* dropped the lock and hence the ire_next would be NULL. Thus, we
* need a mechanism to prevent deletions from a given bucket.
*
* To prevent deletions, we bump up the reference count on the
* bucket. If the bucket is held, ire_delete just marks IRE_MARK_CONDEMNED
* both on the ire's ire_marks and the bucket's irb_marks. When the
* reference count on the bucket drops to zero, all the CONDEMNED ires
* are deleted. We don't have to bump up the reference count on the
* bucket if we are walking the bucket and never have to drop the bucket
* lock. Note that IRB_REFHOLD does not prevent addition of new ires
* in the list. It is okay because addition of new ires will not cause
* ire_next to point to freed memory. We do IRB_REFHOLD only when
* all of the 3 conditions are true :
*
* 1) The code needs to walk the IRE bucket from start to end.
* 2) It may have to drop the bucket lock sometimes while doing (1)
* 3) It does not want any ires to be deleted meanwhile.
*/
/*
* Bump up the reference count on the IRE. We cannot assert that the
* bucket lock is being held as it is legal to bump up the reference
* count after the first lookup has returned the IRE without
* holding the lock. Currently ip_wput does this for caching IRE_CACHEs.
*/
#ifdef DEBUG
#else
#define IRE_UNTRACE_REF(ire)
#define IRE_TRACE_REF(ire)
#endif
#define IRE_REFHOLD_NOTR(ire) { \
}
#define IRE_REFHOLD(ire) { \
IRE_REFHOLD_NOTR(ire); \
IRE_TRACE_REF(ire); \
}
#define IRE_REFHOLD_LOCKED(ire) { \
IRE_TRACE_REF(ire); \
(ire)->ire_refcnt++; \
}
/*
* Decrement the reference count on the IRE.
* In architectures e.g sun4u, where atomic_add_32_nv is just
* a cas, we need to maintain the right memory barrier semantics
* as that of mutex_exit i.e all the loads and stores should complete
* before the cas is executed. membar_exit() does that here.
*
* NOTE : This macro is used only in places where we want performance.
* To avoid bloating the code, we use the function "ire_refrele"
* which essentially calls the macro.
*/
#define IRE_REFRELE_NOTR(ire) { \
membar_exit(); \
ire_inactive(ire); \
}
#define IRE_REFRELE(ire) { \
IRE_UNTRACE_REF(ire); \
} \
IRE_REFRELE_NOTR(ire); \
}
/*
* Bump up the reference count on the hash bucket - IRB to
* prevent ires from being deleted in this bucket.
*/
#define IRB_REFHOLD(irb) { \
(irb)->irb_refcnt++; \
}
#define IRB_REFHOLD_LOCKED(irb) { \
(irb)->irb_refcnt++; \
}
void irb_refrele_ftable(irb_t *);
/*
* Note: when IRB_MARK_FTABLE (i.e., IRE_CACHETABLE entry), the irb_t
* is statically allocated, so that when the irb_refcnt goes to 0,
* we simply clean up the ire list and continue.
*/
#define IRB_REFRELE(irb) { \
irb_refrele_ftable((irb)); \
} else { \
if (--(irb)->irb_refcnt == 0 && \
\
ire_cleanup(ire_list); \
} else { \
} \
} \
}
extern struct kmem_cache *rt_entry_cache;
/*
* Lock the fast path mp for access, since the fp_mp can be deleted
* due a DL_NOTE_FASTPATH_FLUSH in the case of IRE_BROADCAST
*/
#define LOCK_IRE_FP_MP(ire) { \
}
#define UNLOCK_IRE_FP_MP(ire) { \
}
typedef struct ire4 {
} ire4_t;
typedef struct ire6 {
} ire6_t;
typedef union ire_addr {
} ire_addr_u_t;
/* Internet Routing Entry */
typedef struct ire_s {
union {
} imf_u;
/*
* Neighbor Cache Entry for IPv6; arp info for IPv4
*/
/*
* Protects ire_uinfo, ire_max_frag, and ire_frag_flag.
*/
/*
* ire's that are embedded inside mblk_t and sent to the external
* resolver use the ire_stq_ifindex to track the ifindex of the
* ire_stq, so that the ill (if it exists) can be correctly recovered
* for cleanup in the esbfree routine when arp failure occurs.
* Similarly, the ire_stackid is used to recover the ip_stack_t.
*/
} ire_t;
/* IPv4 compatibility macros */
/* Convenient typedefs for sockaddrs */
typedef struct sockaddr_in sin_t;
typedef struct sockaddr_in6 sin6_t;
/* Address structure used for internal bind with IP */
typedef struct ipa_conn_s {
} ipa_conn_t;
typedef struct ipa6_conn_s {
} ipa6_conn_t;
/*
* Using ipa_conn_x_t or ipa6_conn_x_t allows us to modify the behavior of IP's
* bind handler.
*/
typedef struct ipa_conn_extended_s {
} ipa_conn_x_t;
typedef struct ipa6_conn_extended_s {
/* flag values for ipa_conn_x_t and ipa6_conn_x_t. */
typedef struct nv_s {
char *nv_name;
} nv_t;
#define ILL_FRAG_HASH(s, i) \
/*
* The MAX number of allowed fragmented packets per hash bucket
* calculation is based on the most common mtu size of 1500. This limit
* will work well for other mtu sizes as well.
*/
#define COMMON_IP_MTU 1500
#define MAX_FRAG_MIN 10
#define MAX_FRAG_PKTS(ipst) \
/*
* Maximum dups allowed per packet.
*/
extern uint_t ip_max_frag_dups;
/*
* Per-packet information for received packets and transmitted.
* Used by the transport protocols when converting between the packet
* and ancillary data and socket options.
*
* Note: This private data structure and related IPPF_* constant
* definitions are exposed to enable compilation of some debugging tools
* a temporary hack and long term alternate interfaces should be defined
* to support the needs of such tools and private definitions moved to
* private headers.
*/
struct ip6_pkt_s {
struct ip6_mtuinfo *ipp_pathmtu;
};
/*
* This struct is used by ULP_opt_set() functions to return value of IPv4
* ancillary options. Currently this is only used by udp and icmp and only
* IP_PKTINFO option is supported.
*/
typedef struct ip4_pkt_s {
} ip4_pkt_t;
/*
* Used by ULP's to pass options info to ip_output
* currently only IP_PKTINFO is supported.
*/
typedef struct ip_opt_info_s {
/*
* value for ip_opt_flags
*/
#define IP_VERIFY_SRC 0x1
/*
* This structure is used to convey information from IP and the ULP.
* Currently used for the IP_RECVSLLA, IP_RECVIF and IP_RECVPKTINFO options.
* The type of information field is set to IN_PKTINFO (i.e inbound pkt info)
*/
typedef struct ip_pktinfo {
} ip_pktinfo_t;
/*
* flags to tell UDP what IP is sending; in_pkt_flags
*/
/*
* Inbound interface index + matched address.
* Used only by IPV4.
*/
#define IPF_RECVADDR 0x04
/* ipp_fields values */
#define IPPF_HOPLIMIT 0x0020
#define IPPF_HOPOPTS 0x0040
#define IPPF_RTHDR 0x0080
#define IPPF_RTDSTOPTS 0x0100
#define IPPF_DSTOPTS 0x0200
#define IPPF_NEXTHOP 0x0400
#define IPPF_PATHMTU 0x0800
#define IPPF_TCLASS 0x1000
#define IPPF_DONTFRAG 0x2000
#define IPPF_USE_MIN_MTU 0x04000
#define IPPF_MULTICAST_HOPS 0x08000
#define IPPF_UNICAST_HOPS 0x10000
#define IPPF_FRAGHDR 0x20000
#define IPPF_HAS_IP6I \
#define TCP_PORTS_OFFSET 0
#define UDP_PORTS_OFFSET 0
/*
* ill / ipif lookup functions increment the refcnt on the ill / ipif only
* after calling these macros. This ensures that the refcnt on the ipif or
* ill will eventually drop down to zero.
*/
#define ILL_CAN_LOOKUP(ill) \
#define ILL_CAN_WAIT(ill, q) \
#define ILL_CAN_LOOKUP_WALKER(ill) \
#define IPIF_CAN_LOOKUP(ipif) \
/*
* If the parameter 'q' is NULL, the caller is not interested in wait and
* restart of the operation if the ILL or IPIF cannot be looked up when it is
* marked as 'CHANGING'. Typically a thread that tries to send out data will
* end up passing NULLs as the last 4 parameters to ill_lookup_on_ifindex and
* in this case 'q' is NULL
*/
#define IPIF_CAN_WAIT(ipif, q) \
#define IPIF_CAN_LOOKUP_WALKER(ipif) \
#define ILL_UNMARK_CHANGING(ill) \
/* Macros used to assert that this thread is a writer */
/*
* Grab ill locks in the proper order. The order is highest addressed
* ill is locked first.
*/
{ \
} else { \
} \
}
{ \
}
/* Get the other protocol instance ill */
/* ioctl command info: Ioctl properties extracted and stored in here */
typedef struct cmd_info_s
{
} cmd_info_t;
/*
* List of AH and ESP IPsec acceleration capable ills
*/
typedef struct ipsec_capab_ill_s {
struct ipsec_capab_ill_s *next;
extern struct kmem_cache *ire_cache;
extern ipaddr_t ip_g_all_ones;
extern vmem_t *ip_minor_arena_sa;
extern vmem_t *ip_minor_arena_la;
/*
* ip_g_forward controls IP forwarding. It takes two values:
* 0: IP_FORWARD_NEVER Don't forward packets ever.
* 1: IP_FORWARD_ALWAYS Forward packets for elsewhere.
*
* RFC1122 says there must be a configuration switch to control forwarding,
* but that the default MUST be to not forward packets ever. Implicit
* control based on configuration of multiple interfaces MUST NOT be
* implemented (Section 3.1). SunOS 4.1 did provide the "automatic" capability
* and, in fact, it was the default. That capability is now provided in the
*/
/* IPv6 configuration knobs */
/* Misc IP configuration knobs */
extern int dohwcksum; /* use h/w cksum if supported by the h/w */
#ifdef ZC_TEST
extern int noswcksum;
#endif
extern char ipif_loopback_name[];
extern nv_t *ire_nv_tbl;
extern struct module_info ip_mod_info;
#define HOOKS4_INTERESTED_PHYSICAL_IN(ipst) \
#define HOOKS6_INTERESTED_PHYSICAL_IN(ipst) \
#define HOOKS4_INTERESTED_PHYSICAL_OUT(ipst) \
#define HOOKS6_INTERESTED_PHYSICAL_OUT(ipst) \
#define HOOKS4_INTERESTED_FORWARDING(ipst) \
#define HOOKS6_INTERESTED_FORWARDING(ipst) \
#define HOOKS4_INTERESTED_LOOPBACK_IN(ipst) \
#define HOOKS6_INTERESTED_LOOPBACK_IN(ipst) \
#define HOOKS4_INTERESTED_LOOPBACK_OUT(ipst) \
#define HOOKS6_INTERESTED_LOOPBACK_OUT(ipst) \
/*
* Hooks macros used inside of ip
*/
\
if ((_hook).he_interested) { \
\
\
ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\
} else { \
} \
}
\
if ((_hook).he_interested) { \
\
\
ip2dbg(("%s hook dropped mblk chain %p hdr %p\n",\
} else { \
} \
}
(fp) = 0; \
} else if (IS_UNDER_IPMP(ill)) { \
} else { \
}
/*
* Network byte order macros
*/
#ifdef _BIG_ENDIAN
#define N_IN_CLASSA_NET IN_CLASSA_NET
#define N_IN_CLASSD_NET IN_CLASSD_NET
#else /* _BIG_ENDIAN */
#endif /* _BIG_ENDIAN */
#define IP_LOOPBACK_ADDR(addr) \
#ifdef DEBUG
/* IPsec HW acceleration debugging support */
#define IPSECHW_CALL(f, r, x) if (ipsechw_debug & (f)) { (void) r x; }
extern uint32_t ipsechw_debug;
#else
#define IPSECHW_DEBUG(f, x) {}
#define IPSECHW_CALL(f, r, x) {}
#endif
extern int ip_debug;
extern uint_t ip_thread_data;
extern krwlock_t ip_thread_rwlock;
extern list_t ip_thread_list;
#ifdef IP_DEBUG
#else
#define ip0dbg(a) /* */
#define ip1dbg(a) /* */
#define ip2dbg(a) /* */
#define ip3dbg(a) /* */
#endif /* IP_DEBUG */
/* Default MAC-layer address string length for mac_colon_addr */
#define MAC_STR_LEN 128
struct ipsec_out_s;
struct mac_header_info_s;
extern void ill_frag_timer(void *);
extern void ill_frag_timer_start(ill_t *);
extern char *ip_dot_addr(ipaddr_t, char *);
ip_stack_t *);
ip_stack_t *);
cred_t *);
ip_stack_t *);
extern void ip_ire_g_fini(void);
extern void ip_ire_g_init(void);
extern void ip_ire_fini(ip_stack_t *);
extern void ip_ire_init(ip_stack_t *);
size_t);
int, int, mblk_t *);
struct mac_header_info_s *);
extern void ip_mib2_add_ip_stats(mib2_ipIfStatsEntry_t *,
extern void ip_mib2_add_icmp6_stats(mib2_ipv6IfIcmpEntry_t *,
uint32_t);
extern void ip_trash_ire_reclaim(void *);
extern void ip_trash_timer_expire(void *);
extern void ip_output_options(void *, mblk_t *, void *, int,
ip_opt_info_t *);
zoneid_t);
int, zoneid_t);
extern char *ip_nv_lookup(nv_t *, int);
ip_stack_t *);
extern void ipmp_init(ip_stack_t *);
extern void ipmp_destroy(ip_stack_t *);
extern void ipmp_grp_destroy(ipmp_grp_t *);
extern int ipmp_grp_rename(ipmp_grp_t *, const char *);
extern void ipmp_illgrp_destroy(ipmp_illgrp_t *);
extern void ipmp_illgrp_refresh_mtu(ipmp_illgrp_t *);
extern void ipmp_illgrp_refresh_arpent(ipmp_illgrp_t *);
extern int ipmp_illgrp_unlink_grp(ipmp_illgrp_t *);
extern void ipmp_ill_leave_illgrp(ill_t *);
extern void ipmp_ill_refresh_active(ill_t *);
extern void ipmp_phyint_leave_grp(phyint_t *);
extern void ipmp_phyint_refresh_active(phyint_t *);
extern int conn_ipsec_length(conn_t *);
ire_t *);
extern int ipsec_out_extra_length(mblk_t *);
extern int ipsec_in_extra_length(mblk_t *);
extern void ipsec_out_to_in(mblk_t *);
extern void ire_cleanup(ire_t *);
extern void ire_inactive(ire_t *);
#ifdef DEBUG
extern void th_trace_unref(const void *);
extern void th_trace_cleanup(const void *, boolean_t);
extern void ire_trace_ref(ire_t *);
extern void ire_untrace_ref(ire_t *);
#endif
extern void ip_quiesce_conn(conn_t *);
uint_t);
uint_t);
extern void conn_ioctl_cleanup(conn_t *);
struct tcp_stack;
conn_t *);
struct multidata_s;
struct pdesc_s;
ill_lso_capab_t *);
const mblk_t *);
uint_t);
extern void tnet_init(void);
extern void tnet_fini(void);
/* Hooks for CGTP (multirt routes) filtering module */
#define CGTP_FILTER_REV_1 1
#define CGTP_FILTER_REV_2 2
#define CGTP_FILTER_REV_3 3
#define CGTP_FILTER_REV CGTP_FILTER_REV_3
/* cfo_filter and cfo_filter_v6 hooks return values */
#define CGTP_IP_PKT_NOT_CGTP 0
#define CGTP_IP_PKT_PREMIUM 1
#define CGTP_IP_PKT_DUPLICATE 2
/* Version 3 of the filter interface */
typedef struct cgtp_filter_ops {
int cfo_filter_rev; /* CGTP_FILTER_REV_3 */
int (*cfo_change_state)(netstackid_t, int);
in6_addr_t *, in6_addr_t *);
ip6_frag_t *);
#define CGTP_MCAST_SUCCESS 1
/*
* The separate CGTP module needs this global symbol so that it
* can check the version and determine whether to use the old or the new
* version of the filtering interface.
*/
extern int ip_cgtp_filter_rev;
extern int ip_cgtp_filter_supported(void);
extern int ip_cgtp_filter_unregister(netstackid_t);
extern int ip_cgtp_filter_is_registered(netstackid_t);
/* Flags for ire_multirt_lookup() */
#define MULTIRT_USESTAMP 0x0001
#define MULTIRT_SETSTAMP 0x0002
#define MULTIRT_CACHEGW 0x0004
/* Debug stuff for multirt route resolution. */
/* Our "don't send, rather drop" flag. */
#define MULTIRT_DEBUG_FLAG 0x8000
#define MULTIRT_TRACE(x) ip2dbg(x)
#define MULTIRT_DEBUG_TAG(mblk) \
do { \
MULTIRT_TRACE(("%s[%d]: tagging mblk %p, tag was %d\n", \
} while (0)
#define MULTIRT_DEBUG_UNTAG(mblk) \
do { \
MULTIRT_TRACE(("%s[%d]: untagging mblk %p, tag was %d\n", \
} while (0)
#define MULTIRT_DEBUG_TAGGED(mblk) \
#else
#endif
/*
* IP observability hook support
*/
/*
* ipobs_hooktype_t describes the hook types supported
* by the ip module. IPOBS_HOOK_LOCAL refers to packets
* which are looped back internally within the ip module.
*/
typedef enum ipobs_hook_type {
typedef void ipobs_cbfunc_t(mblk_t *);
typedef struct ipobs_cb {
} ipobs_cb_t;
/*
* This structure holds the data passed back from the ip module to
* observability consumers.
*
* ihd_mp Pointer to the IP packet.
* ihd_zsrc Source zoneid; set to ALL_ZONES when unknown.
* ihd_zdst Destination zoneid; set to ALL_ZONES when unknown.
* ihd_htype IPobs hook type, see above for the defined types.
* ihd_ipver IP version of the packet.
* For local packets, this is the index of the interface
* associated with the local destination address.
* ihd_grifindex IPMP group interface index (zero unless ihd_ifindex
* is an IPMP underlying interface).
* ihd_stack Netstack the packet is from.
*/
typedef struct ipobs_hook_data {
/*
* Per-ILL Multidata Transmit capabilities.
*/
struct ill_mdt_capab_s {
};
struct ill_hcksum_capab_s {
};
struct ill_zerocopy_capab_s {
};
struct ill_lso_capab_s {
};
/*
* rr_ring_state cycles in the order shown below from RR_FREE through
* RR_FREE_IN_PROG and back to RR_FREE.
*/
typedef enum {
RR_FREE, /* Free slot */
RR_SQUEUE_UNBOUND, /* Ring's squeue is unbound */
RR_SQUEUE_BIND_INPROG, /* Ring's squeue bind in progress */
RR_SQUEUE_BOUND, /* Ring's squeue bound to cpu */
RR_FREE_INPROG /* Ring is being freed */
/*
* we need to duplicate the definitions here because we cannot
*/
typedef void (*ip_mac_intr_disable_t)(void *);
typedef void (*ip_mac_intr_enable_t)(void *);
typedef void (*ip_flow_enable_t)(void *, ip_mac_tx_cookie_t);
typedef void *(*ip_dld_callb_t)(void *,
ip_flow_enable_t, void *);
typedef int (*ip_capab_func_t)(void *, uint_t,
void *, uint_t);
/*
* POLLING README
* sq_get_pkts() is called to pick packets from softring in poll mode. It
* calls rr_rx to get the chain and process it with rr_ip_accept.
* rr_rx = mac_soft_ring_poll() to pick packets
* rr_ip_accept = ip_accept_tcp() to process packets
*/
/*
* XXX: With protocol, service specific squeues, they will have
* specific acceptor functions.
*/
/*
* rr_intr_enable, rr_intr_disable, rr_rx_handle, rr_rx:
* May be accessed while in the squeue AND after checking that SQS_POLL_CAPAB
* is set.
*
* rr_ring_state: Protected by ill_lock.
*/
struct ill_rx_ring {
void *rr_intr_handle; /* Handle interrupt funcs */
void *rr_rx_handle; /* Handle for Rx ring */
};
/*
* IP - DLD direct function call capability
* Suffixes, df - dld function, dh - dld handle,
* cf - client (IP) function, ch - client handle
*/
typedef struct ill_dld_direct_s { /* DLD provided driver Tx */
void *idd_tx_dh; /* dld_str_t *dsp */
void *idd_tx_cb_dh; /* mac_client_handle_t *mch */
void *idd_tx_fctl_dh; /* mac_client_handle */
/* IP - DLD polling capability */
typedef struct ill_dld_poll_s {
/* Describes ill->ill_dld_capab */
struct ill_dld_capab_s {
void *idc_capab_dh; /* dld_str_t *dsp */
};
/*
* IP squeues exports
*/
extern boolean_t ip_squeue_fanout;
extern void ip_squeue_init(void (*)(squeue_t *));
extern void *ip_squeue_add_ring(ill_t *, void *);
extern void ip_squeue_clean_all(ill_t *);
struct ip6_mtuinfo *, netstack_t *);
uint32_t, ip_stack_t *);
/*
* Squeue tags. Tags only need to be unique when the callback function is the
* same to distinguish between different calls, but we use unique tags for
* convenience anyway.
*/
#define SQTAG_IP_INPUT 1
#define SQTAG_TCP_INPUT_ICMP_ERR 2
#define SQTAG_TCP6_INPUT_ICMP_ERR 3
#define SQTAG_IP_TCP_INPUT 4
#define SQTAG_IP6_TCP_INPUT 5
#define SQTAG_IP_TCP_CLOSE 6
#define SQTAG_TCP_OUTPUT 7
#define SQTAG_TCP_TIMER 8
#define SQTAG_TCP_TIMEWAIT 9
#define SQTAG_TCP_ACCEPT_FINISH 10
#define SQTAG_TCP_ACCEPT_FINISH_Q0 11
#define SQTAG_TCP_ACCEPT_PENDING 12
#define SQTAG_TCP_LISTEN_DISCON 13
#define SQTAG_TCP_CONN_REQ_1 14
#define SQTAG_TCP_EAGER_BLOWOFF 15
#define SQTAG_TCP_EAGER_CLEANUP 16
#define SQTAG_TCP_EAGER_CLEANUP_Q0 17
#define SQTAG_TCP_CONN_IND 18
#define SQTAG_TCP_RSRV 19
#define SQTAG_TCP_ABORT_BUCKET 20
#define SQTAG_TCP_REINPUT 21
#define SQTAG_TCP_REINPUT_EAGER 22
#define SQTAG_TCP_INPUT_MCTL 23
#define SQTAG_TCP_RPUTOTHER 24
#define SQTAG_IP_PROTO_AGAIN 25
#define SQTAG_IP_FANOUT_TCP 26
#define SQTAG_IPSQ_CLEAN_RING 27
#define SQTAG_TCP_WPUT_OTHER 28
#define SQTAG_TCP_CONN_REQ_UNBOUND 29
#define SQTAG_TCP_SEND_PENDING 30
#define SQTAG_BIND_RETRY 31
#define SQTAG_UDP_FANOUT 32
#define SQTAG_UDP_INPUT 33
#define SQTAG_UDP_WPUT 34
#define SQTAG_UDP_OUTPUT 35
#define SQTAG_TCP_KSSL_INPUT 36
#define SQTAG_TCP_DROP_Q0 37
#define SQTAG_TCP_CONN_REQ_2 38
#define SQTAG_IP_INPUT_RX_RING 39
#define SQTAG_SQUEUE_CHANGE 40
#define SQTAG_CONNECT_FINISH 41
#define SQTAG_SYNCHRONOUS_OP 42
#define SQTAG_TCP_SHUTDOWN_OUTPUT 43
#define SQTAG_XMIT_EARLY_RESET 44
#define NOT_OVER_IP(ip_wq) \
IP_MOD_NAME) != 0 || \
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _INET_IP_H */