in_pcb.h revision 199767f8919635c4928607450d9e0abb932109ce
/*-
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California.
* Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* Portions of this software were developed by Robert N. M. Watson under
* contract to Juniper Networks, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_IN_PCB_H_
#define _NETINET_IN_PCB_H_
#ifdef _KERNEL
#endif
struct inpcbpolicy;
/*
* struct inpcb is the common protocol control block structure used in most
* IP transport protocols.
*
* Pointers to local and foreign host table entries, local and foreign socket
* numbers, and pointers up (to a socket structure) and down (to a
* protocol-specific control block) are stored here.
*/
/*
* PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
* So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
* the following structure.
*/
struct in_addr_4in6 {
struct in_addr ia46_addr4;
};
/*
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
* some extra padding to accomplish this.
* NOTE 2: tcp_syncache.c uses first 5 32-bit words, which identify fport,
* lport, faddr to generate hash, so these fields shouldn't be moved.
*/
struct in_endpoints {
/* protocol dependent part, local and foreign addr */
union {
/* foreign host table entry */
struct in_addr_4in6 ie46_foreign;
struct in6_addr ie6_foreign;
union {
/* local host table entry */
struct in_addr_4in6 ie46_local;
};
/*
* XXX The defines for inc_* are hacks and should be changed to direct
* references.
*/
struct in_conninfo {
/* protocol dependent part */
struct in_endpoints inc_ie;
};
/*
* Flags for inc_flags.
*/
#define INC_ISIPV6 0x01
struct icmp6_filter;
/*-
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
* IPv6 sockets. In the case of TCP and UDP, further per-connection state is
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb
* are static after creation or protected by a per-inpcb rwlock, inp_lock. A
* few fields are protected by multiple locks as indicated in the locking notes
* below. For these fields, all of the listed locks must be write-locked for
* any modifications. However, these fields can be safely read while any one of
* the listed locks are read-locked. This model can permit greater concurrency
* for read operations. For example, connections can be looked up while only
* holding a read lock on the global pcblist lock. This is important for
* performance when attempting to find the connection for a packet given its IP
* and port tuple.
*
* One noteworthy exception is that the global pcbinfo lock follows a different
* set of rules in relation to the inp_list field. Rather than being
* write-locked for modifications and read-locked for list iterations, it must
* be read-locked during modifications and write-locked during list iterations.
* This ensures that the relatively rare global list iterations safely walk a
* stable snapshot of connections while allowing more common list modifications
* to safely grab the pcblist lock just while adding or removing a connection
* from the global list.
*
* Key:
* (c) - Constant after initialization
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
* (l) - Protected by the pcblist lock for the inpcb
* (h) - Protected by the pcbhash lock for the inpcb
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
*
* A few other notes:
*
* When a read lock is held, stability of the field is guaranteed; to write
* to a field, a write lock must generally be held.
*
* netinet/netinet6-layer code should not assume that the inp_socket pointer
* is safe to dereference without inp_lock being held, even for protocols
* other than TCP (where the inpcb persists during TIMEWAIT even after the
* socket has been freed), or there may be close(2)-related races.
*
* The inp_vflag field is overloaded, and would otherwise ideally be (c).
*
* TODO: Currently only the TCP stack is leveraging the global pcbinfo lock
* read-lock usage during modification, this model can be applied to other
* protocols (especially SCTP).
*/
struct inpcb {
/* (p[w]) for list iteration */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
* general use */
/* Local and foreign ports, local and foreign addr. */
/* MAC and IPSEC policy information. */
/* Protocol-dependent part; options. */
struct {
} inp_depend4;
struct {
/* (i) IP options */
struct mbuf *inp6_options;
/* (i) IP6 options for outgoing packets */
struct ip6_pktopts *inp6_outputopts;
/* (i) IP multicast options */
struct ip6_moptions *inp6_moptions;
/* (i) ICMPv6 code type filter */
struct icmp6_filter *inp6_icmp6filt;
/* (i) IPV6_CHECKSUM setsockopt */
int inp6_cksum;
short inp6_hops;
} inp_depend6;
};
#define in6p_flowinfo inp_flow
/*
* The range of the generation count, as used in this implementation, is 9e19.
* We would have to create 300 billion connections per second for this number
* to roll over in a year. This seems sufficiently unlikely that we simply
* don't concern ourselves with that possibility.
*/
/*
* Interface exported to userland by various protocols which use inpcbs. Hack
* alert -- only define if struct xsocket is in scope.
*/
#ifdef _SYS_SOCKETVAR_H_
struct xinpcb {
};
struct xinpgen {
};
#endif /* _SYS_SOCKETVAR_H_ */
struct inpcbport {
struct inpcbhead phd_pcblist;
};
/*-
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
* Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
* ipi_list_lock:
* - ipi_lock covering the global pcb list stability during loop iteration,
* - ipi_hash_lock covering the hashed lookup tables,
* - ipi_list_lock covering mutable global fields (such as the global
* pcb list)
*
* The lock order is:
*
* ipi_lock (before)
* inpcb locks (before)
* ipi_list locks (before)
* {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
* (l) Locked by ipi_list_lock
* (h) Read using either ipi_hash_lock or inpcb lock; write requires both
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
* Global lock protecting full inpcb list traversal
*/
/*
* Global list of inpcbs on the protocol.
*/
/*
* Generation count -- incremented each time a connection is allocated
* or freed.
*/
/*
* Fields associated with port lookup and allocation.
*/
/*
* UMA zone from which inpcbs are allocated for this protocol.
*/
/*
* Connection groups associated with this protocol. These fields are
* constant, but pcbgroup structures themselves are protected by
* per-pcbgroup locks.
*/
/*
* Global lock protecting non-pcbgroup hash lookup tables.
*/
struct rwlock ipi_hash_lock;
/*
* Global hash of inpcbs, hashed by local and foreign addresses and
* port numbers.
*/
/*
* Global hash of inpcbs, hashed by only local port number.
*/
/*
* List of wildcard inpcbs for use with pcbgroups. In the past, was
* per-pcbgroup but is now global. All pcbgroup locks must be held
* to modify the list, so any is sufficient to read it.
*/
/*
* Pointer to network stack instance
*/
/*
* general use 2
*/
void *ipi_pspare[2];
/*
* Global lock protecting global inpcb list, inpcb count, etc.
*/
struct rwlock ipi_list_lock;
};
#ifdef _KERNEL
/*
* affinity. Each connection belongs to exactly one connection group.
*/
struct inpcbgroup {
/*
* Per-connection group hash of inpcbs, hashed by local and foreign
* addresses and port numbers.
*/
/*
* Notional affinity of this pcbgroup.
*/
/*
* Per-connection group lock, not to be confused with ipi_lock.
* Protects the hash table hung off the group, but also the global
* wildcard list in inpcbinfo.
*/
#define INP_LOCK_INIT(inp, d, t) \
/*
* more specifically, they were added for the benefit of TOE drivers. The
* macros are reserved for use by the stack.
*/
void inp_wunlock(struct inpcb *);
void inp_runlock(struct inpcb *);
#ifdef INVARIANTS
void inp_lock_assert(struct inpcb *);
void inp_unlock_assert(struct inpcb *);
#else
static __inline void
{
}
static __inline void
{
}
#endif
struct socket *
struct tcpcb *
#endif /* _KERNEL */
#define INP_INFO_LOCK_INIT(ipi, d) \
#define INP_LIST_LOCK_INIT(ipi, d) \
#define INP_LIST_LOCK_ASSERT(ipi) \
#define INP_LIST_RLOCK_ASSERT(ipi) \
#define INP_LIST_WLOCK_ASSERT(ipi) \
#define INP_LIST_UNLOCK_ASSERT(ipi) \
#define INP_HASH_LOCK_INIT(ipi, d) \
/*
* Flags for inp_vflags -- historically version flags only
*/
#define INP_IPV4 0x1
#define INP_IPV6 0x2
/*
* Flags for inp_flags.
*/
/* 0x000200 unused: was INP_FAITH */
/*
* Flags for inp_flags2.
*/
/*
* Flags passed to in_pcblookup*() functions.
*/
/*
* Constants for pcbinfo.ipi_hashfields.
*/
#define IPI_HASHFIELDS_NONE 0
#define IPI_HASHFIELDS_2TUPLE 1
#define IPI_HASHFIELDS_4TUPLE 2
#ifdef _KERNEL
VNET_DECLARE(int, ipport_reservedhigh);
VNET_DECLARE(int, ipport_reservedlow);
VNET_DECLARE(int, ipport_lowfirstauto);
VNET_DECLARE(int, ipport_lowlastauto);
VNET_DECLARE(int, ipport_firstauto);
VNET_DECLARE(int, ipport_lastauto);
VNET_DECLARE(int, ipport_hifirstauto);
VNET_DECLARE(int, ipport_hilastauto);
VNET_DECLARE(int, ipport_randomized);
VNET_DECLARE(int, ipport_randomcps);
VNET_DECLARE(int, ipport_randomtime);
VNET_DECLARE(int, ipport_stoprandom);
VNET_DECLARE(int, ipport_tcpallocs);
void in_pcbinfo_destroy(struct inpcbinfo *);
struct inpcbgroup *
struct inpcbgroup *
in_pcbgroup_byinpcb(struct inpcb *);
struct inpcbgroup *
void in_pcbgroup_destroy(struct inpcbinfo *);
int in_pcbgroup_enabled(struct inpcbinfo *);
void in_pcbgroup_remove(struct inpcb *);
void in_pcbgroup_update(struct inpcb *);
struct ucred *, int);
struct mbuf *);
struct ucred *);
void in_pcbdetach(struct inpcb *);
void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
int in_pcbinshash_nopcbgroup(struct inpcb *);
struct ucred *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct inpcb *
struct inpcb *
void in_pcbrehash(struct inpcb *);
int in_pcbrele(struct inpcb *);
int in_pcbrele_rlocked(struct inpcb *);
int in_pcbrele_wlocked(struct inpcb *);
struct sockaddr *
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */