sctp_hash.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/socket.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/strsun.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/ipclassifier.h>
#include <inet/ipsec_impl.h>
#include <inet/ipp_common.h>
#include <inet/sctp_ip.h>
#include "sctp_impl.h"
#include "sctp_addr.h"
/* SCTP bind hash list - all sctp_t with state >= BOUND. */
sctp_tf_t sctp_bind_fanout[SCTP_BIND_FANOUT_SIZE];
/* SCTP listen hash list - all sctp_t with state == LISTEN. */
sctp_tf_t sctp_listen_fanout[SCTP_LISTEN_FANOUT_SIZE];
/* Default association hash size. The size must be a power of 2. */
#define SCTP_CONN_HASH_SIZE 8192
sctp_tf_t *sctp_conn_fanout;
uint_t sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
void
sctp_hash_init()
{
int i;
if (sctp_conn_hash_size & (sctp_conn_hash_size - 1)) {
/* Not a power of two. Round up to nearest power of two */
for (i = 0; i < 31; i++) {
if (sctp_conn_hash_size < (1 << i))
break;
}
sctp_conn_hash_size = 1 << i;
}
if (sctp_conn_hash_size < SCTP_CONN_HASH_SIZE) {
sctp_conn_hash_size = SCTP_CONN_HASH_SIZE;
cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
sctp_conn_hash_size);
}
sctp_conn_fanout =
(sctp_tf_t *)kmem_zalloc(sctp_conn_hash_size *
sizeof (sctp_tf_t), KM_SLEEP);
for (i = 0; i < sctp_conn_hash_size; i++) {
mutex_init(&sctp_conn_fanout[i].tf_lock, NULL,
MUTEX_DEFAULT, NULL);
}
for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
mutex_init(&sctp_listen_fanout[i].tf_lock, NULL,
MUTEX_DEFAULT, NULL);
}
for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
mutex_init(&sctp_bind_fanout[i].tf_lock, NULL,
MUTEX_DEFAULT, NULL);
}
}
void
sctp_hash_destroy()
{
int i;
for (i = 0; i < sctp_conn_hash_size; i++) {
mutex_destroy(&sctp_conn_fanout[i].tf_lock);
}
kmem_free(sctp_conn_fanout, sctp_conn_hash_size * sizeof (sctp_tf_t));
for (i = 0; i < A_CNT(sctp_listen_fanout); i++) {
mutex_destroy(&sctp_listen_fanout[i].tf_lock);
}
for (i = 0; i < A_CNT(sctp_bind_fanout); i++) {
mutex_destroy(&sctp_bind_fanout[i].tf_lock);
}
}
/* Walk the SCTP global list and refrele the ire for this ipif */
void
sctp_ire_cache_flush(ipif_t *ipif)
{
sctp_t *sctp;
sctp_t *sctp_prev = NULL;
sctp_faddr_t *fp;
conn_t *connp;
ire_t *ire;
sctp = gsctp;
mutex_enter(&sctp_g_lock);
while (sctp != NULL) {
mutex_enter(&sctp->sctp_reflock);
if (sctp->sctp_condemned) {
mutex_exit(&sctp->sctp_reflock);
sctp = list_next(&sctp_g_list, sctp);
continue;
}
sctp->sctp_refcnt++;
mutex_exit(&sctp->sctp_reflock);
mutex_exit(&sctp_g_lock);
if (sctp_prev != NULL)
SCTP_REFRELE(sctp_prev);
RUN_SCTP(sctp);
connp = sctp->sctp_connp;
mutex_enter(&connp->conn_lock);
ire = connp->conn_ire_cache;
if (ire != NULL &&
(ipif == NULL || ire->ire_ipif == ipif)) {
connp->conn_ire_cache = NULL;
mutex_exit(&connp->conn_lock);
IRE_REFRELE_NOTR(ire);
} else {
mutex_exit(&connp->conn_lock);
}
/* check for ires cached in faddr */
for (fp = sctp->sctp_faddrs; fp != NULL;
fp = fp->next) {
ire = fp->ire;
if (ire != NULL && (ipif == NULL ||
ire->ire_ipif == ipif)) {
fp->ire = NULL;
IRE_REFRELE_NOTR(ire);
}
}
WAKE_SCTP(sctp);
sctp_prev = sctp;
mutex_enter(&sctp_g_lock);
sctp = list_next(&sctp_g_list, sctp);
}
mutex_exit(&sctp_g_lock);
if (sctp_prev != NULL)
SCTP_REFRELE(sctp_prev);
}
sctp_t *
sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
uint_t ipif_seqid, zoneid_t zoneid)
{
sctp_tf_t *tf;
sctp_t *sctp;
sctp_faddr_t *fp;
tf = &(sctp_conn_fanout[SCTP_CONN_HASH(ports)]);
mutex_enter(&tf->tf_lock);
for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
if (ports != sctp->sctp_ports || (zoneid != ALL_ZONES &&
zoneid != sctp->sctp_zoneid)) {
continue;
}
/* check for faddr match */
for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
break;
}
}
if (!fp) {
/* no faddr match; keep looking */
continue;
}
/* check for laddr match */
if (ipif_seqid == 0) {
if (sctp_saddr_lookup(sctp, laddr) != NULL) {
SCTP_REFHOLD(sctp);
goto done;
}
} else {
if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
SCTP_REFHOLD(sctp);
goto done;
}
/* no match; continue to the next in the chain */
}
}
done:
mutex_exit(&tf->tf_lock);
return (sctp);
}
static sctp_t *
listen_match(in6_addr_t *laddr, uint32_t ports, uint_t ipif_seqid,
zoneid_t zoneid)
{
sctp_t *sctp;
sctp_tf_t *tf;
uint16_t lport;
lport = ((uint16_t *)&ports)[1];
tf = &(sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
mutex_enter(&tf->tf_lock);
for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
if (lport != sctp->sctp_lport || (zoneid != ALL_ZONES &&
zoneid != sctp->sctp_zoneid)) {
continue;
}
if (ipif_seqid == 0) {
if (sctp_saddr_lookup(sctp, laddr) != NULL) {
SCTP_REFHOLD(sctp);
goto done;
}
} else {
if (sctp_ipif_lookup(sctp, ipif_seqid) != NULL) {
SCTP_REFHOLD(sctp);
goto done;
}
}
/* no match; continue to the next in the chain */
}
done:
mutex_exit(&tf->tf_lock);
return (sctp);
}
conn_t *
sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
uint_t ipif_seqid, zoneid_t zoneid)
{
sctp_t *sctp;
if ((sctp = sctp_conn_match(src, dst, ports, ipif_seqid,
zoneid)) == NULL) {
/* Not in conn fanout; check listen fanout */
if ((sctp = listen_match(dst, ports, ipif_seqid,
zoneid)) == NULL) {
return (NULL);
}
}
return (sctp->sctp_connp);
}
/*
* Fanout for SCTP packets
* The caller puts <fport, lport> in the ports parameter.
*/
/* ARGSUSED */
void
ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
uint_t ipif_seqid, zoneid_t zoneid)
{
sctp_t *sctp;
boolean_t isv4;
conn_t *connp;
mblk_t *first_mp;
ip6_t *ip6h;
in6_addr_t map_src, map_dst;
in6_addr_t *src, *dst;
first_mp = mp;
if (mctl_present) {
mp = first_mp->b_cont;
ASSERT(mp != NULL);
}
/* Assume IP provides aligned packets - otherwise toss */
if (!OK_32PTR(mp->b_rptr)) {
BUMP_MIB(&ip_mib, ipInDiscards);
freemsg(first_mp);
return;
}
if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
ip6h = (ip6_t *)ipha;
src = &ip6h->ip6_src;
dst = &ip6h->ip6_dst;
isv4 = B_FALSE;
} else {
ip6h = NULL;
IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
src = &map_src;
dst = &map_dst;
isv4 = B_TRUE;
}
if ((connp = sctp_find_conn(src, dst, ports, ipif_seqid, zoneid)) ==
NULL) {
ip_fanout_sctp_raw(mp, recv_ill, ipha, isv4,
ports, mctl_present, flags, ip_policy,
ipif_seqid, zoneid);
return;
}
sctp = CONN2SCTP(connp);
/* Found a client; up it goes */
BUMP_MIB(&ip_mib, ipInDelivers);
/*
* We check some fields in conn_t without holding a lock.
* This should be fine.
*/
if (CONN_INBOUND_POLICY_PRESENT(connp) || mctl_present) {
first_mp = ipsec_check_inbound_policy(first_mp, connp,
ipha, NULL, mctl_present);
if (first_mp == NULL) {
SCTP_REFRELE(sctp);
return;
}
}
/* Initiate IPPF processing for fastpath */
if (IPP_ENABLED(IPP_LOCAL_IN)) {
ip_process(IPP_LOCAL_IN, &mp,
recv_ill->ill_phyint->phyint_ifindex);
if (mp == NULL) {
SCTP_REFRELE(sctp);
if (mctl_present)
freeb(first_mp);
return;
} else if (mctl_present) {
/*
* ip_process might return a new mp.
*/
ASSERT(first_mp != mp);
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
}
if (connp->conn_recvif || connp->conn_recvslla ||
connp->conn_ipv6_recvpktinfo) {
int in_flags = 0;
if (connp->conn_recvif || connp->conn_ipv6_recvpktinfo) {
in_flags = IPF_RECVIF;
}
if (connp->conn_recvslla) {
in_flags |= IPF_RECVSLLA;
}
if (isv4) {
mp = ip_add_info(mp, recv_ill, in_flags);
} else {
mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
}
if (mp == NULL) {
SCTP_REFRELE(sctp);
if (mctl_present)
freeb(first_mp);
return;
} else if (mctl_present) {
/*
* ip_add_info might return a new mp.
*/
ASSERT(first_mp != mp);
first_mp->b_cont = mp;
} else {
first_mp = mp;
}
}
mutex_enter(&sctp->sctp_lock);
if (sctp->sctp_running) {
if (mctl_present)
mp->b_prev = first_mp;
if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
BUMP_MIB(&ip_mib, ipInDiscards);
freemsg(first_mp);
}
mutex_exit(&sctp->sctp_lock);
} else {
sctp->sctp_running = B_TRUE;
mutex_exit(&sctp->sctp_lock);
mutex_enter(&sctp->sctp_recvq_lock);
if (sctp->sctp_recvq != NULL) {
if (mctl_present)
mp->b_prev = first_mp;
if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
BUMP_MIB(&ip_mib, ipInDiscards);
freemsg(first_mp);
}
mutex_exit(&sctp->sctp_recvq_lock);
WAKE_SCTP(sctp);
} else {
mutex_exit(&sctp->sctp_recvq_lock);
sctp_input_data(sctp, mp, (mctl_present ? first_mp :
NULL));
WAKE_SCTP(sctp);
sctp_process_sendq(sctp);
}
}
SCTP_REFRELE(sctp);
}
void
sctp_conn_hash_remove(sctp_t *sctp)
{
sctp_tf_t *tf = sctp->sctp_conn_tfp;
if (!tf) {
return;
}
mutex_enter(&tf->tf_lock);
ASSERT(tf->tf_sctp);
if (tf->tf_sctp == sctp) {
tf->tf_sctp = sctp->sctp_conn_hash_next;
if (sctp->sctp_conn_hash_next) {
ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
tf->tf_sctp->sctp_conn_hash_prev = NULL;
}
} else {
ASSERT(sctp->sctp_conn_hash_prev);
ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
sctp->sctp_conn_hash_next;
if (sctp->sctp_conn_hash_next) {
ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
== sctp);
sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
sctp->sctp_conn_hash_prev;
}
}
sctp->sctp_conn_hash_next = NULL;
sctp->sctp_conn_hash_prev = NULL;
sctp->sctp_conn_tfp = NULL;
mutex_exit(&tf->tf_lock);
}
void
sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
{
if (sctp->sctp_conn_tfp) {
sctp_conn_hash_remove(sctp);
}
if (!caller_holds_lock) {
mutex_enter(&tf->tf_lock);
} else {
ASSERT(MUTEX_HELD(&tf->tf_lock));
}
sctp->sctp_conn_hash_next = tf->tf_sctp;
if (tf->tf_sctp) {
tf->tf_sctp->sctp_conn_hash_prev = sctp;
}
sctp->sctp_conn_hash_prev = NULL;
tf->tf_sctp = sctp;
sctp->sctp_conn_tfp = tf;
if (!caller_holds_lock) {
mutex_exit(&tf->tf_lock);
}
}
void
sctp_listen_hash_remove(sctp_t *sctp)
{
sctp_tf_t *tf = sctp->sctp_listen_tfp;
if (!tf) {
return;
}
mutex_enter(&tf->tf_lock);
ASSERT(tf->tf_sctp);
if (tf->tf_sctp == sctp) {
tf->tf_sctp = sctp->sctp_listen_hash_next;
if (sctp->sctp_listen_hash_next) {
ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
tf->tf_sctp->sctp_listen_hash_prev = NULL;
}
} else {
ASSERT(sctp->sctp_listen_hash_prev);
ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
sctp);
sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
sctp->sctp_listen_hash_next;
if (sctp->sctp_listen_hash_next) {
ASSERT(
sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
sctp);
sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
sctp->sctp_listen_hash_prev;
}
}
sctp->sctp_listen_hash_next = NULL;
sctp->sctp_listen_hash_prev = NULL;
sctp->sctp_listen_tfp = NULL;
mutex_exit(&tf->tf_lock);
}
void
sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
{
if (sctp->sctp_listen_tfp) {
sctp_listen_hash_remove(sctp);
}
mutex_enter(&tf->tf_lock);
sctp->sctp_listen_hash_next = tf->tf_sctp;
if (tf->tf_sctp) {
tf->tf_sctp->sctp_listen_hash_prev = sctp;
}
sctp->sctp_listen_hash_prev = NULL;
tf->tf_sctp = sctp;
sctp->sctp_listen_tfp = tf;
mutex_exit(&tf->tf_lock);
}
/*
* Hash list insertion routine for sctp_t structures.
* Inserts entries with the ones bound to a specific IP address first
* followed by those bound to INADDR_ANY.
*/
void
sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
{
sctp_t **sctpp;
sctp_t *sctpnext;
if (sctp->sctp_ptpbhn != NULL) {
ASSERT(!caller_holds_lock);
sctp_bind_hash_remove(sctp);
}
sctpp = &tbf->tf_sctp;
if (!caller_holds_lock) {
mutex_enter(&tbf->tf_lock);
} else {
ASSERT(MUTEX_HELD(&tbf->tf_lock));
}
sctpnext = sctpp[0];
if (sctpnext) {
sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
}
sctp->sctp_bind_hash = sctpnext;
sctp->sctp_ptpbhn = sctpp;
sctpp[0] = sctp;
/* For sctp_*_hash_remove */
sctp->sctp_bind_lockp = &tbf->tf_lock;
if (!caller_holds_lock)
mutex_exit(&tbf->tf_lock);
}
/*
* Hash list removal routine for sctp_t structures.
*/
void
sctp_bind_hash_remove(sctp_t *sctp)
{
sctp_t *sctpnext;
kmutex_t *lockp;
lockp = sctp->sctp_bind_lockp;
if (sctp->sctp_ptpbhn == NULL)
return;
ASSERT(lockp != NULL);
mutex_enter(lockp);
if (sctp->sctp_ptpbhn) {
sctpnext = sctp->sctp_bind_hash;
if (sctpnext) {
sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
sctp->sctp_bind_hash = NULL;
}
*sctp->sctp_ptpbhn = sctpnext;
sctp->sctp_ptpbhn = NULL;
}
mutex_exit(lockp);
sctp->sctp_bind_lockp = NULL;
}
/*
* Similar to but more general than ip_sctp's conn_match().
*
* Matches sets of addresses as follows: if the argument addr set is
* a complete subset of the corresponding addr set in the sctp_t, it
* is a match.
*
* Caller must hold tf->tf_lock.
*
* Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
*/
sctp_t *
sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
int min_state)
{
sctp_t *sctp;
sctp_faddr_t *fp;
ASSERT(MUTEX_HELD(&tf->tf_lock));
for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
if (*ports != sctp->sctp_ports || sctp->sctp_state <
min_state) {
continue;
}
/* check for faddr match */
for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
break;
}
}
if (!fp) {
/* no faddr match; keep looking */
continue;
}
/* check for laddr subset match */
if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
goto done;
}
/* no match; continue searching */
}
done:
if (sctp) {
SCTP_REFHOLD(sctp);
}
return (sctp);
}
boolean_t
ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
{
uint16_t lport;
if (connp->conn_fully_bound) {
return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
ipha->ipha_dst, ports));
} else {
lport = htons(ntohl(ports) & 0xFFFF);
return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
lport));
}
}
boolean_t
ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
boolean_t for_v4)
{
uint16_t lport;
in6_addr_t v6dst;
if (!for_v4 && connp->conn_fully_bound) {
return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
ip6h->ip6_dst, ports));
} else {
lport = htons(ntohl(ports) & 0xFFFF);
if (for_v4)
v6dst = ipv6_all_zeros;
else
v6dst = ip6h->ip6_dst;
return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
}
}