tcp_bind.c revision 7256a34efe9df75b638b9e812912ef7c5c68e208
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
#include <sys/stream.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
#include <sys/policy.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <sys/tsol/tnet.h>
#include <rpc/pmap_prot.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
#include <inet/proto_set.h>
#include <inet/ipsec_impl.h>
/* Setable in /etc/system */
/* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
static uint32_t tcp_random_anon_port = 1;
static int tcp_bind_select_lport(tcp_t *, in_port_t *, boolean_t,
cred_t *cr);
static in_port_t tcp_get_next_priv_port(const tcp_t *);
/*
* Hash list insertion routine for tcp_t structures. Each hash bucket
* contains a list of tcp_t entries, and each entry is bound to a unique
* port. If there are multiple tcp_t's that are bound to the same port, then
* one of them will be linked into the hash bucket list, and the rest will
* hang off of that one entry. For each port, entries bound to a specific IP
* address will be inserted before those those bound to INADDR_ANY.
*/
void
tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
{
tcp_t **tcpp;
tcp_t *tcpnext;
tcp_t *tcphash;
conn_t *connp = tcp->tcp_connp;
conn_t *connext;
if (tcp->tcp_ptpbhn != NULL) {
ASSERT(!caller_holds_lock);
tcp_bind_hash_remove(tcp);
}
tcpp = &tbf->tf_tcp;
if (!caller_holds_lock) {
mutex_enter(&tbf->tf_lock);
} else {
ASSERT(MUTEX_HELD(&tbf->tf_lock));
}
tcphash = tcpp[0];
tcpnext = NULL;
if (tcphash != NULL) {
/* Look for an entry using the same port */
while ((tcphash = tcpp[0]) != NULL &&
connp->conn_lport != tcphash->tcp_connp->conn_lport)
tcpp = &(tcphash->tcp_bind_hash);
/* The port was not found, just add to the end */
if (tcphash == NULL)
goto insert;
/*
* OK, there already exists an entry bound to the
* same port.
*
* If the new tcp bound to the INADDR_ANY address
* and the first one in the list is not bound to
* INADDR_ANY we skip all entries until we find the
* first one bound to INADDR_ANY.
* This makes sure that applications binding to a
* specific address get preference over those binding to
* INADDR_ANY.
*/
tcpnext = tcphash;
connext = tcpnext->tcp_connp;
tcphash = NULL;
if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
!V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
while ((tcpnext = tcpp[0]) != NULL) {
connext = tcpnext->tcp_connp;
if (!V6_OR_V4_INADDR_ANY(
connext->conn_bound_addr_v6))
tcpp = &(tcpnext->tcp_bind_hash_port);
else
break;
}
if (tcpnext != NULL) {
tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
tcphash = tcpnext->tcp_bind_hash;
if (tcphash != NULL) {
tcphash->tcp_ptpbhn =
&(tcp->tcp_bind_hash);
tcpnext->tcp_bind_hash = NULL;
}
}
} else {
tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
tcphash = tcpnext->tcp_bind_hash;
if (tcphash != NULL) {
tcphash->tcp_ptpbhn =
&(tcp->tcp_bind_hash);
tcpnext->tcp_bind_hash = NULL;
}
}
}
insert:
tcp->tcp_bind_hash_port = tcpnext;
tcp->tcp_bind_hash = tcphash;
tcp->tcp_ptpbhn = tcpp;
tcpp[0] = tcp;
if (!caller_holds_lock)
mutex_exit(&tbf->tf_lock);
}
/*
* Hash list removal routine for tcp_t structures.
*/
void
tcp_bind_hash_remove(tcp_t *tcp)
{
tcp_t *tcpnext;
kmutex_t *lockp;
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
if (tcp->tcp_ptpbhn == NULL)
return;
/*
* Extract the lock pointer in case there are concurrent
* hash_remove's for this instance.
*/
ASSERT(connp->conn_lport != 0);
lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(
connp->conn_lport)].tf_lock;
ASSERT(lockp != NULL);
mutex_enter(lockp);
if (tcp->tcp_ptpbhn) {
tcpnext = tcp->tcp_bind_hash_port;
if (tcpnext != NULL) {
tcp->tcp_bind_hash_port = NULL;
tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
tcpnext->tcp_bind_hash = tcp->tcp_bind_hash;
if (tcpnext->tcp_bind_hash != NULL) {
tcpnext->tcp_bind_hash->tcp_ptpbhn =
&(tcpnext->tcp_bind_hash);
tcp->tcp_bind_hash = NULL;
}
} else if ((tcpnext = tcp->tcp_bind_hash) != NULL) {
tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
tcp->tcp_bind_hash = NULL;
}
*tcp->tcp_ptpbhn = tcpnext;
tcp->tcp_ptpbhn = NULL;
}
mutex_exit(lockp);
}
/*
* Don't let port fall into the privileged range.
* Since the extra privileged ports can be arbitrary we also
* ensure that we exclude those from consideration.
* tcp_g_epriv_ports is not sorted thus we loop over it until
* there are no changes.
*
* Note: No locks are held when inspecting tcp_g_*epriv_ports
* but instead the code relies on:
* - the fact that the address of the array and its size never changes
* - the atomic assignment of the elements of the array
*
* Returns 0 if there are no more ports available.
*
* TS note: skip multilevel ports.
*/
in_port_t
tcp_update_next_port(in_port_t port, const tcp_t *tcp, boolean_t random)
{
int i, bump;
boolean_t restart = B_FALSE;
tcp_stack_t *tcps = tcp->tcp_tcps;
if (random && tcp_random_anon_port != 0) {
(void) random_get_pseudo_bytes((uint8_t *)&port,
sizeof (in_port_t));
/*
* Unless changed by a sys admin, the smallest anon port
* is 32768 and the largest anon port is 65535. It is
* very likely (50%) for the random port to be smaller
* than the smallest anon port. When that happens,
* add port % (anon port range) to the smallest anon
* port to get the random port. It should fall into the
* valid anon port range.
*/
if ((port < tcps->tcps_smallest_anon_port) ||
(port > tcps->tcps_largest_anon_port)) {
if (tcps->tcps_smallest_anon_port ==
tcps->tcps_largest_anon_port) {
bump = 0;
} else {
bump = port % (tcps->tcps_largest_anon_port -
tcps->tcps_smallest_anon_port);
}
port = tcps->tcps_smallest_anon_port + bump;
}
}
retry:
if (port < tcps->tcps_smallest_anon_port)
port = (in_port_t)tcps->tcps_smallest_anon_port;
if (port > tcps->tcps_largest_anon_port) {
if (restart)
return (0);
restart = B_TRUE;
port = (in_port_t)tcps->tcps_smallest_anon_port;
}
if (port < tcps->tcps_smallest_nonpriv_port)
port = (in_port_t)tcps->tcps_smallest_nonpriv_port;
for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
if (port == tcps->tcps_g_epriv_ports[i]) {
port++;
/*
* Make sure whether the port is in the
* valid range.
*/
goto retry;
}
}
if (is_system_labeled() &&
(i = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred), port,
IPPROTO_TCP, B_TRUE)) != 0) {
port = i;
goto retry;
}
return (port);
}
/*
* Return the next anonymous port in the privileged port range for
* bind checking. It starts at IPPORT_RESERVED - 1 and goes
* downwards. This is the same behavior as documented in the userland
* library call rresvport(3N).
*
* TS note: skip multilevel ports.
*/
static in_port_t
tcp_get_next_priv_port(const tcp_t *tcp)
{
static in_port_t next_priv_port = IPPORT_RESERVED - 1;
in_port_t nextport;
boolean_t restart = B_FALSE;
tcp_stack_t *tcps = tcp->tcp_tcps;
retry:
if (next_priv_port < tcps->tcps_min_anonpriv_port ||
next_priv_port >= IPPORT_RESERVED) {
next_priv_port = IPPORT_RESERVED - 1;
if (restart)
return (0);
restart = B_TRUE;
}
if (is_system_labeled() &&
(nextport = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred),
next_priv_port, IPPROTO_TCP, B_FALSE)) != 0) {
next_priv_port = nextport;
goto retry;
}
return (next_priv_port--);
}
static int
tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
boolean_t bind_to_req_port_only, cred_t *cr)
{
in_port_t mlp_port;
mlp_type_t addrtype, mlptype;
boolean_t user_specified;
in_port_t allocated_port;
in_port_t requested_port = *requested_port_ptr;
conn_t *connp = tcp->tcp_connp;
zone_t *zone;
tcp_stack_t *tcps = tcp->tcp_tcps;
in6_addr_t v6addr = connp->conn_laddr_v6;
/*
* XXX It's up to the caller to specify bind_to_req_port_only or not.
*/
ASSERT(cr != NULL);
/*
* Get a valid port (within the anonymous range and should not
* be a privileged one) to use if the user has not given a port.
* If multiple threads are here, they may all start with
* with the same initial port. But, it should be fine as long as
* tcp_bindi will ensure that no two threads will be assigned
* the same port.
*
* NOTE: XXX If a privileged process asks for an anonymous port, we
* still check for ports only in the range > tcp_smallest_non_priv_port,
* unless TCP_ANONPRIVBIND option is set.
*/
mlptype = mlptSingle;
mlp_port = requested_port;
if (requested_port == 0) {
requested_port = connp->conn_anon_priv_bind ?
tcp_get_next_priv_port(tcp) :
tcp_update_next_port(tcps->tcps_next_port_to_try,
tcp, B_TRUE);
if (requested_port == 0) {
return (-TNOADDR);
}
user_specified = B_FALSE;
/*
* If the user went through one of the RPC interfaces to create
* this socket and RPC is MLP in this zone, then give him an
* anonymous MLP.
*/
if (connp->conn_anon_mlp && is_system_labeled()) {
zone = crgetzone(cr);
addrtype = tsol_mlp_addr_type(
connp->conn_allzones ? ALL_ZONES : zone->zone_id,
IPV6_VERSION, &v6addr,
tcps->tcps_netstack->netstack_ip);
if (addrtype == mlptSingle) {
return (-TNOADDR);
}
mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
PMAPPORT, addrtype);
mlp_port = PMAPPORT;
}
} else {
int i;
boolean_t priv = B_FALSE;
/*
* If the requested_port is in the well-known privileged range,
* verify that the stream was opened by a privileged user.
* Note: No locks are held when inspecting tcp_g_*epriv_ports
* but instead the code relies on:
* - the fact that the address of the array and its size never
* changes
* - the atomic assignment of the elements of the array
*/
if (requested_port < tcps->tcps_smallest_nonpriv_port) {
priv = B_TRUE;
} else {
for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
if (requested_port ==
tcps->tcps_g_epriv_ports[i]) {
priv = B_TRUE;
break;
}
}
}
if (priv) {
if (secpolicy_net_privaddr(cr, requested_port,
IPPROTO_TCP) != 0) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_bind: no priv for port %d",
requested_port);
}
return (-TACCES);
}
}
user_specified = B_TRUE;
connp = tcp->tcp_connp;
if (is_system_labeled()) {
zone = crgetzone(cr);
addrtype = tsol_mlp_addr_type(
connp->conn_allzones ? ALL_ZONES : zone->zone_id,
IPV6_VERSION, &v6addr,
tcps->tcps_netstack->netstack_ip);
if (addrtype == mlptSingle) {
return (-TNOADDR);
}
mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
requested_port, addrtype);
}
}
if (mlptype != mlptSingle) {
if (secpolicy_net_bindmlp(cr) != 0) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_bind: no priv for multilevel port %d",
requested_port);
}
return (-TACCES);
}
/*
* If we're specifically binding a shared IP address and the
* port is MLP on shared addresses, then check to see if this
* zone actually owns the MLP. Reject if not.
*/
if (mlptype == mlptShared && addrtype == mlptShared) {
/*
* No need to handle exclusive-stack zones since
* ALL_ZONES only applies to the shared stack.
*/
zoneid_t mlpzone;
mlpzone = tsol_mlp_findzone(IPPROTO_TCP,
htons(mlp_port));
if (connp->conn_zoneid != mlpzone) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_bind: attempt to bind port "
"%d on shared addr in zone %d "
"(should be %d)",
mlp_port, connp->conn_zoneid,
mlpzone);
}
return (-TACCES);
}
}
if (!user_specified) {
int err;
err = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
requested_port, B_TRUE);
if (err != 0) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_bind: cannot establish anon "
"MLP for port %d",
requested_port);
}
return (err);
}
connp->conn_anon_port = B_TRUE;
}
connp->conn_mlp_type = mlptype;
}
allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
connp->conn_reuseaddr, B_FALSE, bind_to_req_port_only,
user_specified);
if (allocated_port == 0) {
connp->conn_mlp_type = mlptSingle;
if (connp->conn_anon_port) {
connp->conn_anon_port = B_FALSE;
(void) tsol_mlp_anon(zone, mlptype, connp->conn_proto,
requested_port, B_FALSE);
}
if (bind_to_req_port_only) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_bind: requested addr busy");
}
return (-TADDRBUSY);
} else {
/* If we are out of ports, fail the bind. */
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_bind: out of ports?");
}
return (-TNOADDR);
}
}
/* Pass the allocated port back */
*requested_port_ptr = allocated_port;
return (0);
}
/*
* Check the address and check/pick a local port number.
*/
int
tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
boolean_t bind_to_req_port_only)
{
tcp_t *tcp = connp->conn_tcp;
sin_t *sin;
sin6_t *sin6;
in_port_t requested_port;
ipaddr_t v4addr;
in6_addr_t v6addr;
ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
zoneid_t zoneid = IPCL_ZONEID(connp);
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
uint_t scopeid = 0;
int error = 0;
ip_xmit_attr_t *ixa = connp->conn_ixa;
ASSERT((uintptr_t)len <= (uintptr_t)INT_MAX);
if (tcp->tcp_state == TCPS_BOUND) {
return (0);
} else if (tcp->tcp_state > TCPS_BOUND) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"tcp_bind: bad state, %d", tcp->tcp_state);
}
return (-TOUTSTATE);
}
ASSERT(sa != NULL && len != 0);
if (!OK_32PTR((char *)sa)) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_bind: bad address parameter, "
"address %p, len %d",
(void *)sa, len);
}
return (-TPROTO);
}
error = proto_verify_ip_addr(connp->conn_family, sa, len);
if (error != 0) {
return (error);
}
switch (len) {
case sizeof (sin_t): /* Complete IPv4 address */
sin = (sin_t *)sa;
requested_port = ntohs(sin->sin_port);
v4addr = sin->sin_addr.s_addr;
IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
if (v4addr != INADDR_ANY) {
laddr_type = ip_laddr_verify_v4(v4addr, zoneid, ipst,
B_FALSE);
}
break;
case sizeof (sin6_t): /* Complete IPv6 address */
sin6 = (sin6_t *)sa;
v6addr = sin6->sin6_addr;
requested_port = ntohs(sin6->sin6_port);
if (IN6_IS_ADDR_V4MAPPED(&v6addr)) {
if (connp->conn_ipv6_v6only)
return (EADDRNOTAVAIL);
IN6_V4MAPPED_TO_IPADDR(&v6addr, v4addr);
if (v4addr != INADDR_ANY) {
laddr_type = ip_laddr_verify_v4(v4addr,
zoneid, ipst, B_FALSE);
}
} else {
if (!IN6_IS_ADDR_UNSPECIFIED(&v6addr)) {
if (IN6_IS_ADDR_LINKSCOPE(&v6addr))
scopeid = sin6->sin6_scope_id;
laddr_type = ip_laddr_verify_v6(&v6addr,
zoneid, ipst, B_FALSE, scopeid);
}
}
break;
default:
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"tcp_bind: bad address length, %d", len);
}
return (EAFNOSUPPORT);
/* return (-TBADADDR); */
}
/* Is the local address a valid unicast address? */
if (laddr_type == IPVL_BAD)
return (EADDRNOTAVAIL);
connp->conn_bound_addr_v6 = v6addr;
if (scopeid != 0) {
ixa->ixa_flags |= IXAF_SCOPEID_SET;
ixa->ixa_scopeid = scopeid;
connp->conn_incoming_ifindex = scopeid;
} else {
ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
connp->conn_incoming_ifindex = connp->conn_bound_if;
}
connp->conn_laddr_v6 = v6addr;
connp->conn_saddr_v6 = v6addr;
bind_to_req_port_only = requested_port != 0 && bind_to_req_port_only;
error = tcp_bind_select_lport(tcp, &requested_port,
bind_to_req_port_only, cr);
if (error != 0) {
connp->conn_laddr_v6 = ipv6_all_zeros;
connp->conn_saddr_v6 = ipv6_all_zeros;
connp->conn_bound_addr_v6 = ipv6_all_zeros;
}
return (error);
}
/*
* If the "bind_to_req_port_only" parameter is set, if the requested port
* number is available, return it, If not return 0
*
* If "bind_to_req_port_only" parameter is not set and
* If the requested port number is available, return it. If not, return
* the first anonymous port we happen across. If no anonymous ports are
* available, return 0. addr is the requested local address, if any.
*
* In either case, when succeeding update the tcp_t to record the port number
* and insert it in the bind hash table.
*
* Note that TCP over IPv4 and IPv6 sockets can use the same port number
* without setting SO_REUSEADDR. This is needed so that they
* can be viewed as two independent transport protocols.
*/
in_port_t
tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
int reuseaddr, boolean_t quick_connect,
boolean_t bind_to_req_port_only, boolean_t user_specified)
{
/* number of times we have run around the loop */
int count = 0;
/* maximum number of times to run around the loop */
int loopmax;
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
/*
* Lookup for free addresses is done in a loop and "loopmax"
* influences how long we spin in the loop
*/
if (bind_to_req_port_only) {
/*
* If the requested port is busy, don't bother to look
* for a new one. Setting loop maximum count to 1 has
* that effect.
*/
loopmax = 1;
} else {
/*
* If the requested port is busy, look for a free one
* in the anonymous port range.
* Set loopmax appropriately so that one does not look
* forever in the case all of the anonymous ports are in use.
*/
if (connp->conn_anon_priv_bind) {
/*
* loopmax =
* (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
*/
loopmax = IPPORT_RESERVED -
tcps->tcps_min_anonpriv_port;
} else {
loopmax = (tcps->tcps_largest_anon_port -
tcps->tcps_smallest_anon_port + 1);
}
}
do {
uint16_t lport;
tf_t *tbf;
tcp_t *ltcp;
conn_t *lconnp;
lport = htons(port);
/*
* Ensure that the tcp_t is not currently in the bind hash.
* Hold the lock on the hash bucket to ensure that
* the duplicate check plus the insertion is an atomic
* operation.
*
* This function does an inline lookup on the bind hash list
* Make sure that we access only members of tcp_t
* and that we don't look at tcp_tcp, since we are not
* doing a CONN_INC_REF.
*/
tcp_bind_hash_remove(tcp);
tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(lport)];
mutex_enter(&tbf->tf_lock);
for (ltcp = tbf->tf_tcp; ltcp != NULL;
ltcp = ltcp->tcp_bind_hash) {
if (lport == ltcp->tcp_connp->conn_lport)
break;
}
for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) {
boolean_t not_socket;
boolean_t exclbind;
lconnp = ltcp->tcp_connp;
/*
* On a labeled system, we must treat bindings to ports
* on shared IP addresses by sockets with MAC exemption
* privilege as being in all zones, as there's
* otherwise no way to identify the right receiver.
*/
if (!IPCL_BIND_ZONE_MATCH(lconnp, connp))
continue;
/*
* If TCP_EXCLBIND is set for either the bound or
* binding endpoint, the semantics of bind
* is changed according to the following.
*
* spec = specified address (v4 or v6)
* unspec = unspecified address (v4 or v6)
* A = specified addresses are different for endpoints
*
* bound bind to allowed
* -------------------------------------
* unspec unspec no
* unspec spec no
* spec unspec no
* spec spec yes if A
*
* For labeled systems, SO_MAC_EXEMPT behaves the same
* as TCP_EXCLBIND, except that zoneid is ignored.
*
* Note:
*
* 1. Because of TLI semantics, an endpoint can go
* back from, say TCP_ESTABLISHED to TCPS_LISTEN or
* TCPS_BOUND, depending on whether it is originally
* a listener or not. That is why we need to check
* for states greater than or equal to TCPS_BOUND
* here.
*
* 2. Ideally, we should only check for state equals
* to TCPS_LISTEN. And the following check should be
* added.
*
* if (ltcp->tcp_state == TCPS_LISTEN ||
* !reuseaddr || !lconnp->conn_reuseaddr) {
* ...
* }
*
* The semantics will be changed to this. If the
* endpoint on the list is in state not equal to
* TCPS_LISTEN and both endpoints have SO_REUSEADDR
* set, let the bind succeed.
*
* Because of (1), we cannot do that for TLI
* endpoints. But we can do that for socket endpoints.
* If in future, we can change this going back
* semantics, we can use the above check for TLI also.
*/
not_socket = !(TCP_IS_SOCKET(ltcp) &&
TCP_IS_SOCKET(tcp));
exclbind = lconnp->conn_exclbind ||
connp->conn_exclbind;
if ((lconnp->conn_mac_mode != CONN_MAC_DEFAULT) ||
(connp->conn_mac_mode != CONN_MAC_DEFAULT) ||
(exclbind && (not_socket ||
ltcp->tcp_state <= TCPS_ESTABLISHED))) {
if (V6_OR_V4_INADDR_ANY(
lconnp->conn_bound_addr_v6) ||
V6_OR_V4_INADDR_ANY(*laddr) ||
IN6_ARE_ADDR_EQUAL(laddr,
&lconnp->conn_bound_addr_v6)) {
break;
}
continue;
}
/*
* Check ipversion to allow IPv4 and IPv6 sockets to
* have disjoint port number spaces, if *_EXCLBIND
* is not set and only if the application binds to a
* specific port. We use the same autoassigned port
* number space for IPv4 and IPv6 sockets.
*/
if (connp->conn_ipversion != lconnp->conn_ipversion &&
bind_to_req_port_only)
continue;
/*
* Ideally, we should make sure that the source
* address, remote address, and remote port in the
* four tuple for this tcp-connection is unique.
* However, trying to find out the local source
* address would require too much code duplication
* with IP, since IP needs needs to have that code
* to support userland TCP implementations.
*/
if (quick_connect &&
(ltcp->tcp_state > TCPS_LISTEN) &&
((connp->conn_fport != lconnp->conn_fport) ||
!IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
&lconnp->conn_faddr_v6)))
continue;
if (!reuseaddr) {
/*
* No socket option SO_REUSEADDR.
* If existing port is bound to
* a non-wildcard IP address
* and the requesting stream is
* bound to a distinct
* different IP addresses
* (non-wildcard, also), keep
* going.
*/
if (!V6_OR_V4_INADDR_ANY(*laddr) &&
!V6_OR_V4_INADDR_ANY(
lconnp->conn_bound_addr_v6) &&
!IN6_ARE_ADDR_EQUAL(laddr,
&lconnp->conn_bound_addr_v6))
continue;
if (ltcp->tcp_state >= TCPS_BOUND) {
/*
* This port is being used and
* its state is >= TCPS_BOUND,
* so we can't bind to it.
*/
break;
}
} else {
/*
* socket option SO_REUSEADDR is set on the
* binding tcp_t.
*
* If two streams are bound to
* same IP address or both addr
* and bound source are wildcards
* (INADDR_ANY), we want to stop
* searching.
* We have found a match of IP source
* address and source port, which is
* refused regardless of the
* SO_REUSEADDR setting, so we break.
*/
if (IN6_ARE_ADDR_EQUAL(laddr,
&lconnp->conn_bound_addr_v6) &&
(ltcp->tcp_state == TCPS_LISTEN ||
ltcp->tcp_state == TCPS_BOUND))
break;
}
}
if (ltcp != NULL) {
/* The port number is busy */
mutex_exit(&tbf->tf_lock);
} else {
/*
* This port is ours. Insert in fanout and mark as
* bound to prevent others from getting the port
* number.
*/
tcp->tcp_state = TCPS_BOUND;
DTRACE_TCP6(state__change, void, NULL,
ip_xmit_attr_t *, connp->conn_ixa,
void, NULL, tcp_t *, tcp, void, NULL,
int32_t, TCPS_IDLE);
connp->conn_lport = htons(port);
ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH(
connp->conn_lport)] == tbf);
tcp_bind_hash_insert(tbf, tcp, 1);
mutex_exit(&tbf->tf_lock);
/*
* We don't want tcp_next_port_to_try to "inherit"
* a port number supplied by the user in a bind.
*/
if (user_specified)
return (port);
/*
* This is the only place where tcp_next_port_to_try
* is updated. After the update, it may or may not
* be in the valid range.
*/
if (!connp->conn_anon_priv_bind)
tcps->tcps_next_port_to_try = port + 1;
return (port);
}
if (connp->conn_anon_priv_bind) {
port = tcp_get_next_priv_port(tcp);
} else {
if (count == 0 && user_specified) {
/*
* We may have to return an anonymous port. So
* get one to start with.
*/
port =
tcp_update_next_port(
tcps->tcps_next_port_to_try,
tcp, B_TRUE);
user_specified = B_FALSE;
} else {
port = tcp_update_next_port(port + 1, tcp,
B_FALSE);
}
}
if (port == 0)
break;
/*
* Don't let this loop run forever in the case where
* all of the anonymous ports are in use.
*/
} while (++count < loopmax);
return (0);
}