tcp_bind.c revision 7256a34efe9df75b638b9e812912ef7c5c68e208
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
#define _SUN_TPI_VERSION 2
#include <sys/xti_inet.h>
#include <sys/squeue_impl.h>
#include <rpc/pmap_prot.h>
#include <inet/tcp_impl.h>
#include <inet/proto_set.h>
#include <inet/ipsec_impl.h>
/* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
/*
* Hash list insertion routine for tcp_t structures. Each hash bucket
* contains a list of tcp_t entries, and each entry is bound to a unique
* port. If there are multiple tcp_t's that are bound to the same port, then
* one of them will be linked into the hash bucket list, and the rest will
* hang off of that one entry. For each port, entries bound to a specific IP
* address will be inserted before those those bound to INADDR_ANY.
*/
void
{
}
if (!caller_holds_lock) {
} else {
}
/* Look for an entry using the same port */
/* The port was not found, just add to the end */
goto insert;
/*
* OK, there already exists an entry bound to the
* same port.
*
* If the new tcp bound to the INADDR_ANY address
* and the first one in the list is not bound to
* INADDR_ANY we skip all entries until we find the
* first one bound to INADDR_ANY.
* This makes sure that applications binding to a
* specific address get preference over those binding to
* INADDR_ANY.
*/
if (!V6_OR_V4_INADDR_ANY(
else
break;
}
&(tcp->tcp_bind_hash);
}
}
} else {
&(tcp->tcp_bind_hash);
}
}
}
if (!caller_holds_lock)
}
/*
* Hash list removal routine for tcp_t structures.
*/
void
{
return;
/*
* Extract the lock pointer in case there are concurrent
* hash_remove's for this instance.
*/
if (tcp->tcp_ptpbhn) {
&(tcpnext->tcp_bind_hash);
}
}
}
}
/*
* Don't let port fall into the privileged range.
* Since the extra privileged ports can be arbitrary we also
* ensure that we exclude those from consideration.
* tcp_g_epriv_ports is not sorted thus we loop over it until
* there are no changes.
*
* Note: No locks are held when inspecting tcp_g_*epriv_ports
* but instead the code relies on:
* - the fact that the address of the array and its size never changes
* - the atomic assignment of the elements of the array
*
* Returns 0 if there are no more ports available.
*
* TS note: skip multilevel ports.
*/
{
int i, bump;
if (random && tcp_random_anon_port != 0) {
sizeof (in_port_t));
/*
* Unless changed by a sys admin, the smallest anon port
* is 32768 and the largest anon port is 65535. It is
* very likely (50%) for the random port to be smaller
* than the smallest anon port. When that happens,
* add port % (anon port range) to the smallest anon
* port to get the random port. It should fall into the
* valid anon port range.
*/
if (tcps->tcps_smallest_anon_port ==
bump = 0;
} else {
}
}
}
if (restart)
return (0);
}
for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
port++;
/*
* Make sure whether the port is in the
* valid range.
*/
goto retry;
}
}
if (is_system_labeled() &&
IPPROTO_TCP, B_TRUE)) != 0) {
port = i;
goto retry;
}
return (port);
}
/*
* Return the next anonymous port in the privileged port range for
* bind checking. It starts at IPPORT_RESERVED - 1 and goes
* downwards. This is the same behavior as documented in the userland
* library call rresvport(3N).
*
* TS note: skip multilevel ports.
*/
static in_port_t
{
if (restart)
return (0);
}
if (is_system_labeled() &&
goto retry;
}
return (next_priv_port--);
}
static int
{
/*
* XXX It's up to the caller to specify bind_to_req_port_only or not.
*/
/*
* Get a valid port (within the anonymous range and should not
* be a privileged one) to use if the user has not given a port.
* If multiple threads are here, they may all start with
* with the same initial port. But, it should be fine as long as
* tcp_bindi will ensure that no two threads will be assigned
* the same port.
*
* NOTE: XXX If a privileged process asks for an anonymous port, we
* still check for ports only in the range > tcp_smallest_non_priv_port,
* unless TCP_ANONPRIVBIND option is set.
*/
if (requested_port == 0) {
if (requested_port == 0) {
return (-TNOADDR);
}
/*
* If the user went through one of the RPC interfaces to create
* this socket and RPC is MLP in this zone, then give him an
* anonymous MLP.
*/
if (addrtype == mlptSingle) {
return (-TNOADDR);
}
}
} else {
int i;
/*
* If the requested_port is in the well-known privileged range,
* verify that the stream was opened by a privileged user.
* Note: No locks are held when inspecting tcp_g_*epriv_ports
* but instead the code relies on:
* - the fact that the address of the array and its size never
* changes
* - the atomic assignment of the elements of the array
*/
} else {
for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
if (requested_port ==
tcps->tcps_g_epriv_ports[i]) {
break;
}
}
}
if (priv) {
IPPROTO_TCP) != 0) {
if (connp->conn_debug) {
"tcp_bind: no priv for port %d",
}
return (-TACCES);
}
}
if (is_system_labeled()) {
if (addrtype == mlptSingle) {
return (-TNOADDR);
}
}
}
if (mlptype != mlptSingle) {
if (secpolicy_net_bindmlp(cr) != 0) {
if (connp->conn_debug) {
"tcp_bind: no priv for multilevel port %d",
}
return (-TACCES);
}
/*
* If we're specifically binding a shared IP address and the
* port is MLP on shared addresses, then check to see if this
* zone actually owns the MLP. Reject if not.
*/
/*
* No need to handle exclusive-stack zones since
* ALL_ZONES only applies to the shared stack.
*/
if (connp->conn_debug) {
"tcp_bind: attempt to bind port "
"%d on shared addr in zone %d "
"(should be %d)",
mlpzone);
}
return (-TACCES);
}
}
if (!user_specified) {
int err;
if (err != 0) {
if (connp->conn_debug) {
"tcp_bind: cannot establish anon "
"MLP for port %d",
}
return (err);
}
}
}
if (allocated_port == 0) {
if (connp->conn_anon_port) {
}
if (bind_to_req_port_only) {
if (connp->conn_debug) {
"tcp_bind: requested addr busy");
}
return (-TADDRBUSY);
} else {
/* If we are out of ports, fail the bind. */
if (connp->conn_debug) {
"tcp_bind: out of ports?");
}
return (-TNOADDR);
}
}
/* Pass the allocated port back */
return (0);
}
/*
*/
int
{
int error = 0;
return (0);
if (connp->conn_debug) {
}
return (-TOUTSTATE);
}
if (connp->conn_debug) {
"tcp_bind: bad address parameter, "
"address %p, len %d",
}
return (-TPROTO);
}
if (error != 0) {
return (error);
}
switch (len) {
case sizeof (sin_t): /* Complete IPv4 address */
if (v4addr != INADDR_ANY) {
B_FALSE);
}
break;
case sizeof (sin6_t): /* Complete IPv6 address */
if (IN6_IS_ADDR_V4MAPPED(&v6addr)) {
if (connp->conn_ipv6_v6only)
return (EADDRNOTAVAIL);
if (v4addr != INADDR_ANY) {
}
} else {
if (!IN6_IS_ADDR_UNSPECIFIED(&v6addr)) {
if (IN6_IS_ADDR_LINKSCOPE(&v6addr))
}
}
break;
default:
if (connp->conn_debug) {
"tcp_bind: bad address length, %d", len);
}
return (EAFNOSUPPORT);
/* return (-TBADADDR); */
}
/* Is the local address a valid unicast address? */
if (laddr_type == IPVL_BAD)
return (EADDRNOTAVAIL);
if (scopeid != 0) {
} else {
}
if (error != 0) {
}
return (error);
}
/*
* If the "bind_to_req_port_only" parameter is set, if the requested port
* number is available, return it, If not return 0
*
* If "bind_to_req_port_only" parameter is not set and
* If the requested port number is available, return it. If not, return
* the first anonymous port we happen across. If no anonymous ports are
* available, return 0. addr is the requested local address, if any.
*
* In either case, when succeeding update the tcp_t to record the port number
* and insert it in the bind hash table.
*
* Note that TCP over IPv4 and IPv6 sockets can use the same port number
* without setting SO_REUSEADDR. This is needed so that they
* can be viewed as two independent transport protocols.
*/
{
/* number of times we have run around the loop */
int count = 0;
/* maximum number of times to run around the loop */
int loopmax;
/*
* Lookup for free addresses is done in a loop and "loopmax"
* influences how long we spin in the loop
*/
if (bind_to_req_port_only) {
/*
* If the requested port is busy, don't bother to look
* for a new one. Setting loop maximum count to 1 has
* that effect.
*/
loopmax = 1;
} else {
/*
* If the requested port is busy, look for a free one
* in the anonymous port range.
* Set loopmax appropriately so that one does not look
* forever in the case all of the anonymous ports are in use.
*/
if (connp->conn_anon_priv_bind) {
/*
* loopmax =
* (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
*/
} else {
}
}
do {
/*
* Ensure that the tcp_t is not currently in the bind hash.
* Hold the lock on the hash bucket to ensure that
* the duplicate check plus the insertion is an atomic
* operation.
*
* This function does an inline lookup on the bind hash list
* Make sure that we access only members of tcp_t
* and that we don't look at tcp_tcp, since we are not
* doing a CONN_INC_REF.
*/
break;
}
/*
* On a labeled system, we must treat bindings to ports
* on shared IP addresses by sockets with MAC exemption
* privilege as being in all zones, as there's
* otherwise no way to identify the right receiver.
*/
continue;
/*
* If TCP_EXCLBIND is set for either the bound or
* binding endpoint, the semantics of bind
* is changed according to the following.
*
* spec = specified address (v4 or v6)
* unspec = unspecified address (v4 or v6)
* A = specified addresses are different for endpoints
*
* bound bind to allowed
* -------------------------------------
* unspec unspec no
* unspec spec no
* spec unspec no
* spec spec yes if A
*
* For labeled systems, SO_MAC_EXEMPT behaves the same
* as TCP_EXCLBIND, except that zoneid is ignored.
*
* Note:
*
* 1. Because of TLI semantics, an endpoint can go
* back from, say TCP_ESTABLISHED to TCPS_LISTEN or
* TCPS_BOUND, depending on whether it is originally
* a listener or not. That is why we need to check
* for states greater than or equal to TCPS_BOUND
* here.
*
* 2. Ideally, we should only check for state equals
* to TCPS_LISTEN. And the following check should be
* added.
*
* if (ltcp->tcp_state == TCPS_LISTEN ||
* !reuseaddr || !lconnp->conn_reuseaddr) {
* ...
* }
*
* The semantics will be changed to this. If the
* endpoint on the list is in state not equal to
* TCPS_LISTEN and both endpoints have SO_REUSEADDR
* set, let the bind succeed.
*
* Because of (1), we cannot do that for TLI
* endpoints. But we can do that for socket endpoints.
* If in future, we can change this going back
* semantics, we can use the above check for TLI also.
*/
TCP_IS_SOCKET(tcp));
(exclbind && (not_socket ||
if (V6_OR_V4_INADDR_ANY(
lconnp->conn_bound_addr_v6) ||
V6_OR_V4_INADDR_ANY(*laddr) ||
&lconnp->conn_bound_addr_v6)) {
break;
}
continue;
}
/*
* Check ipversion to allow IPv4 and IPv6 sockets to
* have disjoint port number spaces, if *_EXCLBIND
* is not set and only if the application binds to a
* specific port. We use the same autoassigned port
* number space for IPv4 and IPv6 sockets.
*/
continue;
/*
* Ideally, we should make sure that the source
* address, remote address, and remote port in the
* four tuple for this tcp-connection is unique.
* However, trying to find out the local source
* address would require too much code duplication
* with IP, since IP needs needs to have that code
* to support userland TCP implementations.
*/
if (quick_connect &&
&lconnp->conn_faddr_v6)))
continue;
if (!reuseaddr) {
/*
* No socket option SO_REUSEADDR.
* If existing port is bound to
* a non-wildcard IP address
* and the requesting stream is
* bound to a distinct
* different IP addresses
* (non-wildcard, also), keep
* going.
*/
if (!V6_OR_V4_INADDR_ANY(*laddr) &&
lconnp->conn_bound_addr_v6) &&
continue;
/*
* This port is being used and
* its state is >= TCPS_BOUND,
* so we can't bind to it.
*/
break;
}
} else {
/*
* socket option SO_REUSEADDR is set on the
* binding tcp_t.
*
* If two streams are bound to
* same IP address or both addr
* and bound source are wildcards
* (INADDR_ANY), we want to stop
* searching.
* We have found a match of IP source
* address and source port, which is
* refused regardless of the
* SO_REUSEADDR setting, so we break.
*/
if (IN6_ARE_ADDR_EQUAL(laddr,
&lconnp->conn_bound_addr_v6) &&
break;
}
}
/* The port number is busy */
} else {
/*
* This port is ours. Insert in fanout and mark as
* bound to prevent others from getting the port
* number.
*/
/*
* We don't want tcp_next_port_to_try to "inherit"
* a port number supplied by the user in a bind.
*/
if (user_specified)
return (port);
/*
* This is the only place where tcp_next_port_to_try
* is updated. After the update, it may or may not
* be in the valid range.
*/
if (!connp->conn_anon_priv_bind)
return (port);
}
if (connp->conn_anon_priv_bind) {
} else {
if (count == 0 && user_specified) {
/*
* We may have to return an anonymous port. So
* get one to start with.
*/
port =
} else {
B_FALSE);
}
}
if (port == 0)
break;
/*
* Don't let this loop run forever in the case where
* all of the anonymous ports are in use.
*/
return (0);
}