af_rds.c revision c0dd49bdd68c0d758a67d56f07826f3b45cfc664
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* Copyright (c) 2006 Oracle. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <sys/socketvar.h>
#include <sys/sysmacros.h>
#include <net/if_types.h>
extern ddi_taskq_t *rdsv3_taskq;
extern struct rdma_cm_id *rdsv3_rdma_listen_id;
/* this is just used for stats gathering :/ */
static unsigned long rdsv3_sock_count;
/*
* This is called as the final descriptor referencing this socket is closed.
* We have to unbind the socket so that another socket can be bound to the
* address it was using.
*
* We have to be careful about racing with the incoming path. sock_orphan()
* sets SOCK_DEAD and we use that as an indicator to the rx path that new
* messages shouldn't be queued.
*/
/* ARGSUSED */
static int
{
struct rdsv3_sock *rs;
goto out;
/*
* Note - rdsv3_clear_recv_queue grabs rs_recv_lock, so
* that ensures the recv path has completed messing
* with the socket.
*/
out:
return (0);
}
void
{
/* wakup anyone waiting in recvmsg */
}
/*
* Careful not to race with rdsv3_release -> sock_orphan which clears sk_sleep.
* _bh() isn't OK here, we're called from interrupt handlers. It's probably OK
* to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but
* this seems more conservative.
* NB - normally, one would use sk_callback_lock for this, but we can
* get here from interrupts, whereas the network code grabs sk_callback_lock
* with _lock_bh only - so relying on sk_callback_lock introduces livelocks.
*/
void
{
}
/*ARGSUSED*/
static int
{
rs->rs_bound_port);
return (0);
}
/*
* RDS' poll is without a doubt the least intuitive part of the interface,
* as POLLIN and POLLOUT do not behave entirely as you would expect from
* a network protocol.
*
* POLLIN is asserted if
* - there is data on the receive queue.
* - to signal that a previously congested destination may have become
* uncongested
* - A notification has been queued to the socket (this can be a congestion
* update, or a RDMA completion).
*
* POLLOUT is asserted if there is room on the send queue. This does not mean
* however, that the next sendmsg() call will succeed. If the application tries
* to send to a congested destination, the system call may still fail (and
* return ENOBUFS).
*/
/* ARGSUSED */
static short
{
unsigned short mask = 0;
#if 0
#endif
if (!rs->rs_cong_monitor) {
/*
* When a congestion map was updated, we signal POLLIN for
* "historical" reasons. Applications can also poll for
* WRBAND instead.
*/
} else {
if (rs->rs_cong_notify)
}
#if 0
#endif
return (mask);
}
/* ARGSUSED */
static int
{
int numifs;
int bufsize;
void *buf;
/* Only ipv4 for now */
CRED());
if (rval != 0) {
rval);
return (rval);
}
switch (cmd) {
case SIOCGLIFNUM :
case SIOCGIFNUM :
if (rval != 0) break;
if (cmd == SIOCGLIFNUM) {
sizeof (int), 0);
} else {
len = 0;
len++;
}
}
sizeof (int), 0);
}
break;
case SIOCGLIFCONF :
!= 0) {
break;
}
if (rval != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"rdsv3_do_ip_ioctl failed: %d", rval);
break;
}
bufsize, 0) != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"copyout of records failed");
}
}
0) != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"copyout of lifconf failed");
}
break;
case SIOCGIFCONF :
case O_SIOCGIFCONF :
!= 0) {
break;
}
RDSV3_DPRINTF2("rdsv3_ioctl",
"O_SIOCGIFCONF: ifc_len: %d, req: %p",
if (rval != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"rdsv3_do_ip_ioctl_old failed: %d", rval);
break;
}
bufsize, 0) != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"copyout of records failed");
}
}
0) != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"copyout of ifconf failed");
}
break;
case SIOCGLIFFLAGS :
case SIOCSLIFFLAGS :
case SIOCGLIFMTU :
case SIOCGLIFNETMASK :
case SIOCGLIFINDEX :
!= 0) {
break;
}
if (rc != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"ksocket_ioctl failed: %d, name: %s cmd: 0x%x",
break;
}
sizeof (struct lifreq), 0);
break;
case SIOCGIFFLAGS :
case SIOCSIFFLAGS :
case SIOCGIFMTU :
case SIOCGIFNETMASK :
case SIOCGIFINDEX :
!= 0) {
break;
}
if (rc != 0) {
RDSV3_DPRINTF2("rdsv3_ioctl",
"ksocket_ioctl failed: %d, name: %s cmd: 0x%x",
break;
}
sizeof (struct ifreq), 0);
break;
default:
rval = EOPNOTSUPP;
}
return (rval);
}
static int
{
struct sockaddr_in sin;
/* racing with another thread binding seems ok here */
if (rs->rs_bound_addr == 0)
return (-ENOTCONN); /* XXX not a great errno */
if (len < sizeof (struct sockaddr_in))
return (-EINVAL);
0) != 0) {
return (-EFAULT);
}
return (0);
}
static int
{
if (optlen < sizeof (int))
return (-EINVAL);
return (0);
}
static int
{
int ret;
if (ret == 0) {
if (rs->rs_cong_monitor) {
} else {
rs->rs_cong_mask = 0;
rs->rs_cong_notify = 0;
}
}
return (ret);
}
/*ARGSUSED*/
static int
{
int ret = 0;
switch (optname) {
case RDSV3_CANCEL_SENT_TO:
break;
case RDSV3_GET_MR:
break;
case RDSV3_FREE_MR:
break;
case RDSV3_RECVERR:
break;
case RDSV3_CONG_MONITOR:
break;
case SO_SNDBUF:
return (ret);
case SO_RCVBUF:
return (ret);
default:
#if 1
break;
#else
ret = -ENOPROTOOPT;
#endif
}
out:
return (ret);
}
/* XXX */
/*ARGSUSED*/
static int
{
int ret = 0;
switch (optname) {
case SO_SNDBUF:
if (*optlen != 0) {
}
return (ret);
case SO_RCVBUF:
if (*optlen != 0) {
}
return (ret);
case RDSV3_RECVERR:
rs->rs_recverr);
if (*optlen < sizeof (int))
return (-EINVAL);
else {
*optlen = sizeof (int);
}
return (0);
default:
if ((optname >= RDSV3_INFO_FIRST) &&
(optname <= RDSV3_INFO_LAST)) {
optlen));
}
RDSV3_DPRINTF2("rdsv3_getsockopt",
ret = -ENOPROTOOPT;
}
return (ret);
}
/*ARGSUSED*/
{
int ret = 0;
if (addr_len != sizeof (struct sockaddr_in)) {
goto out;
}
ret = -EAFNOSUPPORT;
goto out;
}
ret = -EDESTADDRREQ;
goto out;
}
out:
return (ret);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
void
{
}
/* ARGSUSED */
int
{
int ret;
if (ret < 0) {
return (-ret);
}
return (0);
}
/* ARGSUSED */
int
{
int ret;
if (ret < 0) {
return (-ret);
}
return (0);
}
/*ARGSUSED*/
int
{
/* racey, don't care */
if (!rs->rs_conn_addr)
return (-ENOTCONN);
return (0);
}
void
{
}
#ifndef __lock_lint
static struct sock_downcalls_s rdsv3_sock_downcalls = {
.sd_bind = rdsv3_bind,
.sd_poll = rdsv3_poll,
.sd_ioctl = rdsv3_ioctl,
};
#else
static struct sock_downcalls_s rdsv3_sock_downcalls = {
NULL,
NULL,
NULL,
NULL,
};
#endif
{
struct rdsv3_sock *rs;
sk = rdsv3_sk_alloc();
return (NULL);
if (rdsv3_sock_count == 1) {
}
*errorp = 0;
}
void
{
}
void
{
}
static void
{
struct rdsv3_sock *rs;
struct rdsv3_incoming *inc;
unsigned int total = 0;
len /= sizeof (struct rdsv3_info_message);
/* XXX too lazy to maintain counts.. */
total++;
}
}
}
static void
{
struct rdsv3_info_socket sinfo;
struct rdsv3_sock *rs;
unsigned long bytes;
len /= sizeof (struct rdsv3_info_socket);
goto out;
bytes = sizeof (struct rdsv3_info_socket);
}
out:
}
void
rdsv3_exit(void)
{
if (rdsv3_rdma_dwp) {
}
while (rdsv3_rdma_listen_id != NULL) {
#ifndef __lock_lint
#endif
}
if (rdsv3_rdma_dwp) {
}
}
/*ARGSUSED*/
int
{
int ret;
ret = rdsv3_conn_init();
if (ret)
goto out;
ret = rdsv3_threads_init();
if (ret)
goto out_conn;
ret = rdsv3_sysctl_init();
if (ret)
goto out_threads;
ret = rdsv3_stats_init();
if (ret)
goto out_sysctl;
/* rdsv3_rdma_init need to be called with a little delay */
goto out;
out:
return (ret);
}