idm_so.c revision a6d42e7d71324c5193c3b94d57d96ba2925d52e1
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/conf.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/priv.h>
#include <sys/cpuvar.h>
#include <sys/socket.h>
#include <sys/strsubr.h>
#include <sys/sysmacros.h>
#include <sys/sdt.h>
#include <netinet/tcp.h>
#include <inet/tcp.h>
#include <sys/socketvar.h>
#include <sys/pathname.h>
#include <sys/fs/snode.h>
#include <sys/fs/dv_node.h>
#include <sys/vnode.h>
#include <netinet/in.h>
#include <net/if.h>
#include <sys/sockio.h>
#include <sys/idm/idm.h>
#include <sys/idm/idm_so.h>
#include <sys/idm/idm_text.h>
/*
* in6addr_any is currently all zeroes, but use the macro in case this
* ever changes.
*/
const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
static idm_status_t idm_so_conn_create_common(idm_conn_t *ic,
struct sonode *new_so);
static void idm_so_conn_destroy_common(idm_conn_t *ic);
static void idm_so_conn_connect_common(idm_conn_t *ic);
static void idm_set_ini_preconnect_options(idm_so_conn_t *sc);
static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
static void idm_set_tgt_connect_options(struct sonode *sonode);
static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
static idm_status_t idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode,
idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
uint32_t ro, uint32_t dlength);
static idm_status_t idm_so_handle_digest(idm_conn_t *it,
nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
/*
* Transport ops prototypes
*/
static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
static idm_status_t idm_so_notice_key_values(idm_conn_t *it,
nvlist_t *negotiated_nvl);
static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
idm_transport_caps_t *caps);
static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
static void idm_so_buf_free(idm_buf_t *idb);
static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
static void idm_so_buf_teardown(idm_buf_t *idb);
static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
static void idm_so_tgt_svc_destroy(idm_svc_t *is);
static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
static void idm_so_tgt_svc_offline(idm_svc_t *is);
static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
static void idm_so_conn_disconnect(idm_conn_t *ic);
static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
static void idm_so_ini_conn_destroy(idm_conn_t *ic);
static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
/*
* IDM Native Sockets transport operations
*/
static
idm_transport_ops_t idm_so_transport_ops = {
idm_so_tx, /* it_tx_pdu */
idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
idm_so_rx_datain, /* it_rx_datain */
idm_so_rx_rtt, /* it_rx_rtt */
idm_so_rx_dataout, /* it_rx_dataout */
NULL, /* it_alloc_conn_rsrc */
NULL, /* it_free_conn_rsrc */
NULL, /* it_tgt_enable_datamover */
NULL, /* it_ini_enable_datamover */
NULL, /* it_conn_terminate */
idm_so_free_task_rsrc, /* it_free_task_rsrc */
idm_so_negotiate_key_values, /* it_negotiate_key_values */
idm_so_notice_key_values, /* it_notice_key_values */
idm_so_conn_is_capable, /* it_conn_is_capable */
idm_so_buf_alloc, /* it_buf_alloc */
idm_so_buf_free, /* it_buf_free */
idm_so_buf_setup, /* it_buf_setup */
idm_so_buf_teardown, /* it_buf_teardown */
idm_so_tgt_svc_create, /* it_tgt_svc_create */
idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
idm_so_tgt_svc_online, /* it_tgt_svc_online */
idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
idm_so_ini_conn_create, /* it_ini_conn_create */
idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
idm_so_ini_conn_connect, /* it_ini_conn_connect */
idm_so_conn_disconnect /* it_ini_conn_disconnect */
};
/*
* idm_so_init()
* Sockets transport initialization
*/
void
idm_so_init(idm_transport_t *it)
{
/* Cache for IDM Data and R2T Transmit PDU's */
idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
&idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
/* Cache for IDM Receive PDU's */
idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
&idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
/* Set the sockets transport ops */
it->it_ops = &idm_so_transport_ops;
}
/*
* idm_so_fini()
* Sockets transport teardown
*/
void
idm_so_fini(void)
{
kmem_cache_destroy(idm.idm_sotx_pdu_cache);
kmem_cache_destroy(idm.idm_sorx_pdu_cache);
}
struct sonode *
idm_socreate(int domain, int type, int protocol)
{
vnode_t *dvp;
vnode_t *vp;
struct snode *csp;
int err;
major_t maj;
if ((vp = solookup(domain, type, protocol, NULL, &err)) == NULL) {
/*
* solookup calls sogetvp if the vp is not found in the cache.
* Since the call to sogetvp is hardwired to use USERSPACE
* and declared static we'll do the work here instead.
*/
err = lookupname(type == SOCK_STREAM ? "/dev/tcp" : "/dev/udp",
UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
if (err != 0)
return (NULL);
/* Check that it is the correct vnode */
if (vp->v_type != VCHR) {
VN_RELE(vp);
return (NULL);
}
csp = VTOS(VTOS(vp)->s_commonvp);
if (!(csp->s_flag & SDIPSET)) {
char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
err = ddi_dev_pathname(vp->v_rdev, S_IFCHR,
pathname);
if (err == 0) {
err = devfs_lookupname(pathname, NULLVPP,
&dvp);
}
VN_RELE(vp);
kmem_free(pathname, MAXPATHLEN);
if (err != 0) {
return (NULL);
}
vp = dvp;
}
maj = getmajor(vp->v_rdev);
if (!STREAMSTAB(maj)) {
VN_RELE(vp);
return (NULL);
}
}
return (socreate(vp, domain, type, protocol, SOV_DEFAULT, NULL, &err));
}
/*
* idm_soshutdown will disconnect the socket and prevent subsequent PDU
* reception and transmission. The sonode still exists but its state
* gets modified to indicate it is no longer connected. Calls to
* idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
* regain control of a thread stuck in idm_sorecv.
*/
void
idm_soshutdown(struct sonode *so)
{
(void) soshutdown(so, SHUT_RDWR);
}
/*
* idm_sodestroy releases all resources associated with a socket previously
* created with idm_socreate. The socket must be shutdown using
* idm_soshutdown before the socket is destroyed with idm_sodestroy,
* otherwise undefined behavior will result.
*/
void
idm_sodestroy(struct sonode *so)
{
vnode_t *vp = SOTOV(so);
(void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
VN_RELE(vp);
}
/*
* IP address filter functions to flag addresses that should not
* go out to initiators through discovery.
*/
static boolean_t
idm_v4_addr_okay(struct in_addr *in_addr)
{
in_addr_t addr = ntohl(in_addr->s_addr);
if ((INADDR_NONE == addr) ||
(IN_MULTICAST(addr)) ||
((addr >> IN_CLASSA_NSHIFT) == 0) ||
((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
return (B_FALSE);
}
return (B_TRUE);
}
static boolean_t
idm_v6_addr_okay(struct in6_addr *addr6)
{
if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
(IN6_IS_ADDR_LOOPBACK(addr6)) ||
(IN6_IS_ADDR_MULTICAST(addr6)) ||
(IN6_IS_ADDR_V4MAPPED(addr6)) ||
(IN6_IS_ADDR_V4COMPAT(addr6)) ||
(IN6_IS_ADDR_LINKLOCAL(addr6))) {
return (B_FALSE);
}
return (B_TRUE);
}
/*
* idm_get_ipaddr will retrieve a list of IP Addresses which the host is
* configured with by sending down a sequence of kernel ioctl to IP STREAMS.
*/
int
idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
{
struct sonode *so4, *so6;
vnode_t *vp, *vp4, *vp6;
struct lifnum lifn;
struct lifconf lifc;
struct lifreq *lp;
int rval;
int numifs;
int bufsize;
void *buf;
int i, j, n, rc;
struct sockaddr_storage ss;
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
idm_addr_t *ip;
idm_addr_list_t *ipaddr;
int size_ipaddr;
*ipaddr_p = NULL;
size_ipaddr = 0;
buf = NULL;
/* create an ipv4 and ipv6 UDP socket */
if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
return (0);
if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
idm_sodestroy(so6);
return (0);
}
/* setup the vp's for each socket type */
vp6 = SOTOV(so6);
vp4 = SOTOV(so4);
/* use vp6 for ioctls with unspecified families by default */
vp = vp6;
retry_count:
/* snapshot the current number of interfaces */
lifn.lifn_family = PF_UNSPEC;
lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
lifn.lifn_count = 0;
if (VOP_IOCTL(vp, SIOCGLIFNUM, (intptr_t)&lifn, FKIOCTL, kcred,
&rval, NULL) != 0) {
goto cleanup;
}
numifs = lifn.lifn_count;
if (numifs <= 0) {
goto cleanup;
}
/* allocate extra room in case more interfaces appear */
numifs += 10;
/* get the interface names and ip addresses */
bufsize = numifs * sizeof (struct lifreq);
buf = kmem_alloc(bufsize, KM_SLEEP);
lifc.lifc_family = AF_UNSPEC;
lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
lifc.lifc_len = bufsize;
lifc.lifc_buf = buf;
rc = VOP_IOCTL(vp, SIOCGLIFCONF, (intptr_t)&lifc, FKIOCTL, kcred,
&rval, NULL);
if (rc != 0) {
goto cleanup;
}
/* if our extra room is used up, try again */
if (bufsize <= lifc.lifc_len) {
kmem_free(buf, bufsize);
buf = NULL;
goto retry_count;
}
/* calc actual number of ifconfs */
n = lifc.lifc_len / sizeof (struct lifreq);
/* get ip address */
if (n > 0) {
size_ipaddr = sizeof (idm_addr_list_t) +
(n - 1) * sizeof (idm_addr_t);
ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
} else {
goto cleanup;
}
/*
* Examine the array of interfaces and filter uninteresting ones
*/
for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
/*
* Copy the address as the SIOCGLIFFLAGS ioctl is destructive
*/
ss = lp->lifr_addr;
/*
* fetch the flags using the socket of the correct family
*/
switch (ss.ss_family) {
case AF_INET:
vp = vp4;
break;
case AF_INET6:
vp = vp6;
break;
default:
continue;
}
rc = VOP_IOCTL(vp, SIOCGLIFFLAGS, (intptr_t)lp, FKIOCTL, kcred,
&rval, NULL);
if (rc == 0) {
/*
* If we got the flags, skip uninteresting
* interfaces based on flags
*/
if ((lp->lifr_flags & IFF_UP) != IFF_UP)
continue;
if (lp->lifr_flags &
(IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
continue;
}
/* save ip address */
ip = &ipaddr->al_addrs[j];
switch (ss.ss_family) {
case AF_INET:
sin = (struct sockaddr_in *)&ss;
if (!idm_v4_addr_okay(&sin->sin_addr))
continue;
ip->a_addr.i_addr.in4 = sin->sin_addr;
ip->a_addr.i_insize = sizeof (struct in_addr);
break;
case AF_INET6:
sin6 = (struct sockaddr_in6 *)&ss;
if (!idm_v6_addr_okay(&sin6->sin6_addr))
continue;
ip->a_addr.i_addr.in6 = sin6->sin6_addr;
ip->a_addr.i_insize = sizeof (struct in6_addr);
break;
default:
continue;
}
j++;
}
if (j == 0) {
/* no valid ifaddr */
kmem_free(ipaddr, size_ipaddr);
size_ipaddr = 0;
ipaddr = NULL;
} else {
ipaddr->al_out_cnt = j;
}
cleanup:
idm_sodestroy(so6);
idm_sodestroy(so4);
if (buf != NULL)
kmem_free(buf, bufsize);
*ipaddr_p = ipaddr;
return (size_ipaddr);
}
int
idm_sorecv(struct sonode *so, void *msg, size_t len)
{
iovec_t iov;
ASSERT(so != NULL);
ASSERT(len != 0);
/*
* Fill in iovec and receive data
*/
iov.iov_base = msg;
iov.iov_len = len;
return (idm_iov_sorecv(so, &iov, 1, len));
}
/*
* idm_sosendto - Sends a buffered data on a non-connected socket.
*
* This function puts the data provided on the wire by calling sosendmsg.
* It will return only when all the data has been sent or if an error
* occurs.
*
* Returns 0 for success, the socket errno value if sosendmsg fails, and
* -1 if sosendmsg returns success but uio_resid != 0
*/
int
idm_sosendto(struct sonode *so, void *buff, size_t len,
struct sockaddr *name, socklen_t namelen)
{
struct msghdr msg;
struct uio uio;
struct iovec iov[1];
int error;
iov[0].iov_base = buff;
iov[0].iov_len = len;
/* Initialization of the message header. */
bzero(&msg, sizeof (msg));
msg.msg_iov = iov;
msg.msg_iovlen = 1;
/* Initialization of the uio structure. */
uio.uio_iov = iov;
uio.uio_iovcnt = 1;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = len;
msg.msg_name = name;
msg.msg_namelen = namelen;
if ((error = sosendmsg(so, &msg, &uio)) == 0) {
/* Data sent */
if (uio.uio_resid == 0) {
/* All data sent. Success. */
return (0);
} else {
/* Not all data was sent. Failure */
return (-1);
}
}
/* Send failed */
return (error);
}
/*
* idm_iov_sosend - Sends an iovec on a connection.
*
* This function puts the data provided on the wire by calling sosendmsg.
* It will return only when all the data has been sent or if an error
* occurs.
*
* Returns 0 for success, the socket errno value if sosendmsg fails, and
* -1 if sosendmsg returns success but uio_resid != 0
*/
int
idm_iov_sosend(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
{
struct msghdr msg;
struct uio uio;
int error;
ASSERT(iop != NULL);
/* Initialization of the message header. */
bzero(&msg, sizeof (msg));
msg.msg_iov = iop;
msg.msg_iovlen = iovlen;
/* Initialization of the uio structure. */
bzero(&uio, sizeof (uio));
uio.uio_iov = iop;
uio.uio_iovcnt = iovlen;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = total_len;
if ((error = sosendmsg(so, &msg, &uio)) == 0) {
/* Data sent */
if (uio.uio_resid == 0) {
/* All data sent. Success. */
return (0);
} else {
/* Not all data was sent. Failure */
return (-1);
}
}
/* Send failed */
return (error);
}
/*
* idm_iov_sorecv - Receives an iovec from a connection
*
* This function gets the data asked for from the socket. It will return
* only when all the requested data has been retrieved or if an error
* occurs.
*
* Returns 0 for success, the socket errno value if sorecvmsg fails, and
* -1 if sorecvmsg returns success but uio_resid != 0
*/
int
idm_iov_sorecv(struct sonode *so, iovec_t *iop, int iovlen, size_t total_len)
{
struct msghdr msg;
struct uio uio;
int error;
ASSERT(iop != NULL);
/* Initialization of the message header. */
bzero(&msg, sizeof (msg));
msg.msg_iov = iop;
msg.msg_flags = MSG_WAITALL;
msg.msg_iovlen = iovlen;
/* Initialization of the uio structure. */
bzero(&uio, sizeof (uio));
uio.uio_iov = iop;
uio.uio_iovcnt = iovlen;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = total_len;
if ((error = sorecvmsg(so, &msg, &uio)) == 0) {
/* Received data */
if (uio.uio_resid == 0) {
/* All requested data received. Success */
return (0);
} else {
/*
* Not all data was received. The connection has
* probably failed.
*/
return (-1);
}
}
/* Receive failed */
return (error);
}
static void
idm_set_ini_preconnect_options(idm_so_conn_t *sc)
{
int conn_abort = 10000;
int conn_notify = 2000;
int abort = 30000;
/* Pre-connect socket options */
(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_CONN_NOTIFY_THRESHOLD,
(char *)&conn_notify, sizeof (int));
(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_CONN_ABORT_THRESHOLD,
(char *)&conn_abort, sizeof (int));
(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
(char *)&abort, sizeof (int));
}
static void
idm_set_ini_postconnect_options(idm_so_conn_t *sc)
{
int32_t rcvbuf = IDM_RCVBUF_SIZE;
int32_t sndbuf = IDM_SNDBUF_SIZE;
const int on = 1;
/* Set postconnect options */
(void) sosetsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
(char *)&on, sizeof (int));
(void) sosetsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
(char *)&rcvbuf, sizeof (int));
(void) sosetsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
(char *)&sndbuf, sizeof (int));
}
static void
idm_set_tgt_connect_options(struct sonode *sonode)
{
int32_t rcvbuf = IDM_RCVBUF_SIZE;
int32_t sndbuf = IDM_SNDBUF_SIZE;
const int on = 1;
/* Set connect options */
(void) sosetsockopt(sonode, SOL_SOCKET, SO_RCVBUF,
(char *)&rcvbuf, sizeof (int));
(void) sosetsockopt(sonode, SOL_SOCKET, SO_SNDBUF,
(char *)&sndbuf, sizeof (int));
(void) sosetsockopt(sonode, IPPROTO_TCP, TCP_NODELAY,
(char *)&on, sizeof (on));
}
static uint32_t
n2h24(const uchar_t *ptr)
{
return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
}
static idm_status_t
idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
{
iscsi_hdr_t *bhs;
uint32_t hdr_digest_crc;
uint32_t crc_calculated;
void *new_hdr;
int ahslen = 0;
int total_len = 0;
int iovlen = 0;
struct iovec iov[2];
idm_so_conn_t *so_conn;
int rc;
so_conn = ic->ic_transport_private;
/*
* Read BHS
*/
bhs = pdu->isp_hdr;
rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
if (rc != IDM_STATUS_SUCCESS) {
return (IDM_STATUS_FAIL);
}
/*
* Check actual AHS length against the amount available in the buffer
*/
pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
(bhs->hlength * sizeof (uint32_t));
pdu->isp_datalen = n2h24(bhs->dlength);
if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
/* Allocate a new header segment and change the callback */
new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
pdu->isp_hdr = new_hdr;
pdu->isp_flags |= IDM_PDU_ADDL_HDR;
/*
* This callback will restore the expected values after
* the RX PDU has been processed.
*/
pdu->isp_callback = idm_sorx_addl_pdu_cb;
}
/*
* Setup receipt of additional header and header digest (if enabled).
*/
if (bhs->hlength > 0) {
iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
iov[iovlen].iov_len = ahslen;
total_len += iov[iovlen].iov_len;
iovlen++;
}
if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
iov[iovlen].iov_len = sizeof (hdr_digest_crc);
total_len += iov[iovlen].iov_len;
iovlen++;
}
if ((iovlen != 0) &&
(idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
total_len) != 0)) {
return (IDM_STATUS_FAIL);
}
/*
* Validate header digest if enabled
*/
if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
crc_calculated = idm_crc32c(pdu->isp_hdr,
sizeof (iscsi_hdr_t) + ahslen);
if (crc_calculated != hdr_digest_crc) {
/* Invalid Header Digest */
return (IDM_STATUS_HEADER_DIGEST);
}
}
return (0);
}
/*
* idm_so_ini_conn_create()
* Allocate the sockets transport connection resources.
*/
static idm_status_t
idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
{
struct sonode *so;
idm_so_conn_t *so_conn;
idm_status_t idmrc;
so = idm_socreate(cr->cr_domain, cr->cr_type,
cr->cr_protocol);
if (so == NULL) {
return (IDM_STATUS_FAIL);
}
/* Bind the socket if configured to do so */
if (cr->cr_bound) {
if (sobind(so, &cr->cr_bound_addr.sin,
SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), 0, 0) != 0) {
idm_sodestroy(so);
return (IDM_STATUS_FAIL);
}
}
idmrc = idm_so_conn_create_common(ic, so);
if (idmrc != IDM_STATUS_SUCCESS) {
idm_soshutdown(so);
idm_sodestroy(so);
return (IDM_STATUS_FAIL);
}
so_conn = ic->ic_transport_private;
/* Set up socket options */
idm_set_ini_preconnect_options(so_conn);
return (IDM_STATUS_SUCCESS);
}
/*
* idm_so_ini_conn_destroy()
* Tear down the sockets transport connection resources.
*/
static void
idm_so_ini_conn_destroy(idm_conn_t *ic)
{
idm_so_conn_destroy_common(ic);
}
/*
* idm_so_ini_conn_connect()
* Establish the connection referred to by the handle previously allocated via
* idm_so_ini_conn_create().
*/
static idm_status_t
idm_so_ini_conn_connect(idm_conn_t *ic)
{
idm_so_conn_t *so_conn;
so_conn = ic->ic_transport_private;
if (soconnect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
(SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), 0, 0) != 0) {
idm_soshutdown(so_conn->ic_so);
return (IDM_STATUS_FAIL);
}
idm_so_conn_connect_common(ic);
idm_set_ini_postconnect_options(so_conn);
return (IDM_STATUS_SUCCESS);
}
idm_status_t
idm_so_tgt_conn_create(idm_conn_t *ic, struct sonode *new_so)
{
idm_status_t idmrc;
idmrc = idm_so_conn_create_common(ic, new_so);
return (idmrc);
}
static void
idm_so_tgt_conn_destroy(idm_conn_t *ic)
{
idm_so_conn_destroy_common(ic);
}
/*
* idm_so_tgt_conn_connect()
* Establish the connection in ic, passed from idm_tgt_conn_finish(), which
* is invoked from the SM as a result of an inbound connection request.
*/
static idm_status_t
idm_so_tgt_conn_connect(idm_conn_t *ic)
{
idm_so_conn_connect_common(ic);
return (IDM_STATUS_SUCCESS);
}
static idm_status_t
idm_so_conn_create_common(idm_conn_t *ic, struct sonode *new_so)
{
idm_so_conn_t *so_conn;
so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
so_conn->ic_so = new_so;
ic->ic_transport_private = so_conn;
ic->ic_transport_hdrlen = 0;
/* Set the scoreboarding flag on this connection */
ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
/*
* Initialize tx thread mutex and list
*/
mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
offsetof(idm_pdu_t, idm_tx_link));
return (IDM_STATUS_SUCCESS);
}
static void
idm_so_conn_destroy_common(idm_conn_t *ic)
{
idm_so_conn_t *so_conn = ic->ic_transport_private;
ic->ic_transport_private = NULL;
idm_sodestroy(so_conn->ic_so);
list_destroy(&so_conn->ic_tx_list);
mutex_destroy(&so_conn->ic_tx_mutex);
cv_destroy(&so_conn->ic_tx_cv);
kmem_free(so_conn, sizeof (idm_so_conn_t));
}
static void
idm_so_conn_connect_common(idm_conn_t *ic)
{
idm_so_conn_t *so_conn;
so_conn = ic->ic_transport_private;
SOP_GETSOCKNAME(so_conn->ic_so);
/* Set the local and remote addresses in the idm conn handle */
mutex_enter(&so_conn->ic_so->so_lock);
bcopy(so_conn->ic_so->so_laddr_sa, &ic->ic_laddr,
so_conn->ic_so->so_laddr_len);
bcopy(so_conn->ic_so->so_faddr_sa, &ic->ic_raddr,
so_conn->ic_so->so_faddr_len);
mutex_exit(&so_conn->ic_so->so_lock);
mutex_enter(&ic->ic_mutex);
so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
&p0, TS_RUN, minclsyspri);
so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
&p0, TS_RUN, minclsyspri);
while (!so_conn->ic_rx_thread_running || !so_conn->ic_tx_thread_running)
cv_wait(&ic->ic_cv, &ic->ic_mutex);
mutex_exit(&ic->ic_mutex);
}
/*
* idm_so_conn_disconnect()
* Shutdown the socket connection and stop the thread
*/
static void
idm_so_conn_disconnect(idm_conn_t *ic)
{
idm_so_conn_t *so_conn;
so_conn = ic->ic_transport_private;
mutex_enter(&ic->ic_mutex);
so_conn->ic_rx_thread_running = B_FALSE;
so_conn->ic_tx_thread_running = B_FALSE;
/* We need to wakeup the TX thread */
mutex_enter(&so_conn->ic_tx_mutex);
cv_signal(&so_conn->ic_tx_cv);
mutex_exit(&so_conn->ic_tx_mutex);
mutex_exit(&ic->ic_mutex);
/* This should wakeup the RX thread if it is sleeping */
idm_soshutdown(so_conn->ic_so);
thread_join(so_conn->ic_tx_thread_did);
thread_join(so_conn->ic_rx_thread_did);
}
/*
* idm_so_tgt_svc_create()
* Establish a service on an IP address and port. idm_svc_req_t contains
* the service parameters.
*/
/*ARGSUSED*/
static idm_status_t
idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
{
idm_so_svc_t *so_svc;
so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
/* Set the new sockets service in svc handle */
is->is_so_svc = (void *)so_svc;
return (IDM_STATUS_SUCCESS);
}
/*
* idm_so_tgt_svc_destroy()
* Teardown sockets resources allocated in idm_so_tgt_svc_create()
*/
static void
idm_so_tgt_svc_destroy(idm_svc_t *is)
{
/* the socket will have been torn down; free the service */
kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
}
/*
* idm_so_tgt_svc_online()
* Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
*/
static idm_status_t
idm_so_tgt_svc_online(idm_svc_t *is)
{
idm_so_svc_t *so_svc;
idm_svc_req_t *sr = &is->is_svc_req;
struct sockaddr_in6 sin6_ip;
const uint32_t on = 1;
const uint32_t off = 0;
mutex_enter(&is->is_mutex);
so_svc = (idm_so_svc_t *)is->is_so_svc;
/*
* Try creating an IPv6 socket first
*/
if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
mutex_exit(&is->is_mutex);
return (IDM_STATUS_FAIL);
} else {
bzero(&sin6_ip, sizeof (sin6_ip));
sin6_ip.sin6_family = AF_INET6;
sin6_ip.sin6_port = htons(sr->sr_port);
sin6_ip.sin6_addr = in6addr_any;
(void) sosetsockopt(so_svc->is_so, SOL_SOCKET, SO_REUSEADDR,
(char *)&on, sizeof (on));
/*
* Turn off SO_MAC_EXEMPT so future sobinds succeed
*/
(void) sosetsockopt(so_svc->is_so, SOL_SOCKET, SO_MAC_EXEMPT,
(char *)&off, sizeof (off));
if (sobind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
sizeof (sin6_ip), 0, 0) != 0) {
mutex_exit(&is->is_mutex);
idm_sodestroy(so_svc->is_so);
return (IDM_STATUS_FAIL);
}
}
idm_set_tgt_connect_options(so_svc->is_so);
if (solisten(so_svc->is_so, 5) != 0) {
mutex_exit(&is->is_mutex);
idm_soshutdown(so_svc->is_so);
idm_sodestroy(so_svc->is_so);
return (IDM_STATUS_FAIL);
}
/* Launch a watch thread */
so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
is, 0, &p0, TS_RUN, minclsyspri);
if (so_svc->is_thread == NULL) {
/* Failure to launch; teardown the socket */
mutex_exit(&is->is_mutex);
idm_soshutdown(so_svc->is_so);
idm_sodestroy(so_svc->is_so);
return (IDM_STATUS_FAIL);
}
/* Wait for the port watcher thread to start */
while (!so_svc->is_thread_running)
cv_wait(&is->is_cv, &is->is_mutex);
mutex_exit(&is->is_mutex);
return (IDM_STATUS_SUCCESS);
}
/*
* idm_so_tgt_svc_offline
*
* Stop listening on the IP address and port identified by idm_svc_t.
*/
static void
idm_so_tgt_svc_offline(idm_svc_t *is)
{
idm_so_svc_t *so_svc;
mutex_enter(&is->is_mutex);
so_svc = (idm_so_svc_t *)is->is_so_svc;
so_svc->is_thread_running = B_FALSE;
mutex_exit(&is->is_mutex);
/*
* When called from the kernel, soaccept blocks and cannot be woken
* up via the sockfs API. soclose does not work like you would
* hope. When the Volo project is available we can switch to that
* API which should address this issue. For now, we will poke at
* the socket to wake it up.
*/
mutex_enter(&so_svc->is_so->so_lock);
so_svc->is_so->so_error = EINTR;
cv_signal(&so_svc->is_so->so_connind_cv);
mutex_exit(&so_svc->is_so->so_lock);
/*
* Now we expect the port watcher thread to terminate
*/
thread_join(so_svc->is_thread_did);
/*
* Teardown socket
*/
idm_sodestroy(so_svc->is_so);
}
/*
* Watch thread for target service connection establishment.
*/
void
idm_so_svc_port_watcher(void *arg)
{
idm_svc_t *svc = arg;
struct sonode *new_so;
idm_conn_t *ic;
idm_status_t idmrc;
idm_so_svc_t *so_svc;
int rc;
const uint32_t off = 0;
mutex_enter(&svc->is_mutex);
so_svc = svc->is_so_svc;
so_svc->is_thread_running = B_TRUE;
so_svc->is_thread_did = so_svc->is_thread->t_did;
cv_signal(&svc->is_cv);
IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
svc->is_svc_req.sr_port);
while (so_svc->is_thread_running) {
mutex_exit(&svc->is_mutex);
if ((rc = soaccept(so_svc->is_so, 0, &new_so)) != 0) {
mutex_enter(&svc->is_mutex);
if (rc == ECONNABORTED)
continue;
/* Connection problem */
break;
}
/*
* Turn off SO_MAC_EXEMPT so future sobinds succeed
*/
(void) sosetsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
(char *)&off, sizeof (off));
idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
&ic);
if (idmrc != IDM_STATUS_SUCCESS) {
/* Drop connection */
idm_soshutdown(new_so);
idm_sodestroy(new_so);
mutex_enter(&svc->is_mutex);
continue;
}
idmrc = idm_so_tgt_conn_create(ic, new_so);
if (idmrc != IDM_STATUS_SUCCESS) {
idm_svc_conn_destroy(ic);
idm_soshutdown(new_so);
idm_sodestroy(new_so);
mutex_enter(&svc->is_mutex);
continue;
}
/*
* Kick the state machine. At CS_S3_XPT_UP the state machine
* will notify the client (target) about the new connection.
*/
idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
mutex_enter(&svc->is_mutex);
}
so_svc->is_thread_running = B_FALSE;
mutex_exit(&svc->is_mutex);
IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
svc->is_svc_req.sr_port);
thread_exit();
}
/*
* idm_so_free_task_rsrc() stops any ongoing processing of the task and
* frees resources associated with the task.
*
* It's not clear that this should return idm_status_t. What do we do
* if it fails?
*/
static idm_status_t
idm_so_free_task_rsrc(idm_task_t *idt)
{
idm_buf_t *idb;
/*
* If this is a target connection, call idm_buf_rx_from_ini_done for
* any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
*
* In addition, remove any buffers associated with this task from
* the ic_tx_list. We'll do this by walking the idt_inbufv list, but
* items don't actually get removed from that list (and completion
* routines called) until idm_task_cleanup.
*/
mutex_enter(&idt->idt_mutex);
for (idb = list_head(&idt->idt_outbufv); idb != NULL;
idb = list_next(&idt->idt_outbufv, idb)) {
if (idb->idb_in_transport) {
/*
* idm_buf_rx_from_ini_done releases idt->idt_mutex
*/
idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
mutex_enter(&idt->idt_mutex);
}
}
for (idb = list_head(&idt->idt_inbufv); idb != NULL;
idb = list_next(&idt->idt_inbufv, idb)) {
/*
* We want to remove these items from the tx_list as well,
* but knowing it's in the idt_inbufv list is not a guarantee
* that it's in the tx_list. If it's on the tx list then
* let idm_sotx_thread() clean it up.
*/
if (idb->idb_in_transport && !idb->idb_tx_thread) {
/*
* idm_buf_tx_to_ini_done releases idt->idt_mutex
*/
idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
mutex_enter(&idt->idt_mutex);
}
}
mutex_exit(&idt->idt_mutex);
return (IDM_STATUS_SUCCESS);
}
/*
* idm_so_negotiate_key_values() validates the key values for this connection
*/
/* ARGSUSED */
static kv_status_t
idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
{
/* All parameters are negotiated at the iscsit level */
return (KV_HANDLED);
}
/*
* idm_so_notice_key_values() activates the negotiated key values for
* this connection.
*/
static idm_status_t
idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
{
char *nvp_name;
nvpair_t *nvp;
nvpair_t *next_nvp;
int nvrc;
idm_status_t idm_status;
const idm_kv_xlate_t *ikvx;
for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
nvp != NULL; nvp = next_nvp) {
next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
nvp_name = nvpair_name(nvp);
ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
switch (ikvx->ik_key_id) {
case KI_HEADER_DIGEST:
case KI_DATA_DIGEST:
idm_status = idm_so_handle_digest(it, nvp, ikvx);
ASSERT(idm_status == 0);
/* Remove processed item from negotiated_nvl list */
nvrc = nvlist_remove_all(
negotiated_nvl, ikvx->ik_key_name);
ASSERT(nvrc == 0);
break;
default:
break;
}
}
return (IDM_STATUS_SUCCESS);
}
static idm_status_t
idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
const idm_kv_xlate_t *ikvx)
{
int nvrc;
char *digest_choice_string;
nvrc = nvpair_value_string(digest_choice,
&digest_choice_string);
ASSERT(nvrc == 0);
if (strcasecmp(digest_choice_string, "crc32c") == 0) {
switch (ikvx->ik_key_id) {
case KI_HEADER_DIGEST:
it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
break;
case KI_DATA_DIGEST:
it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
break;
default:
ASSERT(0);
break;
}
} else if (strcasecmp(digest_choice_string, "none") == 0) {
switch (ikvx->ik_key_id) {
case KI_HEADER_DIGEST:
it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
break;
case KI_DATA_DIGEST:
it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
break;
default:
ASSERT(0);
break;
}
} else {
ASSERT(0);
}
return (IDM_STATUS_SUCCESS);
}
/*
* idm_so_conn_is_capable() verifies that the passed connection is provided
* for by the sockets interface.
*/
/* ARGSUSED */
static boolean_t
idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
{
return (B_TRUE);
}
/*
* idm_so_rx_datain() validates the Data Sequence number of the PDU. The
* idm_sorecv_scsidata() function invoked earlier actually reads the data
* off the socket into the appropriate buffers.
*/
static void
idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
{
iscsi_data_hdr_t *bhs;
idm_task_t *idt;
idm_buf_t *idb;
uint32_t datasn;
size_t offset;
iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
ASSERT(ic != NULL);
ASSERT(pdu != NULL);
bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
datasn = ntohl(bhs->datasn);
offset = ntohl(bhs->offset);
ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
/*
* Look up the task corresponding to the initiator task tag
* to get the buffers affiliated with the task.
*/
idt = idm_task_find(ic, bhs->itt, bhs->ttt);
if (idt == NULL) {
IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
idb = pdu->isp_sorx_buf;
if (idb == NULL) {
IDM_CONN_LOG(CE_WARN,
"idm_so_rx_datain: failed to find buffer");
idm_task_rele(idt);
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
/*
* DataSN values should be sequential and should not have any gaps or
* repetitions. Check the DataSN with the one stored in the task.
*/
if (datasn == idt->idt_exp_datasn) {
idt->idt_exp_datasn++; /* keep track of DataSN received */
} else {
IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
idm_task_rele(idt);
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
idm_task_rele(idt);
/*
* PDUs in a sequence should be in continuously increasing
* address offset
*/
if (offset != idb->idb_exp_offset) {
IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
/* Expected next relative buffer offset */
idb->idb_exp_offset += n2h24(bhs->dlength);
/*
* For now call scsi_rsp which will process the data rsp
* Revisit, need to provide an explicit client entry point for
* phase collapse completions.
*/
if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
(idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
}
idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
}
/*
* The idm_so_rx_dataout() function is used by the iSCSI target to read
* data from the Data-Out PDU sent by the iSCSI initiator.
*
* This function gets the Initiator Task Tag from the PDU BHS and looks up the
* task to get the buffers associated with the PDU. A PDU might span buffers.
* The data is then read into the respective buffer.
*/
static void
idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
{
iscsi_data_hdr_t *bhs;
idm_task_t *idt;
idm_buf_t *idb;
size_t offset;
ASSERT(ic != NULL);
ASSERT(pdu != NULL);
bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
offset = ntohl(bhs->offset);
ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
/*
* Look up the task corresponding to the initiator task tag
* to get the buffers affiliated with the task.
*/
idt = idm_task_find(ic, bhs->itt, bhs->ttt);
if (idt == NULL) {
IDM_CONN_LOG(CE_WARN,
"idm_so_rx_dataout: failed to find task");
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
idb = pdu->isp_sorx_buf;
if (idb == NULL) {
IDM_CONN_LOG(CE_WARN,
"idm_so_rx_dataout: failed to find buffer");
idm_task_rele(idt);
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
/* Keep track of data transferred - check data offsets */
if (offset != idb->idb_exp_offset) {
IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
"%ld, %d", offset, idb->idb_exp_offset);
idm_task_rele(idt);
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
/* Expected next relative offset */
idb->idb_exp_offset += ntoh24(bhs->dlength);
/*
* Call the buffer callback when the transfer is complete
*
* The connection state machine should only abort tasks after
* shutting down the connection so we are assured that there
* won't be a simultaneous attempt to abort this task at the
* same time as we are processing this PDU (due to a connection
* state change).
*/
if (bhs->flags & ISCSI_FLAG_FINAL) {
/*
* We only want to call idm_buf_rx_from_ini_done once
* per transfer. It's possible that this task has
* already been aborted in which case
* idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
* for each buffer with idb_in_transport==B_TRUE. To
* close this window and ensure that this doesn't happen,
* we'll clear idb->idb_in_transport now while holding
* the task mutex. This is only really an issue for
* SCSI task abort -- if tasks were being aborted because
* of a connection state change the state machine would
* have already stopped the receive thread.
*/
mutex_enter(&idt->idt_mutex);
/*
* Release the task hold here (obtained in idm_task_find)
* because the task may complete synchronously during
* idm_buf_rx_from_ini_done. Since we still have an active
* buffer we know there is at least one additional hold on idt.
*/
idm_task_rele(idt);
/*
* idm_buf_rx_from_ini_done releases idt->idt_mutex
*/
idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
return;
}
idm_task_rele(idt);
idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
}
/*
* The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
* the R2T PDU sent by the iSCSI target indicating that it is ready to
* accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
* and looks up the task in the task tree using the itt to get the output
* buffers associated the task. The R2T PDU contains the offset of the
* requested data and the data length. This function then constructs a
* sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
* PDU is associated with the R2T by the Target Transfer Tag (ttt).
*/
static void
idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
{
idm_task_t *idt;
idm_buf_t *idb;
iscsi_rtt_hdr_t *rtt_hdr;
uint32_t data_offset;
ASSERT(ic != NULL);
ASSERT(pdu != NULL);
rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
data_offset = ntohl(rtt_hdr->data_offset);
idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
if (idt == NULL) {
IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
/* Find the buffer bound to the task by the iSCSI initiator */
mutex_enter(&idt->idt_mutex);
idb = idm_buf_find(&idt->idt_outbufv, data_offset);
idt->idt_r2t_ttt = rtt_hdr->ttt;
/* reset to zero */
idt->idt_exp_datasn = 0;
if (idb == NULL) {
mutex_exit(&idt->idt_mutex);
idm_task_rele(idt);
IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
idm_pdu_rx_protocol_error(ic, pdu);
return;
}
(void) idm_so_send_buf_region(idt, ISCSI_OP_SCSI_DATA, idb,
data_offset, ntohl(rtt_hdr->data_length));
mutex_exit(&idt->idt_mutex);
idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
idm_task_rele(idt);
}
idm_status_t
idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
{
uint8_t pad[ISCSI_PAD_WORD_LEN];
int pad_len;
uint32_t data_digest_crc;
uint32_t crc_calculated;
int total_len;
idm_so_conn_t *so_conn;
so_conn = ic->ic_transport_private;
pad_len = ((ISCSI_PAD_WORD_LEN -
(pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
(ISCSI_PAD_WORD_LEN - 1));
ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
total_len = pdu->isp_datalen;
if (pad_len) {
pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
total_len += pad_len;
pdu->isp_iovlen++;
}
/* setup data digest */
if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
pdu->isp_iov[pdu->isp_iovlen].iov_base =
(char *)&data_digest_crc;
pdu->isp_iov[pdu->isp_iovlen].iov_len =
sizeof (data_digest_crc);
total_len += sizeof (data_digest_crc);
pdu->isp_iovlen++;
}
if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
pdu->isp_iovlen, total_len) != 0) {
return (IDM_STATUS_IO);
}
if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
crc_calculated = idm_crc32c(pdu->isp_data,
pdu->isp_datalen);
if (pad_len) {
crc_calculated = idm_crc32c_continued((char *)&pad,
pad_len, crc_calculated);
}
if (crc_calculated != data_digest_crc) {
IDM_CONN_LOG(CE_WARN,
"idm_sorecvdata: "
"CRC error: actual 0x%x, calc 0x%x",
data_digest_crc, crc_calculated);
/* Invalid Data Digest */
return (IDM_STATUS_DATA_DIGEST);
}
}
return (IDM_STATUS_SUCCESS);
}
/*
* idm_sorecv_scsidata() is used to receive scsi data from the socket. The
* Data-type PDU header must be read into the idm_pdu_t structure prior to
* calling this function.
*/
idm_status_t
idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
{
iscsi_data_hdr_t *bhs;
idm_task_t *task;
uint32_t offset;
uint8_t opcode;
uint32_t dlength;
list_t *buflst;
uint32_t xfer_bytes;
idm_status_t status;
ASSERT(ic != NULL);
ASSERT(pdu != NULL);
bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
offset = ntohl(bhs->offset);
opcode = bhs->opcode;
dlength = n2h24(bhs->dlength);
ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
(opcode == ISCSI_OP_SCSI_DATA));
/*
* Successful lookup implicitly gets a "hold" on the task. This
* hold must be released before leaving this function. At one
* point we were caching this task context and retaining the hold
* but it turned out to be very difficult to release the hold properly.
* The task can be aborted and the connection shutdown between this
* call and the subsequent expected call to idm_so_rx_datain/
* idm_so_rx_dataout (in which case those functions are not called).
* Releasing the hold in the PDU callback doesn't work well either
* because the whole task may be completed by then at which point
* it is too late to release the hold -- for better or worse this
* code doesn't wait on the refcnts during normal operation.
* idm_task_find() is very fast and it is not a huge burden if we
* have to do it twice.
*/
task = idm_task_find(ic, bhs->itt, bhs->ttt);
if (task == NULL) {
IDM_CONN_LOG(CE_WARN,
"idm_sorecv_scsidata: could not find task");
return (IDM_STATUS_FAIL);
}
mutex_enter(&task->idt_mutex);
buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
&task->idt_inbufv : &task->idt_outbufv;
pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
mutex_exit(&task->idt_mutex);
if (pdu->isp_sorx_buf == NULL) {
idm_task_rele(task);
IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
"buffer for offset %x opcode=%x",
offset, opcode);
return (IDM_STATUS_FAIL);
}
xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
ASSERT(xfer_bytes != 0);
if (xfer_bytes != dlength) {
idm_task_rele(task);
/*
* Buffer overflow, connection error. The PDU data is still
* sitting in the socket so we can't use the connection
* again until that data is drained.
*/
return (IDM_STATUS_FAIL);
}
status = idm_sorecvdata(ic, pdu);
idm_task_rele(task);
return (status);
}
static uint32_t
idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
{
uint32_t buf_ro = ro - idb->idb_bufoffset;
uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
ASSERT(ro >= idb->idb_bufoffset);
pdu->isp_iov[pdu->isp_iovlen].iov_base =
(caddr_t)idb->idb_buf + buf_ro;
pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
pdu->isp_iovlen++;
return (xfer_len);
}
int
idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
{
pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
ASSERT(pdu->isp_data != NULL);
pdu->isp_databuflen = pdu->isp_datalen;
pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
pdu->isp_iov[0].iov_len = pdu->isp_datalen;
pdu->isp_iovlen = 1;
/*
* Since we are associating a new data buffer with this received
* PDU we need to set a specific callback to free the data
* after the PDU is processed.
*/
pdu->isp_flags |= IDM_PDU_ADDL_DATA;
pdu->isp_callback = idm_sorx_addl_pdu_cb;
return (idm_sorecvdata(ic, pdu));
}
void
idm_sorx_thread(void *arg)
{
boolean_t conn_failure = B_FALSE;
idm_conn_t *ic = (idm_conn_t *)arg;
idm_so_conn_t *so_conn;
idm_pdu_t *pdu;
idm_status_t rc;
idm_conn_hold(ic);
mutex_enter(&ic->ic_mutex);
so_conn = ic->ic_transport_private;
so_conn->ic_rx_thread_running = B_TRUE;
so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
cv_signal(&ic->ic_cv);
while (so_conn->ic_rx_thread_running) {
mutex_exit(&ic->ic_mutex);
/*
* Get PDU with default header size (large enough for
* BHS plus any anticipated AHS). PDU from
* the cache will have all values set correctly
* for sockets RX including callback.
*/
pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
pdu->isp_ic = ic;
pdu->isp_flags = 0;
pdu->isp_transport_hdrlen = 0;
if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
/*
* Call idm_pdu_complete so that we call the callback
* and ensure any memory allocated in idm_sorecvhdr
* gets freed up.
*/
idm_pdu_complete(pdu, IDM_STATUS_FAIL);
/*
* If ic_rx_thread_running is still set then
* this is some kind of connection problem
* on the socket. In this case we want to
* generate an event. Otherwise some other
* thread closed the socket due to another
* issue in which case we don't need to
* generate an event.
*/
mutex_enter(&ic->ic_mutex);
if (so_conn->ic_rx_thread_running) {
conn_failure = B_TRUE;
so_conn->ic_rx_thread_running = B_FALSE;
}
continue;
}
/*
* Header has been read and validated. Now we need
* to read the PDU data payload (if present). SCSI data
* need to be transferred from the socket directly into
* the associated transfer buffer for the SCSI task.
*/
if (pdu->isp_datalen != 0) {
if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
(IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
rc = idm_sorecv_scsidata(ic, pdu);
/*
* All SCSI errors are fatal to the
* connection right now since we have no
* place to put the data. What we need
* is some kind of sink to dispose of unwanted
* SCSI data. For example an invalid task tag
* should not kill the connection (although
* we may want to drop the connection).
*/
} else {
/*
* Not data PDUs so allocate a buffer for the
* data segment and read the remaining data.
*/
rc = idm_sorecv_nonscsidata(ic, pdu);
}
if (rc != 0) {
/*
* Call idm_pdu_complete so that we call the
* callback and ensure any memory allocated
* in idm_sorecvhdr gets freed up.
*/
idm_pdu_complete(pdu, IDM_STATUS_FAIL);
/*
* If ic_rx_thread_running is still set then
* this is some kind of connection problem
* on the socket. In this case we want to
* generate an event. Otherwise some other
* thread closed the socket due to another
* issue in which case we don't need to
* generate an event.
*/
mutex_enter(&ic->ic_mutex);
if (so_conn->ic_rx_thread_running) {
conn_failure = B_TRUE;
so_conn->ic_rx_thread_running = B_FALSE;
}
continue;
}
}
/*
* Process RX PDU
*/
idm_pdu_rx(ic, pdu);
mutex_enter(&ic->ic_mutex);
}
mutex_exit(&ic->ic_mutex);
/*
* If we dropped out of the RX processing loop because of
* a socket problem or other connection failure (including
* digest errors) then we need to generate a state machine
* event to shut the connection down.
* If the state machine is already in, for example, INIT_ERROR, this
* event will get dropped, and the TX thread will never be notified
* to shut down. To be safe, we'll just notify it here.
*/
if (conn_failure) {
if (so_conn->ic_tx_thread_running) {
so_conn->ic_tx_thread_running = B_FALSE;
mutex_enter(&so_conn->ic_tx_mutex);
cv_signal(&so_conn->ic_tx_cv);
mutex_exit(&so_conn->ic_tx_mutex);
}
idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
}
idm_conn_rele(ic);
thread_exit();
}
/*
* idm_so_tx
*
* This is the implementation of idm_transport_ops_t's it_tx_pdu entry
* point. By definition, it is supposed to be fast. So, simply queue
* the entry and return. The real work is done by idm_i_so_tx() via
* idm_sotx_thread().
*/
static void
idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
{
idm_so_conn_t *so_conn = ic->ic_transport_private;
ASSERT(pdu->isp_ic == ic);
mutex_enter(&so_conn->ic_tx_mutex);
if (!so_conn->ic_tx_thread_running) {
mutex_exit(&so_conn->ic_tx_mutex);
idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
return;
}
list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
cv_signal(&so_conn->ic_tx_cv);
mutex_exit(&so_conn->ic_tx_mutex);
}
static idm_status_t
idm_i_so_tx(idm_pdu_t *pdu)
{
idm_conn_t *ic = pdu->isp_ic;
idm_status_t status = IDM_STATUS_SUCCESS;
uint8_t pad[ISCSI_PAD_WORD_LEN];
int pad_len;
uint32_t hdr_digest_crc;
uint32_t data_digest_crc = 0;
int total_len = 0;
int iovlen = 0;
struct iovec iov[6];
idm_so_conn_t *so_conn;
so_conn = ic->ic_transport_private;
/* Setup BHS */
iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
iov[iovlen].iov_len = pdu->isp_hdrlen;
total_len += iov[iovlen].iov_len;
iovlen++;
/* Setup header digest */
if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
(ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
iov[iovlen].iov_len = sizeof (hdr_digest_crc);
total_len += iov[iovlen].iov_len;
iovlen++;
}
/* Setup the data */
if (pdu->isp_datalen) {
idm_task_t *idt;
idm_buf_t *idb;
iscsi_data_hdr_t *ihp;
ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
/* Write of immediate data */
if (ic->ic_ffp &&
(ihp->opcode == ISCSI_OP_SCSI_CMD ||
ihp->opcode == ISCSI_OP_SCSI_DATA)) {
idt = idm_task_find(ic, ihp->itt, ihp->ttt);
if (idt) {
mutex_enter(&idt->idt_mutex);
idb = idm_buf_find(&idt->idt_outbufv, 0);
mutex_exit(&idt->idt_mutex);
idb->idb_xfer_len += pdu->isp_datalen;
}
}
iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
iov[iovlen].iov_len = pdu->isp_datalen;
total_len += iov[iovlen].iov_len;
iovlen++;
}
/* Setup the data pad if necessary */
pad_len = ((ISCSI_PAD_WORD_LEN -
(pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
(ISCSI_PAD_WORD_LEN - 1));
if (pad_len) {
bzero(pad, sizeof (pad));
iov[iovlen].iov_base = (void *)&pad;
iov[iovlen].iov_len = pad_len;
total_len += iov[iovlen].iov_len;
iovlen++;
}
/*
* Setup the data digest if enabled. Data-digest is not sent
* for login-phase PDUs.
*/
if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
(pdu->isp_datalen || pad_len)) {
/*
* RFC3720/10.2.3: A zero-length Data Segment also
* implies a zero-length data digest.
*/
if (pdu->isp_datalen) {
data_digest_crc = idm_crc32c(pdu->isp_data,
pdu->isp_datalen);
}
if (pad_len) {
data_digest_crc = idm_crc32c_continued(&pad,
pad_len, data_digest_crc);
}
iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
iov[iovlen].iov_len = sizeof (data_digest_crc);
total_len += iov[iovlen].iov_len;
iovlen++;
}
/* Transmit the PDU */
if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
total_len) != 0) {
/* Set error status */
IDM_CONN_LOG(CE_WARN,
"idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
"data: %p", (void *) so_conn->ic_so, (void *) ic,
(void *) pdu->isp_data);
status = IDM_STATUS_IO;
}
/*
* Success does not mean that the PDU actually reached the
* remote node since it could get dropped along the way.
*/
idm_pdu_complete(pdu, status);
return (status);
}
/*
* The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
* Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
* the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
* A target can invoke this function multiple times for a single read command
* (identified by the same ITT) to split the input into several sequences.
*
* DataSN starts with 0 for the first data PDU of an input command and advances
* by 1 for each subsequent data PDU. Each sequence will have its own F bit,
* which is set to 1 for the last data PDU of a sequence.
*
* Scope for Prototype build:
* The data PDUs within a sequence will be sent in order with the buffer offset
* in increasing order. i.e. initiator and target must have negotiated the
* "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
*
* Caller holds idt->idt_mutex
*/
static idm_status_t
idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
{
idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
idm_pdu_t tmppdu;
ASSERT(mutex_owned(&idt->idt_mutex));
/*
* Put the idm_buf_t on the tx queue. It will be transmitted by
* idm_sotx_thread.
*/
mutex_enter(&so_conn->ic_tx_mutex);
if (!so_conn->ic_tx_thread_running) {
mutex_exit(&so_conn->ic_tx_mutex);
/*
* Don't release idt->idt_mutex since we're supposed to hold
* in when calling idm_buf_tx_to_ini_done
*/
idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
return (IDM_STATUS_FAIL);
}
/*
* Build a template for the data PDU headers we will use so that
* the SN values will stay consistent with other PDU's we are
* transmitting like R2T and SCSI status.
*/
bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
ISCSI_OP_SCSI_DATA_RSP);
idb->idb_tx_thread = B_TRUE;
list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
cv_signal(&so_conn->ic_tx_cv);
mutex_exit(&so_conn->ic_tx_mutex);
mutex_exit(&idt->idt_mutex);
/*
* Returning success here indicates the transfer was successfully
* dispatched -- it does not mean that the transfer completed
* successfully.
*/
return (IDM_STATUS_SUCCESS);
}
/*
* The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
* data blocks it is ready to receive from the initiator in response to a WRITE
* SCSI command. The target iSCSI layer passes the information about the desired
* data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
* offset and datalen are passed via the 'idb' argument.
*
* Scope for Prototype build:
* R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
* negotiated the "InitialR2T" to "Yes".
*
* Caller holds idt->idt_mutex
*/
static idm_status_t
idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
{
idm_pdu_t *pdu;
iscsi_rtt_hdr_t *rtt;
ASSERT(mutex_owned(&idt->idt_mutex));
pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
pdu->isp_ic = idt->idt_ic;
bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
/* iSCSI layer fills the TTT, ITT, StatSN, ExpCmdSN, MaxCmdSN */
(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
rtt->opcode = ISCSI_OP_RTT_RSP;
rtt->flags = ISCSI_FLAG_FINAL;
rtt->data_offset = htonl(idb->idb_bufoffset);
rtt->data_length = htonl(idb->idb_xfer_len);
rtt->rttsn = htonl(idt->idt_exp_rttsn++);
/* Keep track of buffer offsets */
idb->idb_exp_offset = idb->idb_bufoffset;
mutex_exit(&idt->idt_mutex);
/*
* Transmit the PDU. Call the internal routine directly as there
* is already implicit ordering of the PDU.
*/
(void) idm_i_so_tx(pdu);
return (IDM_STATUS_SUCCESS);
}
static idm_status_t
idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
{
idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
if (idb->idb_buf == NULL) {
IDM_CONN_LOG(CE_NOTE,
"idm_so_buf_alloc: failed buffer allocation");
return (IDM_STATUS_FAIL);
}
return (IDM_STATUS_SUCCESS);
}
/* ARGSUSED */
static idm_status_t
idm_so_buf_setup(idm_buf_t *idb)
{
/* nothing to do here */
return (IDM_STATUS_SUCCESS);
}
/* ARGSUSED */
static void
idm_so_buf_teardown(idm_buf_t *idb)
{
/* nothing to do here */
}
static void
idm_so_buf_free(idm_buf_t *idb)
{
kmem_free(idb->idb_buf, idb->idb_buflen);
}
idm_status_t
idm_so_send_buf_region(idm_task_t *idt, uint8_t opcode, idm_buf_t *idb,
uint32_t buf_region_offset, uint32_t buf_region_length)
{
idm_conn_t *ic;
uint32_t max_dataseglen;
size_t remainder, chunk;
uint32_t data_offset = buf_region_offset;
iscsi_data_hdr_t *bhs;
idm_pdu_t *pdu;
ASSERT(mutex_owned(&idt->idt_mutex));
ic = idt->idt_ic;
max_dataseglen = 8192; /* Need value from login negotiation */
remainder = buf_region_length;
while (remainder) {
if (idt->idt_state != TASK_ACTIVE) {
ASSERT((idt->idt_state != TASK_IDLE) &&
(idt->idt_state != TASK_COMPLETE));
return (IDM_STATUS_ABORTED);
}
/* check to see if we need to chunk the data */
if (remainder > max_dataseglen) {
chunk = max_dataseglen;
} else {
chunk = remainder;
}
/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
pdu->isp_ic = ic;
/*
* For target we've already built a build a header template
* to use during the transfer. Use this template so that
* the SN values stay consistent with any unrelated PDU's
* being transmitted.
*/
if (opcode == ISCSI_OP_SCSI_DATA_RSP) {
bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
sizeof (iscsi_hdr_t));
} else {
/*
* OK for now, but we should remove this bzero and
* make sure the build_hdr function is initializing the
* header properly
*/
bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
/*
* setup iscsi data hdr
* callback to the iSCSI layer to fill in the BHS
* CmdSN, StatSN, ExpCmdSN, MaxCmdSN, TTT, ITT and
* opcode
*/
(*ic->ic_conn_ops.icb_build_hdr)(idt, pdu, opcode);
}
/*
* Set DataSN, data offset, and flags in BHS
* For the prototype build, A = 0, S = 0, U = 0
*/
bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
bhs->datasn = htonl(idt->idt_exp_datasn++);
hton24(bhs->dlength, chunk);
bhs->offset = htonl(idb->idb_bufoffset + data_offset);
if (chunk == remainder) {
bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
}
/* setup data */
pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
pdu->isp_datalen = (uint_t)chunk;
remainder -= chunk;
data_offset += chunk;
/*
* Now that we're done working with idt_exp_datasn,
* idt->idt_state and idb->idb_bufoffset we can release
* the task lock -- don't want to hold it across the
* call to idm_i_so_tx since we could block.
*/
mutex_exit(&idt->idt_mutex);
/*
* Transmit the PDU. Call the internal routine directly
* as there is already implicit ordering.
*/
(void) idm_i_so_tx(pdu);
mutex_enter(&idt->idt_mutex);
}
return (IDM_STATUS_SUCCESS);
}
/*
* TX PDU cache
*/
/* ARGSUSED */
int
idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
{
idm_pdu_t *pdu = hdl;
bzero(pdu, sizeof (idm_pdu_t));
pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
pdu->isp_callback = idm_sotx_cache_pdu_cb;
pdu->isp_magic = IDM_PDU_MAGIC;
bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
return (0);
}
/* ARGSUSED */
void
idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
{
/* reset values between use */
pdu->isp_datalen = 0;
kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
}
/*
* RX PDU cache
*/
/* ARGSUSED */
int
idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
{
idm_pdu_t *pdu = hdl;
bzero(pdu, sizeof (idm_pdu_t));
pdu->isp_magic = IDM_PDU_MAGIC;
pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
pdu->isp_callback = idm_sorx_cache_pdu_cb;
return (0);
}
/* ARGSUSED */
static void
idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
{
pdu->isp_iovlen = 0;
pdu->isp_sorx_buf = 0;
kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
}
static void
idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
{
/*
* We had to modify our cached RX PDU with a longer header buffer
* and/or a longer data buffer. Release the new buffers and fix
* the fields back to what we would expect for a cached RX PDU.
*/
if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
}
if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
kmem_free(pdu->isp_data, pdu->isp_datalen);
}
pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
pdu->isp_data = NULL;
pdu->isp_datalen = 0;
pdu->isp_sorx_buf = 0;
pdu->isp_callback = idm_sorx_cache_pdu_cb;
idm_sorx_cache_pdu_cb(pdu, status);
}
/*
* This thread is only active when I/O is queued for transmit
* because the socket is busy.
*/
void
idm_sotx_thread(void *arg)
{
idm_conn_t *ic = arg;
idm_tx_obj_t *object, *next;
idm_so_conn_t *so_conn;
idm_status_t status = IDM_STATUS_SUCCESS;
idm_conn_hold(ic);
mutex_enter(&ic->ic_mutex);
so_conn = ic->ic_transport_private;
so_conn->ic_tx_thread_running = B_TRUE;
so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
cv_signal(&ic->ic_cv);
mutex_exit(&ic->ic_mutex);
mutex_enter(&so_conn->ic_tx_mutex);
while (so_conn->ic_tx_thread_running) {
while (list_is_empty(&so_conn->ic_tx_list)) {
DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
if (!so_conn->ic_tx_thread_running) {
goto tx_bail;
}
}
object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
list_remove(&so_conn->ic_tx_list, object);
mutex_exit(&so_conn->ic_tx_mutex);
switch (object->idm_tx_obj_magic) {
case IDM_PDU_MAGIC:
DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
idm_pdu_t *, (idm_pdu_t *)object);
status = idm_i_so_tx((idm_pdu_t *)object);
break;
case IDM_BUF_MAGIC: {
idm_buf_t *idb = (idm_buf_t *)object;
idm_task_t *idt = idb->idb_task_binding;
DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
idm_buf_t *, idb);
mutex_enter(&idt->idt_mutex);
status = idm_so_send_buf_region(idt,
ISCSI_OP_SCSI_DATA_RSP, idb, 0, idb->idb_xfer_len);
/*
* TX thread owns the buffer so we expect it to
* be "in transport"
*/
ASSERT(idb->idb_in_transport);
/*
* idm_buf_tx_to_ini_done releases idt->idt_mutex
*/
idm_buf_tx_to_ini_done(idt, idb, status);
break;
}
default:
IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
"(0x%08x)", object->idm_tx_obj_magic);
status = IDM_STATUS_FAIL;
}
mutex_enter(&so_conn->ic_tx_mutex);
if (status != IDM_STATUS_SUCCESS) {
so_conn->ic_tx_thread_running = B_FALSE;
idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
}
}
/*
* Before we leave, we need to abort every item remaining in the
* TX list.
*/
tx_bail:
object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
while (object != NULL) {
next = list_next(&so_conn->ic_tx_list, object);
list_remove(&so_conn->ic_tx_list, object);
switch (object->idm_tx_obj_magic) {
case IDM_PDU_MAGIC:
idm_pdu_complete((idm_pdu_t *)object,
IDM_STATUS_ABORTED);
break;
case IDM_BUF_MAGIC: {
idm_buf_t *idb = (idm_buf_t *)object;
idm_task_t *idt = idb->idb_task_binding;
mutex_exit(&so_conn->ic_tx_mutex);
mutex_enter(&idt->idt_mutex);
/*
* TX thread owns the buffer so we expect it to
* be "in transport"
*/
ASSERT(idb->idb_in_transport);
/*
* idm_buf_tx_to_ini_done releases idt->idt_mutex
*/
idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
mutex_enter(&so_conn->ic_tx_mutex);
break;
}
default:
IDM_CONN_LOG(CE_WARN,
"idm_sotx_thread: Unexpected magic "
"(0x%08x)", object->idm_tx_obj_magic);
}
object = next;
}
mutex_exit(&so_conn->ic_tx_mutex);
idm_conn_rele(ic);
thread_exit();
/*NOTREACHED*/
}