iser_cq.c revision 30e7468f8f41aa30ada067b2c1d5d284046514da
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/sunddi.h>
#include <sys/sdt.h>
#include <sys/ib/ibtl/ibti.h>
#include <sys/ib/ibtl/ibtl_types.h>
#include <sys/ib/clients/iser/iser.h>
/*
* iser_cq.c
* Routines for completion queue handlers for iSER.
*/
static void iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg);
int iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg);
static int iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl,
iser_chan_t *iser_chan);
static int iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl,
iser_chan_t *iser_chan);
void
iser_ib_sendcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
{
iser_chan_t *iser_chan;
ibt_status_t status;
iser_chan = (iser_chan_t *)arg;
/* Poll completions until the CQ is empty */
do {
status = iser_ib_poll_send_completions(cq_hdl, iser_chan);
} while (status != IBT_CQ_EMPTY);
/* We've emptied the CQ, rearm it before we're done here */
status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
if (status != IBT_SUCCESS) {
/* Unexpected error */
ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: "
"ibt_enable_cq_notify error (%d)", status);
return;
}
/* Now, check for more completions after the rearm */
do {
status = iser_ib_poll_send_completions(cq_hdl, iser_chan);
} while (status != IBT_CQ_EMPTY);
}
static int
iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan)
{
ibt_wc_t wc[ISER_IB_SCQ_POLL_MAX];
ibt_wrid_t wrid;
idm_buf_t *idb = NULL;
idm_task_t *idt = NULL;
iser_wr_t *wr = NULL;
int i;
uint_t npoll = 0;
ibt_status_t status;
iser_conn_t *iser_conn;
idm_status_t idm_status;
iser_conn = iser_chan->ic_conn;
/*
* Poll ISER_IB_SCQ_POLL_MAX completions from the CQ.
*/
status = ibt_poll_cq(cq_hdl, wc, ISER_IB_SCQ_POLL_MAX, &npoll);
if (status != IBT_SUCCESS) {
if (status != IBT_CQ_EMPTY) {
/* Unexpected error */
ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: ibt_poll_cq "
"error (%d)", status);
}
/* CQ is empty. Either way, move along... */
return (status);
}
/*
* Handle each of the completions we've polled
*/
for (i = 0; i < npoll; i++) {
DTRACE_PROBE3(iser__send__cqe, iser_chan_t *, iser_chan,
ibt_wc_t *, &wc[i], ibt_wc_status_t, wc[i].wc_status);
/* Grab the wrid of the completion */
wrid = wc[i].wc_id;
/* Decrement this channel's SQ posted count */
mutex_enter(&iser_chan->ic_sq_post_lock);
iser_chan->ic_sq_post_count--;
mutex_exit(&iser_chan->ic_sq_post_lock);
/* Pull in the wr handle */
wr = (iser_wr_t *)(uintptr_t)wrid;
ASSERT(wr != NULL);
/* Set an idm_status for return to IDM */
idm_status = (wc[i].wc_status == IBT_WC_SUCCESS) ?
IDM_STATUS_SUCCESS : IDM_STATUS_FAIL;
/*
* A non-success status here indicates the QP went
* into an error state while this WR was being
* processed. This can also happen when the
* channel is closed on the remote end. Clean up
* the resources, then push CE_TRANSPORT_FAIL
* into IDM.
*/
if (wc[i].wc_status != IBT_WC_SUCCESS) {
/*
* Free the resources attached to this
* completion.
*/
if (wr->iw_msg != NULL) {
/* Free iser_msg handle */
iser_msg_free(wr->iw_msg);
}
if (wr->iw_pdu != NULL) {
/* Complete the PDU */
idm_pdu_complete(wr->iw_pdu, idm_status);
}
if (wr->iw_buf != NULL) {
/* Invoke buffer callback */
idb = wr->iw_buf;
#ifdef DEBUG
bcopy(&wc[i],
&((iser_buf_t *)idb->idb_buf_private)->
buf_wc, sizeof (ibt_wc_t));
#endif
idt = idb->idb_task_binding;
mutex_enter(&idt->idt_mutex);
if (wr->iw_type == ISER_WR_RDMAW) {
idm_buf_tx_to_ini_done(idt, idb,
IDM_STATUS_FAIL);
} else { /* ISER_WR_RDMAR */
idm_buf_rx_from_ini_done(idt, idb,
IDM_STATUS_FAIL);
}
}
/* Free the iser wr handle */
iser_wr_free(wr);
/*
* Tell IDM that the channel has gone down,
* unless he already knows.
*/
mutex_enter(&iser_conn->ic_lock);
switch (iser_conn->ic_stage) {
case ISER_CONN_STAGE_IC_DISCONNECTED:
case ISER_CONN_STAGE_IC_FREED:
case ISER_CONN_STAGE_CLOSING:
case ISER_CONN_STAGE_CLOSED:
break;
default:
idm_conn_event(iser_conn->ic_idmc,
CE_TRANSPORT_FAIL, idm_status);
iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
}
mutex_exit(&iser_conn->ic_lock);
/* Move onto the next completion */
continue;
}
/*
* For a success status, just invoke the PDU or
* buffer completion. We use our WR handle's
* "iw_type" here so that we can properly process
* because the CQE's opcode is invalid if the status
* is failed.
*/
switch (wr->iw_type) {
case ISER_WR_SEND:
/* Free the msg handle */
ASSERT(wr->iw_msg != NULL);
iser_msg_free(wr->iw_msg);
if (wr->iw_pdu == NULL) {
/* This is a hello exchange message */
mutex_enter(&iser_conn->ic_lock);
if (iser_conn->ic_stage ==
ISER_CONN_STAGE_HELLOREPLY_SENT) {
/*
* We're on the target side,
* and have just successfully
* sent the HelloReply msg.
*/
iser_conn->ic_stage =
ISER_CONN_STAGE_LOGGED_IN;
}
mutex_exit(&iser_conn->ic_lock);
} else {
/* This is a normal control message */
idm_pdu_complete(wr->iw_pdu, idm_status);
}
/* Free the wr handle */
iser_wr_free(wr);
break;
case ISER_WR_RDMAW:
case ISER_WR_RDMAR:
/*
* Invoke the appropriate callback;
* the buffer will be freed there.
*/
idb = wr->iw_buf;
#ifdef DEBUG
bcopy(&wc[i],
&((iser_buf_t *)idb->idb_buf_private)->buf_wc,
sizeof (ibt_wc_t));
#endif
idt = idb->idb_task_binding;
mutex_enter(&idt->idt_mutex);
if (wr->iw_type == ISER_WR_RDMAW) {
idm_buf_tx_to_ini_done(idt, idb, idm_status);
} else {
idm_buf_rx_from_ini_done(idt, idb, idm_status);
}
/* Free the wr handle */
iser_wr_free(wr);
break;
default:
ASSERT(0);
break;
}
}
return (status);
}
void
iser_ib_recvcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
{
iser_chan_t *iser_chan;
ibt_status_t status;
iser_chan = (iser_chan_t *)arg;
/* Poll completions until the CQ is empty */
do {
status = iser_ib_poll_recv_completions(cq_hdl, iser_chan);
} while (status != IBT_CQ_EMPTY);
/* We've emptied the CQ, rearm it before we're done here */
status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
if (status != IBT_SUCCESS) {
/* Unexpected error */
ISER_LOG(CE_NOTE, "iser_ib_recvcq_handler: "
"ibt_enable_cq_notify error (%d)", status);
return;
}
/* Now, check for more completions after the rearm */
do {
status = iser_ib_poll_recv_completions(cq_hdl, iser_chan);
} while (status != IBT_CQ_EMPTY);
}
static int
iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan)
{
ibt_wc_t wc;
iser_msg_t *msg;
iser_qp_t *iser_qp;
int status;
iser_qp = &(iser_chan->ic_qp);
bzero(&wc, sizeof (ibt_wc_t));
status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
if (status == IBT_CQ_EMPTY) {
/* CQ is empty, return */
return (status);
}
if (status != IBT_SUCCESS) {
/* Unexpected error */
ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: "
"ibt_poll_cq error (%d)", status);
mutex_enter(&iser_qp->qp_lock);
iser_qp->rq_level--;
mutex_exit(&iser_qp->qp_lock);
/* Free the msg handle (if we got it back) */
if ((msg = (iser_msg_t *)(uintptr_t)wc.wc_id) != NULL) {
iser_msg_free(msg);
}
return (status);
}
/* Retrieve the iSER msg handle */
msg = (iser_msg_t *)(uintptr_t)wc.wc_id;
ASSERT(msg != NULL);
/*
* Decrement the posted level in the RQ, then check
* to see if we need to fill the RQ back up (or if
* we are already on the taskq).
*/
mutex_enter(&iser_qp->qp_lock);
iser_qp->rq_level--;
if ((iser_qp->rq_taskqpending == B_FALSE) &&
(iser_qp->rq_level <= iser_qp->rq_lwm)) {
/* Set the pending flag and fire off a post_recv */
iser_qp->rq_taskqpending = B_TRUE;
mutex_exit(&iser_qp->qp_lock);
status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv,
(void *)iser_chan->ic_chanhdl, DDI_NOSLEEP);
if (status != DDI_SUCCESS) {
ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: "
"task dispatch failed");
/* Failure to launch, unset the pending flag */
mutex_enter(&iser_qp->qp_lock);
iser_qp->rq_taskqpending = B_FALSE;
mutex_exit(&iser_qp->qp_lock);
}
} else {
mutex_exit(&iser_qp->qp_lock);
}
DTRACE_PROBE3(iser__recv__cqe, iser_chan_t *, iser_chan,
ibt_wc_t *, &wc, ibt_wc_status_t, wc.wc_status);
if (wc.wc_status != IBT_WC_SUCCESS) {
/*
* Tell IDM that the channel has gone down,
* unless he already knows.
*/
mutex_enter(&iser_chan->ic_conn->ic_lock);
switch (iser_chan->ic_conn->ic_stage) {
case ISER_CONN_STAGE_IC_DISCONNECTED:
case ISER_CONN_STAGE_IC_FREED:
case ISER_CONN_STAGE_CLOSING:
case ISER_CONN_STAGE_CLOSED:
break;
default:
idm_conn_event(iser_chan->ic_conn->ic_idmc,
CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
iser_chan->ic_conn->ic_stage =
ISER_CONN_STAGE_CLOSING;
}
mutex_exit(&iser_chan->ic_conn->ic_lock);
iser_msg_free(msg);
return (DDI_SUCCESS);
} else {
/*
* We have an iSER message in, let's handle it.
* We will free the iser_msg_t later in this path,
* depending upon the action required.
*/
iser_msg_handle(iser_chan, msg);
return (DDI_SUCCESS);
}
}
static void
iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg)
{
int opcode;
iser_ctrl_hdr_t *hdr = NULL;
iser_conn_t *iser_conn = chan->ic_conn;
int status;
hdr = (iser_ctrl_hdr_t *)(uintptr_t)msg->msg_ds.ds_va;
ASSERT(hdr != NULL);
opcode = hdr->opcode;
if (opcode == ISER_OPCODE_CTRL_TYPE_PDU) {
/*
* Handle an iSCSI Control PDU iSER message.
* Note we'll free the msg handle in the PDU callback.
*/
status = iser_iscsihdr_handle(chan, msg);
if (status != DDI_SUCCESS) {
/*
* We are unable to handle this message, and
* have no way to recover from this. Fail the
* transport.
*/
ISER_LOG(CE_NOTE, "iser_msg_handle: failed "
"iser_iscsihdr_handle");
iser_msg_free(msg);
idm_conn_event(iser_conn->ic_idmc,
CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
}
} else if (opcode == ISER_OPCODE_HELLO_MSG) { /* at the target */
/*
* We are currently not supporting Hello Exchange,
* since OFED iSER does not. May be revisited.
*/
ASSERT(opcode != ISER_OPCODE_HELLO_MSG);
if (iser_conn->ic_type != ISER_CONN_TYPE_TGT) {
idm_conn_event(iser_conn->ic_idmc,
CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
}
iser_hello_hdr_t *hello_hdr = (iser_hello_hdr_t *)hdr;
ISER_LOG(CE_NOTE, "received Hello message: opcode[%d], "
"maxver[%d], minver[%d], iser_ird[%d], msg (0x%p)",
hello_hdr->opcode, hello_hdr->maxver, hello_hdr->minver,
ntohs(hello_hdr->iser_ird), (void *)msg);
mutex_enter(&iser_conn->ic_lock);
if (iser_conn->ic_stage != ISER_CONN_STAGE_HELLO_WAIT) {
/* target is not expected to receive a Hello */
idm_conn_event(iser_conn->ic_idmc,
CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
}
iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_SENT;
mutex_exit(&iser_conn->ic_lock);
/* Prepare and send a HelloReply message */
status = iser_xfer_helloreply_msg(chan);
if (status != ISER_STATUS_SUCCESS) {
mutex_enter(&iser_conn->ic_lock);
iser_conn->ic_stage =
ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL;
mutex_exit(&iser_conn->ic_lock);
idm_conn_event(iser_conn->ic_idmc,
CE_TRANSPORT_FAIL, status);
}
/* Free this msg handle */
iser_msg_free(msg);
} else if (opcode == ISER_OPCODE_HELLOREPLY_MSG) { /* at initiator */
/*
* We are currently not supporting Hello Exchange,
* since OFED iSER does not. May be revisited.
*/
ASSERT(opcode != ISER_OPCODE_HELLOREPLY_MSG);
if (iser_conn->ic_type != ISER_CONN_TYPE_INI) {
idm_conn_event(iser_conn->ic_idmc,
CE_TRANSPORT_FAIL, status);
}
iser_helloreply_hdr_t *hello_hdr = (iser_helloreply_hdr_t *)hdr;
ISER_LOG(CE_NOTE, "received Hello Reply message: opcode[%d], "
"maxver[%d], curver[%d], iser_ord[%d], msg (0x%p)",
hello_hdr->opcode, hello_hdr->maxver, hello_hdr->curver,
ntohs(hello_hdr->iser_ord), (void *)msg);
/* Free this msg handle */
iser_msg_free(msg);
/*
* Signal the receipt of HelloReply to the waiting thread
* so that the initiator can proceed to the Full Feature
* Phase.
*/
mutex_enter(&iser_conn->ic_lock);
iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV;
cv_signal(&iser_conn->ic_stage_cv);
mutex_exit(&iser_conn->ic_lock);
} else {
/* Protocol error: free the msg handle and fail the session */
ISER_LOG(CE_NOTE, "iser_msg_handle: unsupported opcode (0x%x): "
"terminating session on IDM handle (0x%p)", opcode,
(void *) iser_conn->ic_idmc);
iser_msg_free(msg);
idm_conn_event(iser_conn->ic_idmc, CE_TRANSPORT_FAIL,
IDM_STATUS_FAIL);
}
}
#define IDM_PDU_OPCODE(PDU) \
((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK)
/* network to host translation for 24b integers */
static uint32_t
n2h24(uchar_t *ptr)
{
return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
}
/* ARGSUSED */
static void
iser_rx_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
{
/* Free the iser msg handle and the PDU handle */
iser_msg_free((iser_msg_t *)pdu->isp_transport_private);
idm_pdu_free(pdu);
}
int
iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg)
{
idm_pdu_t *pdu;
uint8_t *iser_hdrp;
uint8_t *iscsi_hdrp;
iscsi_hdr_t *bhs;
pdu = idm_pdu_alloc_nosleep(sizeof (iscsi_hdr_t), 0);
pdu->isp_ic = chan->ic_conn->ic_idmc;
ASSERT(pdu->isp_ic != NULL);
/* Set the iser_msg handle into the transport-private field */
pdu->isp_transport_private = (void *)msg;
/* Set up a pointer in the pdu handle to the iSER header */
iser_hdrp = (uint8_t *)(uintptr_t)msg->msg_ds.ds_va;
if (iser_hdrp == NULL) {
ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iser_hdrp is NULL");
idm_pdu_free(pdu);
return (ISER_STATUS_FAIL);
}
pdu->isp_transport_hdr = (void *)iser_hdrp;
pdu->isp_transport_hdrlen = ISER_HEADER_LENGTH;
/*
* Set up a pointer to the iSCSI header, which is directly
* after the iSER header in the message.
*/
iscsi_hdrp = ((uint8_t *)(uintptr_t)msg->msg_ds.ds_va) +
ISER_HEADER_LENGTH;
if (iscsi_hdrp == NULL) {
ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iscsi_hdrp is NULL");
idm_pdu_free(pdu);
return (ISER_STATUS_FAIL);
}
pdu->isp_hdr = (iscsi_hdr_t *)(uintptr_t)iscsi_hdrp;
/* Fill in the BHS */
bhs = pdu->isp_hdr;
pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
(bhs->hlength * sizeof (uint32_t));
pdu->isp_datalen = n2h24(bhs->dlength);
pdu->isp_callback = iser_rx_pdu_cb;
/*
* If datalen > 0, then non-scsi data may be present. Allocate
* space in the PDU handle and set a pointer to the data.
*/
if (pdu->isp_datalen) {
pdu->isp_data = ((uint8_t *)(uintptr_t)pdu->isp_hdr) +
pdu->isp_hdrlen;
}
/* Process RX PDU */
idm_pdu_rx(pdu->isp_ic, pdu);
return (DDI_SUCCESS);
}