ib_recv.c revision 5e12ddada2833f3aa285210603ce9aaeb8be35cc
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo/*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo/*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This file contains code imported from the OFED rds source file ib_recv.c
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Oracle elects to have and use the contents of ib_recv.c under and governed
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * by the OpenIB.org BSD license (see below for full license text). However,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * the following notice accompanied the original version of this file:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo/*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Copyright (c) 2006 Oracle. All rights reserved.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This software is available to you under a choice of one of two
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * licenses. You may choose to be licensed under the terms of the GNU
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * General Public License (GPL) Version 2, available from the file
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * COPYING in the main directory of this source tree, or the
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * OpenIB.org BSD license below:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Redistribution and use in source and binary forms, with or
20ae46ebaff1237662e05edf9db61538aa85d448ha * without modification, are permitted provided that the following
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * conditions are met:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - Redistributions of source code must retain the above
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * copyright notice, this list of conditions and the following
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * disclaimer.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - Redistributions in binary form must reproduce the above
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * copyright notice, this list of conditions and the following
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * disclaimer in the documentation and/or other materials
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * provided with the distribution.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * SOFTWARE.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo#include <sys/types.h>
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo#include <sys/kmem.h>
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo#include <sys/cpuvar.h>
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo#include <sys/rds.h>
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
a10abbb48301520aaa9158a9d71fac18fc269159lm#include <sys/ib/clients/rdsv3/rdsv3.h>
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo#include <sys/ib/clients/rdsv3/ib.h>
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo#include <sys/ib/clients/rdsv3/rdsv3_debug.h>
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppostatic struct kmem_cache *rdsv3_ib_incoming_slab;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppostatic atomic_t rdsv3_ib_allocation = ATOMIC_INIT(0);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppovoid
d66f83158d97c12b2a78b9363a07d9d365762606jbrdsv3_ib_recv_init_ring(struct rdsv3_ib_connection *ic)
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_recv_work *recv;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_header *hdrp;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo uint32_t i;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_recv_init_ring", "ic: %p", ic);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha hdrp = ic->i_recv_hdrs;
20ae46ebaff1237662e05edf9db61538aa85d448ha for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
20ae46ebaff1237662e05edf9db61538aa85d448ha recv->r_ibinc = NULL;
20ae46ebaff1237662e05edf9db61538aa85d448ha recv->r_frag = NULL;
20ae46ebaff1237662e05edf9db61538aa85d448ha
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* initialize the hdr sgl permanently */
20ae46ebaff1237662e05edf9db61538aa85d448ha recv->r_sge[0].ds_va = (ib_vaddr_t)(uintptr_t)hdrp++;
20ae46ebaff1237662e05edf9db61538aa85d448ha recv->r_sge[0].ds_len = sizeof (struct rdsv3_header);
20ae46ebaff1237662e05edf9db61538aa85d448ha recv->r_sge[0].ds_key = ic->i_mr->lkey;
20ae46ebaff1237662e05edf9db61538aa85d448ha }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppostatic void
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayanrdsv3_ib_recv_clear_one(struct rdsv3_ib_connection *ic,
0d0c8d4ba091e011eca697b381d5f1a44939cdeanarayan struct rdsv3_ib_recv_work *recv)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_clear_one", "ic: %p, recv: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic, recv);
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram if (recv->r_ibinc) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_inc_put(&recv->r_ibinc->ii_inc);
20ae46ebaff1237662e05edf9db61538aa85d448ha recv->r_ibinc = NULL;
20ae46ebaff1237662e05edf9db61538aa85d448ha }
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha if (recv->r_frag) {
20ae46ebaff1237662e05edf9db61538aa85d448ha kmem_cache_free(ic->rds_ibdev->ib_frag_slab, recv->r_frag);
20ae46ebaff1237662e05edf9db61538aa85d448ha recv->r_frag = NULL;
20ae46ebaff1237662e05edf9db61538aa85d448ha }
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_recv_clear_one", "Return: ic: %p, recv: %p",
20ae46ebaff1237662e05edf9db61538aa85d448ha ic, recv);
20ae46ebaff1237662e05edf9db61538aa85d448ha}
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448havoid
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_recv_clear_ring(struct rdsv3_ib_connection *ic)
20ae46ebaff1237662e05edf9db61538aa85d448ha{
20ae46ebaff1237662e05edf9db61538aa85d448ha uint32_t i;
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_recv_clear_ring", "ic: %p", ic);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha for (i = 0; i < ic->i_recv_ring.w_nr; i++)
20ae46ebaff1237662e05edf9db61538aa85d448ha rdsv3_ib_recv_clear_one(ic, &ic->i_recvs[i]);
20ae46ebaff1237662e05edf9db61538aa85d448ha}
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448haextern int atomic_add_unless(atomic_t *, uint_t, ulong_t);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448hastatic int
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_recv_refill_one(struct rdsv3_connection *conn,
20ae46ebaff1237662e05edf9db61538aa85d448ha struct rdsv3_ib_recv_work *recv)
20ae46ebaff1237662e05edf9db61538aa85d448ha{
20ae46ebaff1237662e05edf9db61538aa85d448ha struct rdsv3_ib_connection *ic = conn->c_transport_data;
20ae46ebaff1237662e05edf9db61538aa85d448ha ibt_mi_hdl_t mi_hdl;
20ae46ebaff1237662e05edf9db61538aa85d448ha ibt_iov_attr_t iov_attr;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ibt_iov_t iov_arr[1];
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF5("rdsv3_ib_recv_refill_one", "conn: %p, recv: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo conn, recv);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!recv->r_ibinc) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!atomic_add_unless(&rdsv3_ib_allocation, 1,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_max_recv_alloc)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_rx_alloc_limit);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo goto out;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_ibinc = kmem_cache_alloc(rdsv3_ib_incoming_slab,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo KM_NOSLEEP);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (recv->r_ibinc == NULL) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo atomic_add_32(&rdsv3_ib_allocation, -1);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo goto out;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_ibinc->ii_ibdev = ic->rds_ibdev;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_ibinc->ii_pool = ic->rds_ibdev->inc_pool;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!recv->r_frag) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_frag = kmem_cache_alloc(ic->rds_ibdev->ib_frag_slab,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo KM_NOSLEEP);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!recv->r_frag)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo goto out;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Data sge, structure copy */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_sge[1] = recv->r_frag->f_sge;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF5("rdsv3_ib_recv_refill_one", "Return: conn: %p, recv: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo conn, recv);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return (0);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoout:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (recv->r_ibinc) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo kmem_cache_free(rdsv3_ib_incoming_slab, recv->r_ibinc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo atomic_add_32(&rdsv3_ib_allocation, -1);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_ibinc = NULL;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return (-ENOMEM);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo/*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This tries to allocate and post unused work requests after making sure that
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * they have all the allocations they need to queue received fragments into
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * pairs don't go unmatched.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * -1 is returned if posting fails due to temporary resource exhaustion.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoint
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_recv_refill(struct rdsv3_connection *conn, int prefill)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_connection *ic = conn->c_transport_data;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_recv_work *recv;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int posted = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo int ret = 0, avail;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo uint32_t pos, i;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_refill", "conn: %p, prefill: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo conn, prefill);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (prefill || rdsv3_conn_up(conn)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo uint_t w_nr = ic->i_recv_ring.w_nr;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo avail = rdsv3_ib_ring_alloc(&ic->i_recv_ring, w_nr, &pos);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if ((avail <= 0) || (pos >= w_nr)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_recv_refill",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "Argh - ring alloc returned pos=%u, avail: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo pos, avail);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return (-EINVAL);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* populate the WRs */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo for (i = 0; i < avail; i++) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv = &ic->i_recvs[pos];
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ret = rdsv3_ib_recv_refill_one(conn, recv);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ret) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_ring_unalloc(&ic->i_recv_ring,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo avail - i);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo break;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_recv_wrs[i].wr_id = (ibt_wrid_t)pos;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_recv_wrs[i].wr_nds = RDSV3_IB_RECV_SGE;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_recv_wrs[i].wr_sgl = &recv->r_sge[0];
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo pos = (pos + 1) % w_nr;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (i) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* post the WRs at one shot */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ret = ibt_post_recv(ib_get_ibt_channel_hdl(ic->i_cm_id),
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo &ic->i_recv_wrs[0], i, &posted);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF3("rdsv3_ib_recv_refill",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "attempted: %d posted: %d WRs ret %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo i, posted, ret);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ret) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_recv_refill",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "disconnecting and reconnecting\n",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo NIPQUAD(conn->c_faddr), ret);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_ring_unalloc(&ic->i_recv_ring,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo i - posted);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_conn_drop(conn);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* We're doing flow control - update the window. */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ic->i_flowctl && posted)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_advertise_credits(conn, posted);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_refill", "Return: conn: %p, posted: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo conn, posted);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return (ret);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo/*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * delayed freed incoming's
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppostruct rdsv3_inc_pool {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo list_t f_list; /* list of freed incoming */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo kmutex_t f_lock; /* lock of fmr pool */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo int32_t f_listcnt;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo};
193974072f41a843678abf5f61979c748687e66bSherry Moore
193974072f41a843678abf5f61979c748687e66bSherry Moorevoid
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_destroy_inc_pool(struct rdsv3_ib_device *rds_ibdev)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_inc_pool *pool = rds_ibdev->inc_pool;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (pool) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo list_destroy(&pool->f_list);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo kmem_free((void *) pool, sizeof (*pool));
193974072f41a843678abf5f61979c748687e66bSherry Moore }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoint
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_create_inc_pool(struct rdsv3_ib_device *rds_ibdev)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_inc_pool *pool;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo pool = (struct rdsv3_inc_pool *)kmem_zalloc(sizeof (*pool), KM_NOSLEEP);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (pool == NULL) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return (-ENOMEM);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
d66f83158d97c12b2a78b9363a07d9d365762606jb list_create(&pool->f_list, sizeof (struct rdsv3_ib_incoming),
d66f83158d97c12b2a78b9363a07d9d365762606jb offsetof(struct rdsv3_ib_incoming, ii_obj));
d66f83158d97c12b2a78b9363a07d9d365762606jb mutex_init(&pool->f_lock, NULL, MUTEX_DRIVER, NULL);
d66f83158d97c12b2a78b9363a07d9d365762606jb rds_ibdev->inc_pool = pool;
d66f83158d97c12b2a78b9363a07d9d365762606jb return (0);
d66f83158d97c12b2a78b9363a07d9d365762606jb}
d66f83158d97c12b2a78b9363a07d9d365762606jb
d66f83158d97c12b2a78b9363a07d9d365762606jbstatic void
d66f83158d97c12b2a78b9363a07d9d365762606jbrdsv3_ib_inc_drop(struct rdsv3_ib_incoming *ibinc)
d66f83158d97c12b2a78b9363a07d9d365762606jb{
d66f83158d97c12b2a78b9363a07d9d365762606jb struct rdsv3_page_frag *frag;
d66f83158d97c12b2a78b9363a07d9d365762606jb struct rdsv3_page_frag *pos;
d66f83158d97c12b2a78b9363a07d9d365762606jb
d66f83158d97c12b2a78b9363a07d9d365762606jb RDSV3_FOR_EACH_LIST_NODE_SAFE(frag, pos, &ibinc->ii_frags, f_item) {
d66f83158d97c12b2a78b9363a07d9d365762606jb list_remove_node(&frag->f_item);
d66f83158d97c12b2a78b9363a07d9d365762606jb kmem_cache_free(ibinc->ii_ibdev->ib_frag_slab, frag);
d66f83158d97c12b2a78b9363a07d9d365762606jb }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ASSERT(list_is_empty(&ibinc->ii_frags));
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram kmem_cache_free(rdsv3_ib_incoming_slab, ibinc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo atomic_dec_uint(&rdsv3_ib_allocation);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppovoid
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_drain_inclist(void *data)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_inc_pool *pool = (struct rdsv3_inc_pool *)data;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_incoming *ibinc;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo list_t *listp = &pool->f_list;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo kmutex_t *lockp = &pool->f_lock;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo int i = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo for (;;) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo mutex_enter(lockp);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ibinc = (struct rdsv3_ib_incoming *)list_remove_head(listp);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ibinc)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo pool->f_listcnt--;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo mutex_exit(lockp);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!ibinc)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo break;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo i++;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_inc_drop(ibinc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppovoid
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_inc_free(struct rdsv3_incoming *inc)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_incoming *ibinc;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_af_thr_t *af_thr;
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_inc_free", "inc: %p", inc);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha ibinc = container_of(inc, struct rdsv3_ib_incoming, ii_inc);
20ae46ebaff1237662e05edf9db61538aa85d448ha /* save af_thr in a local as ib_inc might be freed at mutex_exit */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo af_thr = ibinc->ii_ibdev->inc_soft_cq;
20ae46ebaff1237662e05edf9db61538aa85d448ha
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo mutex_enter(&ibinc->ii_pool->f_lock);
20ae46ebaff1237662e05edf9db61538aa85d448ha list_insert_tail(&ibinc->ii_pool->f_list, ibinc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ibinc->ii_pool->f_listcnt++;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo mutex_exit(&ibinc->ii_pool->f_lock);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_af_thr_fire(af_thr);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoint
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_inc_copy_to_user(struct rdsv3_incoming *inc, uio_t *uiop,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo size_t size)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_incoming *ibinc;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_page_frag *frag;
20ae46ebaff1237662e05edf9db61538aa85d448ha unsigned long to_copy;
20ae46ebaff1237662e05edf9db61538aa85d448ha unsigned long frag_off = 0;
20ae46ebaff1237662e05edf9db61538aa85d448ha int copied = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo int ret;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo uint32_t len;
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha ibinc = container_of(inc, struct rdsv3_ib_incoming, ii_inc);
20ae46ebaff1237662e05edf9db61538aa85d448ha frag = list_head(&ibinc->ii_frags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo len = ntohl(inc->i_hdr.h_len);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_inc_copy_to_user", "inc: %p, size: %d len: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo inc, size, len);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo while (copied < size && copied < len) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (frag_off == RDSV3_FRAG_SIZE) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo frag = list_next(&ibinc->ii_frags, frag);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo frag_off = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
20ae46ebaff1237662e05edf9db61538aa85d448ha to_copy = min(len - copied, RDSV3_FRAG_SIZE - frag_off);
20ae46ebaff1237662e05edf9db61538aa85d448ha to_copy = min(size - copied, to_copy);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF5("rdsv3_ib_inc_copy_to_user",
20ae46ebaff1237662e05edf9db61538aa85d448ha "%lu bytes to user %p from frag [%p, %u] + %lu",
20ae46ebaff1237662e05edf9db61538aa85d448ha to_copy, uiop,
20ae46ebaff1237662e05edf9db61538aa85d448ha frag->f_page, frag->f_offset, frag_off);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha ret = uiomove((caddr_t)(frag->f_page +
20ae46ebaff1237662e05edf9db61538aa85d448ha frag->f_offset + frag_off),
20ae46ebaff1237662e05edf9db61538aa85d448ha to_copy, UIO_READ, uiop);
20ae46ebaff1237662e05edf9db61538aa85d448ha if (ret) {
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF2("rdsv3_ib_inc_copy_to_user",
20ae46ebaff1237662e05edf9db61538aa85d448ha "uiomove (%d) returned: %d", to_copy, ret);
20ae46ebaff1237662e05edf9db61538aa85d448ha break;
20ae46ebaff1237662e05edf9db61538aa85d448ha }
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha frag_off += to_copy;
20ae46ebaff1237662e05edf9db61538aa85d448ha copied += to_copy;
20ae46ebaff1237662e05edf9db61538aa85d448ha }
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_inc_copy_to_user",
20ae46ebaff1237662e05edf9db61538aa85d448ha "Return: inc: %p, copied: %d", inc, copied);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha return (copied);
20ae46ebaff1237662e05edf9db61538aa85d448ha}
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha/* ic starts out kmem_zalloc()ed */
20ae46ebaff1237662e05edf9db61538aa85d448havoid
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_recv_init_ack(struct rdsv3_ib_connection *ic)
20ae46ebaff1237662e05edf9db61538aa85d448ha{
20ae46ebaff1237662e05edf9db61538aa85d448ha ibt_send_wr_t *wr = &ic->i_ack_wr;
20ae46ebaff1237662e05edf9db61538aa85d448ha ibt_wr_ds_t *sge = &ic->i_ack_sge;
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_recv_init_ack", "ic: %p", ic);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha sge->ds_va = ic->i_ack_dma;
20ae46ebaff1237662e05edf9db61538aa85d448ha sge->ds_len = sizeof (struct rdsv3_header);
20ae46ebaff1237662e05edf9db61538aa85d448ha sge->ds_key = ic->i_mr->lkey;
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha wr->wr_sgl = sge;
20ae46ebaff1237662e05edf9db61538aa85d448ha wr->wr_nds = 1;
20ae46ebaff1237662e05edf9db61538aa85d448ha wr->wr_opcode = IBT_WRC_SEND;
20ae46ebaff1237662e05edf9db61538aa85d448ha wr->wr_id = RDSV3_IB_ACK_WR_ID;
20ae46ebaff1237662e05edf9db61538aa85d448ha wr->wr_flags = IBT_WR_SEND_SIGNAL | IBT_WR_SEND_SOLICIT;
20ae46ebaff1237662e05edf9db61538aa85d448ha}
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha/*
20ae46ebaff1237662e05edf9db61538aa85d448ha * You'd think that with reliable IB connections you wouldn't need to ack
20ae46ebaff1237662e05edf9db61538aa85d448ha * messages that have been received. The problem is that IB hardware generates
20ae46ebaff1237662e05edf9db61538aa85d448ha * an ack message before it has DMAed the message into memory. This creates a
20ae46ebaff1237662e05edf9db61538aa85d448ha * potential message loss if the HCA is disabled for any reason between when it
20ae46ebaff1237662e05edf9db61538aa85d448ha * sends the ack and before the message is DMAed and processed. This is only a
20ae46ebaff1237662e05edf9db61538aa85d448ha * potential issue if another HCA is available for fail-over.
20ae46ebaff1237662e05edf9db61538aa85d448ha *
20ae46ebaff1237662e05edf9db61538aa85d448ha * When the remote host receives our ack they'll free the sent message from
20ae46ebaff1237662e05edf9db61538aa85d448ha * their send queue. To decrease the latency of this we always send an ack
20ae46ebaff1237662e05edf9db61538aa85d448ha * immediately after we've received messages.
20ae46ebaff1237662e05edf9db61538aa85d448ha *
20ae46ebaff1237662e05edf9db61538aa85d448ha * For simplicity, we only have one ack in flight at a time. This puts
20ae46ebaff1237662e05edf9db61538aa85d448ha * pressure on senders to have deep enough send queues to absorb the latency of
20ae46ebaff1237662e05edf9db61538aa85d448ha * a single ack frame being in flight. This might not be good enough.
20ae46ebaff1237662e05edf9db61538aa85d448ha *
20ae46ebaff1237662e05edf9db61538aa85d448ha * This is implemented by have a long-lived send_wr and sge which point to a
20ae46ebaff1237662e05edf9db61538aa85d448ha * statically allocated ack frame. This ack wr does not fall under the ring
20ae46ebaff1237662e05edf9db61538aa85d448ha * accounting that the tx and rx wrs do. The QP attribute specifically makes
20ae46ebaff1237662e05edf9db61538aa85d448ha * room for it beyond the ring size. Send completion notices its special
20ae46ebaff1237662e05edf9db61538aa85d448ha * wr_id and avoids working with the ring in that case.
20ae46ebaff1237662e05edf9db61538aa85d448ha */
20ae46ebaff1237662e05edf9db61538aa85d448havoid
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_set_ack(struct rdsv3_ib_connection *ic, uint64_t seq,
20ae46ebaff1237662e05edf9db61538aa85d448ha int ack_required)
20ae46ebaff1237662e05edf9db61538aa85d448ha{
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_set_ack", "ic: %p, seq: %lld ack: %d",
20ae46ebaff1237662e05edf9db61538aa85d448ha ic, seq, ack_required);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha mutex_enter(&ic->i_ack_lock);
20ae46ebaff1237662e05edf9db61538aa85d448ha ic->i_ack_next = seq;
20ae46ebaff1237662e05edf9db61538aa85d448ha if (ack_required)
20ae46ebaff1237662e05edf9db61538aa85d448ha set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
20ae46ebaff1237662e05edf9db61538aa85d448ha mutex_exit(&ic->i_ack_lock);
20ae46ebaff1237662e05edf9db61538aa85d448ha}
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448hastatic uint64_t
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_get_ack(struct rdsv3_ib_connection *ic)
20ae46ebaff1237662e05edf9db61538aa85d448ha{
20ae46ebaff1237662e05edf9db61538aa85d448ha uint64_t seq;
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_get_ack", "ic: %p", ic);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha mutex_enter(&ic->i_ack_lock);
20ae46ebaff1237662e05edf9db61538aa85d448ha seq = ic->i_ack_next;
20ae46ebaff1237662e05edf9db61538aa85d448ha mutex_exit(&ic->i_ack_lock);
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha return (seq);
20ae46ebaff1237662e05edf9db61538aa85d448ha}
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448hastatic void
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_send_ack(struct rdsv3_ib_connection *ic, unsigned int adv_credits)
20ae46ebaff1237662e05edf9db61538aa85d448ha{
20ae46ebaff1237662e05edf9db61538aa85d448ha struct rdsv3_header *hdr = ic->i_ack;
20ae46ebaff1237662e05edf9db61538aa85d448ha uint64_t seq;
20ae46ebaff1237662e05edf9db61538aa85d448ha int ret;
20ae46ebaff1237662e05edf9db61538aa85d448ha
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_send_ack", "ic: %p adv_credits: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic, adv_credits);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo seq = rdsv3_ib_get_ack(ic);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_send_ack", "send_ack: ic %p ack %llu",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic, (unsigned long long) seq);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_message_populate_header(hdr, 0, 0, 0);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo hdr->h_ack = htonll(seq);
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff hdr->h_credit = adv_credits;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_message_make_checksum(hdr);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_ack_queued = jiffies;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
cb112a141f667f84bf442a77589d1705a2336dbelm ret = ibt_post_send(RDSV3_QP2CHANHDL(ic->i_cm_id->qp), &ic->i_ack_wr, 1,
cb112a141f667f84bf442a77589d1705a2336dbelm NULL);
cb112a141f667f84bf442a77589d1705a2336dbelm if (ret) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Failed to send. Release the WR, and
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram * force another ACK.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_ack_send_failure);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_send_ack", "sending ack failed\n");
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_conn_drop(ic->conn);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo } else {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_ack_sent);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_send_ack", "Return: ic: %p adv_credits: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic, adv_credits);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo/*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * There are 3 ways of getting acknowledgements to the peer:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * 1. We call rdsv3_ib_attempt_ack from the recv completion handler
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * to send an ACK-only frame.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * However, there can be only one such frame in the send queue
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * at any time, so we may have to postpone it.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * 2. When another (data) packet is transmitted while there's
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * an ACK in the queue, we piggyback the ACK sequence number
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * on the data packet.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * 3. If the ACK WR is done sending, we get called from the
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * send queue completion handler, and check whether there's
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * another ACK pending (postponed because the WR was on the
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * queue). If so, we transmit it.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * We maintain 2 variables:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - i_ack_flags, which keeps track of whether the ACK WR
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - i_ack_next, which is the last sequence number we received
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Potentially, send queue and receive queue handlers can run concurrently.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * It would be nice to not have to use a spinlock to synchronize things,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * but the one problem that rules this out is that 64bit updates are
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * not atomic on all platforms. Things would be a lot simpler if
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * we had atomic64 or maybe cmpxchg64 everywhere.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Reconnecting complicates this picture just slightly. When we
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * reconnect, we may be seeing duplicate packets. The peer
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * is retransmitting them, because it hasn't seen an ACK for
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * them. It is important that we ACK these.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * this flag set *MUST* be acknowledged immediately.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo/*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * When we get here, we're called from the recv queue handler.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Check whether we ought to transmit an ACK.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppovoid
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_attempt_ack(struct rdsv3_ib_connection *ic)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int adv_credits;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_attempt_ack", "ic: %p", ic);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_ack_send_delayed);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Can we get a send credit? */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!rdsv3_ib_send_grab_credits(ic, 1, &adv_credits, 0)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_tx_throttle);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram return;
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_send_ack(ic, adv_credits);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
cb112a141f667f84bf442a77589d1705a2336dbelm RDSV3_DPRINTF4("rdsv3_ib_attempt_ack", "Return: ic: %p", ic);
cb112a141f667f84bf442a77589d1705a2336dbelm}
cb112a141f667f84bf442a77589d1705a2336dbelm
cb112a141f667f84bf442a77589d1705a2336dbelm/*
cb112a141f667f84bf442a77589d1705a2336dbelm * We get here from the send completion handler, when the
cb112a141f667f84bf442a77589d1705a2336dbelm * adapter tells us the ACK frame was sent.
cb112a141f667f84bf442a77589d1705a2336dbelm */
cb112a141f667f84bf442a77589d1705a2336dbelmvoid
cb112a141f667f84bf442a77589d1705a2336dbelmrdsv3_ib_ack_send_complete(struct rdsv3_ib_connection *ic)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff RDSV3_DPRINTF4("rdsv3_ib_ack_send_complete", "ic: %p", ic);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_attempt_ack(ic);
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff/*
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * This is called by the regular xmit code when it wants to piggyback
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * an ACK on an outgoing frame.
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff */
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoffuint64_t
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoffrdsv3_ib_piggyb_ack(struct rdsv3_ib_connection *ic)
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_piggyb_ack", "ic: %p", ic);
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) {
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff rdsv3_ib_stats_inc(s_ib_ack_send_piggybacked);
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff }
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff return (rdsv3_ib_get_ack(ic));
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff}
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff/*
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * It's kind of lame that we're copying from the posted receive pages into
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * long-lived bitmaps. We could have posted the bitmaps and rdma written into
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * them. But receiving new congestion bitmaps should be a *rare* event, so
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * hopefully we won't need to invest that complexity in making it more
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * efficient. By copying we can share a simpler core with TCP which has to
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * copy.
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff */
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoffstatic void
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoffrdsv3_ib_cong_recv(struct rdsv3_connection *conn,
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff struct rdsv3_ib_incoming *ibinc)
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff{
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff struct rdsv3_cong_map *map;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int map_off;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int map_page;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_page_frag *frag;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned long frag_off;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned long to_copy;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned long copied;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo uint64_t uncongested = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo caddr_t addr;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_cong_recv", "conn: %p, ibinc: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo conn, ibinc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* catch completely corrupt packets */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ntohl(ibinc->ii_inc.i_hdr.h_len) != RDSV3_CONG_MAP_BYTES)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo map = conn->c_fcong;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo map_page = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo map_off = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo frag = list_head(&ibinc->ii_frags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo frag_off = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo copied = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo while (copied < RDSV3_CONG_MAP_BYTES) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo uint64_t *src, *dst;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int k;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo to_copy = min(RDSV3_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ASSERT(!(to_copy & 7)); /* Must be 64bit aligned. */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo addr = frag->f_page + frag->f_offset;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo src = (uint64_t *)(addr + frag_off);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo dst = (uint64_t *)(map->m_page_addrs[map_page] + map_off);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_cong_recv",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "src: %p dst: %p copied: %d", src, dst, copied);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo for (k = 0; k < to_copy; k += 8) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Record ports that became uncongested, ie
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * bits that changed from 0 to 1.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo uncongested |= ~(*src) & *dst;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo *dst++ = *src++;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo copied += to_copy;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_cong_recv",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "src: %p dst: %p copied: %d", src, dst, copied);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo map_off += to_copy;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (map_off == PAGE_SIZE) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo map_off = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo map_page++;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo frag_off += to_copy;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (frag_off == RDSV3_FRAG_SIZE) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo frag = list_next(&ibinc->ii_frags, frag);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo frag_off = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
928da554c8c4cc86ee5c49c1f3e8706d88eee546lm
928da554c8c4cc86ee5c49c1f3e8706d88eee546lm#if 0
928da554c8c4cc86ee5c49c1f3e8706d88eee546lmXXX
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* the congestion map is in little endian order */
a10abbb48301520aaa9158a9d71fac18fc269159lm uncongested = le64_to_cpu(uncongested);
a10abbb48301520aaa9158a9d71fac18fc269159lm#endif
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
a10abbb48301520aaa9158a9d71fac18fc269159lm rdsv3_cong_map_updated(map, uncongested);
a10abbb48301520aaa9158a9d71fac18fc269159lm
a10abbb48301520aaa9158a9d71fac18fc269159lm RDSV3_DPRINTF4("rdsv3_ib_cong_recv", "Return: conn: %p, ibinc: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo conn, ibinc);
a10abbb48301520aaa9158a9d71fac18fc269159lm}
a10abbb48301520aaa9158a9d71fac18fc269159lm
a10abbb48301520aaa9158a9d71fac18fc269159lmstatic void
a10abbb48301520aaa9158a9d71fac18fc269159lmrdsv3_ib_process_recv(struct rdsv3_connection *conn,
a10abbb48301520aaa9158a9d71fac18fc269159lm struct rdsv3_ib_recv_work *recv, uint32_t data_len,
a10abbb48301520aaa9158a9d71fac18fc269159lm struct rdsv3_ib_ack_state *state)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_connection *ic = conn->c_transport_data;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_incoming *ibinc = ic->i_ibinc;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_header *ihdr, *hdr;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* XXX shut down the connection if port 0,0 are seen? */
20ae46ebaff1237662e05edf9db61538aa85d448ha
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF5("rdsv3_ib_process_recv",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "ic %p ibinc %p recv %p byte len %u", ic, ibinc, recv, data_len);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (data_len < sizeof (struct rdsv3_header)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_process_recv",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "incoming message from %u.%u.%u.%u didn't include a "
a10abbb48301520aaa9158a9d71fac18fc269159lm "header, disconnecting and reconnecting",
a10abbb48301520aaa9158a9d71fac18fc269159lm NIPQUAD(conn->c_faddr));
a10abbb48301520aaa9158a9d71fac18fc269159lm rdsv3_conn_drop(conn);
a10abbb48301520aaa9158a9d71fac18fc269159lm return;
a10abbb48301520aaa9158a9d71fac18fc269159lm }
a10abbb48301520aaa9158a9d71fac18fc269159lm data_len -= sizeof (struct rdsv3_header);
a10abbb48301520aaa9158a9d71fac18fc269159lm
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Validate the checksum. */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!rdsv3_message_verify_checksum(ihdr)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_process_recv", "incoming message "
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram "from %u.%u.%u.%u has corrupted header - "
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "forcing a reconnect",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo NIPQUAD(conn->c_faddr));
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram rdsv3_conn_drop(conn);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_stats_inc(s_recv_drop_bad_checksum);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Process the ACK sequence which comes with every packet */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo state->ack_recv = ntohll(ihdr->h_ack);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo state->ack_recv_valid = 1;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Process the credits update if there was one */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ihdr->h_credit)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_send_add_credits(conn, ihdr->h_credit);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This is an ACK-only packet. The fact that it gets
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * special treatment here is that historically, ACKs
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * were rather special beasts.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_ack_received);
20ae46ebaff1237662e05edf9db61538aa85d448ha return;
20ae46ebaff1237662e05edf9db61538aa85d448ha }
20ae46ebaff1237662e05edf9db61538aa85d448ha
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * If we don't already have an inc on the connection then this
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * fragment has a header and starts a message.. copy its header
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * into the inc and save the inc so we can hang upcoming fragments
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda * off its list.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!ibinc) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ibinc = recv->r_ibinc;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_ibinc = NULL;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_ibinc = ibinc;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo hdr = &ibinc->ii_inc.i_hdr;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo (void) memcpy(hdr, ihdr, sizeof (*hdr));
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_recv_data_rem = ntohl(hdr->h_len);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF5("rdsv3_ib_process_recv",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "ic %p ibinc %p rem %u flag 0x%x", ic, ibinc,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_recv_data_rem, hdr->h_flags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo } else {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo hdr = &ibinc->ii_inc.i_hdr;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * We can't just use memcmp here; fragments of a
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * single message may carry different ACKs
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (hdr->h_sequence != ihdr->h_sequence ||
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo hdr->h_len != ihdr->h_len ||
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo hdr->h_sport != ihdr->h_sport ||
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo hdr->h_dport != ihdr->h_dport) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_process_recv",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "fragment header mismatch; forcing reconnect");
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_conn_drop(conn);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo list_insert_tail(&ibinc->ii_frags, recv->r_frag);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_frag = NULL;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ic->i_recv_data_rem > RDSV3_FRAG_SIZE)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_recv_data_rem -= RDSV3_FRAG_SIZE;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo else {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_recv_data_rem = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ic->i_ibinc = NULL;
cb112a141f667f84bf442a77589d1705a2336dbelm
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ibinc->ii_inc.i_hdr.h_flags == RDSV3_FLAG_CONG_BITMAP)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_cong_recv(conn, ibinc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo else {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo &ibinc->ii_inc, KM_NOSLEEP);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo state->ack_next = ntohll(hdr->h_sequence);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo state->ack_next_valid = 1;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Evaluate the ACK_REQUIRED flag *after* we received
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * the complete frame, and after bumping the next_rx
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * sequence.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (hdr->h_flags & RDSV3_FLAG_ACK_REQUIRED) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_stats_inc(s_recv_ack_required);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo state->ack_required = 1;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_inc_put(&ibinc->ii_inc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_process_recv",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "Return: conn: %p recv: %p len: %d state: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo conn, recv, data_len, state);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppovoid
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_recv_cqe_handler(struct rdsv3_ib_connection *ic, ibt_wc_t *wc,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_ack_state *state)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_connection *conn = ic->conn;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_recv_work *recv;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_work_ring *recv_ringp = &ic->i_recv_ring;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_cqe_handler",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "rwc wc_id 0x%llx status %u byte_len %u imm_data %u\n",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo (unsigned long long)wc->wc_id, wc->wc_status,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo wc->wc_bytes_xfer, ntohl(wc->wc_immed_data));
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_rx_cq_event);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv = &ic->i_recvs[rdsv3_ib_ring_oldest(recv_ringp)];
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Also process recvs in connecting state because it is possible
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * to get a recv completion _before_ the rdmacm ESTABLISHED
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * event is processed.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (rdsv3_conn_up(conn) || rdsv3_conn_connecting(conn)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* We expect errors as the qp is drained during shutdown */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (wc->wc_status == IBT_WC_SUCCESS) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_process_recv(conn, recv,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo wc->wc_bytes_xfer, state);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo } else {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_recv_cqe_handler",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "recv completion on "
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "%u.%u.%u.%u had status %u, "
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "disconnecting and reconnecting\n",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo NIPQUAD(conn->c_faddr),
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo wc->wc_status);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_conn_drop(conn);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_ring_free(recv_ringp, 1);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /*
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * If we ever end up with a really empty receive ring, we're
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * in deep trouble, as the sender will definitely see RNR
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * timeouts.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (rdsv3_ib_ring_empty(recv_ringp))
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_ib_stats_inc(s_ib_rx_ring_empty);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (rdsv3_ib_ring_low(recv_ringp)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_af_thr_fire(ic->i_refill_rq);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo }
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoint
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_recv(struct rdsv3_connection *conn)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_connection *ic = conn->c_transport_data;
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan int ret = 0;
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv", "conn %p\n", conn);
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan if (rdsv3_conn_up(conn))
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan rdsv3_ib_attempt_ack(ic);
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan RDSV3_DPRINTF4("rdsv3_ib_recv", "Return: conn: %p", conn);
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan return (ret);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoextern int rdsv3_ib_inc_constructor(void *buf, void *arg, int kmflags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoextern void rdsv3_ib_inc_destructor(void *buf, void *arg);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
d10e4ef2fabf16c3237c6d6592496df3eac6a1efnarayanint
d10e4ef2fabf16c3237c6d6592496df3eac6a1efnarayanrdsv3_ib_recv_init(void)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_init", "Enter");
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda rdsv3_ib_incoming_slab = kmem_cache_create("rdsv3_ib_incoming",
20ae46ebaff1237662e05edf9db61538aa85d448ha sizeof (struct rdsv3_ib_incoming), 0, rdsv3_ib_inc_constructor,
20ae46ebaff1237662e05edf9db61538aa85d448ha rdsv3_ib_inc_destructor, NULL, NULL, NULL, 0);
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda if (!rdsv3_ib_incoming_slab) {
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda RDSV3_DPRINTF2("rdsv3_ib_recv_init", "kmem_cache_create "
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda "failed");
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda return (-ENOMEM);
20ae46ebaff1237662e05edf9db61538aa85d448ha }
20ae46ebaff1237662e05edf9db61538aa85d448ha
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_init", "Return");
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda return (0);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppovoid
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_recv_exit(void)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo{
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_exit", "Enter");
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo kmem_cache_destroy(rdsv3_ib_incoming_slab);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_exit", "Return");
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo}
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo