ib_recv.c revision 5e12ddada2833f3aa285210603ce9aaeb8be35cc
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This file contains code imported from the OFED rds source file ib_recv.c
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Oracle elects to have and use the contents of ib_recv.c under and governed
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * by the OpenIB.org BSD license (see below for full license text). However,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * the following notice accompanied the original version of this file:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Copyright (c) 2006 Oracle. All rights reserved.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This software is available to you under a choice of one of two
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * licenses. You may choose to be licensed under the terms of the GNU
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * General Public License (GPL) Version 2, available from the file
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * COPYING in the main directory of this source tree, or the
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * OpenIB.org BSD license below:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Redistribution and use in source and binary forms, with or
20ae46ebaff1237662e05edf9db61538aa85d448ha * without modification, are permitted provided that the following
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * conditions are met:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - Redistributions of source code must retain the above
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * copyright notice, this list of conditions and the following
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * disclaimer.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - Redistributions in binary form must reproduce the above
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * copyright notice, this list of conditions and the following
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * disclaimer in the documentation and/or other materials
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * provided with the distribution.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * SOFTWARE.
20ae46ebaff1237662e05edf9db61538aa85d448ha for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* initialize the hdr sgl permanently */
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayanrdsv3_ib_recv_clear_one(struct rdsv3_ib_connection *ic,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_clear_one", "ic: %p, recv: %p",
20ae46ebaff1237662e05edf9db61538aa85d448ha kmem_cache_free(ic->rds_ibdev->ib_frag_slab, recv->r_frag);
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_recv_clear_one", "Return: ic: %p, recv: %p",
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_recv_clear_ring", "ic: %p", ic);
20ae46ebaff1237662e05edf9db61538aa85d448haextern int atomic_add_unless(atomic_t *, uint_t, ulong_t);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF5("rdsv3_ib_recv_refill_one", "conn: %p, recv: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_ibinc = kmem_cache_alloc(rdsv3_ib_incoming_slab,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv->r_frag = kmem_cache_alloc(ic->rds_ibdev->ib_frag_slab,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Data sge, structure copy */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF5("rdsv3_ib_recv_refill_one", "Return: conn: %p, recv: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo return (0);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo kmem_cache_free(rdsv3_ib_incoming_slab, recv->r_ibinc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This tries to allocate and post unused work requests after making sure that
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * they have all the allocations they need to queue received fragments into
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * pairs don't go unmatched.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * -1 is returned if posting fails due to temporary resource exhaustion.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_recv_refill(struct rdsv3_connection *conn, int prefill)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_connection *ic = conn->c_transport_data;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int posted = 0;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_refill", "conn: %p, prefill: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo avail = rdsv3_ib_ring_alloc(&ic->i_recv_ring, w_nr, &pos);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "Argh - ring alloc returned pos=%u, avail: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* populate the WRs */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo for (i = 0; i < avail; i++) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* post the WRs at one shot */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ret = ibt_post_recv(ib_get_ibt_channel_hdl(ic->i_cm_id),
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "attempted: %d posted: %d WRs ret %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "disconnecting and reconnecting\n",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* We're doing flow control - update the window. */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_recv_refill", "Return: conn: %p, posted: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * delayed freed incoming's
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_destroy_inc_pool(struct rdsv3_ib_device *rds_ibdev)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_create_inc_pool(struct rdsv3_ib_device *rds_ibdev)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo pool = (struct rdsv3_inc_pool *)kmem_zalloc(sizeof (*pool), KM_NOSLEEP);
d66f83158d97c12b2a78b9363a07d9d365762606jb list_create(&pool->f_list, sizeof (struct rdsv3_ib_incoming),
d66f83158d97c12b2a78b9363a07d9d365762606jb return (0);
d66f83158d97c12b2a78b9363a07d9d365762606jbstatic void
d66f83158d97c12b2a78b9363a07d9d365762606jb RDSV3_FOR_EACH_LIST_NODE_SAFE(frag, pos, &ibinc->ii_frags, f_item) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_inc_pool *pool = (struct rdsv3_inc_pool *)data;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo ibinc = (struct rdsv3_ib_incoming *)list_remove_head(listp);
20ae46ebaff1237662e05edf9db61538aa85d448ha ibinc = container_of(inc, struct rdsv3_ib_incoming, ii_inc);
20ae46ebaff1237662e05edf9db61538aa85d448ha /* save af_thr in a local as ib_inc might be freed at mutex_exit */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_inc_copy_to_user(struct rdsv3_incoming *inc, uio_t *uiop,
20ae46ebaff1237662e05edf9db61538aa85d448ha unsigned long to_copy;
20ae46ebaff1237662e05edf9db61538aa85d448ha unsigned long frag_off = 0;
20ae46ebaff1237662e05edf9db61538aa85d448ha ibinc = container_of(inc, struct rdsv3_ib_incoming, ii_inc);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_inc_copy_to_user", "inc: %p, size: %d len: %d",
20ae46ebaff1237662e05edf9db61538aa85d448ha "%lu bytes to user %p from frag [%p, %u] + %lu",
20ae46ebaff1237662e05edf9db61538aa85d448ha/* ic starts out kmem_zalloc()ed */
20ae46ebaff1237662e05edf9db61538aa85d448ha * You'd think that with reliable IB connections you wouldn't need to ack
20ae46ebaff1237662e05edf9db61538aa85d448ha * messages that have been received. The problem is that IB hardware generates
20ae46ebaff1237662e05edf9db61538aa85d448ha * an ack message before it has DMAed the message into memory. This creates a
20ae46ebaff1237662e05edf9db61538aa85d448ha * potential message loss if the HCA is disabled for any reason between when it
20ae46ebaff1237662e05edf9db61538aa85d448ha * sends the ack and before the message is DMAed and processed. This is only a
20ae46ebaff1237662e05edf9db61538aa85d448ha * potential issue if another HCA is available for fail-over.
20ae46ebaff1237662e05edf9db61538aa85d448ha * When the remote host receives our ack they'll free the sent message from
20ae46ebaff1237662e05edf9db61538aa85d448ha * their send queue. To decrease the latency of this we always send an ack
20ae46ebaff1237662e05edf9db61538aa85d448ha * immediately after we've received messages.
20ae46ebaff1237662e05edf9db61538aa85d448ha * For simplicity, we only have one ack in flight at a time. This puts
20ae46ebaff1237662e05edf9db61538aa85d448ha * pressure on senders to have deep enough send queues to absorb the latency of
20ae46ebaff1237662e05edf9db61538aa85d448ha * a single ack frame being in flight. This might not be good enough.
20ae46ebaff1237662e05edf9db61538aa85d448ha * This is implemented by have a long-lived send_wr and sge which point to a
20ae46ebaff1237662e05edf9db61538aa85d448ha * statically allocated ack frame. This ack wr does not fall under the ring
20ae46ebaff1237662e05edf9db61538aa85d448ha * accounting that the tx and rx wrs do. The QP attribute specifically makes
20ae46ebaff1237662e05edf9db61538aa85d448ha * room for it beyond the ring size. Send completion notices its special
20ae46ebaff1237662e05edf9db61538aa85d448ha * wr_id and avoids working with the ring in that case.
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_set_ack(struct rdsv3_ib_connection *ic, uint64_t seq,
20ae46ebaff1237662e05edf9db61538aa85d448ha RDSV3_DPRINTF4("rdsv3_ib_set_ack", "ic: %p, seq: %lld ack: %d",
20ae46ebaff1237662e05edf9db61538aa85d448hastatic void
20ae46ebaff1237662e05edf9db61538aa85d448hardsv3_ib_send_ack(struct rdsv3_ib_connection *ic, unsigned int adv_credits)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_send_ack", "ic: %p adv_credits: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_send_ack", "send_ack: ic %p ack %llu",
cb112a141f667f84bf442a77589d1705a2336dbelm ret = ibt_post_send(RDSV3_QP2CHANHDL(ic->i_cm_id->qp), &ic->i_ack_wr, 1,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Failed to send. Release the WR, and
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram * force another ACK.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_send_ack", "sending ack failed\n");
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_send_ack", "Return: ic: %p adv_credits: %d",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * There are 3 ways of getting acknowledgements to the peer:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * 1. We call rdsv3_ib_attempt_ack from the recv completion handler
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * to send an ACK-only frame.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * However, there can be only one such frame in the send queue
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * at any time, so we may have to postpone it.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * 2. When another (data) packet is transmitted while there's
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * an ACK in the queue, we piggyback the ACK sequence number
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * on the data packet.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * 3. If the ACK WR is done sending, we get called from the
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * send queue completion handler, and check whether there's
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * another ACK pending (postponed because the WR was on the
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * queue). If so, we transmit it.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * We maintain 2 variables:
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - i_ack_flags, which keeps track of whether the ACK WR
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * - i_ack_next, which is the last sequence number we received
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Potentially, send queue and receive queue handlers can run concurrently.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * It would be nice to not have to use a spinlock to synchronize things,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * but the one problem that rules this out is that 64bit updates are
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * not atomic on all platforms. Things would be a lot simpler if
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * we had atomic64 or maybe cmpxchg64 everywhere.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Reconnecting complicates this picture just slightly. When we
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * reconnect, we may be seeing duplicate packets. The peer
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * is retransmitting them, because it hasn't seen an ACK for
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * them. It is important that we ACK these.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * this flag set *MUST* be acknowledged immediately.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * When we get here, we're called from the recv queue handler.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Check whether we ought to transmit an ACK.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int adv_credits;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Can we get a send credit? */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (!rdsv3_ib_send_grab_credits(ic, 1, &adv_credits, 0)) {
cb112a141f667f84bf442a77589d1705a2336dbelm RDSV3_DPRINTF4("rdsv3_ib_attempt_ack", "Return: ic: %p", ic);
cb112a141f667f84bf442a77589d1705a2336dbelm * We get here from the send completion handler, when the
cb112a141f667f84bf442a77589d1705a2336dbelm * adapter tells us the ACK frame was sent.
cb112a141f667f84bf442a77589d1705a2336dbelmrdsv3_ib_ack_send_complete(struct rdsv3_ib_connection *ic)
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff RDSV3_DPRINTF4("rdsv3_ib_ack_send_complete", "ic: %p", ic);
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * This is called by the regular xmit code when it wants to piggyback
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * an ACK on an outgoing frame.
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoffrdsv3_ib_piggyb_ack(struct rdsv3_ib_connection *ic)
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) {
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff rdsv3_ib_stats_inc(s_ib_ack_send_piggybacked);
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * It's kind of lame that we're copying from the posted receive pages into
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * long-lived bitmaps. We could have posted the bitmaps and rdma written into
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * them. But receiving new congestion bitmaps should be a *rare* event, so
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * hopefully we won't need to invest that complexity in making it more
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoff * efficient. By copying we can share a simpler core with TCP which has to
0de66c941512d3ab0d15397815d8c9c552bcbf8dMichael Bergknoffrdsv3_ib_cong_recv(struct rdsv3_connection *conn,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int map_off;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int map_page;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned long frag_off;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned long to_copy;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned long copied;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF4("rdsv3_ib_cong_recv", "conn: %p, ibinc: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* catch completely corrupt packets */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ntohl(ibinc->ii_inc.i_hdr.h_len) != RDSV3_CONG_MAP_BYTES)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo unsigned int k;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo to_copy = min(RDSV3_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo dst = (uint64_t *)(map->m_page_addrs[map_page] + map_off);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Record ports that became uncongested, ie
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * bits that changed from 0 to 1.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* the congestion map is in little endian order */
a10abbb48301520aaa9158a9d71fac18fc269159lm RDSV3_DPRINTF4("rdsv3_ib_cong_recv", "Return: conn: %p, ibinc: %p",
a10abbb48301520aaa9158a9d71fac18fc269159lmstatic void
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_connection *ic = conn->c_transport_data;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* XXX shut down the connection if port 0,0 are seen? */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "ic %p ibinc %p recv %p byte len %u", ic, ibinc, recv, data_len);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "incoming message from %u.%u.%u.%u didn't include a "
a10abbb48301520aaa9158a9d71fac18fc269159lm "header, disconnecting and reconnecting",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Validate the checksum. */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo RDSV3_DPRINTF2("rdsv3_ib_process_recv", "incoming message "
0705ae3a16fa797df2a0885aadbf7b05cec9bbf2raghuram "from %u.%u.%u.%u has corrupted header - "
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "forcing a reconnect",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Process the ACK sequence which comes with every packet */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* Process the credits update if there was one */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * This is an ACK-only packet. The fact that it gets
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * special treatment here is that historically, ACKs
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * were rather special beasts.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * If we don't already have an inc on the connection then this
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * fragment has a header and starts a message.. copy its header
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * into the inc and save the inc so we can hang upcoming fragments
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda * off its list.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * We can't just use memcmp here; fragments of a
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * single message may carry different ACKs
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "fragment header mismatch; forcing reconnect");
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (ibinc->ii_inc.i_hdr.h_flags == RDSV3_FLAG_CONG_BITMAP)
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo rdsv3_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Evaluate the ACK_REQUIRED flag *after* we received
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * the complete frame, and after bumping the next_rx
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * sequence.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "Return: conn: %p recv: %p len: %d state: %p",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppordsv3_ib_recv_cqe_handler(struct rdsv3_ib_connection *ic, ibt_wc_t *wc,
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_work_ring *recv_ringp = &ic->i_recv_ring;
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "rwc wc_id 0x%llx status %u byte_len %u imm_data %u\n",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo recv = &ic->i_recvs[rdsv3_ib_ring_oldest(recv_ringp)];
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * Also process recvs in connecting state because it is possible
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * to get a recv completion _before_ the rdmacm ESTABLISHED
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * event is processed.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo if (rdsv3_conn_up(conn) || rdsv3_conn_connecting(conn)) {
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo /* We expect errors as the qp is drained during shutdown */
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "recv completion on "
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "%u.%u.%u.%u had status %u, "
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo "disconnecting and reconnecting\n",
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * If we ever end up with a really empty receive ring, we're
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * in deep trouble, as the sender will definitely see RNR
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo * timeouts.
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppo struct rdsv3_ib_connection *ic = conn->c_transport_data;
a8ea4ede2107d9ad3895b91946b9f33a83c5f7eenarayan RDSV3_DPRINTF4("rdsv3_ib_recv", "Return: conn: %p", conn);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoextern int rdsv3_ib_inc_constructor(void *buf, void *arg, int kmflags);
1ae0874509b6811fdde1dfd46f0d93fd09867a3fheppoextern void rdsv3_ib_inc_destructor(void *buf, void *arg);
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda rdsv3_ib_incoming_slab = kmem_cache_create("rdsv3_ib_incoming",
20ae46ebaff1237662e05edf9db61538aa85d448ha sizeof (struct rdsv3_ib_incoming), 0, rdsv3_ib_inc_constructor,
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda RDSV3_DPRINTF2("rdsv3_ib_recv_init", "kmem_cache_create "
b0fc0e77220f1fa4c933fd58a4e1dedcd650b0f1govinda return (0);