/*
* Copyright (c) 2008-2016 Solarflare Communications Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are
* those of the authors and should not be interpreted as representing official
* policies, either expressed or implied, of the FreeBSD Project.
*/
#include <sys/sysmacros.h>
#include <sys/ethernet.h>
#include <sys/ethernet.h>
#include "sfxge.h"
#include "efx.h"
/* RXQ flush response timeout (in microseconds) */
/* RXQ flush tries in the case of failure */
/* RXQ default packet buffer preallocation (number of packet buffers) */
#define SFXGE_RX_QPREALLOC (0)
/* Receive packet DMA attributes */
DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
};
DMA_ATTR_V0, /* dma_attr_version */
0, /* dma_attr_addr_lo */
0xffffffffffffffffull, /* dma_attr_addr_hi */
0xffffffffffffffffull, /* dma_attr_count_max */
SFXGE_CPU_CACHE_SIZE, /* dma_attr_align */
0xffffffff, /* dma_attr_burstsizes */
1, /* dma_attr_minxfer */
0xffffffffffffffffull, /* dma_attr_maxxfer */
0xffffffffffffffffull, /* dma_attr_seg */
1, /* dma_attr_sgllen */
1, /* dma_attr_granular */
0 /* dma_attr_flags */
};
/* Receive queue DMA attributes */
DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
};
DMA_ATTR_V0, /* dma_attr_version */
0, /* dma_attr_addr_lo */
0xffffffffffffffffull, /* dma_attr_addr_hi */
0xffffffffffffffffull, /* dma_attr_count_max */
EFX_BUF_SIZE, /* dma_attr_align */
0xffffffff, /* dma_attr_burstsizes */
1, /* dma_attr_minxfer */
0xffffffffffffffffull, /* dma_attr_maxxfer */
0xffffffffffffffffull, /* dma_attr_seg */
1, /* dma_attr_sgllen */
1, /* dma_attr_granular */
0 /* dma_attr_flags */
};
/* Forward declaration */
static int
{
int err;
/* Allocate a DMA handle */
if (err != DDI_SUCCESS)
goto fail1;
return (0);
return (-1);
}
static void
{
/* Free the DMA handle */
}
static int
{
int nprealloc;
unsigned int id;
int rc;
/* Compile-time structure layout checks */
goto fail1;
/* Allocate some buffer table entries */
goto fail2;
/* Allocate the context array */
goto fail3;
}
/* Allocate the flow table */
goto fail4;
}
/* Initialize the free packet pool */
goto fail5;
}
}
/* Preallocate some packets on the free packet pool */
return (0);
/* Free the flow table */
/* Free the context array */
sp->s_rxq_size);
/* Free the buffer table entries */
/* Remove dma setup */
return (-1);
}
static void
{
unsigned int id;
/* Tear down the free packet pool */
}
/* Free the flow table */
/* Free the context array */
sp->s_rxq_size);
/* Free the buffer table entries */
/* Tear down dma setup */
}
/* Note: This function takes ownership of *srpp. */
static inline void
{
unsigned int id;
putp->srfpl_count++;
}
static unsigned int
{
unsigned int start;
unsigned int id;
mblk_t *p;
unsigned int count;
unsigned int loaned;
/* We want to access the put list for the current CPU last */
do {
/* Acquire the put list */
p = putp->srfpl_putp;
putp->srfpl_count = 0;
if (p == NULL)
continue;
/* Add the list to the head of the get list */
/* Adjust the counters */
#if 0
/* NOTE: this probe is disabled because it is expensive!! */
unsigned int, count);
#endif
/* Return the number of packets yet to appear in the put list */
return (loaned);
}
static void
{
/* Swizzle put list to get list */
(void) sfxge_rx_qfpp_swizzle(srp);
/* Free the remainder */
srfppp->srfpp_count--;
/*
* ASSERT3P(freep->free_func, ==, sfxge_rx_qpacket_free);
* is implied by srpp test below
*/
/*LINTED*/
}
}
/*
* This is an estimate of all memory consumed per RX packet
* it can be inaccurate but but sp->s_rx_pkt_mem_alloc mustn't drift
*/
static uint64_t
{
sizeof (sfxge_rx_packet_t));
}
static void
{
srpp->srp_mblksize = 0;
/* Unbind the DMA memory from the DMA handle */
/* Free the DMA memory */
if (sp->s_rx_pkt_mem_max)
}
static void
{
/*
* WARNING "man -s 9f esballoc" states:
* => runs sync from the thread calling freeb()
* => must not sleep, or access data structures that could be freed
*/
/* Check whether we want to recycle the receive packets */
if (srpp->srp_recycle) {
/*
* Allocate a matching mblk_t before the current one is
* freed.
*/
/* NORMAL recycled case */
return;
}
}
}
static sfxge_rx_packet_t *
{
unsigned int ncookies;
int err;
int rc;
if (sp->s_rx_pkt_mem_max &&
return (NULL);
}
/* Allocate a new packet */
goto fail1;
}
/* Allocate some DMA memory */
switch (err) {
case DDI_SUCCESS:
break;
case DDI_FAILURE:
goto fail2;
default:
goto fail2;
}
/* Adjust the buffer to align the start of the DMA area correctly */
/* Bind the DMA memory to the DMA handle */
switch (err) {
case DDI_DMA_MAPPED:
break;
case DDI_DMA_INUSE:
goto fail3;
case DDI_DMA_NORESOURCES:
goto fail3;
case DDI_DMA_NOMAPPING:
goto fail3;
case DDI_DMA_TOOBIG:
goto fail3;
default:
goto fail3;
}
/*
* Allocate a STREAMS block: We use size 1 so that the allocator will
* use the first (and smallest) dblk cache.
*/
goto fail4;
}
if (sp->s_rx_pkt_mem_max) {
}
return (srpp);
srpp->srp_mblksize = 0;
/* Unbind the DMA memory from the DMA handle */
/* Free the DMA memory */
return (NULL);
}
/* Try to refill the RX descriptor ring from the associated free pkt pool */
static void
{
int ntodo;
unsigned int count;
unsigned int batch;
unsigned int rxfill;
unsigned int mblksize;
return;
if (ntodo == 0)
goto out;
(void) sfxge_rx_qfpp_swizzle(srp);
batch = 0;
while (ntodo-- > 0) {
unsigned int id;
break;
/*LINTED*/
/* The MTU may have changed since the packet was allocated */
--count;
continue;
}
if (batch == SFXGE_REFILL_BATCH) {
batch = 0;
}
--count;
}
if (batch != 0) {
}
out:
}
/* Preallocate packets and put them in the free packet pool */
static void
{
while (nprealloc-- > 0) {
break;
}
}
/* Try to refill the RX descriptor ring by allocating new packets */
static void
{
unsigned int batch;
unsigned int rxfill;
unsigned int mblksize;
int ntodo;
return;
if (ntodo == 0)
return;
batch = 0;
while (ntodo-- > 0) {
unsigned int id;
break;
if (batch == SFXGE_REFILL_BATCH) {
batch = 0;
}
}
if (batch != 0) {
}
}
void
{
mblk_t *p;
int count;
goto done;
/* Make sure the queue is full */
/* The refill may have emptied the pool */
goto done;
/* Don't trim below the pool's low water mark */
goto done;
/* Trim to the largest of srfppp->srfpp_min and srfpp->srfpp_lowat */
else
/* Walk the get list */
while (--count >= 0) {
p = *pp;
}
p = *pp;
/* Truncate the get list */
/* Free the remainder */
while (p != NULL) {
srfppp->srfpp_count--;
/*LINTED*/
freeb(p);
p = next;
}
done:
}
static void
{
/*
* man timeout(9f) states that this code should adhere to the
* same requirements as a softirq handler - DO NOT BLOCK
*/
/*
* Post an event to the event queue to cause the free packet pool to be
* trimmed if it is oversize.
*/
#if defined(DEBUG)
#else
/*
* Bug22691 WORKAROUND:
* This handler has been observed in the field to be invoked for a
* queue in the INITIALIZED state, which should never happen.
* Until the mechanism for this is properly understood, add defensive
* checks.
*/
"RXQ[%d] bad state in sfxge_rx_qpoll %d %d %p",
return;
}
#endif
}
static void
{
/* Schedule a poll */
}
static void
{
/*
* Cancel the qpoll timer. Care is needed as this function
* can race with sfxge_rx_qpoll() for timeout id updates.
*
* Do not hold locks used by any timeout(9f) handlers across
* calls to untimeout(9f) as this will deadlock.
*/
tid = 0;
}
}
static int
{
int rc;
if (rw != KSTAT_READ) {
goto fail1;
}
goto done;
/* NB pointer post-increment below */
done:
return (0);
return (rc);
}
static int
{
int rc;
/* Create the set */
SFXGE_RX_NSTATS, 0)) == NULL) {
goto fail1;
}
/* Initialise the named stats */
knp++;
knp++;
knp++;
knp++;
knp++;
knp++;
knp++;
return (0);
return (rc);
}
static int
{
int rc;
goto fail1;
}
goto fail2;
return (0);
return (rc);
}
static int
{
unsigned int level;
int rc;
/* Zero the memory */
/* Program the buffer table */
goto fail1;
/* Create the receive queue */
!= 0)
goto fail2;
/* Enable the receive queue */
/* Set the water marks */
/* Try to fill the queue from the pool */
/*
* If there were insufficient buffers in the pool to reach the at
* least a batch then allocate some.
*/
if (level < SFXGE_RX_BATCH)
return (0);
/* Clear entries from the buffer table */
return (rc);
}
static void
{
return;
sizeof (struct ether_vlan_header) :
sizeof (struct ether_header)) +
}
static boolean_t
{
unsigned int shift;
/*
* If the time between this segment and the last is greater than RTO
* then consider this a new flow.
*/
goto fail1;
}
goto fail2;
}
/* Update the in-order segment count and sequence number */
/* Don't merge across pure ACK, URG, SYN or RST segments */
goto fail3;
/*
* If the in-order segment count has not yet reached the slow-start
* threshold then we cannot coalesce.
*/
goto fail4;
/* Scale up the packet size from 4k (the maximum being 64k) */
/* First packet in this flow */
/*
* If the flow is not already in the list of occupied flows then
* add it.
*/
}
} else {
/* Later packet in this flow - skip TCP header */
}
/*
* Try to align coalesced segments on push boundaries, unless they
* are too frequent.
*/
return (B_TRUE);
return (B_FALSE);
}
void
{
now = ddi_get_lbolt();
/* Start with the last flow to be appended to */
/*LINTED*/
/* If the packet is not TCP then we cannot coalesce it */
goto reject;
/*
* If the packet is not fully checksummed then we cannot
* coalesce it.
*/
goto reject;
/* Parse the TCP header */
goto reject;
} else {
ether_tci = 0;
}
/*
* Make sure any minimum length padding is stripped
* before we try to add the packet to a flow.
*/
/*
* If there is no current flow, or the segment does not match
* the current flow then we must attempt to look up the
* correct flow in the table.
*/
goto lookup;
goto lookup;
goto lookup;
goto lookup;
add:
/* Try to append the packet to the flow */
goto reject;
continue;
/*
* If there is a prefix area then read the hash from that,
* otherwise calculate it.
*/
if (sp->s_rx_prefix_size != 0) {
} else {
hash);
}
/*
* If the flow we have found does not match the hash then
* it may be an unused flow, or it may be stale.
*/
goto reject;
}
goto reject;
/* Start a new flow */
goto add;
}
/*
* If the flow we have found does match the hash then it could
* still be an alias.
*/
goto reject;
goto reject;
goto reject;
goto add;
}
}
void
{
unsigned int completed;
unsigned int level;
unsigned int id;
int rc;
unsigned int prefetch;
}
/* when called from sfxge_rx_qstop() */
goto discard;
goto discard;
/* Make the data visible to the kernel */
/* Read the length from the psuedo header if required */
}
/* Set up the packet length */
/* Calculate the maximum packet size */
sizeof (struct ether_vlan_header) :
sizeof (struct ether_header);
goto discard;
/* Check for loopback packets */
/*LINTED*/
if (etherhp->ether_type ==
srp->sr_loopback++;
goto discard;
}
}
/* Set up the checksum information */
flags = 0;
}
}
DB_CKSUMSTART(mp) = 0;
DB_CKSUMSTUFF(mp) = 0;
DB_CKSUMEND(mp) = 0;
DB_CKSUM16(mp) = 0;
/* Add the packet to the tail of the chain */
srfppp->srfpp_loaned++;
continue;
/* Return the packet to the pool */
srfppp->srfpp_loaned++;
}
/* Attempt to coalesce any TCP packets */
/*
* If there are any pending flows and this is the end of the
* poll then they must be completed.
*/
do {
}
/* If there are any packets then pass them up the stack */
if (level == 0) {
/* Try to refill ASAP */
}
/*
* If the RXQ is still empty, discard and recycle the
* current entry to ensure that the ring always
* contains at least one descriptor. This ensures that
* the next hardware RX will trigger an event
* (possibly delayed by interrupt moderation) and
*
* Note this drops a complete LRO fragment from the
* start of the batch.
*
* Note also that copymsgchain() does not help with
* resource starvation here, unless we are short of DMA
* mappings.
*/
if (level == 0) {
if (nmp)
/* as level==0 will swizzle,rxpost below */
} else {
}
}
/* Top up the queue if necessary */
}
}
void
{
/*
* Flush successful: wakeup sfxge_rx_qstop() if flush is pending.
*
* A delayed flush event received after RxQ stop has timed out
* will be ignored, as then the flush state will not be PENDING
* (see SFCbug22989).
*/
if (flush_pending)
}
void
{
/*
* Flush failed: wakeup sfxge_rx_qstop() if flush is pending.
*
* A delayed flush event received after RxQ stop has timed out
* will be ignored, as then the flush state will not be PENDING
* (see SFCbug22989).
*/
if (flush_pending)
}
static void
{
int rc;
/* Further packets are discarded by sfxge_rx_qcomplete() */
/*
* Flag indicates possible hardware failure.
* Attempt flush but do not wait for it to complete.
*/
}
/* Wait upto 2sec for queue flushing to complete */
else
break;
}
timeout) < 0) {
/* Timeout waiting for successful or failed flush */
break;
}
}
/* Destroy the receive queue */
/* Clear entries from the buffer table */
/*
* Free any unused RX packets which had descriptors on the RXQ
* Packets will be discard as state != STARTED
*/
srp->sr_pending = 0;
srp->sr_completed = 0;
srp->sr_loopback = 0;
}
static void
{
}
static void
{
/* Empty the pool */
}
static int
{
unsigned int index;
unsigned int entry;
unsigned int *freq;
int rc;
if (rw != KSTAT_READ) {
goto fail1;
}
KM_NOSLEEP)) == NULL) {
goto fail2;
}
}
knp++;
}
return (0);
return (rc);
}
static int
{
unsigned int index;
int rc;
/* Create the set */
goto fail1;
}
/* Initialise the named stats */
knp++;
}
return (0);
return (rc);
}
static void
{
/* Destroy the set */
}
unsigned int
{
int rx_scale;
/* 0 and all -ve numbers sets to number of logical CPUs */
if (rx_scale <= 0)
return (rx_scale);
}
static int
{
int rc;
/* Create tables for CPU, core, cache and chip counts */
/* We need at least one event queue */
/* Set up the kstats */
goto fail1;
return (0);
return (rc);
}
void
{
unsigned int count;
unsigned int *tbl;
unsigned int *rating;
unsigned int entry;
int rc;
goto fail1;
}
KM_NOSLEEP)) == NULL) {
goto fail2;
}
KM_NOSLEEP)) == NULL) {
goto fail3;
}
/*
* Substract any current CPU, core, cache and chip usage from the
* global contention tables.
*/
}
/* Choose as many event queues as we need */
unsigned int index;
unsigned int choice;
unsigned int choice_rating;
/*
* Rate each event queue on its global level of CPU
* contention.
*/
}
/* Choose the queue with the lowest CPU contention */
choice = 0;
choice_rating = rating[0];
}
}
/* Add our choice to the condensed RSS table */
/* Add information to the global contention tables */
}
/* Build the expanded RSS table */
count = 0;
unsigned int index;
}
/* Program the expanded RSS table into the hardware */
return;
}
static int
{
int rc;
/* Clear down the RSS table */
goto fail1;
/* sfxge_t->s_state_lock held */
return (0);
return (rc);
}
int
{
int rc;
goto fail1;
}
return (0);
return (rc);
}
int
{
int rc;
goto fail1;
}
goto fail2;
}
dispatch = 0;
if (dispatch)
/* no locks held */
return (0);
return (rc);
}
static void
{
/*
* Substract any current CPU, core, cache and chip usage from the
* global contention tables.
*/
}
/* Clear down the RSS table */
}
static void
{
/* Tear down the kstats */
/* Destroy tables */
}
int
{
int index;
int rc;
goto fail1;
}
goto fail2;
NULL, 0);
/* Initialize the receive queue(s) */
goto fail3;
}
return (0);
/* Tear down the receive queue(s) */
while (--index >= 0)
return (rc);
}
int
{
int index;
int rc;
/* Calculate the receive packet buffer size and alignment */
/* Packet buffer allocations are cache line aligned */
/* Ensure IP headers are 32bit aligned */
/*
* Place the start of the buffer a prefix length minus 2
* before the start of a cache line. This ensures that the
* last two bytes of the prefix (which is where the LFSR hash
* is located) are in the same cache line as the headers, and
* the IP header is 32-bit aligned.
*/
} else {
sp->s_rx_prefix_size = 0;
/*
* Place the start of the buffer 2 bytes after a cache line
* boundary so that the headers fit into the cache line and
* the IP header is 32-bit aligned.
*/
}
/* Align end of packet buffer for RX DMA end padding */
/* Initialize the receive module */
goto fail1;
goto fail2;
/* Start the receive queue(s) */
goto fail3;
}
/* It is sufficient to have Rx scale initialized */
if (rc != 0)
goto fail4;
return (0);
/* Stop the receive queue(s) */
while (--index >= 0)
/* Tear down the receive module */
return (rc);
}
void
{
}
int
{
int rc;
switch (mode) {
case SFXGE_RX_COALESCE_OFF:
break;
default:
goto fail1;
}
return (0);
return (rc);
}
void
{
int index;
/* Stop the receive queue(s) */
while (--index >= 0) {
/* TBD: Flush RXQs in parallel; HW has limit + may need retry */
}
/* Tear down the receive module */
sp->s_rx_buffer_align = 0;
sp->s_rx_prefix_size = 0;
sp->s_rx_buffer_size = 0;
}
unsigned int
{
int index;
unsigned int loaned;
loaned = 0;
}
return (loaned);
}
void
{
int index;
/* Tear down the receive queue(s) */
while (--index >= 0)
}