/*
* Copyright (c) 2008-2016 Solarflare Communications Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are
* those of the authors and should not be interpreted as representing official
* policies, either expressed or implied, of the FreeBSD Project.
*/
#include <sys/sysmacros.h>
#include <sys/ethernet.h>
#include "sfxge.h"
#include "efx.h"
/* TXQ flush response timeout (in microseconds) */
/* See sfxge.conf.private for descriptions */
/* Transmit buffer DMA attributes */
DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
};
DMA_ATTR_V0, /* dma_attr_version */
0, /* dma_attr_addr_lo */
0xffffffffffffffffull, /* dma_attr_addr_hi */
0xffffffffffffffffull, /* dma_attr_count_max */
SFXGE_TX_BUFFER_SIZE, /* dma_attr_align */
0xffffffff, /* dma_attr_burstsizes */
1, /* dma_attr_minxfer */
0xffffffffffffffffull, /* dma_attr_maxxfer */
0xffffffffffffffffull, /* dma_attr_seg */
1, /* dma_attr_sgllen */
1, /* dma_attr_granular */
0 /* dma_attr_flags */
};
/* Transmit mapping DMA attributes */
DMA_ATTR_V0, /* dma_attr_version */
0, /* dma_attr_addr_lo */
0xffffffffffffffffull, /* dma_attr_addr_hi */
0xffffffffffffffffull, /* dma_attr_count_max */
1, /* dma_attr_align */
0xffffffff, /* dma_attr_burstsizes */
1, /* dma_attr_minxfer */
0xffffffffffffffffull, /* dma_attr_maxxfer */
0xffffffffffffffffull, /* dma_attr_seg */
0x7fffffff, /* dma_attr_sgllen */
1, /* dma_attr_granular */
0 /* dma_attr_flags */
};
/* Transmit queue DMA attributes */
DDI_DEVICE_ATTR_V0, /* devacc_attr_version */
DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */
DDI_STRICTORDER_ACC /* devacc_attr_dataorder */
};
DMA_ATTR_V0, /* dma_attr_version */
0, /* dma_attr_addr_lo */
0xffffffffffffffffull, /* dma_attr_addr_hi */
0xffffffffffffffffull, /* dma_attr_count_max */
EFX_BUF_SIZE, /* dma_attr_align */
0xffffffff, /* dma_attr_burstsizes */
1, /* dma_attr_minxfer */
0xffffffffffffffffull, /* dma_attr_maxxfer */
0xffffffffffffffffull, /* dma_attr_seg */
1, /* dma_attr_sgllen */
1, /* dma_attr_granular */
0 /* dma_attr_flags */
};
/*
* A sfxge_tx_qdpl_swizzle() can happen when the DPL get list is one packet
* under the limit, and must move all packets from the DPL put->get list
* Hence this is the real maximum length of the TX DPL get list.
*/
static int
{
}
static int
{
return (0);
}
static void
{
}
static int
{
int rc;
goto fail1;
return (0);
return (-1);
}
static void
{
}
static int
{
int rc;
/* Allocate DMA handle */
if (rc != DDI_SUCCESS)
goto fail1;
return (0);
return (-1);
}
static void
{
/* Free the DMA handle */
}
static int
{
int rc;
/* Compile-time structure layout checks */
goto fail1;
/* Allocate some buffer table entries */
goto fail2;
/* Allocate the descriptor array */
goto fail3;
}
/* Allocate the context arrays */
goto fail4;
}
goto fail5;
}
goto fail6;
}
/* Initialize the deferred packet list */
return (0);
sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS);
/* Free the descriptor array */
/* Free the buffer table entries */
/* Tear down DMA setup */
return (-1);
}
static void
{
stp->st_unblock = 0;
/* Tear down the deferred packet list */
/* Free the context arrays */
sizeof (sfxge_tx_mapping_t *) * SFXGE_TX_NDESCS);
/* Free the descriptor array */
/* Free the buffer table entries */
/* Tear down dma setup */
}
static void
{
}
static sfxge_tx_packet_t *
{
return (stpp);
}
static inline int
{
/* Add to the start of the list */
return (0);
}
return (ENOSPC);
}
static inline sfxge_tx_packet_t *
{
return (NULL);
}
/* Remove item from the head of the list */
}
return (stpp);
}
static void
{
}
}
static inline void
{
}
static inline sfxge_tx_buffer_t *
{
return (NULL);
}
}
return (stbp);
}
static void
{
}
}
static inline void
{
}
static inline sfxge_tx_mapping_t *
{
return (NULL);
}
}
return (stmp);
}
static void
{
}
}
static void
{
}
static int
{
unsigned int ncookies;
unsigned int n;
int rc;
/* Bind the STREAMS block to the mapping */
if (rc != DDI_DMA_MAPPED)
goto fail1;
/*
* Construct an array of addresses and an initial
* offset.
*/
n = 0;
for (;;) {
ASSERT3U(n, <, SFXGE_TX_MAPPING_NADDR);
if (--ncookies == 0)
break;
}
}
ASSERT3U(n, <=, SFXGE_TX_MAPPING_NADDR);
return (0);
return (-1);
}
static void
{
unsigned int reaped;
unsigned int id;
/* Free all the mappings */
do {
}
/* Free all the buffers */
do {
}
}
}
static void
{
unsigned int id;
/* Clear the completion information */
/* Free any mappings that were used */
}
/* Free any buffers that were used */
}
/* Clear the fragment list */
}
/* Push descriptors to the TX ring setting blocked if no space */
static void
{
unsigned int id;
unsigned int level;
unsigned int available;
int rc;
goto fail1;
}
/* Post the fragment list */
goto fail2;
/*
* If the list took more than a single descriptor then we need to
* to move the completion information so it is referenced by the last
* descriptor.
*/
}
/* Clear the list */
return;
/*
* If there would be enough space after we've reaped any completed
* mappings and buffers, and we gain sufficient queue space by doing
* so, then reap now and try posting again.
*/
goto again;
}
/* Set the unblock level */
} else {
}
/*
* Avoid a race with completion interrupt handling that could leave the
* queue blocked.
*
* NOTE: The use of st_pending rather than st_completed is intentional
* as st_pending is updated per-event rather than per-batch and
* therefore avoids needless deferring.
*/
goto again;
}
}
static int
{
int rc;
if (rw != KSTAT_READ) {
goto fail1;
}
goto done;
knp++;
knp++;
knp++;
done:
return (0);
return (rc);
}
static int
{
unsigned int id;
int rc;
/* Create the set */
goto fail1;
}
/* Initialise the named stats */
knp++;
}
knp++;
knp++;
knp++;
return (0);
return (rc);
}
static void
{
/* Destroy the set */
}
static int
unsigned int evq)
{
int rc;
goto fail1;
}
/* Initialize the statistics */
goto fail2;
DDI_PROP_DONTPASS, "tx_dpl_get_pkt_limit",
DDI_PROP_DONTPASS, "tx_dpl_put_pkt_limit",
/* Allocate a per-EVQ label for events from this TXQ */
goto fail2;
/* Attach the TXQ to the driver */
sp->s_tx_qcount++;
return (0);
return (rc);
}
static int
{
unsigned int evq;
unsigned int flags;
unsigned int desc_index;
int rc;
/* Zero the memory */
/* Program the buffer table */
EFX_TXQ_NBUFS(SFXGE_TX_NDESCS))) != 0)
goto fail1;
case SFXGE_TXQ_NON_CKSUM:
flags = 0;
break;
case SFXGE_TXQ_IP_CKSUM:
break;
break;
default:
flags = 0;
break;
}
/* Create the transmit queue */
goto fail2;
/* Initialise queue descriptor indexes */
/* Enable the transmit queue */
return (0);
/* Clear entries from the buffer table */
return (rc);
}
static inline int
{
int rc;
while (mapping_size != 0 && *limitp != 0) {
goto fail1;
}
mapping_off += page_size;
}
return (0);
return (rc);
}
static inline int
{
int rc;
goto fail1;
}
return (0);
return (rc);
}
static inline boolean_t
{
return (eop);
}
static int
{
int rc;
while (size != 0) {
if (copy)
goto copy;
/*
* Check whether we have already mapped this data block for
* DMA.
*/
/*
* If we are part way through copying a data block then
* there's no point in trying to map it for DMA.
*/
if (off != 0)
goto copy;
/*
* If the data block is too short then the cost of
* mapping it for DMA would outweigh the cost of
* copying it.
*/
goto copy;
/* Try to grab a transmit mapping from the pool */
/*
* The pool was empty so allocate a new
* mapping.
*/
KM_NOSLEEP)) == NULL)
goto copy;
}
/* Add the DMA mapping to the list */
/* Try to bind the data block to the mapping */
goto copy;
}
/*
* If we have a partially filled buffer then we must add it to
* the fragment list before adding the mapping.
*/
if (rc != 0)
goto fail1;
}
/* Add the mapping to the fragment list */
if (rc != 0)
goto fail2;
/*
* If the data block has been exhausted then Skip over the
* control block and advance to the next data block.
*/
off = 0;
}
continue;
copy:
/* Try to grab a buffer from the pool */
/*
* The pool was empty so allocate a new
* buffer.
*/
KM_NOSLEEP)) == NULL) {
goto fail3;
}
}
/* Add it to the list */
}
/* Copy as much of the data block as we can into the buffer */
/*
* If we have reached the end of the packet, or the buffer is
* full, then add the buffer to the fragment list.
*/
if (rc != 0)
goto fail4;
}
/*
* If the data block has been exhaused then advance to the next
* one.
*/
off = 0;
}
}
return (0);
return (rc);
}
static int
{
unsigned int id;
int rc;
goto fail1;
}
sizeof (struct ether_vlan_header) :
sizeof (struct ether_header);
goto fail2;
}
/* The payload offset is equivalent to the size of the headers */
/*
* If the initial data block only contains the headers then advance
* to the next one.
*/
goto fail3;
}
off = 0;
/* Check IP and TCP headers are suitable for LSO */
goto fail4;
}
goto fail4;
}
/*
* Get the base IP id, The stack leaves enough of a gap in id space
* for us to increment this for each segment we send out.
*/
/* Get the base TCP sequence number and flags */
/* Adjust the header for interim segments */
goto fail5;
}
while (lss != 0) {
/* Adjust the header for this segment */
ip_id++;
/* If this is the final segment then do some extra adjustment */
if (eol) {
}
/* Try to grab a buffer from the pool */
/*
* The pool was empty so allocate a new
* buffer.
*/
KM_NOSLEEP)) == NULL) {
goto fail6;
}
}
/* Add it to the list */
}
/* Copy in the headers */
/* Add the buffer to the fragment list */
if (rc != 0)
goto fail7;
/* Add the payload to the fragment list */
goto fail8;
}
/*
* If no part of the packet has been mapped for DMA then we can free
* it now, otherwise it can only be freed on completion.
*/
else
return (0);
/* Restore the header */
sizeof (struct ether_vlan_header) :
sizeof (struct ether_header)) +
return (rc);
}
static int
{
unsigned int id;
int rc;
off = 0;
if (copy)
goto copy;
/*
* If we are part way through copying a data block then there's
* no point in trying to map it for DMA.
*/
if (off != 0)
goto copy;
/*
* If the data block is too short then the cost of mapping it
* for DMA would outweigh the cost of copying it.
*
* TX copy break
*/
goto copy;
/* Try to grab a transmit mapping from the pool */
/*
* The pool was empty so allocate a new
* mapping.
*/
KM_NOSLEEP)) == NULL)
goto copy;
}
/* Add the DMA mapping to the list */
/* Try to bind the data block to the mapping */
goto copy;
/*
* If we have a partially filled buffer then we must add it to
* the fragment list before adding the mapping.
*/
if (rc != 0)
goto fail1;
}
/* Add the mapping to the fragment list */
if (rc != 0)
goto fail2;
/* Advance to the next data block */
off = 0;
continue;
copy:
/* Try to grab a buffer from the pool */
/*
* The pool was empty so allocate a new
* buffer.
*/
KM_NOSLEEP)) == NULL) {
goto fail3;
}
}
/* Add it to the list */
}
/* Copy as much of the data block as we can into the buffer */
/*
* If we have reached the end of the packet, or the buffer is
* full, then add the buffer to the fragment list.
*/
if (rc != 0)
goto fail4;
}
/*
* If the data block has been exhaused then advance to the next
* one.
*/
off = 0;
}
}
/*
* If no part of the packet has been mapped for DMA then we can free
* it now, otherwise it can only be freed on completion.
*/
else
return (0);
return (rc);
}
static void
{
unsigned int count;
/*
* Guaranteed that in flight TX packets will cause more TX completions
* hence more swizzles must happen
*/
return;
/* Acquire the put list - replacing with an empty list */
return;
/* Reverse the list */
p = NULL;
count = 0;
do {
p = stpp;
count++;
/* Add it to the tail of the get list */
}
/*
* If TXQ locked, add the RX DPL put list and this packet to the TX DPL get list
* If TXQ unlocked, atomically add this packet to TX DPL put list
*
* The only possible error is ENOSPC (used for TX backpressure)
* For the TX DPL put or get list becoming full, in both cases there must be
* future TX completions (as represented by the packets on the DPL get lists).
*
* This ensures that in the future mac_tx_update() will be called from
* sfxge_tx_qcomplete()
*/
static inline int
{
if (locked) {
stdp->get_full_count++;
return (ENOSPC);
}
/* Reverse the put list onto the get list */
/* Add to the tail of the get list */
} else {
/* Add to the head of the put list, keeping a list length */
do {
stpp->stp_dpl_put_len = 0;
stdp->put_full_count++;
return (ENOSPC);
}
}
return (0);
}
/* Take all packets from DPL get list and try to send to HW */
static void
{
unsigned int count;
while (count != 0) {
int rc;
/* Split stpp off */
goto reject;
/* Fragment the packet */
} else {
}
switch (rc) {
case 0:
break;
case ENOSPC:
if (!copy)
goto copy;
/*FALLTHRU*/
default:
goto reject;
}
/* Free the packet structure */
stpp->stp_dpl_put_len = 0;
}
--count;
/* Post the packet */
goto defer;
}
continue;
copy:
/* Abort the current fragment list */
/* Try copying the packet to flatten it */
goto again;
/* Abort the current fragment list */
/* Discard the packet */
/* Free the packet structure */
stpp->stp_dpl_put_len = 0;
}
--count;
continue;
break;
}
if (count == 0) {
/* New empty get list */
} else {
/* shorten the list by moving the head */
}
}
/* Swizzle deferred packet list, try and push to HW */
static inline void
{
do {
if (SFXGE_TX_QDPL_PUT_PENDING(stp))
if (!SFXGE_TX_QDPL_PUT_PENDING(stp))
break;
}
static void
{
unsigned int count;
/* Swizzle put list to the get list */
while (count != 0) {
/* Discard the packet */
/* Free the packet structure */
stpp->stp_dpl_put_len = 0;
--count;
}
/* Empty list */
}
void
{
}
static void
{
return;
}
unsigned int level;
}
}
/* lock has been dropped */
}
void
{
unsigned int completed;
unsigned int id;
/* Unbind all the mappings */
do {
/*
* Now that the packet is no longer mapped for DMA it
* can be freed.
*/
}
}
/* Check whether we need to unblock the queue */
unsigned int level;
}
}
void
{
case SFXGE_TXQ_INITIALIZED:
/* Ignore flush event after TxQ destroyed */
break;
case SFXGE_TXQ_FLUSH_PENDING:
break;
case SFXGE_TXQ_FLUSH_FAILED:
/* MC may have rebooted before handling the flush request */
break;
case SFXGE_TXQ_STARTED:
/*
* MC initiated flush on MC reboot or because of bad Tx
* descriptor
*/
break;
case SFXGE_TXQ_FLUSH_DONE:
/* Ignore unexpected extra flush event */
break;
default:
}
if (flush_pending == B_FALSE) {
/* Flush was not pending */
return;
}
sp->s_tx_flush_pending--;
if (sp->s_tx_flush_pending <= 0) {
/* All queues flushed: wakeup sfxge_tx_stop() */
}
}
static void
{
int rc;
/* Prepare to flush and stop the queue */
/* Flush the transmit queue */
/* Already flushed, may be initiated by MC */
} else if (rc != 0) {
/* Unexpected error */
} else if (wait_for_flush) {
sp->s_tx_flush_pending++;
} else {
/* Assume the flush is done */
}
}
}
static void
{
goto done;
/* All queues should have been flushed */
SFXGE_CMN_ERR "txq[%d] stop with flush_pending=%d",
}
}
/* Destroy the transmit queue */
/* Clear entries from the buffer table */
/*
* Ensure the deferred packet list is cleared
* Can race with sfxge_tx_packet_add() adding to the put list
*/
stp->st_pending = 0;
stp->st_completed = 0;
done:
}
static void
{
/* Detach the TXQ from the driver */
sp->s_tx_qcount--;
/* Free the EVQ label for events from this TXQ */
/* Tear down the statistics */
/* Ensure the deferred packet list is empty */
/* Clear the free buffer pool */
/* Clear the free mapping pool */
/* Clear the free packet pool */
}
int
{
int index;
int rc;
NULL, 0);
/* Initialize the transmit queues. */
/* Ensure minimum queue counts required by sfxge_tx_packet_add(). */
txq = 0;
goto fail1;
}
goto fail2;
}
txq++;
}
}
return (0);
while (--index >= 0) {
}
return (rc);
}
int
{
int index;
int rc;
/* Initialize the transmit module */
goto fail1;
goto fail2;
}
return (0);
return (rc);
}
/*
* Add a packet to the TX Deferred Packet List and if the TX queue lock
* can be acquired then call sfxge_tx_qdpl_service() to fragment and push
* to the H/W transmit descriptor ring
*
* If ENOSPC is returned then the DPL is full or the packet create failed, but
* the mblk isn't freed so that the caller can return this mblk from mc_tx() to
* back-pressure the OS stack.
*
* For all other errors the mblk is freed
*/
int
{
unsigned int txq;
int index;
int rc = 0;
/*
*
* NOTE: This access to the state is NOT protected by the state lock. It
* deferred packet lists is cleaned up in (possibly repeated) calls to
* sfxge_can_destroy().
*/
goto fail1;
}
off = 0;
size = 0;
mss = 0;
/* Check whether we need the header pointers for LSO segmentation */
/* LSO segmentation relies on hardware checksum offload */
goto fail1;
}
if (pkt_type != SFXGE_PACKET_TYPE_IPV4_TCP ||
off == 0) {
goto fail2;
}
}
/* Choose the appropriate transit queue */
/*
* If we have not already parsed the headers
* for LSO segmentation then we need to do it
* now so we can calculate the hash.
*/
}
/*
* Calculate IPv4 4-tuple hash, with
* are zero for other IPv4 protocols.
*/
} else {
/*
* Other traffic always goes to the
* the queue in the zero-th entry of
* the RSS table.
*/
}
} else {
/*
* It does not matter what the hash is
* because all the RSS table entries will be
* the same.
*/
}
/*
* Find the event queue corresponding to the hash in
* the RSS table.
*/
} else {
index = 0;
}
index = 0;
} else {
/*
* No hardware checksum offload requested.
*/
}
/*
* Calculate IPv4 4-tuple hash, with
* are zero for other IPv4 protocols.
*/
}
}
/*
* The RSS table (indexed by hash) gives the RXQ index,
* (mapped 1:1 with EVQs). Find the TXQ that results in
* using the same EVQ as for the RX data path.
*/
>, index);
} else {
index = 0;
}
}
/* Try to grab the lock */
if (locked) {
/* Try to grab a packet from the pool */
} else {
}
/*
* Either the pool was empty or we don't have the lock so
* allocate a new packet.
*/
goto fail3;
}
}
stpp->stp_dpl_put_len = 0;
if (rc != 0) {
/* ENOSPC can happen for DPL get or put list is full */
/*
* Note; if this is the unlocked DPL put list full case there is
* no need to worry about a race with locked
* sfxge_tx_qdpl_swizzle() as we know that the TX DPL put list
* was full and would have been swizzle'd to the TX DPL get
* list; hence guaranteeing future TX completions and calls
* to mac_tx_update() via sfxge_tx_qcomplete()
*/
goto fail4;
}
/* Try to grab the lock again */
if (!locked)
if (locked) {
/* Try to service the list */
/* lock has been dropped */
}
return (0);
if (locked)
return (rc);
}
void
{
int index;
/* Flush all the queues */
} else {
/*
* Flag indicates possible hardware failure.
* Attempt flush but do not wait for it to complete.
*/
}
/* Prepare queues to stop and flush the hardware ring */
while (--index >= 0) {
}
if (wait_for_flush == B_FALSE)
goto flush_done;
/* Wait upto 2sec for queue flushing to complete */
while (sp->s_tx_flush_pending > 0) {
timeout) < 0) {
/* Timeout waiting for queues to flush */
SFXGE_CMN_ERR "tx qflush timeout");
break;
}
}
sp->s_tx_flush_pending = 0;
/* Stop all the queues */
while (--index >= 0) {
}
/* Tear down the transmit module */
}
void
{
int index;
while (--index >= 0) {
}
}