igb_tx.c revision cf8dcc9bbabedca41ecfee13dec8172104e99968
/*
* CDDL HEADER START
*
* Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at:
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When using or redistributing this file, you may do so under the
* License only. No other modification of this header is permitted.
*
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "igb_sw.h"
uint32_t);
size_t);
static void igb_fill_tx_context(struct e1000_adv_tx_context_desc *,
tx_context_t *, uint32_t);
#ifndef IGB_DEBUG
#pragma inline(igb_save_desc)
#pragma inline(igb_get_tx_context)
#pragma inline(igb_check_tx_context)
#pragma inline(igb_fill_tx_context)
#endif
mblk_t *
{
return (NULL);
}
}
/*
* igb_tx - Main transmit processing
*
* Called from igb_m_tx with an mblk ready to transmit. this
* routine sets up the transmit descriptors and sends data to
* the wire.
*
* One mblk can consist of several fragments, each fragment
* will be processed with different methods based on the size.
* For the fragments with size less than the bcopy threshold,
* they will be processed by using bcopy; otherwise, they will
* be processed by using DMA binding.
*
* To process the mblk, a tx control block is got from the
* free list. One tx control block contains one tx buffer, which
* is used to copy mblk fragments' data; and one tx DMA handle,
* which is used to bind a mblk fragment with DMA resource.
*
* Several small mblk fragments can be copied into one tx control
* block's buffer, and then the buffer will be transmitted with
* one tx descriptor.
*
* A large fragment only binds with one tx control block's DMA
* handle, and it can span several tx descriptors for transmitting.
*
* So to transmit a packet (mblk), several tx control blocks can
* be used. After the processing, those tx control blocks will
* be put to the work list.
*/
static boolean_t
{
int desc_num;
/* Get the mblk size */
mbsize = 0;
}
if (igb->tx_hcksum_enable) {
ctx = &tx_context;
/*
* Retrieve offloading context information from the mblk
* context descriptor.
*/
return (B_TRUE);
}
return (B_TRUE);
}
} else {
return (B_TRUE);
}
}
/*
* Check and recycle tx descriptors.
* The recycle threshold here should be selected carefully
*/
/*
* After the recycling, if the tbd_free is less than the
* overload_threshold, assert overload, return B_FALSE;
* and we need to re-schedule the tx again.
*/
return (B_FALSE);
}
/*
* The software should guarantee LSO packet header(MAC+IP+TCP)
* to be within one descriptor - this is required by h/w.
* Here will reallocate and refill the header if
* the headers(MAC+IP+TCP) is physical memory non-contiguous.
*/
current_mp = mp;
previous_mp = NULL;
}
/*
* If len is larger than copy_thresh, we do not
* need to do anything since igb's tx copy mechanism
* will ensure that the headers will be handled
* in one descriptor.
*/
if (len > copy_thresh) {
/*
* If the header and the payload are in
* different mblks, we simply force the
* header to be copied into a
* new-allocated buffer.
*/
hdr_frag_len = hdr_len -
/*
* There are two cases we will reallocate
* a mblk for the last header fragment.
* 1. the header is in multiple mblks and
* the last fragment shares the same mblk
* with the payload
* 2. the header is in a single mblk shared
* with the payload but the header crosses
* a page.
*/
if ((current_mp != mp) ||
/*
* reallocate the mblk for the last
* header fragment, expect it to be
* copied into pre-allocated
* page-aligned buffer
*/
if (!new_mp) {
return (B_FALSE);
}
/*
* Insert the new mblk
*/
if (previous_mp)
else
}
}
if (copy_thresh < hdr_len)
}
}
/*
* The pending_list is a linked list that is used to save
* the tx control blocks that have packet data processed
* but have not put the data to the tx descriptor ring.
* It is used to reduce the lock contention of the tx_lock.
*/
desc_num = 0;
desc_total = 0;
current_mp = mp;
/*
* Decide which method to use for the first fragment
*/
/*
* If the mblk includes several contiguous small fragments,
* they may be copied into one buffer. This flag is used to
* indicate whether there are pending fragments that need to
* be copied to the current tx buffer.
*
* If this flag is B_TRUE, it indicates that a new tx control
* block is needed to process the next fragment using either
* copy or DMA binding.
*
* Otherwise, it indicates that the next fragment will be
* copied to the current tx buffer that is maintained by the
* current tx control block. No new tx control block is needed.
*/
while (current_mp) {
/*
* When the current fragment is an empty fragment, if
* the next fragment will still be copied to the current
* tx buffer, we cannot skip this fragment here. Because
* the copy processing is pending for completion. We have
* to process this empty fragment in the tx_copy routine.
*
* If the copy processing is completed or a DMA binding
* processing is just completed, we can just skip this
* empty fragment.
*/
if ((current_len == 0) && (copy_done)) {
continue;
}
if (copy_done) {
/*
* Get a new tx control block from the free list
*/
goto tx_failure;
}
/*
* Push the tx control block to the pending list
* to avoid using lock too early
*/
}
if (current_flag == USE_COPY) {
/*
* Check whether to use bcopy or DMA binding to process
* the next fragment, and if using bcopy, whether we
* need to continue copying the next fragment into the
* current tx buffer.
*/
if (eop) {
/*
* This is the last fragment of the packet, so
* the copy processing will be completed with
* this fragment.
*/
/*
* If the next fragment is too large to be
* copied to the current tx buffer, we need
* to complete the current copy processing.
*/
} else if (next_len > copy_thresh) {
/*
* The next fragment needs to be processed with
* DMA binding. So the copy prcessing will be
* completed with the current fragment.
*/
} else {
/*
* Continue to copy the next fragment to the
* current tx buffer.
*/
}
} else {
/*
* Check whether to use bcopy or DMA binding to process
* the next fragment.
*/
}
if (desc_num > 0)
desc_total += desc_num;
else if (desc_num < 0)
goto tx_failure;
}
/*
* Attach the mblk to the last tx control block
*/
/*
* Before fill the tx descriptor ring with the data, we need to
* ensure there are adequate free descriptors for transmit
* (including one context descriptor).
*/
}
/*
* If the number of free tx descriptors is not enough for transmit
* then return failure.
*
* Note: we must put this check under the mutex protection to
* ensure the correctness when multiple threads access it in
* parallel.
*/
goto tx_failure;
}
return (B_TRUE);
/*
* Discard the mblk and free the used resources
*/
while (tcb) {
tcb = (tx_control_block_t *)
}
/*
* Return the tx control blocks in the pending list to the free list.
*/
/* Transmit failed, do not drop the mblk, rechedule the transmit */
return (B_FALSE);
}
/*
* igb_tx_copy
*
* Copy the mblk fragment to the pre-allocated tx buffer
*/
static int
{
/*
* Copy the packet data of the mblk fragment into the
* pre-allocated tx buffer, which is maintained by the
* tx control block.
*
* Several mblk fragments can be copied into one tx buffer.
* The destination address of the current copied fragment in
* the tx buffer is next to the end of the previous copied
* fragment.
*/
if (len > 0) {
}
desc_num = 0;
/*
* If it is the last fragment copied to the current tx buffer,
* in other words, if there's no remaining fragment or the remaining
* fragment requires a new tx control block to process, we need to
* complete the current copy processing by syncing up the current
* DMA buffer and saving the descriptor data.
*/
if (copy_done) {
/*
* Sync the DMA buffer of the packet data
*/
/*
* Save the address and length to the private data structure
* of the tx control block, which will be used to fill the
* tx descriptor ring after all the fragments are processed.
*/
desc_num++;
}
return (desc_num);
}
/*
* igb_tx_bind
*
* Bind the mblk fragment with DMA
*/
static int
{
int status, i;
int desc_num;
/*
* Use DMA binding to process the mblk fragment
*/
0, &dma_cookie, &ncookies);
if (status != DDI_DMA_MAPPED) {
return (-1);
}
/*
* Each fragment can span several cookies. One cookie will have
* one tx descriptor to transmit.
*/
desc_num = 0;
for (i = ncookies; i > 0; i--) {
/*
* Save the address and length to the private data structure
* of the tx control block, which will be used to fill the
* tx descriptor ring after all the fragments are processed.
*/
desc_num++;
if (i > 1)
}
return (desc_num);
}
/*
* igb_get_tx_context
*
* Get the tx context information from the mblk
*/
static int
{
unsigned char *pos;
if (flags == 0)
return (TX_CXT_SUCCESS);
/*
* LSO relies on tx h/w checksum, so here the packet will be
* dropped if the h/w checksum flags are not set.
*/
"checksum flags are not set for LSO");
return (TX_CXT_E_LSO_CSUM);
}
}
etype = 0;
mac_hdr_len = 0;
l4_proto = 0;
/*
* Firstly get the position of the ether_type/ether_tpid.
* Here we don't assume the ether (VLAN) header is fully included
* in one mblk fragment, so we go thourgh the fragments to parse
* the ether type.
*/
}
if (etype == ETHERTYPE_VLAN) {
/*
* Get the position of the ether_type in VLAN header
*/
}
mac_hdr_len = sizeof (struct ether_vlan_header);
} else {
mac_hdr_len = sizeof (struct ether_header);
}
/*
* Here we assume the IP(V6) header is fully included in one
* mblk fragment.
*/
switch (etype) {
case ETHERTYPE_IP:
}
ipha_length))) = 0;
/*
* To utilize igb LSO, here need to fill
* the tcp checksum field of the packet with the
* following pseudo-header checksum:
* (ip_source_addr, ip_destination_addr, l4_proto)
* and also need to fill the ip header checksum
* these.
*/
}
break;
case ETHERTYPE_IPV6:
}
break;
default:
/* Unrecoverable error */
"tx hcksum flag set");
return (TX_CXT_E_ETHER_TYPE);
}
}
} else {
/*
* l4 header length is only required for LSO
*/
l4_hdr_len = 0;
}
return (TX_CXT_SUCCESS);
}
/*
* igb_check_tx_context
*
* Check if a new context descriptor is needed
*/
static boolean_t
{
return (B_FALSE);
/*
* Compare the context data retrieved from the mblk and the
* stored context data of the last context descriptor. The data
* need to be checked are:
* hcksum_flags
* l4_proto
* mss (only check for LSO)
* l4_hdr_len (only check for LSO)
* ip_hdr_len
* mac_hdr_len
* Either one of the above data is changed, a new context descriptor
* will be needed.
*/
if (ctx->hcksum_flags != 0) {
return (B_TRUE);
}
}
return (B_FALSE);
}
/*
* igb_fill_tx_context
*
* Fill the context descriptor with hardware checksum informations
*/
static void
{
/*
* Fill the context descriptor with the checksum
* context information we've got
*/
case IPPROTO_TCP:
break;
case IPPROTO_UDP:
/*
* We don't have to explicitly set:
* ctx_tbd->type_tucmd_mlhl |=
* E1000_ADVTXD_TUCMD_L4T_UDP;
* Because E1000_ADVTXD_TUCMD_L4T_UDP == 0b
*/
break;
default:
/* Unrecoverable error */
break;
}
}
ctx_tbd->seqnum_seed = 0;
ctx_tbd->mss_l4len_idx |=
}
}
/*
* igb_tx_fill_ring
*
* Fill the tx descriptor ring with the data
*/
static int
{
int i;
desc_num = 0;
hcksum_flags = 0;
/*
* Get the index of the first tx descriptor that will be filled,
* and the index of the first work list item that will be attached
* with the first used tx control block in the pending list.
* Note: the two indexes are the same.
*/
/*
* Check if a new context descriptor is needed for this packet
*/
if (load_context) {
first_tcb = (tx_control_block_t *)
/*
* Fill the context descriptor with the
* hardware checksum offload informations.
*/
(struct e1000_adv_tx_context_desc *)tbd,
desc_num++;
/*
* Store the checksum context data if
* a new context descriptor is added
*/
}
}
/*
* Fill tx data descriptors with the data saved in the pending list.
* The tx control blocks in the pending list are added to the work list
* at the same time.
*
* The work list is strictly 1:1 corresponding to the descriptor ring.
* One item of the work list corresponds to one tx descriptor. Because
* one tx control block can span multiple tx descriptors, the tx
* control block will be added to the first work list item that
* corresponds to the first tx descriptor generated from that tx
* control block.
*/
desc_num++;
}
/*
* Count the checksum context descriptor for
* the first tx control block.
*/
}
/*
* Add the tx control block to the work list
*/
}
/*
* The Insert Ethernet CRC (IFCS) bit and the checksum fields are only
* valid in the first descriptor of the packet.
* 82576 also requires the payload length setting even without LSO
*/
} else {
}
}
/* Set hardware checksum bits */
if (hcksum_flags != 0) {
if (hcksum_flags & HCK_IPV4_HDRCKSUM)
E1000_TXD_POPTS_IXSM << 8;
if (hcksum_flags & HCK_PARTIALCKSUM)
E1000_TXD_POPTS_TXSM << 8;
}
/*
* The last descriptor of packet needs End Of Packet (EOP),
* and Report Status (RS) bits set
*/
/*
* Sync the DMA buffer of the tx descriptor ring
*/
/*
* Update the number of the free tx descriptors.
* The mutual exclusion between the transmission and the recycling
* (for the tx descriptor ring and the work list) is implemented
* with the atomic operation on the number of the free tx descriptors.
*
* Note: we should always decrement the counter tbd_free before
* advancing the hardware TDT pointer to avoid the race condition -
* before the counter tbd_free is decremented, the transmit of the
* tx descriptors has done and the counter tbd_free is increased by
* the tx recycling.
*/
ASSERT(i >= 0);
/*
* Advance the hardware TDT pointer of the tx descriptor ring
*/
}
return (desc_num);
}
/*
* igb_save_desc
*
* will be filled into the tx descriptor ring later.
*/
static void
{
}
/*
* igb_tx_recycle_legacy
*
* Recycle the tx descriptors and tx control blocks.
*
* The work list is traversed to check if the corresponding
* tx descriptors have been transmitted. If so, the resources
* bound to the tx control blocks will be freed, and those
* tx control blocks will be returned to the free list.
*/
{
int desc_num;
/*
* The mutex_tryenter() is used to avoid unnecessary
* lock contention.
*/
return (0);
tx_ring->recycle_fail = 0;
tx_ring->stall_watchdog = 0;
return (0);
}
/*
* Sync the DMA buffer of the tx descriptor ring
*/
if (igb_check_dma_handle(
return (0);
}
desc_num = 0;
/*
* Get the last tx descriptor of the tx control block.
* If the last tx descriptor is done, it is done with
* all the tx descriptors of the tx control block.
* Then the tx control block and all the corresponding
* tx descriptors can be recycled.
*/
/*
* Check if the Descriptor Done bit is set
*/
if (desc_done) {
/*
* Strip off the tx control block from the work list,
* and add it to the pending list.
*/
/*
* Count the total number of the tx descriptors recycled
*/
/*
* Advance the index of the tx descriptor ring
*/
}
}
/*
* If no tx descriptors are recycled, no need to do more processing
*/
if (desc_num == 0) {
tx_ring->recycle_fail++;
return (0);
}
tx_ring->recycle_fail = 0;
tx_ring->stall_watchdog = 0;
/*
* Update the head index of the tx descriptor ring
*/
/*
* Update the number of the free tx descriptors with atomic operations
*/
/*
* Free the resources used by the tx control blocks
* in the pending list
*/
/*
* Release the resources occupied by the tx control block
*/
tcb = (tx_control_block_t *)
}
/*
* Add the tx control blocks in the pending list to the free list.
*/
return (desc_num);
}
/*
* igb_tx_recycle_head_wb
*
* Check the head write-back, and recycle all the transmitted
* tx descriptors and tx control blocks.
*/
{
int desc_num;
/*
* The mutex_tryenter() is used to avoid unnecessary
* lock contention.
*/
return (0);
tx_ring->recycle_fail = 0;
tx_ring->stall_watchdog = 0;
return (0);
}
/*
* Sync the DMA buffer of the tx descriptor ring
*
* Note: For head write-back mode, the tx descriptors will not
* be written back, but the head write-back value is stored at
* the last extra tbd at the end of the DMA area, we still need
* to sync the head write-back value for kernel.
*
* DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL);
*/
sizeof (uint32_t),
if (igb_check_dma_handle(
return (0);
}
desc_num = 0;
/*
* Get the value of head write-back
*/
/*
* The current tx control block is not
* completely transmitted, stop recycling
*/
break;
}
/*
* Strip off the tx control block from the work list,
* and add it to the pending list.
*/
/*
* Advance the index of the tx descriptor ring
*/
/*
* Count the total number of the tx descriptors recycled
*/
}
/*
* If no tx descriptors are recycled, no need to do more processing
*/
if (desc_num == 0) {
tx_ring->recycle_fail++;
return (0);
}
tx_ring->recycle_fail = 0;
tx_ring->stall_watchdog = 0;
/*
* Update the head index of the tx descriptor ring
*/
/*
* Update the number of the free tx descriptors with atomic operations
*/
/*
* Free the resources used by the tx control blocks
* in the pending list
*/
while (tcb) {
/*
* Release the resources occupied by the tx control block
*/
tcb = (tx_control_block_t *)
}
/*
* Add the tx control blocks in the pending list to the free list.
*/
return (desc_num);
}
/*
* igb_free_tcb - free up the tx control block
*
* Free the resources of the tx control block, including
* unbind the previously bound DMA handle, and reset other
* control fields.
*/
void
{
case USE_COPY:
/*
* Reset the buffer length that is used for copy
*/
break;
case USE_DMA:
/*
* Release the DMA resource that is used for
* DMA binding.
*/
break;
default:
break;
}
/*
* Free the mblk
*/
}
}
/*
* igb_get_free_list - Get a free tx control block from the free list
*
* The atomic operation on the number of the available tx control block
* in the free list is used to keep this routine mutual exclusive with
* the routine igb_put_check_list.
*/
static tx_control_block_t *
{
/*
* Check and update the number of the free tx control block
* in the free list.
*/
return (NULL);
return (tcb);
}
/*
* igb_put_free_list
*
* Put a list of used tx control blocks back to the free list
*
* A mutex is used here to ensure the serialization. The mutual exclusion
* between igb_get_free_list and igb_put_free_list is implemented with
* the atomic operation on the counter tcb_free.
*/
void
{
int tcb_num;
tcb_num = 0;
tcb_num++;
}
/*
* Update the number of the free tx control block
* in the free list. This operation must be placed
* under the protection of the lock.
*/
}