bge_send.c revision 0dc2366f7b9f9f36e10909b1e95edbf2a261c2ac
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include "bge_impl.h"
/*
* The transmit-side code uses an allocation process which is similar
* to some theme park roller-coaster rides, where riders sit in cars
* that can go individually, but work better in a train.
*
* 1) RESERVE a place - this doesn't refer to any specific car or
* seat, just that you will get a ride. The attempt to RESERVE a
* place can fail if all spaces in all cars are already committed.
*
* 2) Prepare yourself; this may take an arbitrary (but not unbounded)
* time, and you can back out at this stage, in which case you must
* give up (RENOUNCE) your place.
*
* 3) CLAIM your space - a specific car (the next sequentially
* numbered one) is allocated at this stage, and is guaranteed
* to be part of the next train to depart. Once you've done
* this, you can't back out, nor wait for any external event
* or resource.
*
* 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they
* all depart together as a single train!
*
* 5) At the end of the ride, you climb out of the car and RENOUNCE
* your right to it, so that it can be recycled for another rider.
*
* For each rider, these have to occur in this order, but the riders
* don't have to stay in the same order at each stage. In particular,
* they may overtake each other between RESERVING a place and CLAIMING
* it, or between CLAIMING and OCCUPYING a space.
*
* Once a car is CLAIMED, the train currently being assembled can't go
* without that car (this guarantees that the cars in a single train
* make up a consecutively-numbered set). Therefore, when any train
* leaves, we know there can't be any riders in transit between CLAIMING
* and OCCUPYING their cars. There can be some who have RESERVED but
* not yet CLAIMED their places. That's OK, though, because they'll go
* into the next train.
*/
#define BGE_DBG BGE_DBG_SEND /* debug flag for this code */
/*
* ========== Send-side recycle routines ==========
*/
/*
* Recycle all the completed buffers in the specified send ring up to
* (but not including) the consumer index in the status block.
*
* This function must advance (srp->tc_next) AND adjust (srp->tx_free)
* to account for the packets it has recycled.
*
* This is a trivial version that just does that and nothing more, but
* it suffices while there's only one method for sending messages (by
* copying) and that method doesn't need any special per-buffer action
* for recycling.
*/
static void bge_recycle_ring(bge_t *bgep, send_ring_t *srp);
#pragma inline(bge_recycle_ring)
static void
bge_recycle_ring(bge_t *bgep, send_ring_t *srp)
{
sw_sbd_t *ssbdp;
bge_queue_item_t *buf_item;
bge_queue_item_t *buf_item_head;
bge_queue_item_t *buf_item_tail;
bge_queue_t *txbuf_queue;
uint64_t slot;
uint64_t n;
ASSERT(mutex_owned(srp->tc_lock));
/*
* We're about to release one or more places :-)
* These ASSERTions check that our invariants still hold:
* there must always be at least one free place
* at this point, there must be at least one place NOT free
* we're not about to free more places than were claimed!
*/
ASSERT(srp->tx_free <= srp->desc.nslots);
buf_item_head = buf_item_tail = NULL;
for (n = 0, slot = srp->tc_next; slot != *srp->cons_index_p;
slot = NEXT(slot, srp->desc.nslots)) {
ssbdp = &srp->sw_sbds[slot];
ASSERT(ssbdp->pbuf != NULL);
buf_item = ssbdp->pbuf;
if (buf_item_head == NULL)
buf_item_head = buf_item_tail = buf_item;
else {
buf_item_tail->next = buf_item;
buf_item_tail = buf_item;
}
ssbdp->pbuf = NULL;
n++;
}
if (n == 0)
return;
/*
* Update recycle index and free tx BD number
*/
srp->tc_next = slot;
ASSERT(srp->tx_free + n <= srp->desc.nslots);
bge_atomic_renounce(&srp->tx_free, n);
/*
* Reset the watchdog count: to 0 if all buffers are
* now free, or to 1 if some are still outstanding.
* Note: non-synchonised access here means we may get
* the "wrong" answer, but only in a harmless fashion
* (i.e. we deactivate the watchdog because all buffers
* are apparently free, even though another thread may
* have claimed one before we leave here; in this case
* the watchdog will restart on the next send() call).
*/
bgep->watchdog = srp->tx_free == srp->desc.nslots ? 0 : 1;
/*
* Return tx buffers to buffer push queue
*/
txbuf_queue = srp->txbuf_push_queue;
mutex_enter(txbuf_queue->lock);
buf_item_tail->next = txbuf_queue->head;
txbuf_queue->head = buf_item_head;
txbuf_queue->count += n;
mutex_exit(txbuf_queue->lock);
/*
* Check if we need exchange the tx buffer push and pop queue
*/
if ((srp->txbuf_pop_queue->count < srp->tx_buffers_low) &&
(srp->txbuf_pop_queue->count < txbuf_queue->count)) {
srp->txbuf_push_queue = srp->txbuf_pop_queue;
srp->txbuf_pop_queue = txbuf_queue;
}
if (srp->tx_flow != 0 || bgep->tx_resched_needed)
ddi_trigger_softintr(bgep->drain_id);
}
/*
* Recycle all returned slots in all rings.
*
* To give priority to low-numbered rings, whenever we have recycled any
* slots in any ring except 0, we restart scanning again from ring 0.
* Thus, for example, if rings 0, 3, and 10 are carrying traffic, the
* pattern of recycles might go 0, 3, 10, 3, 0, 10, 0:
*
* 0 found some - recycle them
* 1..2 none found
* 3 found some - recycle them and restart scan
* 0..9 none found
* 10 found some - recycle them and restart scan
* 0..2 none found
* 3 found some more - recycle them and restart scan
* 0 found some more - recycle them
* 0..9 none found
* 10 found some more - recycle them and restart scan
* 0 found some more - recycle them
* 1..15 none found
*
* The routine returns only when a complete scan has been performed
* without finding any slots to recycle.
*
* Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time
* constant and allows the compiler to optimise away the outer do-loop
* if only one send ring is being used.
*/
void bge_recycle(bge_t *bgep, bge_status_t *bsp);
#pragma no_inline(bge_recycle)
void
bge_recycle(bge_t *bgep, bge_status_t *bsp)
{
send_ring_t *srp;
uint64_t ring;
uint64_t tx_rings = bgep->chipid.tx_rings;
restart:
ring = 0;
srp = &bgep->send[ring];
do {
/*
* For each ring, (srp->cons_index_p) points to the
* proper index within the status block (which has
* already been sync'd by the caller).
*/
ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring));
if (*srp->cons_index_p == srp->tc_next)
continue; /* no slots to recycle */
if (mutex_tryenter(srp->tc_lock) == 0)
continue; /* already in process */
bge_recycle_ring(bgep, srp);
mutex_exit(srp->tc_lock);
/*
* Restart from ring 0, if we're not on ring 0 already.
* As H/W selects send BDs totally based on priority and
* available BDs on the higher priority ring are always
* selected first, driver should keep consistence with H/W
* and gives lower-numbered ring with higher priority.
*/
if (tx_rings > 1 && ring > 0)
goto restart;
/*
* Loop over all rings (if there *are* multiple rings)
*/
} while (++srp, ++ring < tx_rings);
}
/*
* ========== Send-side transmit routines ==========
*/
#define TCP_CKSUM_OFFSET 16
#define UDP_CKSUM_OFFSET 6
static void
bge_pseudo_cksum(uint8_t *buf)
{
uint32_t cksum;
uint16_t iphl;
uint16_t proto;
/*
* Point it to the ip header.
*/
buf += sizeof (struct ether_header);
/*
* Calculate the pseudo-header checksum.
*/
iphl = 4 * (buf[0] & 0xF);
cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl;
cksum += proto = buf[9];
cksum += (((uint16_t)buf[12])<<8) + buf[13];
cksum += (((uint16_t)buf[14])<<8) + buf[15];
cksum += (((uint16_t)buf[16])<<8) + buf[17];
cksum += (((uint16_t)buf[18])<<8) + buf[19];
cksum = (cksum>>16) + (cksum & 0xFFFF);
cksum = (cksum>>16) + (cksum & 0xFFFF);
/*
* Point it to the TCP/UDP header, and
* update the checksum field.
*/
buf += iphl + ((proto == IPPROTO_TCP) ?
TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET);
/*
* A real possibility that pointer cast is a problem.
* Should be fixed when we know the code better.
* E_BAD_PTR_CAST_ALIGN is added to make it temporarily clean.
*/
*(uint16_t *)buf = htons((uint16_t)cksum);
}
static bge_queue_item_t *
bge_get_txbuf(bge_t *bgep, send_ring_t *srp)
{
bge_queue_item_t *txbuf_item;
bge_queue_t *txbuf_queue;
txbuf_queue = srp->txbuf_pop_queue;
mutex_enter(txbuf_queue->lock);
if (txbuf_queue->count == 0) {
mutex_exit(txbuf_queue->lock);
txbuf_queue = srp->txbuf_push_queue;
mutex_enter(txbuf_queue->lock);
if (txbuf_queue->count == 0) {
mutex_exit(txbuf_queue->lock);
/* Try to allocate more tx buffers */
if (srp->tx_array < srp->tx_array_max) {
mutex_enter(srp->tx_lock);
txbuf_item = bge_alloc_txbuf_array(bgep, srp);
mutex_exit(srp->tx_lock);
} else
txbuf_item = NULL;
return (txbuf_item);
}
}
txbuf_item = txbuf_queue->head;
txbuf_queue->head = (bge_queue_item_t *)txbuf_item->next;
txbuf_queue->count--;
mutex_exit(txbuf_queue->lock);
txbuf_item->next = NULL;
return (txbuf_item);
}
static void bge_send_fill_txbd(send_ring_t *srp, send_pkt_t *pktp);
#pragma inline(bge_send_fill_txbd)
static void
bge_send_fill_txbd(send_ring_t *srp, send_pkt_t *pktp)
{
bge_sbd_t *hw_sbd_p;
sw_sbd_t *ssbdp;
bge_queue_item_t *txbuf_item;
sw_txbuf_t *txbuf;
uint64_t slot;
ASSERT(mutex_owned(srp->tx_lock));
/*
* Go straight to claiming our already-reserved places
* on the train!
*/
ASSERT(pktp->txbuf_item != NULL);
txbuf_item = pktp->txbuf_item;
txbuf = txbuf_item->item;
slot = srp->tx_next;
ssbdp = &srp->sw_sbds[slot];
hw_sbd_p = DMA_VPTR(ssbdp->desc);
hw_sbd_p->flags = 0;
ASSERT(txbuf->copy_len != 0);
(void) ddi_dma_sync(txbuf->buf.dma_hdl, 0,
txbuf->copy_len, DDI_DMA_SYNC_FORDEV);
ASSERT(ssbdp->pbuf == NULL);
ssbdp->pbuf = txbuf_item;
srp->tx_next = NEXT(slot, srp->desc.nslots);
pktp->txbuf_item = NULL;
/*
* Setting hardware send buffer descriptor
*/
hw_sbd_p->host_buf_addr = txbuf->buf.cookie.dmac_laddress;
hw_sbd_p->len = txbuf->copy_len;
if (pktp->vlan_tci != 0) {
hw_sbd_p->vlan_tci = pktp->vlan_tci;
hw_sbd_p->host_buf_addr += VLAN_TAGSZ;
hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG;
}
if (pktp->pflags & HCK_IPV4_HDRCKSUM)
hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM;
if (pktp->pflags & HCK_FULLCKSUM)
hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM;
hw_sbd_p->flags |= SBD_FLAG_PACKET_END;
}
/*
* Send a message by copying it into a preallocated (and premapped) buffer
*/
static void bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp);
#pragma inline(bge_send_copy)
static void
bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp)
{
mblk_t *bp;
uint32_t mblen;
char *pbuf;
txbuf->copy_len = 0;
pbuf = DMA_VPTR(txbuf->buf);
for (bp = mp; bp != NULL; bp = bp->b_cont) {
if ((mblen = MBLKL(bp)) == 0)
continue;
ASSERT(txbuf->copy_len + mblen <=
bgep->chipid.snd_buff_size);
bcopy(bp->b_rptr, pbuf, mblen);
pbuf += mblen;
txbuf->copy_len += mblen;
}
}
/*
* Fill the Tx buffer descriptors and trigger the h/w transmission
*/
static void
bge_send_serial(bge_t *bgep, send_ring_t *srp)
{
send_pkt_t *pktp;
uint64_t txfill_next;
uint32_t count;
uint32_t tx_next;
sw_sbd_t *ssbdp;
bge_status_t *bsp;
/*
* Try to hold the tx lock:
* If we are in an interrupt context, use mutex_enter() to
* ensure quick response for tx in interrupt context;
* Otherwise, use mutex_tryenter() to serialize this h/w tx
* BD filling and transmission triggering task.
*/
if (servicing_interrupt() != 0)
mutex_enter(srp->tx_lock);
else if (mutex_tryenter(srp->tx_lock) == 0)
return; /* already in process */
bsp = DMA_VPTR(bgep->status_block);
txfill_next = srp->txfill_next;
start_tx:
tx_next = srp->tx_next;
ssbdp = &srp->sw_sbds[tx_next];
for (count = 0; count < bgep->param_drain_max; ++count) {
pktp = &srp->pktp[txfill_next];
if (!pktp->tx_ready) {
if (count == 0)
srp->tx_block++;
break;
}
/*
* If there are no enough BDs: try to recycle more
*/
if (srp->tx_free <= 1)
bge_recycle(bgep, bsp);
/*
* Reserved required BDs: 1 is enough
*/
if (!bge_atomic_reserve(&srp->tx_free, 1)) {
srp->tx_nobd++;
break;
}
/*
* Filling the tx BD
*/
bge_send_fill_txbd(srp, pktp);
txfill_next = NEXT(txfill_next, BGE_SEND_BUF_MAX);
pktp->tx_ready = B_FALSE;
}
/*
* Trigger h/w to start transmission.
*/
if (count != 0) {
bge_atomic_sub64(&srp->tx_flow, count);
if (tx_next + count > srp->desc.nslots) {
(void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0,
(srp->desc.nslots - tx_next) * sizeof (bge_sbd_t),
DDI_DMA_SYNC_FORDEV);
count -= srp->desc.nslots - tx_next;
ssbdp = &srp->sw_sbds[0];
}
(void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0,
count*sizeof (bge_sbd_t), DDI_DMA_SYNC_FORDEV);
bge_mbx_put(bgep, srp->chip_mbx_reg, srp->tx_next);
srp->txfill_next = txfill_next;
bgep->watchdog++;
if (srp->tx_flow != 0 && srp->tx_free > 1)
goto start_tx;
}
mutex_exit(srp->tx_lock);
}
mblk_t *
bge_ring_tx(void *arg, mblk_t *mp)
{
send_ring_t *srp = arg;
bge_t *bgep = srp->bgep;
struct ether_vlan_header *ehp;
bge_queue_item_t *txbuf_item;
sw_txbuf_t *txbuf;
send_pkt_t *pktp;
uint64_t pkt_slot;
uint16_t vlan_tci;
uint32_t pflags;
char *pbuf;
ASSERT(mp->b_next == NULL);
/*
* Get a s/w tx buffer first
*/
txbuf_item = bge_get_txbuf(bgep, srp);
if (txbuf_item == NULL) {
/* no tx buffer available */
srp->tx_nobuf++;
bgep->tx_resched_needed = B_TRUE;
bge_send_serial(bgep, srp);
return (mp);
}
/*
* Copy all mp fragments to the pkt buffer
*/
txbuf = txbuf_item->item;
bge_send_copy(bgep, txbuf, mp);
/*
* Determine if the packet is VLAN tagged.
*/
ASSERT(txbuf->copy_len >= sizeof (struct ether_header));
pbuf = DMA_VPTR(txbuf->buf);
ehp = (void *)pbuf;
if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) {
/* Strip the vlan tag */
vlan_tci = ntohs(ehp->ether_tci);
pbuf = memmove(pbuf + VLAN_TAGSZ, pbuf, 2 * ETHERADDRL);
txbuf->copy_len -= VLAN_TAGSZ;
} else
vlan_tci = 0;
/*
* Retrieve checksum offloading info.
*/
mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
/*
* Calculate pseudo checksum if needed.
*/
if ((pflags & HCK_FULLCKSUM) &&
(bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM))
bge_pseudo_cksum((uint8_t *)pbuf);
/*
* Packet buffer is ready to send: get and fill pkt info
*/
pkt_slot = bge_atomic_next(&srp->txpkt_next, BGE_SEND_BUF_MAX);
pktp = &srp->pktp[pkt_slot];
ASSERT(pktp->txbuf_item == NULL);
pktp->txbuf_item = txbuf_item;
pktp->vlan_tci = vlan_tci;
pktp->pflags = pflags;
atomic_inc_64(&srp->tx_flow);
ASSERT(pktp->tx_ready == B_FALSE);
pktp->tx_ready = B_TRUE;
/*
* Filling the h/w bd and trigger the h/w to start transmission
*/
bge_send_serial(bgep, srp);
srp->pushed_bytes += MBLKL(mp);
/*
* We've copied the contents, the message can be freed right away
*/
freemsg(mp);
return (NULL);
}
static mblk_t *
bge_send(bge_t *bgep, mblk_t *mp)
{
send_ring_t *ring;
ring = &bgep->send[0]; /* ring 0 */
return (bge_ring_tx(ring, mp));
}
uint_t
bge_send_drain(caddr_t arg)
{
uint_t ring = 0; /* use ring 0 */
bge_t *bgep;
send_ring_t *srp;
bgep = (void *)arg;
BGE_TRACE(("bge_send_drain($%p)", (void *)bgep));
srp = &bgep->send[ring];
bge_send_serial(bgep, srp);
if (bgep->tx_resched_needed &&
(srp->tx_flow < srp->tx_buffers_low) &&
(bgep->bge_mac_state == BGE_MAC_STARTED)) {
mac_tx_update(bgep->mh);
bgep->tx_resched_needed = B_FALSE;
bgep->tx_resched++;
}
return (DDI_INTR_CLAIMED);
}
/*
* bge_m_tx() - send a chain of packets
*/
mblk_t *
bge_m_tx(void *arg, mblk_t *mp)
{
bge_t *bgep = arg; /* private device info */
mblk_t *next;
BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp));
ASSERT(mp != NULL);
ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED);
rw_enter(bgep->errlock, RW_READER);
if (bgep->bge_chip_state != BGE_CHIP_RUNNING) {
BGE_DEBUG(("bge_m_tx: chip not running"));
freemsgchain(mp);
mp = NULL;
}
while (mp != NULL) {
next = mp->b_next;
mp->b_next = NULL;
if ((mp = bge_send(bgep, mp)) != NULL) {
mp->b_next = next;
break;
}
mp = next;
}
rw_exit(bgep->errlock);
return (mp);
}