bge_send.c revision 00d0963faf2e861a4aef6b1bf28f99a5b2b20755
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "sys/bge_impl2.h"
/*
* The transmit-side code uses an allocation process which is similar
* to some theme park roller-coaster rides, where riders sit in cars
* that can go individually, but work better in a train.
*
* 1) RESERVE a place - this doesn't refer to any specific car or
* seat, just that you will get a ride. The attempt to RESERVE a
* place can fail if all spaces in all cars are already committed.
*
* 2) Prepare yourself; this may take an arbitrary (but not unbounded)
* time, and you can back out at this stage, in which case you must
* give up (RENOUNCE) your place.
*
* 3) CLAIM your space - a specific car (the next sequentially
* numbered one) is allocated at this stage, and is guaranteed
* to be part of the next train to depart. Once you've done
* this, you can't back out, nor wait for any external event
* or resource.
*
* 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they
* all depart together as a single train!
*
* 5) At the end of the ride, you climb out of the car and RENOUNCE
* your right to it, so that it can be recycled for another rider.
*
* For each rider, these have to occur in this order, but the riders
* don't have to stay in the same order at each stage. In particular,
* they may overtake each other between RESERVING a place and CLAIMING
* it, or between CLAIMING and OCCUPYING a space.
*
* Once a car is CLAIMED, the train currently being assembled can't go
* without that car (this guarantees that the cars in a single train
* make up a consecutively-numbered set). Therefore, when any train
* leaves, we know there can't be any riders in transit between CLAIMING
* and OCCUPYING their cars. There can be some who have RESERVED but
* not yet CLAIMED their places. That's OK, though, because they'll go
* into the next train.
*/
#define BGE_DBG BGE_DBG_SEND /* debug flag for this code */
/*
* ========== Send-side recycle routines ==========
*/
/*
* Recycle all the completed buffers in the specified send ring up to
* (but not including) the consumer index in the status block.
*
* This function must advance (srp->tc_next) AND adjust (srp->tx_free)
* to account for the packets it has recycled.
*
* This is a trivial version that just does that and nothing more, but
* it suffices while there's only one method for sending messages (by
* copying) and that method doesn't need any special per-buffer action
* for recycling.
*/
static void bge_recycle_ring(bge_t *bgep, send_ring_t *srp);
#pragma inline(bge_recycle_ring)
static void
bge_recycle_ring(bge_t *bgep, send_ring_t *srp)
{
uint64_t slot;
uint64_t n;
_NOTE(ARGUNUSED(bgep))
ASSERT(mutex_owned(srp->tc_lock));
slot = *srp->cons_index_p; /* volatile */
n = slot - srp->tc_next;
if (slot < srp->tc_next)
n += srp->desc.nslots;
/*
* We're about to release one or more places :-)
* These ASSERTions check that our invariants still hold:
* there must always be at least one free place
* at this point, there must be at least one place NOT free
* we're not about to free more places than were claimed!
*/
ASSERT(srp->tx_free > 0);
srp->tc_next = slot;
bge_atomic_renounce(&srp->tx_free, n);
/*
* Reset the watchdog count: to 0 if all buffers are
* now free, or to 1 if some are still outstanding.
* Note: non-synchonised access here means we may get
* the "wrong" answer, but only in a harmless fashion
* (i.e. we deactivate the watchdog because all buffers
* are apparently free, even though another thread may
* have claimed one before we leave here; in this case
* the watchdog will restart on the next send() call).
*/
bgep->watchdog = srp->tx_free == srp->desc.nslots ? 0 : 1;
}
/*
* Recycle all returned slots in all rings.
*
* To give priority to low-numbered rings, whenever we have recycled any
* slots in any ring except 0, we restart scanning again from ring 0.
* Thus, for example, if rings 0, 3, and 10 are carrying traffic, the
* pattern of recycles might go 0, 3, 10, 3, 0, 10, 0:
*
* 0 found some - recycle them
* 1..2 none found
* 3 found some - recycle them and restart scan
* 0..9 none found
* 10 found some - recycle them and restart scan
* 0..2 none found
* 3 found some more - recycle them and restart scan
* 0 found some more - recycle them
* 0..9 none found
* 10 found some more - recycle them and restart scan
* 0 found some more - recycle them
* 1..15 none found
*
* The routine returns only when a complete scan has been performed
* without finding any slots to recycle.
*
* Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time
* constant and allows the compiler to optimise away the outer do-loop
* if only one send ring is being used.
*/
void bge_recycle(bge_t *bgep, bge_status_t *bsp);
#pragma no_inline(bge_recycle)
void
bge_recycle(bge_t *bgep, bge_status_t *bsp)
{
send_ring_t *srp;
uint64_t ring;
uint64_t tx_rings = bgep->chipid.tx_rings;
restart:
ring = 0;
srp = &bgep->send[ring];
do {
/*
* For each ring, (srp->cons_index_p) points to the
* proper index within the status block (which has
* already been sync'd by the caller).
*/
ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring));
if (*srp->cons_index_p == srp->tc_next)
continue; /* no slots to recycle */
mutex_enter(srp->tc_lock);
bge_recycle_ring(bgep, srp);
mutex_exit(srp->tc_lock);
if (bgep->resched_needed && !bgep->resched_running) {
bgep->resched_running = B_TRUE;
ddi_trigger_softintr(bgep->resched_id);
}
/*
* Restart from ring 0, if we're not on ring 0 already.
* As H/W selects send BDs totally based on priority and
* available BDs on the higher priority ring are always
* selected first, driver should keep consistence with H/W
* and gives lower-numbered ring with higher priority.
*/
if (tx_rings > 1 && ring > 0)
goto restart;
/*
* Loop over all rings (if there *are* multiple rings)
*/
} while (++srp, ++ring < tx_rings);
}
/*
* ========== Send-side transmit routines ==========
*/
/*
* CLAIM an already-reserved place on the next train
*
* This is the point of no return!
*/
static uint64_t bge_send_claim(bge_t *bgep, send_ring_t *srp);
#pragma inline(bge_send_claim)
static uint64_t
bge_send_claim(bge_t *bgep, send_ring_t *srp)
{
uint64_t slot;
mutex_enter(srp->tx_lock);
atomic_add_64(&srp->tx_flow, 1);
slot = bge_atomic_claim(&srp->tx_next, srp->desc.nslots);
mutex_exit(srp->tx_lock);
/*
* Bump the watchdog counter, thus guaranteeing that it's
* nonzero (watchdog activated). Note that non-synchonised
* access here means we may race with the reclaim() code
* above, but the outcome will be harmless. At worst, the
* counter may not get reset on a partial reclaim; but the
* large trigger threshold makes false positives unlikely
*/
bgep->watchdog += 1;
return (slot);
}
/*
* Send a message by copying it into a preallocated (and premapped) buffer
*/
static enum send_status bge_send_copy(bge_t *bgep, mblk_t *mp,
send_ring_t *srp, uint16_t tci);
#pragma inline(bge_send_copy)
static enum send_status
bge_send_copy(bge_t *bgep, mblk_t *mp, send_ring_t *srp, uint16_t tci)
{
bge_sbd_t *hw_sbd_p;
sw_sbd_t *ssbdp;
mblk_t *bp;
char *txb;
uint64_t slot;
size_t totlen;
size_t mblen;
uint32_t pflags;
BGE_TRACE(("bge_send_copy($%p, $%p, $%p, 0x%x)",
(void *)bgep, (void *)mp, (void *)srp));
/*
* IMPORTANT:
* Up to the point where it claims a place, a send_msg()
* routine can indicate failure by returning SEND_FAIL.
* Once it's claimed a place, it mustn't fail.
*
* In this version, there's no setup to be done here, and there's
* nothing that can fail, so we can go straight to claiming our
* already-reserved place on the train.
*
* This is the point of no return!
*/
slot = bge_send_claim(bgep, srp);
ssbdp = &srp->sw_sbds[slot];
/*
* Copy the data into a pre-mapped buffer, which avoids the
* overhead (and complication) of mapping/unmapping STREAMS
* buffers and keeping hold of them until the DMA has completed.
*
* Because all buffers are the same size, and larger than the
* longest single valid message, we don't have to bother about
* splitting the message across multiple buffers either.
*/
txb = DMA_VPTR(ssbdp->pbuf);
for (totlen = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
mblen = bp->b_wptr - bp->b_rptr;
if ((totlen += mblen) <= bgep->chipid.ethmax_size) {
bcopy(bp->b_rptr, txb, mblen);
txb += mblen;
}
}
/*
* We'e reached the end of the chain; and we should have
* collected no more than ETHERMAX bytes into our buffer.
*/
ASSERT(bp == NULL);
ASSERT(totlen <= bgep->chipid.ethmax_size);
DMA_SYNC(ssbdp->pbuf, DDI_DMA_SYNC_FORDEV);
/*
* Update the hardware send buffer descriptor; then we're done.
* The return status indicates that the message can be freed
* right away, as we've already copied the contents ...
*/
hw_sbd_p = DMA_VPTR(ssbdp->desc);
hw_sbd_p->host_buf_addr = ssbdp->pbuf.cookie.dmac_laddress;
hw_sbd_p->len = totlen;
hw_sbd_p->flags = SBD_FLAG_PACKET_END;
if (tci != 0) {
hw_sbd_p->vlan_tci = tci;
hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG;
}
hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags);
if (pflags & HCK_IPV4_HDRCKSUM)
hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM;
if (pflags & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))
hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM;
return (SEND_FREE);
}
static boolean_t
bge_send(bge_t *bgep, mblk_t *mp)
{
send_ring_t *srp;
enum send_status status;
struct ether_vlan_header *ehp;
boolean_t need_strip = B_FALSE;
bge_status_t *bsp;
uint16_t tci;
uint_t ring = 0;
ASSERT(mp->b_next == NULL);
/*
* Determine if the packet is VLAN tagged.
*/
ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
ehp = (struct ether_vlan_header *)mp->b_rptr;
if (ehp->ether_tpid == htons(VLAN_TPID)) {
if (MBLKL(mp) < sizeof (struct ether_vlan_header)) {
uint32_t pflags;
/*
* Need to preserve checksum flags across pullup.
*/
hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL,
NULL, &pflags);
if (!pullupmsg(mp,
sizeof (struct ether_vlan_header))) {
BGE_DEBUG(("bge_send: pullup failure"));
bgep->resched_needed = B_TRUE;
return (B_FALSE);
}
(void) hcksum_assoc(mp, NULL, NULL, NULL, NULL, NULL,
NULL, pflags, KM_NOSLEEP);
}
ehp = (struct ether_vlan_header *)mp->b_rptr;
need_strip = B_TRUE;
}
/*
* Try to reserve a place in the chosen ring. Shouldn't try next
* higher-numbered (lower-priority) ring, if there aren't any
* available. Otherwise, packets with same priority may get
* transmission starvation.
*/
srp = &bgep->send[ring];
if (!bge_atomic_reserve(&srp->tx_free, 1)) {
BGE_DEBUG(("bge_send: no free slots"));
bgep->resched_needed = B_TRUE;
return (B_FALSE);
}
/*
* Now that we know that there is space to transmit the packet
* strip any VLAN tag that is present.
*/
if (need_strip) {
tci = ntohs(ehp->ether_tci);
(void) memmove(mp->b_rptr + VLAN_TAGSZ, mp->b_rptr,
2 * ETHERADDRL);
mp->b_rptr += VLAN_TAGSZ;
} else {
tci = 0;
}
if (srp->tx_free <= 16) {
bsp = DMA_VPTR(bgep->status_block);
bge_recycle(bgep, bsp);
}
/*
* We've reserved a place :-)
* These ASSERTions check that our invariants still hold:
* there must still be at least one free place
* there must be at least one place NOT free (ours!)
*/
ASSERT(srp->tx_free > 0);
if ((status = bge_send_copy(bgep, mp, srp, tci)) == SEND_FAIL) {
/*
* The send routine failed :( So we have to renounce
* our reservation before returning the error.
*/
bge_atomic_renounce(&srp->tx_free, 1);
bgep->resched_needed = B_TRUE;
return (B_FALSE);
}
/*
* The send routine succeeded; it will have updated the
* h/w ring descriptor, and the <tx_next> and <tx_flow>
* counters.
*
* Because there can be multiple concurrent threads in
* transit through this code, we only want to prod the
* hardware once the last one is departing ...
*/
mutex_enter(srp->tx_lock);
if (--srp->tx_flow == 0) {
DMA_SYNC(srp->desc, DDI_DMA_SYNC_FORDEV);
bge_mbx_put(bgep, srp->chip_mbx_reg, srp->tx_next);
if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK)
bgep->bge_chip_state = BGE_CHIP_ERROR;
}
mutex_exit(srp->tx_lock);
if (status == SEND_FREE)
freemsg(mp);
return (B_TRUE);
}
uint_t
bge_reschedule(caddr_t arg)
{
bge_t *bgep;
bgep = (bge_t *)arg;
BGE_TRACE(("bge_reschedule($%p)", (void *)bgep));
if (bgep->bge_mac_state == BGE_MAC_STARTED && bgep->resched_needed) {
mac_tx_update(bgep->macp);
bgep->resched_needed = B_FALSE;
bgep->resched_running = B_FALSE;
}
return (DDI_INTR_CLAIMED);
}
/*
* bge_m_tx() - send a chain of packets
*/
mblk_t *
bge_m_tx(void *arg, mblk_t *mp)
{
bge_t *bgep = arg; /* private device info */
mblk_t *next;
BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp));
ASSERT(mp != NULL);
ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED);
if (bgep->bge_chip_state != BGE_CHIP_RUNNING) {
BGE_DEBUG(("bge_m_tx: chip not running"));
return (mp);
}
rw_enter(bgep->errlock, RW_READER);
while (mp != NULL) {
next = mp->b_next;
mp->b_next = NULL;
if (!bge_send(bgep, mp)) {
mp->b_next = next;
break;
}
mp = next;
}
rw_exit(bgep->errlock);
return (mp);
}