/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/machsystm.h>
#include <sys/ethernet.h>
#include <sys/mach_descrip.h>
#include <sys/vio_mailbox.h>
#include <sys/vio_common.h>
#include <sys/vnet_common.h>
#include <sys/vnet_mailbox.h>
#include <sys/vio_util.h>
#include <sys/vnet_gen.h>
/*
* This file contains the implementation of TxDring data transfer mode of VIO
* Protocol in vnet. The functions in this file are invoked from vnet_gen.c
* after TxDring mode is negotiated with the peer during attribute phase of
* handshake. This file contains functions that setup the transmit and receive
* descriptor rings, and associated resources in TxDring mode. It also contains
* the transmit and receive data processing functions that are invoked in
* TxDring mode.
*/
/* Functions exported to vnet_gen.c */
void vgen_ldc_msg_worker(void *arg);
/* Internal functions */
/* Functions imported from vnet_gen.c */
extern void vgen_destroy_rxpools(void *arg);
/* Tunables */
extern int vgen_rxpool_cleanup_delay;
extern boolean_t vnet_jumbo_rxpools;
extern uint32_t vnet_num_descriptors;
extern uint32_t vgen_chain_len;
extern uint32_t vgen_ldcwr_retries;
extern uint32_t vgen_recv_delay;
extern uint32_t vgen_recv_retries;
extern uint32_t vgen_rbufsz1;
extern uint32_t vgen_rbufsz2;
extern uint32_t vgen_rbufsz3;
extern uint32_t vgen_rbufsz4;
extern uint32_t vgen_nrbufs1;
extern uint32_t vgen_nrbufs2;
extern uint32_t vgen_nrbufs3;
extern uint32_t vgen_nrbufs4;
#ifdef DEBUG
extern int vnet_dbglevel;
extern int vgen_inject_err_flag;
#endif
/*
* Allocate transmit resources for the channel. The resources consist of a
* transmit descriptor ring and an associated transmit buffer area.
*/
int
{
int i;
int rv;
int ci;
txdsize = sizeof (vnet_public_desc_t);
tbufsize = sizeof (vgen_private_desc_t);
/* allocate transmit buffer ring */
return (DDI_FAILURE);
}
/* create transmit descriptor ring */
&ldcp->tx_dring_handle);
if (rv != 0) {
goto fail;
}
/* get the addr of descriptor ring */
if (rv != 0) {
goto fail;
}
/*
* In order to ensure that the number of ldc cookies per descriptor is
* limited to be within the default MAX_COOKIES (2), we take the steps
* outlined below:
*
* Align the entire data buffer area to 8K and carve out per descriptor
* data buffers starting from this 8K aligned base address.
*
* We round up the mtu specified to be a multiple of 2K or 4K.
* For sizes up to 12K we round up the size to the next 2K.
* For sizes > 12K we round up to the next 4K (otherwise sizes such as
* 14K could end up needing 3 cookies, with the buffer spread across
* 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...).
*/
} else {
}
/* allocate extra 8K bytes for alignment */
/* align the starting address of the data area to 8K */
/*
* for each private descriptor, allocate a ldc mem_handle which is
* required to map the data during transmit, set the flags
* to free (available for use by transmit routine).
*/
if (rv) {
goto fail;
}
/*
* bind ldc memhandle to the corresponding transmit buffer.
*/
if (rv != 0) {
goto fail;
}
/*
* successful in binding the handle to tx data buffer.
* set datap in the private descr to this buffer.
*/
if ((ncookies == 0) ||
(ncookies > MAX_COOKIES)) {
goto fail;
}
if (rv != 0) {
goto fail;
}
}
}
/*
* The descriptors and the associated buffers are all ready;
* now bind descriptor ring to the channel.
*/
if (rv != 0) {
"rv(%x)\n", rv);
goto fail;
}
/* reset tbuf walking pointers */
/* initialize tx seqnum and index */
ldcp->resched_peer_txi = 0;
return (VGEN_SUCCESS);
fail:
return (VGEN_FAILURE);
}
/*
* Free transmit resources for the channel.
*/
void
{
int i;
/* We first unbind the descriptor ring */
if (ldcp->tx_dring_ncookies != 0) {
ldcp->tx_dring_ncookies = 0;
}
/* Unbind transmit buffers */
/* for each tbuf (priv_desc), free ldc mem_handle */
}
}
}
}
/* Free tx data buffer area */
ldcp->tx_data_sz = 0;
}
/* Free transmit descriptor ring */
if (ldcp->tx_dring_handle != 0) {
ldcp->tx_dring_handle = 0;
}
/* Free transmit buffer ring */
}
}
/*
* Map the transmit descriptor ring exported
* by the peer, as our receive descriptor ring.
*/
int
{
int rv;
/*
* Sanity check.
*/
if (num_desc < VGEN_NUM_DESCRIPTORS_MIN ||
desc_size < sizeof (vnet_public_desc_t)) {
goto fail;
}
/* Map the remote dring */
if (rv != 0) {
goto fail;
}
/*
* Sucessfully mapped, now try to get info about the mapped dring
*/
if (rv != 0) {
goto fail;
}
/*
* Save ring address, number of descriptors.
*/
/* Initialize rx dring indexes and seqnum */
/* Save peer's dring_info values */
sizeof (ldc_mem_cookie_t));
/* Set dring_ident for the peer */
/* Return the dring_ident in ack msg */
/* alloc rx mblk pools */
if (rv != 0) {
/*
* We do not return failure if receive mblk pools can't
* be allocated; instead allocb(9F) will be used to
* dynamically allocate buffers during receive.
*/
"vnet%d: failed to allocate rx mblk "
"pools for channel(0x%lx)\n",
}
return (VGEN_SUCCESS);
fail:
if (ldcp->rx_dring_handle != 0) {
ldcp->rx_dring_handle = 0;
}
return (VGEN_FAILURE);
}
/*
* Unmap the receive descriptor ring.
*/
void
{
/* Destroy receive mblk pools */
/*
* If we can't destroy the rx pool for this channel,
* dispatch a task to retry and clean up. Note that we
* don't need to wait for the task to complete. If the
* vnet device itself gets detached, it will wait for
* the task to complete implicitly in
* ddi_taskq_destroy().
*/
}
/* Unmap peer's dring */
if (ldcp->rx_dring_handle != 0) {
ldcp->rx_dring_handle = 0;
}
/* clobber rx ring members */
}
/* Allocate receive resources */
static int
{
int status;
/*
* We round up the mtu specified to be a multiple of 2K.
* We then create rx pools based on the rounded up size.
*/
/*
* If pool sizes are specified, use them. Note that the presence of
* the first tunable will be used as a hint.
*/
if (vgen_rbufsz1 != 0) {
sz1 = vgen_rbufsz1;
sz2 = vgen_rbufsz2;
sz3 = vgen_rbufsz3;
sz4 = vgen_rbufsz4;
if (sz4 == 0) { /* need 3 pools */
} else {
}
return (status);
}
/*
* Pool sizes are not specified. We select the pool sizes based on the
* mtu if vnet_jumbo_rxpools is enabled.
*/
/*
* Receive buffer pool allocation based on mtu is disabled.
* Use the default mechanism of standard size pool allocation.
*/
return (status);
}
switch (data_sz) {
case VNET_4K:
break;
default: /* data_sz: 4K+ to 16K */
break;
}
return (status);
}
/*
* This function transmits normal data frames (non-priority) over the channel.
* It queues the frame into the transmit descriptor ring and sends a
* VIO_DRING_DATA message if needed, to wake up the peer to (re)start
* processing.
*/
int
{
int rv = 0;
ldcp->ldc_status);
goto dringsend_exit;
}
/* drop the packet if ldc is not up or handshake is not done */
goto dringsend_exit;
}
goto dringsend_exit;
}
/*
* allocate a descriptor
*/
/* Try reclaiming now */
statsp->tx_no_desc++;
return (VGEN_TX_NORESOURCES);
}
}
/* update next available tbuf in the ring and update tx index */
/* Mark the buffer busy before releasing the lock */
/* copy data into pre-allocated transmit buffer */
}
/* initialize the corresponding public descriptor (txd) */
/*
* If the flags not set to BUSY, it implies that the clobber
* was done while we were copying the data. In such case,
* discard the packet and return.
*/
goto dringsend_exit;
}
/* update stats */
if (is_bcast)
else if (is_mcast)
/* send dring datamsg to the peer */
if (ldcp->resched_peer) {
if (rv != 0) {
/* error: drop the packet */
"failed sending dringdata msg "
} else {
}
}
}
if (rv == ECONNRESET) {
}
return (VGEN_TX_SUCCESS);
}
mblk_t *
{
/* get the size of this packet */
/* if adding this pkt, exceeds the size limit, we are done. */
break;
}
/* we have room for this packet */
/* increment the # of packets being sent up */
count++;
/* track the last processed pkt */
/* get the next pkt */
}
if (count != 0) {
/*
* picked up some packets; save the head of pkts to be sent up.
*/
/* move the pollq_headp to skip over the pkts being sent up */
/* picked up all pending pkts in the queue; reset tail also */
}
/* terminate the tail of pkts to be sent up */
}
/*
* We prepend any high priority packets to the chain of packets; note
* that if we are already at the bytes_to_pickup limit, we might
* slightly exceed that in such cases. That should be ok, as these pkts
* are expected to be small in size and arrive at an interval in the
* the order of a few seconds.
*/
}
return (mp);
}
/*
*/
int
{
int rv = 0;
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/*
* To reduce the locking contention, release the
* cblock here and re-acquire it once we are done
* receiving packets.
*/
break;
case VIO_SUBTYPE_ACK:
break;
case VIO_SUBTYPE_NACK:
break;
}
return (rv);
}
static int
{
int rv = 0;
#ifdef VGEN_HANDLE_LOST_PKTS
int n;
#endif
/*
* received a data msg, which contains the start and end
* indices of the descriptors within the rx ring holding data,
* the seq_num of data packet corresponding to the start index,
* and the dring_ident.
* We can now read the contents of each of these descriptors
* and gather data from it.
*/
/* validate rx start and end indexes */
/* drop the message if invalid index */
return (rv);
}
/* validate dring_ident */
/* invalid dring_ident, drop the msg */
return (rv);
}
#ifdef DEBUG
/* drop this msg to simulate lost pkts for debugging */
return (rv);
}
#endif
#ifdef VGEN_HANDLE_LOST_PKTS
/* receive start index doesn't match expected index */
/* calculate the number of pkts lost */
} else {
}
statsp->rx_lost_pkts += n;
/* indicate the range of lost descriptors */
/* dring ident is left unchanged */
if (rv != VGEN_SUCCESS) {
"vgen_sendmsg failed, stype:NACK\n");
return (rv);
}
/*
* and set the new expected value of next_rxi
* and continue(below) to process from the new
* start index.
*/
}
#endif /* VGEN_HANDLE_LOST_PKTS */
/* Now receive messages */
return (rv);
}
static int
{
int count = 0;
int rv = 0;
/*
* start processing the descriptors from the specified
* start index, up to the index a descriptor is not ready
* to be processed or we process the entire descriptor ring
* and wrap around upto the start index.
*/
/* need to set the start index of descriptors to be ack'd */
/* index upto which we have ack'd */
do {
if (rv != 0) {
" rv(%d)\n", rv);
return (rv);
}
/*
* Before waiting and retry here, send up
* the packets that are received already
*/
count = 0;
}
/*
* descriptor is not ready.
* retry descriptor acquire, stop processing
* after max # retries.
*/
if (retries == vgen_recv_retries)
break;
retries++;
goto vgen_recv_retry;
}
retries = 0;
if (set_ack_start) {
/*
* initialize the start index of the range
* of descriptors to be ack'd.
*/
}
} else {
/*
* Try to allocate an mblk from the free pool
* of recv mblks for the channel.
* If this fails, use allocb().
*/
BPRI_MED);
} else {
/*
* Data buffer returned by allocb(9F)
* is 8byte aligned. We allocate extra
* 8 bytes to ensure size is multiple
* of 8 bytes for ldc_mem_copy().
*/
} else {
}
}
}
/*
* rxd_err or allocb() failure,
* drop this packet, get next.
*/
if (rxd_err) {
} else {
statsp->rx_allocb_fail++;
}
/* set descriptor done bit */
if (rv != 0) {
"vnet_dring_entry_set_dstate err rv(%d)\n",
rv);
return (rv);
}
if (ack_needed) {
/*
* sender needs ack for this packet,
* ack pkts upto this index.
*/
if (rv != VGEN_SUCCESS) {
goto error_ret;
}
/* need to set new ack start index */
}
goto vgen_next_rxi;
}
/* if ldc_mem_copy() failed */
if (rv) {
goto error_ret;
}
if (rv != 0) {
"vnet_dring_entry_set_dstate err rv(%d)\n", rv);
goto error_ret;
}
if (ack_needed) {
/*
* sender needs ack for this packet,
* ack pkts upto this index.
*/
if (rv != VGEN_SUCCESS) {
goto error_ret;
}
/* need to set new ack start index */
}
"ldc_mem_copy nread(%lx), nbytes(%lx)\n",
goto vgen_next_rxi;
}
/* point to the actual end of data */
}
/* update stats */
if (IS_BROADCAST(ehp))
else if (IS_MULTICAST(ehp))
/* build a chain of received packets */
/* first pkt */
} else {
}
if (count++ > vgen_chain_len) {
count = 0;
}
/* update end index of range of descrs to be ack'd */
/* update the next index to be processed */
/*
* processed the entire descriptor ring upto
* the index at which we started.
*/
break;
}
} while (1);
/*
* send an ack message to peer indicating that we have stopped
* processing descriptors.
*/
if (set_ack_start) {
/*
* We have ack'd upto some index and we have not
* processed any descriptors beyond that index.
* Use the last ack'd index as both the start and
* end of range of descrs being ack'd.
* Note: This results in acking the last index twice
* and should be harmless.
*/
}
if (rv != VGEN_SUCCESS) {
goto error_ret;
}
/* save new recv index of next dring msg */
/* send up packets received so far */
}
return (rv);
}
static int
{
int rv = 0;
/*
* received an ack corresponding to a specific descriptor for
* which we had set the ACK bit in the descriptor (during
* transmit). This enables us to reclaim descriptors.
*/
/* validate start and end indexes in the tx ack msg */
/* drop the message if invalid index */
return (rv);
}
/* validate dring_ident */
/* invalid dring_ident, drop the msg */
return (rv);
}
/* reclaim descriptors that are done */
/*
* receiver continued processing descriptors after
* sending us the ack.
*/
return (rv);
}
/* receiver stopped processing descriptors */
/*
* determine if there are any pending tx descriptors
* ready to be processed by the receiver(peer) and if so,
* send a message to the peer to restart receiving.
*/
/*
* using the end index of the descriptor range for which
* we received the ack, check if the next descriptor is
* ready.
*/
} else {
/*
* descr next to the end of ack'd descr range is not
* ready.
* starting from the current reclaim index, check
* if any descriptor is ready.
*/
}
}
if (ready_txd) {
/*
* we have tx descriptor(s) ready to be
* processed by the receiver.
* send a message to the peer with the start index
* of ready descriptors.
*/
if (rv != VGEN_SUCCESS) {
return (rv);
}
} else {
/*
* no ready tx descriptors. set the flag to send a
* message to peer when tx descriptors are ready in
* transmit routine.
*/
}
return (rv);
}
static int
{
int rv = 0;
/*
* peer sent a NACK msg to indicate lost packets.
* The start and end correspond to the range of descriptors
* for which the peer didn't receive a dring data msg and so
* didn't receive the corresponding data.
*/
/* validate start and end indexes in the tx nack msg */
/* drop the message if invalid index */
return (rv);
}
/* validate dring_ident */
/* invalid dring_ident, drop the msg */
return (rv);
}
/* no busy descriptors, bogus nack ? */
return (rv);
}
/* we just mark the descrs as done so they can be reclaimed */
}
return (rv);
}
/*
* Send received packets up the stack.
*/
static void
{
} else {
}
/*
* If we are in polling mode, simply queue
* the packets onto the poll queue and return.
*/
} else {
}
return;
}
/*
* Prepend any pending mblks in the poll queue, now that we
* are in interrupt mode, before sending up the chain of pkts.
*/
}
} else {
}
/* Send up the packets */
} else {
}
}
static void
{
}
/*
* transmit reclaim function. starting from the current reclaim index
* look for descriptors marked DONE and reclaim the descriptor.
*/
static void
{
int count = 0;
count++;
}
/*
* Check if mac layer should be notified to restart transmissions
*/
}
}
/*
* Send descriptor ring data message to the peer over ldc.
*/
static int
{
int rv;
#ifdef DEBUG
return (VGEN_SUCCESS);
}
#endif
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
/*
* Send dring data ack message.
*/
static int
{
int rv = 0;
if (rv != VGEN_SUCCESS) {
}
if (pstate == VIO_DP_STOPPED) {
}
return (rv);
}
/*
* Wrapper routine to send the given message over ldc using ldc_write().
*/
int
{
int rv;
return (VGEN_FAILURE);
if (!caller_holds_lock) {
}
}
}
do {
if (retries++ >= vgen_ldcwr_retries)
break;
} while (rv == EWOULDBLOCK);
ldcp->next_txseq++;
}
if (!caller_holds_lock) {
}
if (rv != 0) {
return (rv);
}
return (VGEN_FAILURE);
}
return (VGEN_SUCCESS);
}
int
{
} else {
return (EINVAL);
}
/* seqnums don't match */
"next_rxseq(0x%lx) != seq_num(0x%lx)\n",
return (EINVAL);
}
ldcp->next_rxseq++;
return (0);
}
/*
* vgen_ldc_msg_worker -- A per LDC worker thread. This thread is woken up by
* the LDC interrupt handler to process LDC packets and receive data.
*/
void
{
int rv;
"vnet_rcv_thread");
/*
* Wait until the data is received or a stop
* request is received.
*/
while (!(ldcp->msg_thr_flags &
(VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
}
/*
* First process the stop request.
*/
break;
}
if (rv != 0) {
/*
* Channel has been reset. The thread should now exit.
* The thread may be recreated if TxDring is negotiated
* on this channel after the channel comes back up
* again.
*/
break;
}
}
/*
* Update the run status and wakeup the thread that
* has sent the stop request.
*/
thread_exit();
}
/* vgen_stop_msg_thread -- Co-ordinate with receive thread to stop it */
void
{
/*
* Send a stop request by setting the stop flag and
* wait until the receive thread stops.
*/
}
if (tid != 0) {
}
}