xnf.c revision da14cebe459d3275048785f25bd869cb09b5307f
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
*
* Copyright (c) 2004 Christian Limpach.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 3. This section intentionally left blank.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Section 3 of the above license was updated in response to bug 6379571.
*/
/*
* xnf.c - Nemo-based network driver for domU
*/
#include <sys/sysmacros.h>
#include <sys/ethernet.h>
#include <sys/mac_provider.h>
#include <sys/mac_ether.h>
#include <sys/bootinfo.h>
#include <sys/mach_mmu.h>
#ifdef XPV_HVM_DRIVER
#include <sys/xpv_support.h>
#include <sys/hypervisor.h>
#else
#include <sys/hypervisor.h>
#include <sys/evtchn_impl.h>
#include <sys/balloon_impl.h>
#endif
/*
* Declarations and Module Linkage
*/
#define XNF_DEBUG
int xnfdebug = 0;
#endif
/*
* On a 32 bit PAE system physical and machine addresses are larger
* than 32 bits. ddi_btop() on such systems take an unsigned long
* argument, and so addresses above 4G are truncated before ddi_btop()
* gets to see them. To avoid this, code the shift operation here.
*/
/* Default value for hypervisor-based copy operations */
/*
* Should pages used for transmit be readonly for the peer?
*/
/*
* Packets under this size are bcopied instead of using desballoc.
* Choose a value > XNF_FRAMESIZE (1514) to force the receive path to
* always copy.
*/
unsigned int xnf_rx_bcopy_thresh = 64;
unsigned int xnf_max_tx_frags = 1;
/* Required system entry points */
/* Required driver entry points for Nemo */
static int xnf_start(void *);
static void xnf_stop(void *);
static int xnf_set_mac_addr(void *, const uint8_t *);
static int xnf_set_promiscuous(void *, boolean_t);
/* Driver private functions */
static int xnf_alloc_dma_resources(xnf_t *);
static void xnf_release_dma_resources(xnf_t *);
static void xnf_rcv_complete(struct xnf_buffer_desc *);
static void xnf_release_mblks(xnf_t *);
static void xnf_free_buffer(struct xnf_buffer_desc *);
static void xnf_free_tx_buffer(struct xnf_buffer_desc *);
void xnf_send_driver_status(int, int);
static int xnf_clean_tx_ring(xnf_t *);
void *, void *);
/*
* XXPV dme: remove MC_IOCTL?
*/
static mac_callbacks_t xnf_callbacks = {
};
#define GRANT_INVALID_REF 0
/* DMA attributes for network ring buffer */
static ddi_dma_attr_t ringbuf_dma_attr = {
DMA_ATTR_V0, /* version of this structure */
0, /* lowest usable address */
0xffffffffffffffffULL, /* highest usable address */
0x7fffffff, /* maximum DMAable byte count */
MMU_PAGESIZE, /* alignment in bytes */
0x7ff, /* bitmap of burst sizes */
1, /* minimum transfer */
0xffffffffU, /* maximum transfer */
0xffffffffffffffffULL, /* maximum segment length */
1, /* maximum number of segments */
1, /* granularity */
0, /* flags (reserved) */
};
/* DMA attributes for transmit data */
static ddi_dma_attr_t tx_buffer_dma_attr = {
DMA_ATTR_V0, /* version of this structure */
0, /* lowest usable address */
0xffffffffffffffffULL, /* highest usable address */
0x7fffffff, /* maximum DMAable byte count */
MMU_PAGESIZE, /* alignment in bytes */
0x7ff, /* bitmap of burst sizes */
1, /* minimum transfer */
0xffffffffU, /* maximum transfer */
0xffffffffffffffffULL, /* maximum segment length */
1, /* maximum number of segments */
1, /* granularity */
0, /* flags (reserved) */
};
/* DMA attributes for a receive buffer */
static ddi_dma_attr_t rx_buffer_dma_attr = {
DMA_ATTR_V0, /* version of this structure */
0, /* lowest usable address */
0xffffffffffffffffULL, /* highest usable address */
0x7fffffff, /* maximum DMAable byte count */
MMU_PAGESIZE, /* alignment in bytes */
0x7ff, /* bitmap of burst sizes */
1, /* minimum transfer */
0xffffffffU, /* maximum transfer */
0xffffffffffffffffULL, /* maximum segment length */
1, /* maximum number of segments */
1, /* granularity */
0, /* flags (reserved) */
};
/* DMA access attributes for registers and descriptors */
static ddi_device_acc_attr_t accattr = {
DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */
};
/* DMA access attributes for data: NOT to be byte swapped. */
static ddi_device_acc_attr_t data_accattr = {
};
int xnf_diagnose = 0; /* Patchable global for diagnostic purposes */
static struct modldrv xnf_modldrv = {
"Virtual Ethernet driver",
};
static struct modlinkage modlinkage = {
};
int
_init(void)
{
int r;
r = mod_install(&modlinkage);
if (r != DDI_SUCCESS)
return (r);
}
int
_fini(void)
{
return (EBUSY); /* XXPV dme: should be removable */
}
int
{
}
static int
{
RING_IDX i;
struct xenbus_device *xsd;
if (err <= 0) {
goto out;
}
if (err <= 0) {
goto out;
}
/*
* Cleanup the TX ring. We just clean up any valid tx_pktinfo structs
* and reset the ring. Note that this can lose packets after a resume,
* but we expect to stagger on.
*/
continue;
}
panic("tx grant still in use by backend domain");
}
}
xnfp->xnf_tx_pkt_id_list = 0;
/* LINTED: constant in conditional context */
/*
* Rebuild the RX ring. We have to rebuild the RX ring because some of
* the RX buffers. Reclaim any unprocessed recv buffers, they won't be
* useable anyway since the mfn's they refer to are no longer valid.
* Grant the backend domain access to each hung rx buffer.
*/
volatile netif_rx_request_t *rxrp;
if (xnfp->xnf_rx_hvcopy) {
mfn, 0);
} else {
oeid, 0);
}
}
}
/*
* Reset the ring pointers to initial state.
* Hang buffers for any empty ring slots.
*/
/* LINTED: constant in conditional context */
for (i = 0; i < NET_RX_RING_SIZE; i++) {
continue;
break;
}
/* LINTED: constant in conditional context */
return (0);
out:
return (err);
}
/* Called when the upper layers free a message we passed upstream */
static void
{
}
/*
* Connect driver to back end, called to set up communication with
*/
void
{
const char *message;
struct xenbus_device *xsd;
char *xsname;
int err;
if (err != 0) {
return;
}
if (err != 0) {
return;
}
if (err != 0) {
message = "writing tx ring-ref";
goto abort_transaction;
}
if (err != 0) {
message = "writing rx ring-ref";
goto abort_transaction;
}
xnfp->xnf_evtchn);
if (err != 0) {
message = "writing event-channel";
goto abort_transaction;
}
if (err != 0) {
message = "writing feature-rx-notify";
goto abort_transaction;
}
if (!xnfp->xnf_tx_pages_readonly) {
"%d", 1);
if (err != 0) {
message = "writing feature-tx-writable";
goto abort_transaction;
}
}
if (err != 0) {
message = "writing feature-no-csum-offload";
goto abort_transaction;
}
if (err != 0) {
message = "writing request-rx-copy";
goto abort_transaction;
}
if (err != 0) {
message = "writing frontend XenbusStateConnected";
goto abort_transaction;
}
if (err != 0) {
goto again;
}
return;
}
/*
* Read config info from xenstore
*/
void
{
int err, be_no_cksum_offload;
"%s", (char *)&mac[0]);
if (err != 0) {
/*
* bad: we're supposed to be set up with a proper mac
* addr. at this point
*/
return;
}
return;
}
/*
* If we fail to read the store we assume that the key is
* absent, implying an older domain at the far end. Older
* domains always support checksum offload.
*/
if (err != 0)
be_no_cksum_offload = 0;
/*
* If the far end cannot do checksum offload or we do not wish
* to do it, disable it.
*/
}
/*
* attach(9E) -- Attach a device to the system
*
* Called once for each board successfully probed.
*/
static int
{
int err;
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_DDI)
(void *)devinfo);
#endif
switch (cmd) {
case DDI_RESUME:
(void) xvdi_resume(devinfo);
(void) xvdi_alloc_evtchn(devinfo);
#ifdef XPV_HVM_DRIVER
xnfp);
#else
#endif
/*
* Our MAC address may have changed if we're resuming:
* - on a different host
* - on the same one and got a different MAC address
* because we didn't specify one of our own.
* so it's useful to claim that it changed in order that
* IP send out a gratuitous ARP.
*/
return (DDI_SUCCESS);
case DDI_ATTACH:
break;
default:
return (DDI_FAILURE);
}
/*
* Allocate gld_mac_info_t and xnf_instance structures
*/
return (DDI_FAILURE);
#ifdef XPV_HVM_DRIVER
/*
* Report our version to dom0.
*/
if (!xnfp->xnf_rx_hvcopy) {
"supports 'feature-rx-copy'");
goto failure;
}
#endif
/*
* Get the iblock cookie with which to initialize the mutexes.
*/
!= DDI_SUCCESS)
goto failure;
/*
* Driver locking strategy: the txlock protects all paths
* through the driver, except the interrupt thread.
* If the interrupt thread needs to do something which could
* affect the operation of any other part of the driver,
* it needs to acquire the txlock mutex.
*/
&xnfp->xnf_gref_tx_head) < 0) {
goto failure_1;
}
&xnfp->xnf_gref_rx_head) < 0) {
goto failure_1;
}
"driver data structures",
goto failure_1;
}
/* set driver private pointer now */
!= DDI_SUCCESS)
goto failure_1;
if (!xnf_kstat_init(xnfp))
goto failure_2;
/*
* Allocate an event channel, add the interrupt handler and
* bind it to the event channel.
*/
(void) xvdi_alloc_evtchn(devinfo);
#ifdef XPV_HVM_DRIVER
#else
#endif
if (err != 0)
goto failure_3;
#ifdef XPV_HVM_DRIVER
/*
* In the HVM case, this driver essentially replaces a driver for
* a 'real' PCI NIC. Without the "model" property set to
* "Ethernet controller", like the PCI code does, netbooting does
* not work correctly, as strplumb_get_netdev_path() will not find
* this interface.
*/
"Ethernet controller");
#endif
/*
* connect to the backend
*/
return (DDI_SUCCESS);
#ifdef XPV_HVM_DRIVER
#else
#endif
return (DDI_FAILURE);
}
/* detach(9E) -- Detach a device from the system */
static int
{
int i;
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_DDI)
#endif
switch (cmd) {
case DDI_SUSPEND:
#ifdef XPV_HVM_DRIVER
#else
#endif
/* claim link to be down after disconnect */
return (DDI_SUCCESS);
case DDI_DETACH:
break;
default:
return (DDI_FAILURE);
}
if (xnfp->xnf_connected)
return (DDI_FAILURE);
/* Wait for receive buffers to be returned; give up after 5 seconds */
i = 50;
while (xnfp->xnf_rx_bufs_outstanding > 0) {
if (--i == 0) {
"xnf%d: never reclaimed all the "
"receive buffers. Still have %d "
"buffers outstanding.",
return (DDI_FAILURE);
}
}
return (DDI_FAILURE);
/* Stop the receiver */
/* Remove the interrupt */
#ifdef XPV_HVM_DRIVER
#else
#endif
/* Release any pending xmit mblks */
/* Release all DMA resources */
return (DDI_SUCCESS);
}
/*
* xnf_set_mac_addr() -- set the physical network address on the board.
*/
/*ARGSUSED*/
static int
{
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d: set_mac_addr(0x%p): "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
#endif
/*
* We can't set our macaddr.
*
* XXPV dme: Why not?
*/
return (ENOTSUP);
}
/*
* xnf_set_multicast() -- set (enable) or disable a multicast address.
*
* in "mcast". Enable if "add" is true, disable if false.
*/
/*ARGSUSED*/
static int
{
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d set_multicast(0x%p): "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
#endif
/*
* XXPV dme: Ideally we'd relay the address to the backend for
* enabling. The protocol doesn't support that (interesting
* extension), so we simply succeed and hope that the relevant
* packets are going to arrive.
*
* need to keep a list of those in use and re-add on resume.
*/
return (0);
}
/*
* xnf_set_promiscuous() -- set or reset promiscuous mode on the board
*
*/
/*ARGSUSED*/
static int
{
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d set_promiscuous(0x%p, %x)\n",
#endif
/*
* We can't really do this, but we pretend that we can in
* order that snoop will work.
*/
return (0);
}
/*
* Clean buffers that we have responses for from the transmit ring.
*/
static int
{
struct tx_pktinfo *reap;
int id;
loop:
/*
* index of next transmission ack
*/
/*
* Clean tx packets from ring that we have responses for
*/
/*
* Return id to free list
*/
if (gnttab_query_foreign_access(ref) != 0)
panic("tx grant still in use "
"by backend domain");
(void) gnttab_end_foreign_access_ref(ref,
ref);
}
membar_enter();
}
/* LINTED: constant in conditional context */
if (work_to_do)
goto loop;
}
/*
* If we need to pull up data from either a packet that crosses a page
* boundary or consisting of multiple mblks, do it here. We allocate
* a page aligned buffer and copy the data into it. The header for the
* allocated buffer is returned. (which is also allocated here)
*/
static struct xnf_buffer_desc *
{
struct xnf_buffer_desc *bdesc;
int len;
/*
* get a xmit buffer from the xmit buffer pool
*/
return (bdesc);
/*
* Copy the data into the buffer
*/
}
return (bdesc);
}
void
{
struct ether_header *ehp;
struct ether_vlan_header *evhp;
} else {
}
/* Packet should have been pulled up by the caller. */
return;
}
switch (ipha->ipha_protocol) {
case IPPROTO_TCP:
break;
case IPPROTO_UDP:
break;
default:
return;
}
}
/*
* xnf_send_one() -- send a packet
*
* Called when a packet is ready to be transmitted. A pointer to an
* M_DATA message that contains the packet is passed to this routine.
* At least the complete LLC header is contained in the message's
* first message block, and the remainder of the packet is contained
* within additional M_DATA message blocks linked to the first
* message block.
*
*/
static boolean_t
{
struct xnf_buffer_desc *xmitbuf;
struct tx_pktinfo *txp_info;
int tx_ring_freespace, page_oops;
volatile netif_tx_request_t *txrp;
unsigned long mfn;
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_SEND)
printf("xnf%d send(0x%p, 0x%p)\n",
#endif
ASSERT(tx_ring_freespace >= 0);
/*
* If there are no xmit ring slots available, return.
*/
if (tx_ring_freespace == 0) {
return (B_FALSE); /* Send should be retried */
}
/* Count the number of mblks in message and compute packet size */
/* Make sure packet isn't too large */
if (pktlen > XNF_FRAMESIZE) {
return (B_TRUE);
}
/*
* Test if we cross a page boundary with our buffer
*/
page_oops = (i == 1) &&
/*
* XXPV - unfortunately, the Xen virtual net device currently
* doesn't support multiple packet frags, so this will always
* end up doing the pullup if we got more than one packet.
*/
if (i > xnf_max_tx_frags || page_oops) {
if (page_oops)
/* could not allocate resources? */
#ifdef XNF_DEBUG
#endif
return (B_FALSE); /* Retry send */
}
} else {
}
/* set up data descriptor */
/*
* Get packet id from free list
*/
/* Prepare for DMA mapping of tx buffer(s) */
if (rc != DDI_DMA_MAPPED) {
/*
* Return id to free list
*/
if (rc == DDI_DMA_NORESOURCES) {
return (B_FALSE); /* Retry later */
}
#ifdef XNF_DEBUG
#endif
return (B_FALSE);
}
if (pflags != 0) {
/*
* If the local protocol stack requests checksum
* offload we set the 'checksum blank' flag,
* indicating to the peer that we need the checksum
* calculated for us.
*
* We _don't_ set the validated flag, because we haven't
* validated that the data and the checksum match.
*/
}
return (B_TRUE); /* successful transmit attempt */
}
mblk_t *
{
/*
* Transmission attempts should be impossible without having
* previously called xnf_start().
*/
/*
* Wait for getting connected to the backend
*/
while (!xnfp->xnf_connected) {
}
break;
}
}
if (sent_something) {
/* LINTED: constant in conditional context */
notify);
if (notify)
}
return (mp);
}
/*
* xnf_intr() -- ring interrupt service routine
*/
static uint_t
{
/* spurious intr */
if (!xnfp->xnf_connected) {
return (DDI_INTR_UNCLAIMED);
}
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_INT)
printf("xnf%d intr(0x%p)\n",
#endif
if (xnfp->xnf_rx_hvcopy)
else
}
/*
* Clean tx ring and try to start any blocked xmit streams if
* there is now some space.
*/
if (xnf_clean_tx_ring(xnfp) > 0) {
}
if (sched)
return (DDI_INTR_CLAIMED);
}
/*
* xnf_start() -- start the board receiving and enable interrupts.
*/
static int
{
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d start(0x%p)\n",
#endif
/* Accept packets from above. */
return (0);
}
/* xnf_stop() - disable hardware */
static void
{
#ifdef XNF_DEBUG
if (xnfdebug & XNF_DEBUG_TRACE)
printf("xnf%d stop(0x%p)\n",
#endif
}
/*
* Driver private functions follow
*/
/*
* Hang buffer on rx ring
*/
static void
{
volatile netif_rx_request_t *reqp;
if (xnfp->xnf_rx_hvcopy) {
} else {
}
}
}
static mblk_t *
{
struct xnf_buffer_desc *bdesc;
/*
* in loop over unconsumed responses, we do:
* 1. get a response
* 2. take corresponding buffer off recv. ring
* 3. indicate this by setting slot to NULL
* 4. create a new message and
* 5. copy data in, adjust ptr
*
* outside loop:
* 7. make sure no more data has arrived; kick HV
*/
loop:
/* 1. */
/*
* 2.
* Take buffer off of receive ring
*/
/* 3 */
if (!xnfp->xnf_running) {
xnfp->xnf_stat_drop++;
/*
* re-hang the buffer
*/
xnfp->xnf_stat_errrx++;
xnfp->xnf_stat_runt++;
xnfp->xnf_stat_norxbuf++;
/*
* re-hang the buffer
*/
} else {
struct xnf_buffer_desc *new_bdesc;
if (ref == GRANT_INVALID_REF) {
"from dom %d", ref,
goto luckless;
}
/*
* Release ref which we'll be re-claiming in
* rx_buffer_hang().
*/
(void) gnttab_end_foreign_access_ref(ref, 0);
ref);
/*
* XXPV for the initial implementation of HVcopy,
* create a new msg and copy in the data
*/
/* 4. */
/*
* Couldn't get buffer to copy to,
* drop this data, and re-hang
* the buffer on the ring.
*/
xnfp->xnf_stat_norxbuf++;
} else {
/* 5. */
len);
}
/* Re-hang old or hang new buffer. */
}
if (mp) {
if (hwcsum) {
/*
* See comments in xnf_process_recv().
*/
NULL, 0, 0, 0, 0,
0);
}
} else {
}
}
}
/* 7. */
/*
* Has more data come in since we started?
*/
/* LINTED: constant in conditional context */
if (work_to_do)
goto loop;
/*
* Indicate to the backend that we have re-filled the receive
* ring.
*/
/* LINTED: constant in conditional context */
if (notify)
return (head);
}
/* Process all queued received packets */
static mblk_t *
{
volatile netif_rx_response_t *rxpkt;
struct xnf_buffer_desc *bdesc;
long cnt;
loop:
/*
* Take buffer off of receive ring
*/
if (!xnfp->xnf_running) {
xnfp->xnf_stat_drop++;
/*
* re-hang the buffer
*/
xnfp->xnf_stat_errrx++;
xnfp->xnf_stat_runt++;
xnfp->xnf_stat_norxbuf++;
/*
* re-hang the buffer
*/
} else {
struct xnf_buffer_desc *new_bdesc;
unsigned long mfn;
if (ref == GRANT_INVALID_REF) {
"from dom %d", ref,
goto luckless;
}
ref);
if (len <= xnf_rx_bcopy_thresh) {
/*
* For small buffers, just copy the data
* and send the copy upstream.
*/
} else {
/*
* We send a pointer to this data upstream;
* we need a new buffer to replace this one.
*/
} else {
}
}
/*
* Don't have a new ring buffer; bcopy the data
* from the buffer, and preserve the
* original buffer
*/
/*
* Could't get buffer to copy to,
* drop this data, and re-hang
* the buffer on the ring.
*/
xnfp->xnf_stat_norxbuf++;
} else {
len);
}
/*
* Give the buffer page back to xen
*/
&pfn);
if (cnt != 1) {
"page back to the hypervisor\n");
}
} else {
/*
* Couldn't get mblk to pass recv data
* up with, free the old ring buffer
*/
xnfp->xnf_stat_norxbuf++;
goto luckless;
}
0, 0, DDI_DMA_SYNC_FORCPU);
}
if (mp)
/* re-hang old or hang new buffer */
}
if (mp) {
if (hwcsum) {
/*
* If the peer says that the data has
* been validated then we declare that
* the full checksum has been
* verified.
*
* We don't look at the "checksum
* blank" flag, and hence could have a
* packet here that we are asserting
* is good with a blank checksum.
*
* The hardware checksum offload
* specification says that we must
* provide the actual checksum as well
* as an assertion that it is valid,
* but the protocol stack doesn't
* actually use it and some other
* drivers don't bother, so we don't.
* If it was necessary we could grovel
* in the packet to find it.
*/
NULL, 0, 0, 0, 0,
0);
}
} else {
}
}
}
/*
* Has more data come in since we started?
*/
/* LINTED: constant in conditional context */
if (work_to_do)
goto loop;
/*
* Indicate to the backend that we have re-filled the receive
* ring.
*/
/* LINTED: constant in conditional context */
if (notify)
return (head);
}
/* Called when the upper layers free a message we passed upstream */
static void
{
long cnt;
/* One less outstanding receive buffer */
/*
* Return buffer to the free list, unless the free list is getting
* too large. XXPV - this threshold may need tuning.
*/
/*
* Unmap the page, and hand the machine page back
* to xen so it can be re-used as a backend net buffer.
*/
if (cnt != 1) {
"hypervisor\n");
}
} else {
/*
* We can return everything here since we have a free buffer
* that we have not given the backing page for back to xen.
*/
}
}
/*
* xnf_alloc_dma_resources() -- initialize the drivers structures
*/
static int
{
int i;
struct xnf_buffer_desc *bdesc;
int rc;
/*
* The code below allocates all the DMA data structures that
* need to be released when the driver is detached.
*
* First allocate handles for mapping (virtual address) pointers to
* transmit data buffers to physical addresses
*/
return (DDI_FAILURE);
}
/*
* Allocate page for the transmit descriptor ring.
*/
goto alloc_error;
goto alloc_error;
}
if (rc == DDI_DMA_NORESOURCES)
goto alloc_error;
else
goto error;
}
/* LINTED: constant in conditional context */
/* LINTED: constant in conditional context */
/*
* Allocate page for the receive descriptor ring.
*/
goto alloc_error;
goto alloc_error;
}
if (rc == DDI_DMA_NORESOURCES)
goto alloc_error;
else
goto error;
}
/* LINTED: constant in conditional context */
/* LINTED: constant in conditional context */
/*
* Preallocate receive buffers for each receive descriptor.
*/
/* Set up the "free list" of receive buffer descriptors */
goto alloc_error;
}
return (DDI_SUCCESS);
return (DDI_FAILURE);
}
/*
* Release all DMA resources in the opposite order from acquisition
* Should not be called until all outstanding esballoc buffers
* have been returned.
*/
static void
{
int i;
/*
* Free receive buffers which are currently associated with
* descriptors
*/
struct xnf_buffer_desc *bp;
continue;
}
/* Free the receive ring buffer */
}
/* Free the transmit ring buffer */
}
/*
* Free handles for mapping (virtual address) pointers to
* transmit data buffers to physical addresses
*/
}
}
}
static void
{
int i;
continue;
(void) ddi_dma_unbind_handle(
}
}
/*
* Remove a xmit buffer descriptor from the head of the free list and return
* a pointer to it. If no buffers on list, attempt to allocate a new one.
* Called with the tx_buf_mutex held.
*/
static struct xnf_buffer_desc *
{
struct xnf_buffer_desc *bdesc;
} else {
}
return (bdesc);
}
/*
* Remove a buffer descriptor from the head of the free list and return
* a pointer to it. If no buffers on list, attempt to allocate a new one.
* Called with the rx_buf_mutex held.
*/
static struct xnf_buffer_desc *
{
struct xnf_buffer_desc *bdesc;
} else {
}
return (bdesc);
}
/*
* Free a xmit buffer back to the xmit free list
*/
static void
{
}
/*
* Put a buffer descriptor onto the head of the free list.
* for page-flip:
* We can't really free these buffers back to the kernel
* since we have given away their backing page to be used
* by the back end net driver.
* for hvcopy:
* release all the memory
*/
static void
{
if (xnfp->xnf_rx_hvcopy) {
goto out;
} else {
}
out:
}
/*
* Allocate a DMA-able xmit buffer, including a structure to
* keep track of the buffer. Called with tx_buf_mutex held.
*/
static struct xnf_buffer_desc *
{
struct xnf_buffer_desc *bdesc;
return (NULL);
/* allocate a DMA access handle for receive buffer */
goto failure;
/* Allocate DMA-able memory for transmit buffer */
goto failure_1;
return (bdesc);
return (NULL);
}
/*
* Allocate a DMA-able receive buffer, including a structure to
* keep track of the buffer. Called with rx_buf_mutex held.
*/
static struct xnf_buffer_desc *
{
struct xnf_buffer_desc *bdesc;
long cnt;
return (NULL);
return (NULL);
/* allocate a DMA access handle for receive buffer */
goto failure;
/* Allocate DMA-able memory for receive buffer */
goto failure_1;
/* bind to virtual address of buffer to get physical address */
goto failure_2;
if (xnfp->xnf_rx_hvcopy) {
} else {
}
if (!xnfp->xnf_rx_hvcopy) {
/*
* Unmap the page, and hand the machine page back
* to xen so it can be used as a backend net buffer.
*/
if (cnt != 1) {
"hypervisor\n");
}
}
return (bdesc);
return (NULL);
}
/*
* Statistics.
*/
static char *xnf_aux_statistics[] = {
"tx_cksum_deferred",
"rx_cksum_no_need",
"interrupts",
"unclaimed_interrupts",
"tx_pullup",
"tx_pagebndry",
"tx_attempt",
"rx_no_ringbuf",
"hvcopy_packet_processed",
};
static int
{
if (flag != KSTAT_READ)
return (EACCES);
/*
* Assignment order must match that of the names in
* xnf_aux_statistics.
*/
return (0);
}
static boolean_t
{
int nstat = sizeof (xnf_aux_statistics) /
sizeof (xnf_aux_statistics[0]);
char **cp = xnf_aux_statistics;
/*
* Create and initialise kstats.
*/
return (B_FALSE);
while (nstat > 0) {
knp++;
cp++;
nstat--;
}
return (B_TRUE);
}
static int
{
#define mac_stat(q, r) \
case (MAC_STAT_##q): \
break
#define ether_stat(q, r) \
case (ETHER_STAT_##q): \
break
switch (stat) {
/* always claim to be in full duplex mode */
case ETHER_STAT_LINK_DUPLEX:
*val = LINK_DUPLEX_FULL;
break;
/* always claim to be at 1Gb/s link speed */
case MAC_STAT_IFSPEED:
*val = 1000000000ull;
break;
default:
return (ENOTSUP);
}
return (0);
}
/*ARGSUSED*/
static void
{
}
static boolean_t
{
switch (cap) {
case MAC_CAPAB_HCKSUM: {
/*
* Whilst the flag used to communicate with the IO
* domain is called "NETTXF_csum_blank", the checksum
* in the packet must contain the pseudo-header
* checksum and not zero.
*
* To help out the IO domain, we might use
* HCKSUM_INET_PARTIAL. Unfortunately our stack will
* then use checksum offload for IPv6 packets, which
* the IO domain can't handle.
*
* As a result, we declare outselves capable of
* HCKSUM_INET_FULL_V4. This means that we receive
* IPv4 packets from the stack with a blank checksum
* field and must insert the pseudo-header checksum
* before passing the packet to the IO domain.
*/
if (xnfp->xnf_cksum_offload)
else
*capab = 0;
break;
}
default:
return (B_FALSE);
}
return (B_TRUE);
}
/*ARGSUSED*/
static void
{
switch (new_state) {
case XenbusStateConnected:
/*
* wake up threads wanting to send data to backend,
* but got blocked due to backend is not ready
*/
/*
* kick backend in case it missed any tx request
* in the TX ring buffer
*/
/*
* there maybe already queued rx data in the RX ring
* sent by backend after it gets connected but before
* we see its state change here, so we call our intr
* handling routine to handle them, if any
*/
/* mark as link up after get connected */
break;
default:
break;
}
}
/*
* Check whether backend is capable of and willing to talk
* to us via hypervisor copy, as opposed to page flip.
*/
static boolean_t
{
int be_rx_copy;
int err;
/*
* If we fail to read the store we assume that the key is
* absent, implying an older domain at the far end. Older
* domains cannot do HV copy (we assume ..).
*/
if (err != 0)
be_rx_copy = 0;
}