/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
*/
/*
* Copyright 2013 Nexenta Inc. All rights reserved.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
*/
/* Based on the NetBSD virtio driver by Minoura Makoto. */
/*
* Copyright (c) 2010 Minoura Makoto.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/ethernet.h>
#include <sys/sysmacros.h>
#include <sys/mac_provider.h>
#include <sys/mac_ether.h>
#include "virtiovar.h"
#include "virtioreg.h"
/* Configuration registers */
/* Feature bits */
#define VIRTIO_NET_FEATURE_BITS \
"\020" \
"\1CSUM" \
"\2GUEST_CSUM" \
"\6MAC" \
"\7GSO" \
"\10GUEST_TSO4" \
"\11GUEST_TSO6" \
"\12GUEST_ECN" \
"\13GUEST_UFO" \
"\14HOST_TSO4" \
"\15HOST_TSO6" \
"\16HOST_ECN" \
"\17HOST_UFO" \
"\20MRG_RXBUF" \
"\21STATUS" \
"\22CTRL_VQ" \
"\23CTRL_RX" \
"\24CTRL_VLAN" \
"\25CTRL_RX_EXTRA"
/* Status */
#pragma pack(1)
/* Packet header structure */
struct virtio_net_hdr {
};
#pragma pack()
/* Control virtqueue */
#pragma pack(1)
struct virtio_net_ctrl_cmd {
};
#pragma pack()
#define VIRTIO_NET_CTRL_RX 0
#define VIRTIO_NET_CTRL_RX_PROMISC 0
#define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
#define VIRTIO_NET_CTRL_VLAN_ADD 0
#pragma pack(1)
struct virtio_net_ctrl_status {
};
struct virtio_net_ctrl_rx {
};
struct virtio_net_ctrl_mac_tbl {
};
struct virtio_net_ctrl_vlan {
};
#pragma pack()
static int vioif_quiesce(dev_info_t *);
nulldev, /* identify */
nulldev, /* probe */
vioif_attach, /* attach */
vioif_detach, /* detach */
nodev, /* reset */
NULL, /* cb_ops */
D_MP, /* bus_ops */
NULL, /* power */
vioif_quiesce /* quiesce */);
/* Standard Module linkage initialization for a Streams driver */
extern struct mod_ops mod_driverops;
&mod_driverops, /* Type of module. This one is a driver */
vioif_ident, /* short description */
&vioif_ops /* driver specific ops */
};
{
(void *)&modldrv,
NULL,
},
};
DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
};
/*
* A mapping represents a binding for a single buffer that is contiguous in the
* virtual address space.
*/
struct vioif_buf_mapping {
unsigned int vbm_ncookies;
};
/*
* Rx buffers can be loaned upstream, so the code has
* to allocate them dynamically.
*/
struct vioif_rx_buf {
};
/*
* Tx buffers have two mapping types. One, "inline", is pre-allocated and is
* used to hold the virtio_net_header. Small packets also get copied there, as
* it's faster then mapping them. Bigger packets get mapped using the "external"
* mapping array. An array is used, because a packet may consist of muptiple
* fragments, so each fragment gets bound to an entry. According to my
* observations, the number of fragments does not exceed 2, but just in case,
* a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
* the dma handles are allocated lazily in the tx path.
*/
struct vioif_tx_buf {
/* inline buffer */
/* External buffers */
unsigned int tb_external_num;
};
struct vioif_softc {
/* Feature bits. */
int sc_mtu;
/*
* For rx buffers, we keep a pointer array, because the buffers
* can be loaned upstream, and we have to repopulate the array with
* new members.
*/
/*
* For tx, we just allocate an array of buffers. The packet can
* either be copied into the inline buffer, or the external mapping
* could be used to map the packet
*/
/*
* We "loan" rx buffers upstream and reuse them after they are
* freed. This lets us avoid allocations in the hot path.
*/
/* Copying small packets turns out to be faster then mapping them. */
unsigned long sc_rxcopy_thresh;
unsigned long sc_txcopy_thresh;
/* Some statistic coming here */
};
/* MTU + the ethernet header. */
/*
* Yeah, we spend 8M per device. Turns out, there is no point
* being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
* because vhost does not support them, and we expect to be used with
* vhost in production environment.
*/
/* The buffer keeps both the packet data and the virtio_net_header. */
/*
* We win a bit on header alignment, but the host wins a lot
* more on moving aligned buffers. Might need more thought.
*/
#define VIOIF_IP_ALIGN 0
/* Maximum number of indirect descriptors, somewhat arbitrary. */
/*
* We pre-allocate a reasonably large buffer to copy small packets
* there. Bigger packets are mapped, packets with multiple
* cookies are mapped as indirect buffers.
*/
/* Native queue size for all queues */
#define VIOIF_RX_QLEN 0
#define VIOIF_TX_QLEN 0
#define VIOIF_CTRL_QLEN 0
0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
static char vioif_txcopy_thresh[] =
"vioif_txcopy_thresh";
static char vioif_rxcopy_thresh[] =
"vioif_rxcopy_thresh";
static char *vioif_priv_props[] = {
};
/* Add up to ddi? */
static ddi_dma_cookie_t *
{
return (dmah_impl->dmai_cookie);
}
static void
{
}
static link_state_t
{
return (LINK_STATE_UP);
} else {
return (LINK_STATE_DOWN);
}
}
return (LINK_STATE_UP);
}
DMA_ATTR_V0, /* Version number */
0, /* low address */
0xFFFFFFFFFFFFFFFF, /* high address */
0xFFFFFFFF, /* counter register max */
1, /* page alignment */
1, /* burst sizes: 1 - 32 */
1, /* minimum transfer size */
0xFFFFFFFF, /* max transfer size */
0xFFFFFFFFFFFFFFF, /* address register max */
1, /* scatter-gather capacity */
1, /* device operates on bytes */
0, /* attr flag: set to 0 */
};
DMA_ATTR_V0, /* Version number */
0, /* low address */
0xFFFFFFFFFFFFFFFF, /* high address */
0xFFFFFFFF, /* counter register max */
1, /* page alignment */
1, /* burst sizes: 1 - 32 */
1, /* minimum transfer size */
0xFFFFFFFF, /* max transfer size */
0xFFFFFFFFFFFFFFF, /* address register max */
/* One entry is used for the virtio_net_hdr on the tx path */
1, /* device operates on bytes */
0, /* attr flag: set to 0 */
};
};
static void
{
}
static int
{
"Can't allocate dma handle for rx buffer");
goto exit_handle;
}
VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
"Can't allocate rx buffer");
goto exit_alloc;
}
goto exit_bind;
}
return (0);
return (ENOMEM);
}
static void
{
}
static void
{
int i;
int j;
/* Tear down the internal mapping. */
/* We should not see any in-flight buffers at this point. */
/* Free all the dma hdnales we allocated lazily. */
/* Free the external mapping array. */
}
if (buf)
}
}
static int
{
unsigned int nsegments;
KM_SLEEP);
"Failed to allocate the tx buffers array");
goto exit_txalloc;
}
/*
* We don't allocate the rx vioif_bufs, just the pointers, as
* rx vioif_bufs can be loaned upstream, and we don't know the
* total number we need.
*/
KM_SLEEP);
"Failed to allocate the rx buffers pointer array");
goto exit_rxalloc;
}
for (i = 0; i < txqsize; i++) {
/* Allocate and bind an inline mapping. */
"Can't allocate dma handle for tx buffer %d", i);
goto exit_tx;
}
"Can't allocate tx buffer %d", i);
goto exit_tx;
}
"Can't bind tx buffer %d", i);
goto exit_tx;
}
/* We asked for a single segment */
/*
* We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
* In reality, I don't expect more then 2-3 used, but who
* knows.
*/
KM_SLEEP);
/*
* The external mapping's dma handles are allocate lazily,
* as we don't expect most of them to be used..
*/
}
return (0);
for (i = 0; i < txqsize; i++) {
(void) ddi_dma_unbind_handle(
if (buf->tb_external_mapping)
sizeof (struct vioif_tx_buf) *
VIOIF_INDIRECT_MAX - 1);
}
return (ENOMEM);
}
/* ARGSUSED */
int
{
return (DDI_SUCCESS);
}
/* ARGSUSED */
int
{
return (DDI_SUCCESS);
}
/* ARGSUSED */
int
{
return (DDI_FAILURE);
}
static uint_t
{
for (;;) {
if (!ve) {
/*
* Out of free descriptors - ring already full.
* It would be better to update sc_norxdescavail
* but MAC does not ask for this info, hence we
* update sc_norecvbuf.
*/
sc->sc_norecvbuf++;
break;
}
if (!buf) {
/* First run, allocate the buffer. */
}
/* Still nothing? Bye. */
if (!buf) {
"Can't allocate rx buffer");
sc->sc_norecvbuf++;
break;
}
/*
* For an unknown reason, the virtio_net_hdr must be placed
* as a separate virtio queue entry.
*/
sizeof (struct virtio_net_hdr), B_FALSE);
/* Add the rest of the first cookie. */
sizeof (struct virtio_net_hdr),
sizeof (struct virtio_net_hdr), B_FALSE);
/*
* If the buffer consists of a single cookie (unlikely for a
* 64-k buffer), we are done. Otherwise, add the rest of the
* cookies using indirect entries.
*/
}
num_added++;
}
return (num_added);
}
static uint_t
{
if (num_added > 0)
return (num_added);
}
static uint_t
{
if (len < sizeof (struct virtio_net_hdr)) {
sc->sc_ierrors++;
continue;
}
len -= sizeof (struct virtio_net_hdr);
/*
* We copy small packets that happen to fit into a single
* cookie and reuse the buffers. For bigger ones, we loan
* the buffers upstream.
*/
if (!mp) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
break;
}
} else {
sizeof (struct virtio_net_hdr) +
if (!mp) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
break;
}
/*
* Buffer loaned, we will have to allocate a new one
* for this slot.
*/
}
/*
* virtio-net does not tell us if this packet is multicast
* or broadcast, so we have to check it.
*/
sc->sc_multircv++;
else
sc->sc_brdcstrcv++;
}
sc->sc_ipackets++;
} else {
}
}
}
return (num_processed);
}
static uint_t
{
/* We don't chain descriptors for tx, so don't expect any. */
if (mp) {
for (int i = 0; i < buf->tb_external_num; i++)
(void) ddi_dma_unbind_handle(
}
/* External mapping used, mp was not freed in vioif_send() */
if (mp)
}
sc->sc_tx_stopped = 0;
}
return (num_reclaimed);
}
/* sc will be used to update stat counters. */
/* ARGSUSED */
static inline void
{
/* Frees mp */
sizeof (struct virtio_net_hdr));
}
static inline int
int i)
{
if (ret != DDI_SUCCESS) {
"Can't allocate dma handle for external tx buffer");
}
}
return (ret);
}
static inline int
{
int i, j;
buf->tb_external_num = 0;
i = 0;
while (nmp) {
unsigned int ncookies;
/*
* For some reason, the network stack can
* actually send us zero-length fragments.
*/
if (len == 0) {
continue;
}
if (ret != DDI_SUCCESS) {
sc->sc_notxbuf++;
sc->sc_oerrors++;
goto exit_lazy_alloc;
}
if (ret != DDI_SUCCESS) {
sc->sc_oerrors++;
"TX: Failed to bind external handle");
goto exit_bind;
}
/* Check if we still fit into the indirect table. */
"TX: Indirect descriptor table limit reached."
" It took %d fragments.", i);
sc->sc_notxbuf++;
sc->sc_oerrors++;
ret = DDI_FAILURE;
goto exit_limit;
}
i++;
}
buf->tb_external_num = i;
/* Save the mp to free it when the packet is sent. */
return (DDI_SUCCESS);
for (j = 0; j < i; j++) {
(void) ddi_dma_unbind_handle(
}
return (ret);
}
static boolean_t
{
int ret;
if (sc->sc_tx_tso4) {
}
if (!ve) {
sc->sc_notxbuf++;
/* Out of free descriptors - try later. */
return (B_FALSE);
}
/* Use the inline buffer of the first entry for the virtio_net_hdr. */
sizeof (struct virtio_net_hdr));
NULL, &csum_flags);
if (csum_flags & HCK_PARTIALCKSUM) {
int eth_hsize;
/* Did we ask for it? */
/* We only asked for partial csum packets. */
eth_hsize = sizeof (struct ether_vlan_header);
} else {
eth_hsize = sizeof (struct ether_header);
}
}
/* setup LSO fields if required */
if (lso_required) {
}
sizeof (struct virtio_net_hdr), B_TRUE);
/* meanwhile update the statistic */
sc->sc_multixmt++;
else
sc->sc_brdcstxmt++;
}
/*
* We copy small packets into the inline buffer. The bigger ones
* get mapped using the mapped buffer.
*/
} else {
/* statistic gets updated by vioif_tx_external when fail */
if (ret != DDI_SUCCESS)
goto exit_tx_external;
}
sc->sc_opackets++;
return (B_TRUE);
/*
* vioif_tx_external can fail when the buffer does not fit into the
* indirect descriptor table. Free the mp. I don't expect this ever
* to happen.
*/
return (B_TRUE);
}
mblk_t *
{
break;
}
}
return (mp);
}
int
{
/*
* Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
* so the device will send a transmit interrupt when the queue is empty
* and we can reclaim it in one sweep.
*/
/*
* Clear any data that arrived early on the receive queue and populate
* it with free buffers that the device can use moving forward.
*/
}
return (DDI_SUCCESS);
}
void
{
}
/* ARGSUSED */
static int
{
switch (stat) {
case MAC_STAT_IERRORS:
break;
case MAC_STAT_OERRORS:
break;
case MAC_STAT_MULTIRCV:
break;
case MAC_STAT_BRDCSTRCV:
break;
case MAC_STAT_MULTIXMT:
break;
case MAC_STAT_BRDCSTXMT:
break;
case MAC_STAT_IPACKETS:
break;
case MAC_STAT_RBYTES:
break;
case MAC_STAT_OPACKETS:
break;
case MAC_STAT_OBYTES:
break;
case MAC_STAT_NORCVBUF:
break;
case MAC_STAT_NOXMTBUF:
break;
case MAC_STAT_IFSPEED:
/* always 1 Gbit */
*val = 1000000000ULL;
break;
case ETHER_STAT_LINK_DUPLEX:
/* virtual device, always full-duplex */
*val = LINK_DUPLEX_FULL;
break;
default:
return (ENOTSUP);
}
return (DDI_SUCCESS);
}
static int
{
long result;
return (EINVAL);
return (EINVAL);
}
return (EINVAL);
return (EINVAL);
}
return (0);
}
static int
{
int err;
switch (pr_num) {
case MAC_PROP_MTU:
return (EINVAL);
}
if (err) {
return (err);
}
break;
case MAC_PROP_PRIVATE:
pr_valsize, pr_val);
if (err)
return (err);
break;
default:
return (ENOTSUP);
}
return (0);
}
static int
{
int value;
err = 0;
goto done;
}
err = 0;
goto done;
}
done:
if (err == 0) {
}
return (err);
}
static int
{
switch (pr_num) {
case MAC_PROP_PRIVATE:
pr_valsize, pr_val);
break;
default:
break;
}
return (err);
}
static void
{
int value;
switch (pr_num) {
case MAC_PROP_MTU:
break;
case MAC_PROP_PRIVATE:
vioif_rxcopy_thresh) == 0) {
} else {
return;
}
break;
default:
break;
}
}
static boolean_t
{
switch (cap) {
case MAC_CAPAB_HCKSUM:
if (sc->sc_tx_csum) {
return (B_TRUE);
}
return (B_FALSE);
case MAC_CAPAB_LSO:
if (sc->sc_tx_tso4) {
return (B_TRUE);
}
return (B_FALSE);
default:
break;
}
return (B_FALSE);
}
.mc_getstat = vioif_stat,
.mc_start = vioif_start,
.mc_stop = vioif_stop,
/* Optional callbacks */
};
static void
{
/* LINTED E_PTRDIFF_OVERFLOW */
/* LINTED E_PTRDIFF_OVERFLOW */
*bufp = '\0';
/* Using '!' to only CE_NOTE this to the system log. */
}
/*
* Find out which features are supported by the device and
* choose which ones we wish to use.
*/
static int
{
"Host does not support RING_INDIRECT_DESC, bye.");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
}
static void
{
int i;
for (i = 0; i < ETHERADDRL; i++) {
}
}
/* Get the mac address out of the hardware, or make up one. */
static void
{
int i;
for (i = 0; i < ETHERADDRL; i++) {
VIRTIO_NET_CONFIG_MAC + i);
}
} else {
/* Get a few random bytes */
/* Make sure it's a unicast MAC */
/* Set the "locally administered" bit */
"Generated a random MAC address: %s",
}
}
/*
* Virtqueue interrupt handlers
*/
/* ARGSUSED */
{
struct vioif_softc, sc_virtio);
/*
* The return values of these functions are not needed but they make
* debugging interrupts simpler because you can use them to detect when
* stuff was processed and repopulated in this handler.
*/
(void) vioif_process_rx(sc);
return (DDI_INTR_CLAIMED);
}
/* ARGSUSED */
{
struct vioif_softc, sc_virtio);
/*
* The return value of this function is not needed but makes debugging
* interrupts simpler because you can use it to detect if anything was
* reclaimed in this handler.
*/
(void) vioif_reclaim_used_tx(sc);
return (DDI_INTR_CLAIMED);
}
static int
{
int ret;
{ vioif_rx_handler },
{ vioif_tx_handler },
{ NULL }
};
return (ret);
}
static void
{
sc->sc_rx_csum = 0;
}
/*
* We don't seem to have a way to ask the system
* not to send us LSO packets with Explicit
* Congestion Notification bit set, so we require
* the device to support it in order to do
* LSO.
*/
"TSO4 supported, but not ECN. "
"Not using LSO.");
sc->sc_tx_tso4 = 0;
} else {
}
}
}
}
static int
{
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
case DDI_PM_RESUME:
goto exit;
default:
goto exit;
}
/* Duplicate for less typing */
/*
* Initialize interrupt kstat.
*/
KSTAT_TYPE_INTR, 1, 0);
goto exit_intrstat;
}
/* map BAR 0 */
if (ret != DDI_SUCCESS) {
goto exit_map;
}
if (ret)
goto exit_features;
sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
goto exit_cache;
}
if (ret) {
"Failed to allocate interrupt(s)!");
goto exit_ints;
}
/*
* Register layout determined, can now access the
* device-specific bits
*/
goto exit_alloc1;
goto exit_alloc2;
VIOIF_CTRL_QLEN, 0, "ctrl");
if (!sc->sc_ctrl_vq) {
goto exit_alloc3;
}
}
/* set some reasonable-small default values */
if (vioif_alloc_mems(sc))
goto exit_alloc_mems;
goto exit_macalloc;
}
/* Pre-fill the rx ring. */
if (ret != 0) {
"mac_register() failed, ret=%d", ret);
goto exit_register;
}
if (ret) {
goto exit_enable_ints;
}
return (DDI_SUCCESS);
if (sc->sc_ctrl_vq)
exit:
return (DDI_FAILURE);
}
static int
{
return (DDI_FAILURE);
switch (cmd) {
case DDI_DETACH:
break;
case DDI_PM_SUSPEND:
return (DDI_FAILURE);
default:
return (DDI_FAILURE);
}
" not detaching.");
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
return (DDI_FAILURE);
return (DDI_SUCCESS);
}
int
_init(void)
{
int ret = 0;
if (ret != DDI_SUCCESS) {
return (ret);
}
return (0);
}
int
_fini(void)
{
int ret;
if (ret == DDI_SUCCESS) {
}
return (ret);
}
int
{
}