myri10ge.c revision 91e187871f25f5f4cebb89ca6783e31ac6e8a540
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright 2007-2009 Myricom, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef lint
static const char __idstring[] =
"@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
#endif
#define MXGEFW_NDIS
#include "myri10ge_var.h"
#include "rss_eth_z8e.h"
#include "rss_ethp_z8e.h"
#include "mcp_gen_header.h"
#define MYRI10GE_MAX_ETHER_MTU 9014
#define MYRI10GE_ETH_STOPPED 0
#define MYRI10GE_ETH_STOPPING 1
#define MYRI10GE_ETH_STARTING 2
#define MYRI10GE_ETH_RUNNING 3
#define MYRI10GE_ETH_OPEN_FAILED 4
#define MYRI10GE_ETH_SUSPENDED_RUNNING 5
static int myri10ge_small_bytes = 510;
static int myri10ge_intr_coal_delay = 125;
static int myri10ge_flow_control = 1;
static int myri10ge_nvidia_ecrc_enable = 1;
#endif
static int myri10ge_mtu_override = 0;
static int myri10ge_tx_copylen = 512;
static int myri10ge_deassert_wait = 1;
static int myri10ge_verbose = 0;
static int myri10ge_watchdog_reset = 0;
static int myri10ge_use_msix = 1;
static int myri10ge_max_slices = -1;
static int myri10ge_use_msi = 1;
int myri10ge_force_firmware = 0;
static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
static int myri10ge_tx_hash = 1;
static int myri10ge_lro = 0;
static int myri10ge_lro_cnt = 8;
int myri10ge_lro_max_aggr = 2;
static int myri10ge_lso_copy = 0;
int myri10ge_tx_handles_initial = 128;
static kmutex_t myri10ge_param_lock;
static void* myri10ge_db_lastfree;
"Myricom 10G driver (10GbE)",
};
static struct modlinkage modlinkage = {
};
static ddi_dma_attr_t myri10ge_misc_dma_attr = {
DMA_ATTR_V0, /* version number. */
(uint64_t)0, /* low address */
1, /* granularity */
0 /* attribute flags */
};
/*
* The Myri10GE NIC has the following constraints on receive buffers:
* 1) Buffers which cross a 4KB boundary must be aligned to 4KB
* 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
*/
static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
DMA_ATTR_V0, /* version number. */
(uint64_t)0, /* low address */
UINT64_MAX, /* maximum segment size */
1, /* granularity */
0 /* attribute flags */
};
static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
DMA_ATTR_V0, /* version number. */
(uint64_t)0, /* low address */
#else
#endif
UINT64_MAX, /* maximum segment size */
#else
#endif
1, /* granularity */
0 /* attribute flags */
};
static ddi_dma_attr_t myri10ge_tx_dma_attr = {
DMA_ATTR_V0, /* version number. */
(uint64_t)0, /* low address */
UINT64_MAX, /* maximum segment size */
1, /* granularity */
0 /* attribute flags */
};
#define WC 0
#else
#define WC 1
#endif
struct ddi_device_acc_attr myri10ge_dev_access_attr = {
DDI_DEVICE_ATTR_V0, /* version */
DDI_NEVERSWAP_ACC, /* endian flash */
#if WC
DDI_MERGING_OK_ACC /* data order */
#else
#endif
};
static void myri10ge_watchdog(void *arg);
#ifdef MYRICOM_PRIV
#else
#endif
int myri10ge_bigbufs_initial = 1024;
int myri10ge_bigbufs_max = 4096;
{
int err;
if (err != DDI_SUCCESS) {
if (warn)
"myri10ge: ddi_dma_alloc_handle failed\n");
goto abort_with_nothing;
}
&dma->acc_handle);
if (err != DDI_SUCCESS) {
if (warn)
"myri10ge: ddi_dma_mem_alloc failed\n");
goto abort_with_handle;
}
if (err != DDI_SUCCESS) {
if (warn)
"myri10ge: ddi_dma_addr_bind_handle failed\n");
goto abort_with_mem;
}
if (count != 1) {
if (warn)
"myri10ge: got too many dma segments ");
goto abort_with_bind;
}
return (kaddr);
if (warn) {
"args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
(void*) accattr, alloc_flags);
bind_flags, (void*) dma);
}
return (NULL);
}
void
{
}
static inline void
{
size_t i;
for (i = (size / 4); i; i--) {
to32++;
from32++;
}
}
#if defined(_LP64)
static inline void
{
size_t i;
for (i = (size / 8); i; i--) {
to64++;
from64++;
}
}
#endif
/*
* This routine copies memory from the host to the NIC.
* The "size" argument must always be a multiple of
* be naturally aligned.
*/
static inline void
{
#if !defined(_LP64)
#else
#endif
}
/*
* Due to various bugs in Solaris (especially bug 6186772 where the
* than two elements), and the design bug where hardware checksums are
* ignored on mblk chains with more than 2 elements, we need to
* allocate private pool of physically contiguous receive buffers.
*/
static void
{
}
static void
{
"%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
}
}
/*
* copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
* at most 32 bytes at a time, so as to avoid involving the software
* pio handler in the nic. We re-write the first segment's low
* DMA address to mark it valid only after we write the entire chunk
* in a burst
*/
static inline void
{
mb();
mb();
mb();
}
static void
{
int i;
/* find tail */
j = j->next;
jtail = j;
}
/*
* iterate over all per-CPU caches, and add contents into
* jpool
*/
for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
/* take per-CPU free list */
continue;
/* append to pool */
} else {
}
j = jfree;
j = j->next;
jtail = j;
}
}
/*
* Transfers buffers from the free pool to the nic
* Must be called holding the jpool mutex.
*/
static inline void
{
struct myri10ge_jpool_entry *j;
if (j == NULL) {
if (j == NULL) {
break;
}
}
/* copy 4 descriptors (32-bytes) to the mcp at a time */
}
}
}
/*
* Transfer buffers from the nic to the free pool.
* Should be called holding the jpool mutex
*/
static inline void
{
struct myri10ge_jpool_entry *j;
int i;
if (j == NULL)
continue;
}
}
/*
* Free routine which is called when the mblk allocated via
* esballoc() is freed. Here we return the jumbo buffer
* to the free pool, and possibly pass some jumbo buffers
* to the nic
*/
static void
myri10ge_jfree_rtn(void *arg)
{
struct myri10ge_jpool_stuff *jpool;
/* prepend buffer locklessly to per-CPU freelist */
do {
}
static void
myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
{
(void) ddi_dma_unbind_handle(j->dma_handle);
ddi_dma_mem_free(&j->acc_handle);
kmem_free(j, sizeof (*j));
}
/*
* Allocates one physically contiguous descriptor
* and add it to the jumbo buffer pool.
*/
static int
{
struct myri10ge_jpool_entry *j;
int err;
if (myri10ge_mtu < 2048)
else
j = (struct myri10ge_jpool_entry *)
kmem_alloc(sizeof (*j), KM_SLEEP);
if (err != DDI_SUCCESS)
goto abort_with_j;
if (err != DDI_SUCCESS)
goto abort_with_handle;
if (err != DDI_SUCCESS)
goto abort_with_mem;
/*
* Make certain std MTU buffers do not cross a 4KB boundary:
*
* Setting dma_attr_align=4096 will do this, but the system
* will only allocate 1 RX buffer per 4KB page, rather than 2.
* Setting dma_attr_granular=4096 *seems* to work around this,
* but I'm paranoid about future systems no longer honoring
* this, so fall back to the safe, but memory wasting way if a
* buffer crosses a 4KB boundary.
*/
if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
printf("std buffer crossed a 4KB boundary!\n");
goto again;
}
}
return (0);
ddi_dma_mem_free(&j->acc_handle);
kmem_free(j, sizeof (*j));
/*
* If an allocation failed, perhaps it failed because it could
* not satisfy granularity requirement. Disable that, and
* try agin.
*/
if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
"!alloc failed, reverting to gran=1\n");
goto again;
}
return (err);
}
static int
{
int i;
struct myri10ge_jpool_entry *j;
i = 0;
while (j != NULL) {
i++;
j = j->next;
}
return (i);
}
static int
{
int allocated = 0;
int err;
int needed;
/*
* if total is set, user wants "num" jbufs in the pool,
* otherwise the user wants to "num" additional jbufs
* added to the pool
*/
} else {
}
while (needed > 0) {
needed--;
if (err == 0) {
allocated++;
}
}
return (allocated);
}
static void
{
struct myri10ge_jpool_entry *j;
}
}
static void
{
struct myri10ge_jpool_entry *j = NULL;
unsigned int alloc_size;
/* Allocate a jumbo frame and carve it into small frames */
if (len < alloc_size) {
/* remove jumbo from freelist */
/* place it onto small list */
ss->small_jpool = j;
len = myri10ge_mtu;
}
len -= alloc_size;
ptr += alloc_size;
dma_low += alloc_size;
}
}
/*
* Return the jumbo bufs we carved up for small to the jumbo pool
*/
static void
{
struct myri10ge_jpool_entry *j = NULL;
j = ss->small_jpool;
}
ss->jbufs_for_smalls = 0;
}
static int
{
struct myri10ge_tx_dma_handle *handle;
int err;
&handle->h);
if (err) {
static int limit = 0;
if (limit == 0)
limit++;
return (err);
}
return (DDI_SUCCESS);
}
static void
{
struct myri10ge_tx_dma_handle *handle;
ddi_dma_free_handle(&handle->h);
}
if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
}
}
static void
struct myri10ge_tx_dma_handle_head *list)
{
}
static void
struct myri10ge_tx_dma_handle *handle)
{
struct myri10ge_tx_dma_handle_head list;
return;
}
}
static int
struct myri10ge_tx_dma_handle **ret)
{
struct myri10ge_tx_dma_handle *handle;
int err, i;
for (i = 0; i < count; i++) {
if (err != DDI_SUCCESS) {
goto abort_with_handles;
}
}
}
return (DDI_SUCCESS);
return (err);
}
/*
* Frees DMA resources associated with the send ring
*/
static void
{
struct myri10ge_tx_dma_handle_head handles;
int idx;
}
}
}
/*
* Allocates DMA handles associated with the send ring
*/
static inline int
{
struct myri10ge_tx_dma_handle *handles;
int h;
"%s: Failed to allocate tx copyblock storage\n",
return (DDI_FAILURE);
}
/* allocate the TX copyblocks */
4096, &myri10ge_rx_jumbo_dma_attr,
goto abort_with_copyblocks;
}
}
/* pre-allocate transmit handles */
&handles);
return (DDI_SUCCESS);
while (h > 0) {
h--;
}
return (DDI_FAILURE);
}
/*
* The eeprom strings on the lanaiX have the format
* SN=x\0
* MAC=x:x:x:x:x:x\0
* PT:ddd mmm xx xx:xx:xx xx\0
* PV:ddd mmm xx xx:xx:xx xx\0
*/
static int
{
(((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
(((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
ptr += 4;
if (myri10ge_verbose)
ptr);
for (i = 0; i < 6; i++) {
goto abort;
hv = 0;
} else {
}
ptr++;
}
}
ptr += 3;
}
ptr += 3;
}
}
return (0);
return (ENXIO);
}
/*
* Determine the register set containing the PCI resource we
* want to map: the memory-mappable part of the interface. We do
* this by scanning the DDI "reg" property of the interface,
* which is an array of mx_ddi_reg_set structures.
*/
static int
unsigned long *funcno)
{
#define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
#define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
#ifdef MYRI10GE_REGSET_VERBOSE
char *address_space_name[] = { "Configuration Space",
"I/O Space",
"32-bit Memory Space",
"64-bit Memory Space"
};
#endif
printf("Could not determine register set.\n");
return (ENXIO);
}
#ifdef MYRI10GE_REGSET_VERBOSE
#endif
if (!nelementsp) {
printf("Didn't find any \"reg\" properties.\n");
return (ENODEV);
}
/* Scan for the register number. */
#ifdef MYRI10GE_REGSET_VERBOSE
printf("*** Scanning for register number.\n");
#endif
for (i = 0; i < nelementsp / 5; i++) {
#ifdef MYRI10GE_REGSET_VERBOSE
printf("Examining register set %d:\n", i);
PCI_ADDR_LOW(rs));
PCI_SPAN_LOW(rs));
#endif
/* We are looking for a memory property. */
*reg_set = i;
#ifdef MYRI10GE_REGSET_VERBOSE
printf("%s uses register set %d.\n",
#endif
#ifdef MYRI10GE_REGSET_VERBOSE
#endif
break;
}
}
/* If no match, fail. */
if (i >= nelementsp / 5) {
return (EIO);
}
return (0);
}
static int
{
void *inflate_buffer;
unsigned hdr_offset, i;
*limit = 0; /* -Wuninitialized */
status = 0;
if (!inflate_buffer) {
"%s: Could not allocate buffer to inflate mcp\n",
return (ENOMEM);
}
goto abort;
}
goto abort;
}
/* save firmware version for kstat */
if (myri10ge_verbose)
/* Copy the inflated firmware to NIC SRAM. */
for (i = 0; i < *limit; i += 256) {
(char *)inflate_buffer + i,
mb();
mb();
}
return (status);
}
int
{
volatile char *cmd_addr =
int sleep_total = 0;
/* ensure buf is aligned to 8 bytes */
mb();
/* wait up to 20ms */
mb();
return (0);
== MXGEFW_CMD_UNKNOWN) {
return (ENOSYS);
return (E2BIG);
} else {
"%s: command %d failed, result = %d\n",
return (ENXIO);
}
}
drv_usecwait(1000);
}
return (EAGAIN);
}
/*
* Enable or disable periodic RDMAs from the host to make certain
* chipsets resend dropped PCIe messages
*/
static void
{
char buf_bytes[72];
volatile char *submit;
int i;
/* clear confirmation addr */
*confirm = 0;
mb();
/*
* send an rdma command to the PCIe engine, and wait for the
* response in the confirmation address. The firmware should
* write a -1 there to indicate it is alive and well
*/
mb();
drv_usecwait(1000);
mb();
i = 0;
drv_usecwait(1000);
i++;
}
if (*confirm != 0xffffffff) {
}
}
static int
{
volatile char *submit;
char buf_bytes[72];
int status, i;
if (status) {
return (status);
}
/* clear confirmation addr */
*confirm = 0;
mb();
/*
* send a reload command to the bootstrap MCP, and wait for the
* response in the confirmation address. The firmware should
* write a -1 there to indicate it is alive and well
*/
/*
* FIX: All newest firmware should un-protect the bottom of
* the sram before handoff. However, the very first interfaces
* do not. Therefore the handoff copy must skip the first 8 bytes
*/
mb();
drv_usecwait(1000);
mb();
i = 0;
drv_usecwait(1000);
i++;
}
if (*confirm != 0xffffffff) {
return (ENXIO);
}
if (status != 0) {
return (ENXIO);
}
return (0);
}
static int
{
int status;
return (status);
}
static int
{
int status;
if (pause)
&cmd);
else
&cmd);
if (status) {
return (ENXIO);
}
return (0);
}
static void
{
int status;
if (promisc)
else
if (status) {
}
}
static int
{
int status;
void *dmabench;
struct myri10ge_dma_stuff dmabench_dma;
char *test = " ";
/*
* Run a small DMA test.
* The magic multipliers to the length tell the firmware
* tp do DMA read, write, or read+write tests. The
* results are returned in cmd.data0. The upper 16
* bits or the return is the number of transfers completed.
* The lower 16 bits is the time in 0.5us ticks that the
* transfers took to complete
*/
return (ENOMEM);
}
if (status != 0) {
test = "read";
goto abort;
}
if (status != 0) {
test = "write";
goto abort;
}
if (status != 0) {
goto abort;
}
test);
return (status);
}
static int
{
struct myri10ge_nic_stat *ethstat;
struct myri10ge_slice_state *ss;
int i, status;
/* send a reset command to the card to see if it is alive */
if (status != 0) {
return (ENXIO);
}
/* Now exchange information about interrupts */
/*
* Even though we already know how many slices are supported
* via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
* has magic side effects, and must be called after a reset.
* It must be called prior to calling any RSS related cmds,
* including assigning an interrupt queue for anything but
* slice 0. It must also be called *after*
* MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
* the firmware to compute offsets.
*/
/* ask the maximum number of slices it supports */
&cmd);
if (status != 0) {
"%s: failed to get number of slices\n",
return (status);
}
/*
* MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
* to setting up the interrupt queue DMA
*/
&cmd);
if (status != 0) {
"%s: failed to set number of slices\n",
return (status);
}
}
for (i = 0; i < mgp->num_slices; i++) {
&cmd);
};
for (i = 0; i < mgp->num_slices; i++) {
}
}
if (status != 0) {
return (status);
}
for (i = 0; i < mgp->num_slices; i++) {
}
mgp->watchdog_rx_pause = 0;
}
return (status);
}
static int
{
int i, b, s, t, j;
int status;
uint32_t k[8];
&cmd);
if (status != 0) {
return (EIO);
}
KM_SLEEP);
t = 0;
for (b = 0; b < 12; b++) {
for (s = 0; s < 8; s++) {
/* Bits: b*8+s, ..., b*8+s+31 */
k[s] = 0;
for (j = 0; j < 32; j++) {
int bit = b*8+s+j;
k[s] |= bit << (31 - j);
}
}
for (i = 0; i <= 0xff; i++) {
tmp = 0;
}
}
return (0);
}
static inline struct myri10ge_slice_state *
{
/*
* Note hashing order is reversed from how it is done
* in the NIC, so as to generate the same hash value
* for the connection to try to keep connections CPU local
*/
/* hash on TCP port, if required */
if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
}
}
static inline struct myri10ge_slice_state *
{
}
/*
* Use the second byte of the *destination* address for
* MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
*/
}
static inline struct myri10ge_slice_state *
{
unsigned int slice = 0;
struct ether_header *eh;
struct ether_vlan_header *vh;
if (myri10ge_tx_hash == 0) {
}
/*
* ensure it is a TCP or UDP over IPv4 packet, and that the
* headers are in the 1st mblk. Otherwise, punt
*/
ehl += 4;
}
switch (myri10ge_rss_hash) {
/* fallthru */
/* fallthru */
/* fallthru */
default:
break;
}
}
static int
{
int tx_ring_size, rx_ring_size;
if (status != 0)
return (status);
/* get the lanai pointers to the send and receive rings */
64 * slice;
} else {
}
if (status != 0) {
return (status);
}
goto abort;
/* allocate the host info rings */
goto abort_with_rx_big_shadow;
goto abort_with_tx_info;
goto abort_with_rx_small_info;
"%s: Could not allocate enough receive buffers (%d/%d)\n",
goto abort_with_jumbos;
}
if (allocated < rx_ring_entries)
else
/*
* invalidate the big receive ring in case we do not
* allocate sufficient jumbos to fill it
*/
mb();
}
mb();
}
if (status != 0)
goto abort_with_small_jbufs;
}
if (status) {
goto abort_with_tx;
}
return (0);
if (allocated != 0) {
}
return (status);
}
static void
{
/* ignore slices that have not been fully setup */
return;
/* Free the TX copy buffers */
/* stop passing returned buffers to firmware */
/* Release the free jumbo frame pool */
}
static int
{
/* Allocate DMA resources and receive buffers */
if (status != 0) {
return (DDI_FAILURE);
}
&cmd);
if (status != 0) {
"%s: failed to set number of slices\n",
goto abort_with_nothing;
}
/* setup the indirection table */
&cmd);
if (status != 0) {
}
/* just enable an identity mapping */
for (i = 0; i < mgp->num_slices; i++)
if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
if (status != 0) {
goto abort_with_nothing;
}
}
&cmd);
if (status != 0) {
goto abort_with_toeplitz;
}
}
for (i = 0; i < mgp->num_slices; i++) {
if (status != 0)
goto abort_with_slices;
}
/*
* Tell the MCP how many buffers he has, and to
* bring the ethernet interface up
*
* Firmware needs the big buff size as a power of 2. Lie and
* tell him the buffer is larger, because we only use 1
*/
big_pow2++;
/* now give firmware buffers sizes, and MTU */
status |=
if (status) {
goto abort_with_slices;
}
if (status) {
} else {
}
if (status) {
goto abort_with_slices;
}
return (DDI_SUCCESS);
for (i = 0; i < mgp->num_slices; i++)
}
return (DDI_FAILURE);
}
static void
{
int status, old_down_cnt;
int wait_time = 10;
int i, polling;
mb();
if (status) {
}
wait_time--;
if (wait_time == 0)
break;
}
for (i = 0; i < mgp->num_slices; i++) {
/*
* take and release the rx lock to ensure
* that no interrupt thread is blocked
* elsewhere in the stack, preventing
* completion
*/
printf("%s: slice %d rx irq idle\n",
/* verify that the poll handler is inactive */
if (polling) {
printf("%s: slice %d is polling\n",
goto again;
}
}
}
}
for (i = 0; i < mgp->num_slices; i++)
}
}
static int
myri10ge_m_start(void *arg)
{
int status;
return (DDI_FAILURE);
}
if (status != DDI_SUCCESS)
return (status);
/* start the watchdog timer */
mgp->timer_ticks);
return (DDI_SUCCESS);
}
static void
myri10ge_m_stop(void *arg)
{
/* if the device not running give up */
return;
}
}
static inline void
{
struct ether_header *eh;
s->brdcstrcv++;
else
s->multircv++;
}
/*
* fix checksum by subtracting 4 bytes after what the
* firmware thought was the end of the ether hdr
*/
hdrlen += VLAN_TAGSZ;
}
else
return;
else
return;
/*
* IPv6 headers do not contain a checksum, and hence
* do not checksum to zero, so they don't "fall out"
* of the partial checksum calculation like IPv4
* headers do. We need to fix the partial checksum by
* subtracting the checksum of the IPv6 header.
*/
} else {
return;
}
/* padded frame, so hw csum may be invalid */
return;
}
csum, HCK_PARTIALCKSUM, 0);
}
static mblk_t *
{
int idx;
/* allocate a new buffer to pass up the stack */
goto abort;
}
}
return (mp);
}
static mblk_t *
{
struct myri10ge_jpool_stuff *jpool;
struct myri10ge_jpool_entry *j;
int idx, num_owned_by_mcp;
if (j == NULL) {
return (NULL);
}
/*
* Check to see if we are low on rx buffers.
* Note that we must leave at least 8 free so there are
* enough to free in a single 64-byte write.
*/
/* if we are still low, then we have to copy */
if (num_owned_by_mcp < 16) {
/* allocate a new buffer to pass up the stack */
goto abort;
}
/* push buffer back to NIC */
goto set_len;
}
}
/* loan our buffer to the stack */
goto abort;
}
return (mp);
return (NULL);
}
/*
* Free all transmit buffers up until the specified index
*/
static inline void
{
struct myri10ge_tx_dma_handle_head handles;
int idx;
int limit = 0;
/*
* mblk & DMA handle attached only to first slot
* per buffer in the packet
*/
}
}
/*
* if we stalled the queue, wake it. But Wait until
* we have at least 1/2 our slots free.
*/
}
/* limit potential for livelock */
break;
}
/*
* Nic has sent all pending requests, allow him
* to stop polling this queue
*/
mb();
}
}
}
static void
{
}
/*ARGSUSED*/
void
{
}
static inline void
{
break;
}
length &= (~MXGEFW_RSS_HASH_MASK);
/* limit potential for livelock */
break;
if (length <= myri10ge_small_bytes)
else
if (!myri10ge_lro ||
}
}
}
}
static void
{
struct myri10ge_mblk_list mbl;
return;
&ss->rx_polling);
}
static mblk_t *
{
struct myri10ge_mblk_list mbl;
if (bytes == 0)
return (NULL);
if (ss->rx_polling)
else
}
/*ARGSUSED*/
static uint_t
{
struct myri10ge_slice_state *ss =
(struct myri10ge_slice_state *)(void *)arg0;
/* make sure the DMA has finished */
return (DDI_INTR_UNCLAIMED);
}
/* low bit indicates receives are present */
if (valid & 1)
/* lower legacy IRQ */
*mgp->irq_deassert = 0;
if (!myri10ge_deassert_wait)
/* don't wait for conf. that irq is low */
mb();
} else {
/* no need to wait for conf. that irq is low */
}
do {
/* check for transmit completes and receives */
if (stats->stats_updated) {
mgp->link_state = 0;
}
if (mgp->link_state) {
if (myri10ge_verbose)
} else {
if (myri10ge_verbose)
}
}
if (mgp->rdma_tags_available !=
}
}
mb();
/* check to see if we have rx token to pass back */
if (valid & 0x1) {
if (ss->rx_polling) {
} else {
}
}
return (DDI_INTR_CLAIMED);
}
/*
* Add or remove a multicast address. This is called with our
* macinfo's lock held by GLD, so we do not need to worry about
* our own locking here.
*/
static int
{
int status, join_leave;
if (add)
else
if (status == 0)
return (0);
return (status);
}
static int
{
return (0);
}
/*
* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
* backwards one at a time and handle ring wraps
*/
static inline void
{
int idx, starting_slot;
while (cnt > 1) {
cnt--;
mb();
}
}
/*
* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
* at most 32 bytes at a time, so as to avoid involving the software
* pio handler in the nic. We re-write the first segment's flags
* to mark them valid only after writing the entire chain
*/
static inline void
int cnt)
{
int idx, i;
mb();
mb(); /* force write every 32 bytes */
srcp += 2;
dstp += 2;
}
} else {
/*
* submit all but the first request, and ensure
* that it is submitted below
*/
i = 0;
}
if (i < cnt) {
/* submit the first request */
mb(); /* barrier before setting valid flag */
}
/* re-write the last 32-bits with the valid flags */
src_ints += 3;
dst_ints += 3;
mb();
/* notify NIC to poll this tx ring */
mb();
}
}
/* ARGSUSED */
static inline void
{
}
static int
{
int ok;
mss = 0;
if (!ok) {
printf("pullupmsg failed");
return (DDI_FAILURE);
}
NULL, tx_offload_flags, 0);
if (tx_offload_flags & HW_LSO)
return (DDI_SUCCESS);
}
static inline void
{
else
}
}
static int
{
struct myri10ge_tx_copybuf *cp;
if (avail <= 1) {
return (EBUSY);
}
}
/* ensure runts are padded to 60 bytes */
if (len < 60) {
len = 60;
}
return (DDI_SUCCESS);
}
static void
struct myri10ge_tx_buffer_state *tx_info,
int count)
{
int i, idx;
idx = 0; /* gcc -Wuninitialized */
/* store unmapping and bp info for tx irq handler */
for (i = 0; i < count; i++) {
}
/* submit the frame to the nic */
}
static void
{
int seglen;
while (off > 0) {
break;
}
while (len > 0) {
off = 0;
}
}
static int
{
struct ether_header eh_copy;
struct ether_header *eh;
int eth_hdr_len, seglen;
eth_hdr_len = sizeof (*eh);
if (seglen < eth_hdr_len) {
} else {
}
eth_hdr_len += 4;
}
return (eth_hdr_len);
}
static int
{
char buf[128];
} else {
}
}
/*
* NIC expects ip_sum to be zero. Recent changes to
* OpenSolaris leave the correct ip checksum there, rather
* than the required zero, so we need to zero it. Otherwise,
* the NIC will produce bad checksums when sending LSO packets.
*/
/* ip points into mblk, so just zero it */
} else {
/*
* ip points into a copy, so walk the chain
* to find the ip_csum, then zero it
*/
}
sum_off++;
}
}
}
}
static int
{
struct myri10ge_tx_copybuf *cp;
int rdma_count;
resid = tx_boundary;
count = 1;
/* check to see if the slots are really there */
return (EBUSY);
}
/* copy */
count = 0;
if (mss) {
+ pkt_size;
}
if (len) {
hdr_size_tmp -= len;
if (hdr_size_tmp)
continue;
tx_req++;
resid = tx_boundary;
}
}
while (mblen) {
if (mss_resid == 0) {
tx_req++;
resid = tx_boundary;
}
}
}
}
count = 0;
rdma_count = 0;
while (pkt_size_tmp) {
if (len == 0) {
printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
}
while (pkt_size_tmp > 0) {
tx_req++;
}
printf("dropped\n");
goto done;
}
pkt_size_tmp -= len;
while (len) {
while (len) {
int cum_len_next;
/* payload */
int next_is_first, chop;
next_is_first = (cum_len_next == 0);
flags_next |= next_is_first *
} else if (likely(cum_len_next >= 0)) {
/* header ends */
int small;
rdma_count = -1;
cum_len_next = 0;
(small * MXGEFW_FLAGS_SMALL);
}
if (cksum_offset > seglen)
cksum_offset -= seglen;
else
cksum_offset = 0;
req++;
flags = flags_next;
count++;
rdma_count++;
}
}
tx_req++;
}
do {
req--;
done:
return (DDI_SUCCESS);
}
/*
* Try to send the chain of buffers described by the mp. We must not
* encapsulate more than eth->tx.req - eth->tx.done, or
* MXGEFW_MAX_SEND_DESC, whichever is more.
*/
static int
{
int pkt_size;
int lso_copy = myri10ge_lso_copy;
try_pullup = 1;
/* Setup checksum offloading, if needed */
if (tx_offload_flags & HW_LSO) {
if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
return (DDI_SUCCESS);
}
} else {
mss = 0;
}
cksum_offset = 0;
pseudo_hdr_offset = 0;
/* leave an extra slot keep the ring from wrapping */
/*
* If we have > MXGEFW_MAX_SEND_DESC, then any over-length
* message will need to be pulled up in order to fit.
* Otherwise, we are low on transmit descriptors, it is
* probably better to stall and try again rather than pullup a
* message to fit.
*/
goto stall;
}
/* find out how long the frame is and how many segments it is */
count = 0;
odd_flag = 0;
pkt_size = 0;
if (mblen == 0) {
/*
* we can't simply skip over 0-length mblks
* because the hardware can't deal with them,
* and we could leak them.
*/
goto pullup;
}
/*
* There's no advantage to copying most gesballoc
* attached blocks, so disable lso copy in that case
*/
lso_copy = 0;
}
}
count++;
}
/* Try to pull up excessivly long chains */
count = 1;
} else {
if (count < MYRI10GE_MAX_SEND_DESC_TSO) {
/*
* just let the h/w send it, it will be
* inefficient, but us better than dropping
*/
} else {
/* drop it */
return (0);
}
}
}
cum_len = 0;
if (tx_offload_flags & HCK_PARTIALCKSUM) {
}
lso_hdr_size = 0; /* -Wunitinialized */
if (mss) { /* LSO */
/* this removes any CKSUM flag from before */
/*
* parse the headers and set cum_len to a negative
* value to reflect the offset of the TCP payload
*/
cum_len = -lso_hdr_size;
return (err);
}
/*
* for TSO, pseudo_hdr_offset holds mss. The firmware
* figures out where to put the checksum by parsing
* the header.
*/
} else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
if (pkt_size < myri10ge_tx_copylen) {
return (err);
}
cum_len = 0;
}
/* pull one DMA handle for each bp from our freelist */
if (err != DDI_SUCCESS) {
err = DDI_FAILURE;
goto stall;
}
count = 0;
rdma_count = 0;
try_pullup = 0;
goto abort_with_handles;
}
/* reserve the slot */
for (; ; ) {
while (len) {
int cum_len_next;
if (mss) {
rdma_count + 1;
/* payload */
int next_is_first, chop;
cum_len_next % mss;
(cum_len_next == 0);
flags_next |= next_is_first *
rdma_count |=
-(chop | next_is_first);
rdma_count +=
chop & !next_is_first;
} else if (likely(cum_len_next >= 0)) {
/* header ends */
int small;
rdma_count = -1;
cum_len_next = 0;
| (small *
}
}
if (cksum_offset > seglen)
cksum_offset -= seglen;
else
cksum_offset = 0;
count++;
rdma_count++;
/* make sure all the segments will fit */
/* may try a pullup */
if (try_pullup)
try_pullup = 2;
goto abort_with_handles;
}
req++;
flags = flags_next;
}
ncookies--;
if (ncookies == 0)
break;
}
}
if (mss) {
do {
req--;
}
/* calculate tx stats */
if (mss) {
int payload;
+ pkt_size;
} else {
}
/* check to see if the slots are really there */
err = 0;
goto late_stall;
}
return (DDI_SUCCESS);
try_pullup = 0;
/* unbind and free handles from previous mblks */
for (i = 0; i < count; i++) {
tx_info[i].m = 0;
if (bp) {
(void) ddi_dma_unbind_handle(dma_handle->h);
}
}
if (try_pullup) {
/* drop */
return (0);
}
try_pullup = 0;
goto again;
}
if (err != 0) {
} else {
}
}
return (err);
}
static mblk_t *
{
int err = 0;
#if defined(__i386)
/*
* We need about 2.5KB of scratch space to handle transmits.
* i86pc has only 8KB of kernel stack space, so we malloc the
* scratch space there rather than keeping it on the stack.
*/
struct myri10ge_tx_buffer_state *tx_info;
+ 8;
#else
+ 8];
#endif
/* ensure req_list entries are aligned to 8 bytes */
req_list = (struct mcp_kreq_ether_send *)
#if defined(__i386)
#endif
if (err)
return (mp);
else
return (NULL);
}
static int
{
int err;
return (EINVAL);
if (mgp->macaddr_cnt) {
return (ENOSPC);
}
if (!err)
mgp->macaddr_cnt++;
if (err)
return (err);
return (0);
}
/*ARGSUSED*/
static int
{
mgp->macaddr_cnt--;
return (0);
}
/*ARGSUSED*/
static void
{
if (rtype != MAC_RING_TYPE_RX)
return;
}
static int
{
struct myri10ge_slice_state *ss;
return (0);
}
static int
{
struct myri10ge_slice_state *ss;
return (0);
}
static int
{
struct myri10ge_slice_state *ss;
}
return (0);
}
/*ARGSUSED*/
static void
{
struct myri10ge_slice_state *ss;
switch (rtype) {
case MAC_RING_TYPE_RX:
break;
case MAC_RING_TYPE_TX:
break;
default:
break;
}
}
static void
{
return;
}
static void
{
return;
}
static void
{
return;
}
static int
{
struct myri10ge_nic_stat *ethstat;
struct myri10ge_priv *mgp;
if (rw == KSTAT_WRITE)
return (EACCES);
else
return (0);
}
static int
{
struct myri10ge_slice_stat *ethstat;
struct myri10ge_slice_state *ss;
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
static int
{
struct myri10ge_info *info;
struct myri10ge_priv *mgp;
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
static struct myri10ge_info myri10ge_info_template = {
{ "driver_version", KSTAT_DATA_STRING },
{ "firmware_version", KSTAT_DATA_STRING },
{ "firmware_name", KSTAT_DATA_STRING },
{ "interrupt_type", KSTAT_DATA_STRING },
{ "product_code", KSTAT_DATA_STRING },
{ "serial_number", KSTAT_DATA_STRING },
};
static kmutex_t myri10ge_info_template_lock;
static int
{
sizeof (myri10ge_info_template) /
sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
return (DDI_FAILURE);
}
if (MYRI10GE_VERSION_STR != NULL)
return (DDI_SUCCESS);
}
static int
{
struct myri10ge_nic_stat *ethstat;
sizeof (*ethstat) / sizeof (kstat_named_t), 0);
return (DDI_FAILURE);
}
"dma_read_bw_MBs", KSTAT_DATA_ULONG);
"dma_write_bw_MBs", KSTAT_DATA_ULONG);
"dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
"dma_force_physical", KSTAT_DATA_ULONG);
"lanes", KSTAT_DATA_ULONG);
"dropped_bad_crc32", KSTAT_DATA_ULONG);
"dropped_bad_phy", KSTAT_DATA_ULONG);
"dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
"dropped_link_overflow", KSTAT_DATA_ULONG);
"dropped_multicast_filtered", KSTAT_DATA_ULONG);
"dropped_no_big_buffer", KSTAT_DATA_ULONG);
"dropped_no_small_buffer", KSTAT_DATA_ULONG);
"dropped_overrun", KSTAT_DATA_ULONG);
"dropped_pause", KSTAT_DATA_ULONG);
"dropped_runt", KSTAT_DATA_ULONG);
"dropped_unicast_filtered", KSTAT_DATA_ULONG);
return (DDI_SUCCESS);
}
static int
{
struct myri10ge_slice_stat *ethstat;
int instance;
/*
* fake an instance so that the same slice numbers from
* different instances do not collide
*/
sizeof (*ethstat) / sizeof (kstat_named_t), 0);
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static void
{
int retval = DDI_FAILURE;
int dontcare;
return;
if (parent_dip == NULL) {
return;
}
return;
}
if (myri10ge_verbose) {
if (myri10ge_verbose)
}
if (vendor_id != 0x10de)
return;
return;
}
for (cfg_pa = 0xf0000000UL;
cfg_pa -= 0x10000000UL) {
/* find the config space address for the nvidia bridge */
/* map it into the kernel */
if (myri10ge_verbose)
printf("%s: Enabling ECRC on upstream "
"Nvidia bridge (0x%x:0x%x) "
*ptr32 |= 0x40;
}
}
}
#else
/*ARGSUSED*/
static void
{
}
#endif /* i386 */
/*
* The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
* when the PCI-E Completion packets are aligned on an 8-byte
* boundary. Some PCI-E chip sets always align Completion packets; on
* the ones that do not, the alignment can be enforced by enabling
* ECRC generation (if supported).
*
* When PCI-E Completion packets are not aligned, it is actually more
* efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
*
* If the driver can neither enable ECRC nor verify that it has
* already been enabled, then it must use a firmware image which works
* around unaligned completion packets (ethp_z8e.dat), and it should
* also ensure that it never gives the device a Read-DMA which is
* larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
* enabled, then the driver should use the aligned (eth_z8e.dat)
* firmware image, and set tx.boundary to 4KB.
*/
static int
{
int status;
/*
* Verify the max read request size was set to 4KB
* before trying the test with 4KB.
*/
if (mgp->max_read_request_4k == 0)
/*
* load the optimized firmware which assumes aligned PCIe
* completions in order to see if it works on this host.
*/
if (status != 0) {
return (status);
}
/*
* Enable ECRC if possible
*/
/*
* Run a DMA test which watches for unaligned completions and
* aborts on the first one seen.
*/
if (status == 0)
return (0); /* keep the aligned firmware */
return (status);
}
static int
{
int aligned;
aligned = 0;
if (myri10ge_force_firmware == 1) {
if (myri10ge_verbose)
printf("%s: Assuming aligned completions (forced)\n",
aligned = 1;
goto done;
}
if (myri10ge_force_firmware == 2) {
if (myri10ge_verbose)
printf("%s: Assuming unaligned completions (forced)\n",
aligned = 0;
goto done;
}
/* If the width is less than 8, we may used the aligned firmware */
aligned = 1;
goto done;
}
if (0 == myri10ge_firmware_probe(mgp))
return (0); /* keep optimized firmware */
done:
if (aligned) {
} else {
}
return (myri10ge_load_firmware(mgp));
}
static int
{
if (rc != DDI_SUCCESS) {
rc);
return (DDI_FAILURE);
}
if (!myri10ge_use_msi)
if (!myri10ge_use_msix)
if (intr_types & DDI_INTR_TYPE_MSIX) {
} else if (intr_types & DDI_INTR_TYPE_MSI) {
} else {
}
/* Get number of interrupts */
return (DDI_FAILURE);
}
/* Get number of available interrupts */
return (DDI_FAILURE);
}
"!%s: nintrs() returned %d, navail returned %d",
}
return (DDI_FAILURE);
/* Allocate memory for MSI interrupts */
return (DDI_FAILURE);
}
}
/*
* Get priority for first irq, assume remaining are all the same
*/
!= DDI_SUCCESS) {
/* Free already allocated intr */
for (y = 0; y < actual; y++) {
}
return (DDI_FAILURE);
}
if (!add_handler)
return (DDI_SUCCESS);
/* Call ddi_intr_add_handler() */
for (x = 0; x < actual; x++) {
/* Free already allocated intr */
for (y = 0; y < actual; y++) {
}
return (DDI_FAILURE);
}
}
/* Call ddi_intr_block_enable() for MSI */
} else {
/* Call ddi_intr_enable() for MSI non block enable */
}
}
return (DDI_SUCCESS);
}
static void
{
int x, err;
/* Disable all interrupts */
if (handler_installed) {
/* Call ddi_intr_block_disable() */
} else {
}
}
}
if (handler_installed) {
/* Call ddi_intr_remove_handler() */
if (err != DDI_SUCCESS) {
"%s: ddi_intr_remove_handler for"
x, err);
}
}
if (err != DDI_SUCCESS) {
"%s: ddi_intr_free for vec %d returned %d\n",
}
}
}
static void
{
struct myri10ge_dma_stuff dma;
void *addr;
int err;
/* test #1, sufficient for older sparc systems */
if (err == DDI_DMA_BADATTR)
goto fail;
/* test #2, required on Olympis where the bind is what fails */
goto fail;
return;
fail:
if (myri10ge_verbose)
printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
}
static void
{
"myri10ge_flow_control", myri10ge_flow_control);
"myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
"myri10ge_nvidia_ecrc_enable", 1);
#endif
"myri10ge_use_msi", myri10ge_use_msi);
"myri10ge_deassert_wait", myri10ge_deassert_wait);
"myri10ge_verbose", myri10ge_verbose);
"myri10ge_tx_copylen", myri10ge_tx_copylen);
if (myri10ge_tx_copylen < 60) {
"myri10ge_tx_copylen must be >= 60 bytes\n");
myri10ge_tx_copylen = 60;
}
"myri10ge_mtu_override", myri10ge_mtu_override);
else if (myri10ge_mtu_override != 0) {
"myri10ge_mtu_override must be between 1500 and "
"9000 bytes\n");
}
"myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
"myri10ge_bigbufs_max", myri10ge_bigbufs_max);
"myri10ge_watchdog_reset", myri10ge_watchdog_reset);
if (myri10ge_bigbufs_initial < 128) {
"myri10ge_bigbufs_initial be at least 128\n");
myri10ge_bigbufs_initial = 128;
}
if (myri10ge_bigbufs_max < 128) {
"myri10ge_bigbufs_max be at least 128\n");
myri10ge_bigbufs_max = 128;
}
"myri10ge_bigbufs_max must be >= "
"myri10ge_bigbufs_initial\n");
}
"myri10ge_force_firmware", myri10ge_force_firmware);
"myri10ge_max_slices", myri10ge_max_slices);
"myri10ge_use_msix", myri10ge_use_msix);
"myri10ge_rss_hash", myri10ge_rss_hash);
}
"myri10ge_lro", myri10ge_lro);
"myri10ge_lro_cnt", myri10ge_lro_cnt);
"myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
"myri10ge_tx_hash", myri10ge_tx_hash);
"myri10ge_use_lso", myri10ge_use_lso);
"myri10ge_lso_copy", myri10ge_lso_copy);
"myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
"myri10ge_small_bytes", myri10ge_small_bytes);
myri10ge_small_bytes += 128;
}
}
#ifndef PCI_EXP_LNKSTA
#define PCI_EXP_LNKSTA 18
#endif
static int
{
/* check to see if we have capabilities */
if (!(status & PCI_STAT_CAP)) {
return (ENXIO);
}
/* Walk the capabilities list, looking for a PCI Express cap */
while (ptr != PCI_CAP_NEXT_PTR_NULL) {
break;
}
if (ptr < 64) {
return (ENXIO);
}
return (0);
}
static int
{
int err;
if (err != 0) {
return (ENXIO);
}
/* set max read req to 4096 */
if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
return (EINVAL);
}
return (0);
}
static int
{
int err;
if (err != 0) {
return (ENXIO);
}
/* read link width */
return (0);
}
static int
{
int err;
if ((cmd & PCI_COMM_ME) == 0) {
/*
* Bus master DMA disabled? Check to see if the card
* rebooted due to a parity error For now, just report
* it
*/
/* enter read32 mode */
/* read REBOOT_STATUS (0xfffffff0) */
return (0);
}
if (!myri10ge_watchdog_reset) {
return (1);
}
if (err == DDI_FAILURE) {
return (0);
}
return (1);
}
static inline int
{
return (1);
return (0);
}
static void
myri10ge_watchdog(void *arg)
{
struct myri10ge_priv *mgp;
struct myri10ge_slice_state *ss;
int nic_ok = 1;
int slices_stalled, rx_pause, i;
int add_rx;
"%s not running, not rearming watchdog (%d)\n",
return;
}
/*
* make sure nic is stalled before we reset the nic, so as to
* ensure we don't rip the transmit data structures out from
* under a pending transmit
*/
if (slices_stalled)
break;
}
if (slices_stalled) {
"%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
} else {
"%s Flow controlled, check link partner\n",
}
}
if (!nic_ok) {
return;
}
for (i = 0; i < mgp->num_slices; i++) {
add_rx =
ss->jbufs_for_smalls));
if (add_rx != 0) {
/* now feed them to the firmware */
}
}
}
mgp->timer_ticks);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static int
{
char *end;
return (EINVAL);
return (0);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static int
{
char *end;
int err = 0;
return (EINVAL);
if (new_value != 0)
new_value = 1;
return (err);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static int
{
char *end;
return (EINVAL);
return (0);
}
static void
{
(caddr_t)&myri10ge_lro);
}
static void
{
}
static void
{
ok = 0;
err = 0;
switch (cmd) {
case ND_GET:
case ND_SET:
break;
default:
break;
}
if (!ok)
else
if (!err)
else
}
static struct myri10ge_priv *mgp_list;
struct myri10ge_priv *
{
struct myri10ge_priv *mgp;
break;
}
}
return (mgp);
}
void
{
}
static boolean_t
{
switch (cap) {
case MAC_CAPAB_HCKSUM:
break;
case MAC_CAPAB_RINGS:
case MAC_RING_TYPE_RX:
break;
case MAC_RING_TYPE_TX:
break;
default:
return (B_FALSE);
}
break;
case MAC_CAPAB_LSO:
if (!myri10ge_use_lso)
return (B_FALSE);
return (B_FALSE);
break;
default:
return (B_FALSE);
}
return (B_TRUE);
}
static int
{
struct myri10ge_rx_ring_stats *rstat;
struct myri10ge_tx_ring_stats *tstat;
struct myri10ge_slice_state *ss;
int i;
switch (stat) {
case MAC_STAT_IFSPEED:
break;
case MAC_STAT_MULTIRCV:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_BRDCSTRCV:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_MULTIXMT:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_BRDCSTXMT:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_NORCVBUF:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_IERRORS:
break;
case MAC_STAT_OERRORS:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_RBYTES:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_IPACKETS:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_OBYTES:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
case MAC_STAT_OPACKETS:
for (i = 0; i < mgp->num_slices; i++) {
}
break;
break;
#ifdef SOLARIS_S11
break;
#endif
case ETHER_STAT_LINK_PAUSE:
break;
case ETHER_STAT_LINK_AUTONEG:
*val = 1;
break;
case ETHER_STAT_LINK_DUPLEX:
*val = LINK_DUPLEX_FULL;
break;
default:
return (ENOTSUP);
}
return (0);
}
static mac_callbacks_t myri10ge_m_callbacks = {
(MC_IOCTL | MC_GETCAPAB),
NULL,
NULL,
};
static int
{
int status;
/* hit the board with a reset to ensure it is alive */
if (status != 0) {
return (ENXIO);
}
if (myri10ge_use_msix == 0)
return (0);
/* tell it the size of the interrupt queues */
if (status != 0) {
return (ENXIO);
}
/* ask the maximum number of slices it supports */
&cmd);
if (status != 0)
return (0);
/*
* if the admin did not specify a limit to how many
* slices we should use, cap it automatically to the
* number of CPUs currently online
*/
if (myri10ge_max_slices == -1)
/*
* Now try to allocate as many MSI-X vectors as we have
* slices. We give up on MSI-X if we can only get a single
* vector.
*/
/* make sure it is a power of two */
mgp->num_slices--;
return (0);
if (status == 0) {
myri10ge_rem_intrs(mgp, 0);
if (myri10ge_verbose)
printf("Got %d slices!\n",
mgp->num_slices);
return (0);
}
} else {
}
}
if (myri10ge_verbose)
return (0);
}
static void
{
}
}
static void
{
int idx;
continue;
}
}
static void
{
struct myri10ge_slice_state *ss;
int i;
return;
for (i = 0; i < mgp->num_slices; i++) {
continue;
continue;
}
}
static int
{
struct myri10ge_slice_state *ss;
int i;
return (ENOMEM);
for (i = 0; i < mgp->num_slices; i++) {
/* allocate the per-slice firmware stats */
goto abort;
/* allocate rx done ring */
goto abort;
}
(void) myri10ge_slice_stat_init(ss);
}
return (0);
return (ENOMEM);
}
static int
{
int err;
if (err != 0) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
int err;
if (err != 0) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
{
int i;
int err = DDI_SUCCESS;
/* Save the non-extended PCI config space 32-bits at a time */
for (i = 0; i < 16; i++)
/* now save MSI interrupt state *, if needed */
return (err);
}
static int
{
int i;
int err = DDI_SUCCESS;
/* Restore the non-extended PCI config space 32-bits at a time */
for (i = 15; i >= 0; i--)
/* now restore MSI interrupt state *, if needed */
if (mgp->max_read_request_4k)
(void) myri10ge_set_max_readreq(handle);
return (err);
}
static int
{
int status;
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
}
return (status);
}
static int
{
int status = DDI_SUCCESS;
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
if (status == DDI_SUCCESS &&
}
if (status != DDI_SUCCESS)
return (status);
/* start the watchdog timer */
mgp->timer_ticks);
return (DDI_SUCCESS);
}
static int
{
struct myri10ge_priv *mgp;
if (cmd == DDI_RESUME) {
return (myri10ge_resume(dip));
}
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
return (DDI_FAILURE);
/* enable busmater and io space access */
if (status != 0) {
link_width = 0;
}
if (status != 0)
goto abort_with_cfg_hdl;
goto abort_with_cfg_hdl;
/*
* XXXX Hack: mac_register_t grows in newer kernels. To be
* able to write newer fields, such as m_margin, without
* writing outside allocated memory, we allocate our own macp
* and pass that to mac_register()
*/
if ((mgp = (struct myri10ge_priv *)
goto abort_with_macinfo;
}
/* setup device name for log messages */
/* allocate command page */
goto abort_with_mgp;
&dev_number, &func_number);
if (myri10ge_verbose)
if (status != DDI_SUCCESS) {
printf("%s: reg_set = %d, span = %d, status = %d",
goto abort_with_mgp;
}
ss_offset = hdr_offset +
if (status) {
goto abort_with_mapped;
}
if (status != 0) {
goto abort_with_mapped;
}
if (status != 0) {
goto abort_with_dummy_rdma;
}
if (status != 0) {
goto abort_with_dummy_rdma;
}
/* add the interrupt handler */
if (status != 0) {
goto abort_with_slices;
}
/* now that we have an iblock_cookie, init the mutexes */
if (status != DDI_SUCCESS)
goto abort_with_interrupts;
if (status != DDI_SUCCESS)
goto abort_with_stats;
/*
* Initialize GLD state
*/
#ifdef SOLARIS_S11
#endif
if (status != 0) {
goto abort_with_info;
}
if (myri10ge_verbose)
return (DDI_SUCCESS);
myri10ge_dummy_rdma(mgp, 0);
return (DDI_FAILURE);
}
static int
{
int status, i, jbufs_alloced;
if (cmd == DDI_SUSPEND) {
return (status);
}
if (cmd != DDI_DETACH) {
return (DDI_FAILURE);
}
/* Get the driver private (gld_mac_info_t) structure */
jbufs_alloced = 0;
for (i = 0; i < mgp->num_slices; i++) {
}
if (jbufs_alloced != 0) {
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
if (status != DDI_SUCCESS)
return (status);
myri10ge_dummy_rdma(mgp, 0);
} else {
}
return (DDI_SUCCESS);
}
/*
* Helper for quiesce entry point: Interrupt threads are not being
* scheduled, so we must poll for the confirmation DMA to arrive in
* the firmware stats block for slice 0. We're essentially running
* the guts of the interrupt handler, and just cherry picking the
* confirmation that the NIC is queuesced (stats->link_down)
*/
static int
{
int valid;
int found_down = 0;
/* check for a pending IRQ */
return (0);
/*
* Make sure to tell the NIC to lower a legacy IRQ, else
* it may have corrupt state after restarting
*/
/* lower legacy IRQ */
*mgp->irq_deassert = 0;
mb();
/* wait for irq conf DMA */
;
}
found_down = 1;
if (valid & 0x1)
return (found_down);
}
static int
{
struct myri10ge_priv *mgp;
return (DDI_FAILURE);
/* if devices was unplumbed, it is guaranteed to be quiescent */
return (DDI_SUCCESS);
/* send a down CMD to queuesce NIC */
if (status) {
return (DDI_FAILURE);
}
for (i = 0; i < 20; i++) {
if (down)
break;
mb();
}
if (down)
return (DDI_SUCCESS);
return (DDI_FAILURE);
}
/*
* Distinguish between allocb'ed blocks, and gesballoc'ed attached
* storage.
*/
static void
myri10ge_find_lastfree(void)
{
return;
}
}
int
_init(void)
{
int i;
if (myri10ge_verbose)
"Myricom 10G driver (10GbE) version %s loading\n",
if ((i = mod_install(&modlinkage)) != 0) {
}
return (i);
}
int
_fini(void)
{
int i;
i = mod_remove(&modlinkage);
if (i != 0) {
return (i);
}
return (0);
}
int
{
}
/*
* This file uses MyriGE driver indentation.
*
* Local Variables:
* c-file-style:"sun"
* tab-width:8
* End:
*/