vsw.c revision 023505bcce788e9ac958a334707e49cddbf18d1d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/ethernet.h>
#include <sys/machsystm.h>
#include <sys/mac_ether.h>
#include <sys/mach_descrip.h>
#include <sys/vio_mailbox.h>
#include <sys/vnet_mailbox.h>
#include <sys/vnet_common.h>
#include <sys/vio_util.h>
/*
* Function prototypes.
*/
static int vsw_get_physaddr(vsw_t *);
static int vsw_setup_switching(vsw_t *);
static int vsw_setup_layer2(vsw_t *);
static int vsw_setup_layer3(vsw_t *);
/* MAC Ring table functions. */
static vsw_queue_t *vsw_queue_create();
/* MAC layer routines */
static int vsw_get_hw_maddr(vsw_t *);
static int vsw_unset_hw_addr(vsw_t *, int);
static void vsw_reconfig_hw(vsw_t *);
static int vsw_prog_if(vsw_t *);
static int vsw_prog_ports(vsw_t *);
static int vsw_mac_register(vsw_t *);
static int vsw_mac_unregister(vsw_t *);
static void vsw_m_stop(void *arg);
static int vsw_m_start(void *arg);
/* MDEG routines */
static void vsw_marker_task(void *);
/* Interrupt routines */
/* Handshake routines */
static void vsw_ldc_reinit(vsw_ldc_t *);
static void vsw_conn_task(void *);
static void vsw_next_milestone(vsw_ldc_t *);
static int vsw_supported_version(vio_ver_msg_t *);
/* Data processing routines */
static void vsw_process_pkt(void *);
static void vsw_process_ctrl_pkt(void *);
static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *);
static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *);
static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *);
static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *);
static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *);
static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *);
static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *);
static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *);
static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *);
vsw_port_t *port);
vsw_port_t *port);
/* Packet creation routines */
static void vsw_send_ver(void *);
static void vsw_send_attr(vsw_ldc_t *);
static void vsw_send_dring_info(vsw_ldc_t *);
static void vsw_send_rdx(vsw_ldc_t *);
/* Forwarding database (FDB) routines */
static void vsw_del_mcst_port(vsw_port_t *);
static void vsw_del_mcst_vsw(vsw_t *);
/* Dring routines */
static void vsw_create_privring(vsw_ldc_t *);
int *);
static int vsw_check_dring_info(vio_dring_reg_msg_t *);
/* Misc support routines */
static int vsw_free_ring(dring_info_t *);
/* Debugging routines */
static void dump_flags(uint64_t);
static void display_state(void);
static void display_lane(lane_t *);
static void display_ring(dring_info_t *);
int vsw_desc_delay = 0; /* delay in us */
static mac_callbacks_t vsw_m_callbacks = {
0,
NULL,
NULL,
};
static struct cb_ops vsw_cb_ops = {
nulldev, /* cb_open */
nulldev, /* cb_close */
nodev, /* cb_strategy */
nodev, /* cb_print */
nodev, /* cb_dump */
nodev, /* cb_read */
nodev, /* cb_write */
nodev, /* cb_ioctl */
nodev, /* cb_devmap */
nodev, /* cb_mmap */
nodev, /* cb_segmap */
nochpoll, /* cb_chpoll */
ddi_prop_op, /* cb_prop_op */
NULL, /* cb_stream */
D_MP, /* cb_flag */
CB_REV, /* rev */
nodev, /* int (*cb_aread)() */
nodev /* int (*cb_awrite)() */
};
DEVO_REV, /* devo_rev */
0, /* devo_refcnt */
vsw_getinfo, /* devo_getinfo */
nulldev, /* devo_identify */
nulldev, /* devo_probe */
vsw_attach, /* devo_attach */
vsw_detach, /* devo_detach */
nodev, /* devo_reset */
&vsw_cb_ops, /* devo_cb_ops */
ddi_power /* devo_power */
};
extern struct mod_ops mod_driverops;
static struct modldrv vswmodldrv = {
"sun4v Virtual Switch %I%",
&vsw_ops,
};
#define LDC_ENTER_LOCK(ldcp) \
#define LDC_EXIT_LOCK(ldcp) \
/* Driver soft state ptr */
static void *vsw_state;
/*
* Linked list of "vsw_t" structures - one per instance.
*/
/*
* Property names
*/
static char vdev_propname[] = "virtual-device";
static char vsw_propname[] = "virtual-network-switch";
static char physdev_propname[] = "vsw-phys-dev";
static char smode_propname[] = "vsw-switch-mode";
static char macaddr_propname[] = "local-mac-address";
static char remaddr_propname[] = "remote-mac-address";
static char ldcids_propname[] = "ldc-ids";
static char chan_propname[] = "channel-endpoint";
static char id_propname[] = "id";
static char reg_propname[] = "reg";
/* supported versions */
/*
* Matching criteria passed to the MDEG to register interest
* in changes to 'virtual-device-port' nodes identified by their
* 'id' property.
*/
static md_prop_match_t vport_prop_match[] = {
{ MDET_PROP_VAL, "id" },
{ MDET_LIST_END, NULL }
};
/*
* Matching criteria passed to the MDEG to register interest
* in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
* by their 'name' and 'cfg-handle' properties.
*/
static md_prop_match_t vdev_prop_match[] = {
{ MDET_PROP_STR, "name" },
{ MDET_PROP_VAL, "cfg-handle" },
{ MDET_LIST_END, NULL }
};
/*
* Specification of an MD node passed to the MDEG to filter any
* 'vport' nodes that do not belong to the specified node. This
* template is copied for each vsw instance and filled in with
* the appropriate 'cfg-handle' value before being passed to the MDEG.
*/
static mdeg_prop_spec_t vsw_prop_template[] = {
};
/*
* selection that is done a vsw driver attach time.
*/
int vsw_mac_rx_rings = VSW_MAC_RX_RINGS;
/*
* Print debug messages - set to 0x1f to enable all msgs
* or 0x0 to turn all off.
*/
int vswdbg = 0x0;
/*
* debug levels:
* 0x02: Internal function messages
* 0x04: Verbose internal messages
* 0x08: Warning messages
* 0x10: Error messages
*/
static void
{
char buf[512];
else
}
/*
* For the moment the state dump routines have their own
* private flag.
*/
#define DUMP_STATE 0
#if DUMP_STATE
{ \
}
#define DUMP_TAG_PTR(tag) \
{ \
}
#define DISPLAY_STATE() display_state()
#else
#define DUMP_TAG_PTR(tag)
#define DUMP_FLAGS(state)
#define DISPLAY_STATE()
#endif /* DUMP_STATE */
#ifdef DEBUG
#define D1 \
if (vswdbg & 0x01) \
#define D2 \
if (vswdbg & 0x02) \
#define D3 \
if (vswdbg & 0x04) \
#define DWARN \
if (vswdbg & 0x08) \
#define DERR \
if (vswdbg & 0x10) \
#else
#endif /* DEBUG */
static struct modlinkage modlinkage = {
};
int
_init(void)
{
int status;
if (status != 0) {
return (status);
}
if (status != 0) {
}
return (status);
}
int
_fini(void)
{
int status;
if (status != 0)
return (status);
rw_destroy(&vsw_rw);
return (status);
}
int
{
}
static int
{
int instance;
char hashname[MAXNAMELEN];
char qname[TASKQ_NAMELEN];
enum { PROG_init = 0x00,
PROG_if_lock = 0x01,
PROG_fdb = 0x02,
PROG_mfdb = 0x04,
PROG_report_dev = 0x08,
PROG_plist = 0x10,
PROG_taskq = 0x20}
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
/* nothing to do for this non-device */
return (DDI_SUCCESS);
case DDI_PM_RESUME:
default:
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
goto vsw_attach_fail;
}
progress |= PROG_if_lock;
/* setup the unicast forwarding database */
mod_hash_null_valdtor, sizeof (void *));
/* setup the multicast fowarding database */
mod_hash_null_valdtor, sizeof (void *));
/*
* create lock protecting list of multicast addresses
* which could come via m_multicst() entry point when plumbed.
*/
/* setup the port list */
progress |= PROG_plist;
/*
* Create the taskq which will process all the VIO
* control messages.
*/
TASKQ_DEFAULTPRI, 0)) == NULL) {
goto vsw_attach_fail;
}
progress |= PROG_taskq;
/* prevent auto-detaching */
}
/*
* Now we have everything setup, register an interest in
* specific MD nodes.
*
* The callback is invoked in 2 cases, firstly if upon mdeg
* registration there are existing nodes which match our specified
* criteria, and secondly if the MD is changed (and again, there
* are nodes which we are interested in present within it. Note
* that our callback will be invoked even if our specified nodes
* have not actually changed).
*
* Until the callback is invoked we cannot switch any pkts as
* we don't know basic information such as what mode we are
* operating in. However we expect the callback to be invoked
* immediately upon registration as this driver should only
* be attaching if there are vsw nodes in the MD.
*/
if (vsw_mdeg_register(vswp))
goto vsw_attach_fail;
return (DDI_SUCCESS);
if (progress & PROG_taskq)
if (progress & PROG_plist)
if (progress & PROG_report_dev) {
}
}
}
if (progress & PROG_if_lock) {
}
return (DDI_FAILURE);
}
static int
{
int instance;
return (DDI_FAILURE);
}
switch (cmd) {
case DDI_DETACH:
break;
case DDI_SUSPEND:
case DDI_PM_SUSPEND:
default:
return (DDI_FAILURE);
}
if (vsw_mac_unregister(vswp) != 0) {
return (DDI_FAILURE);
}
}
/* remove mac layer callback */
}
if (vsw_detach_ports(vswp) != 0) {
return (DDI_FAILURE);
}
/*
* Now that the ports have been deleted, stop and close
* the physical device.
*/
if (vswp->mresources)
}
/*
* Destroy any free pools that may still exist.
*/
if (vio_destroy_mblks(poolp) != 0) {
return (DDI_FAILURE);
}
}
/*
* Remove this instance from any entries it may be on in
* the hash table by using the list of addresses maintained
* in the vsw_t structure.
*/
/*
* By now any pending tasks have finished and the underlying
* ldc's have been destroyed, so its safe to delete the control
* message taskq.
*/
/*
* At this stage all the data pointers in the hash table
* should be NULL, as all the ports have been removed and will
* have deleted themselves from the port lists which the data
* pointers point to. Hence we can destroy the table using the
* default destructors.
*/
break;
}
}
return (DDI_SUCCESS);
}
static int
{
int instance;
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
case DDI_INFO_DEVT2INSTANCE:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
/*
* Get the value of the "vsw-phys-dev" property in the specified
* node. This property is the name of the physical device that
* the virtual switch will use to talk to the outside world.
*
* Note it is valid for this property to be NULL (but the property
* itself must exist). Callers of this routine should verify that
* the value returned is what they expected (i.e. either NULL or non NULL).
*
* On success returns value of the property in region pointed to by
* the 'name' argument, and with return value of 0. Otherwise returns 1.
*/
static int
{
int len = 0;
char *dev;
return (1);
return (1);
} else {
}
#ifdef DEBUG
/*
* As a temporary measure to aid testing we check to see if there
* is a vsw.conf file present. If there is we use the value of the
* vsw_physname property in the file as the name of the physical
* device, overriding the value from the MD.
*
* There may be multiple devices listed, but for the moment
* we just use the first one.
*/
return (1);
} else {
}
}
#endif
return (0);
}
/*
* Read the 'vsw-switch-mode' property from the specified MD node.
*
* Returns 0 on success and the number of modes found in 'found',
* otherwise returns 1.
*/
static int
{
int len = 0;
int smode_num = 0;
/*
* Get the switch-mode property. The modes are listed in
* decreasing order of preference, i.e. prefered mode is
* first item in list.
*/
len = 0;
smode_num = 0;
/*
* Unable to get switch-mode property from MD, nothing
* more we can do.
*/
*found = 0;
return (1);
}
/*
* Modes of operation:
* 'switched' - layer 2 switching, underlying HW in
* programmed mode.
* 'promiscuous' - layer 2 switching, underlying HW in
* promiscuous mode.
* 'routed' - layer 3 (i.e. IP) routing, underlying HW
* in non-promiscuous mode.
*/
} else {
"setting to default switched mode",
}
}
return (0);
}
/*
* Get the mac address of the physical device.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
return (1);
return (1);
}
return (0);
}
/*
* Check to see if the card supports the setting of multiple unicst
* addresses.
*
* Returns 0 if card supports the programming of multiple unicast addresses,
* otherwise returns 1.
*/
static int
{
return (1);
}
return (1);
}
return (0);
}
/*
* Setup the required switching mode.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
int i, rv = 1;
/* select best switching mode */
case VSW_LAYER2:
case VSW_LAYER2_PROMISC:
break;
case VSW_LAYER3:
break;
default:
rv = 1;
break;
}
if (rv == 0)
break;
}
if (rv == 1) {
return (rv);
}
return (0);
}
/*
* Setup for layer 2 switching.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
/*
* Attempt to link into the MAC layer so we can get
* and send packets out over the physical adapter.
*/
if (vsw_mac_attach(vswp) != 0) {
/*
* Registration with the MAC layer has failed,
* so return 1 so that can fall back to next
* prefered switching method.
*/
return (1);
}
/*
* Verify that underlying device can support multiple
* unicast mac addresses.
*/
if (vsw_get_hw_maddr(vswp) != 0) {
return (1);
}
}
} else {
/*
* No physical device name found in MD which is
* required for layer 2.
*/
return (1);
}
return (0);
}
static int
{
return (0);
}
/*
* Link into the MAC layer to gain access to the services provided by
* the underlying physical device driver (which should also have
* registered with the MAC layer).
*
* Only when in layer 2 mode.
*/
static int
{
goto mac_fail_exit;
}
goto mac_fail_exit;
}
if (vsw_multi_ring_enable) {
/*
* Initialize the ring table.
*/
/*
* Register our rx callback function.
*/
vsw_rx_queue_cb, (void *)vswp);
/*
* Register our mac resource callback.
*/
/*
* Get the ring resources available to us from
* the mac below us.
*/
} else {
/*
* Just register our rx callback function
*/
}
/* Get the MAC tx fn */
/* start the interface */
goto mac_fail_exit;
}
return (0);
return (1);
}
static void
{
if (vsw_multi_ring_enable) {
}
if (vswp->mresources)
}
}
/*
* Depending on the mode specified, the capabilites and capacity
* of the underlying device setup the physical device.
*
* If in layer 3 mode, then do nothing.
*
* If in layer 2 programmed mode attempt to program the unicast address
* associated with the port into the physical device. If this is not
* possible due to resource exhaustion or simply because the device does
* not support multiple unicast addresses then if required fallback onto
* putting the card into promisc mode.
*
* If in promisc mode then simply set the card into promisc mode.
*
* Returns 0 success, 1 on failure.
*/
static int
{
int err;
return (0);
}
/*
* Attempt to program the unicast address into the HW.
*/
if (type == VSW_VNETPORT) {
} else {
/*
* Don't program if the interface is not UP. This
* is possible if the address has just been changed
* in the MD node, but the interface has not yet been
* plumbed.
*/
return (0);
}
}
if (err != 0) {
/*
* Mark that attempt should be made to re-config sometime
* in future if a port is deleted.
*/
/*
* Only 1 mode specified, nothing more to do.
*/
return (err);
/*
* If promiscuous was next mode specified try to
* set the card into that mode.
*/
== VSW_LAYER2_PROMISC)) {
}
return (err);
}
if (type == VSW_VNETPORT) {
} else {
}
"of device %s",
return (0);
}
/*
* If in layer 3 mode do nothing.
*
* If in layer 2 switched mode remove the address from the physical
* device.
*
* If in layer 2 promiscuous mode disable promisc mode.
*
* Returns 0 on success.
*/
static int
{
int rv;
return (0);
switch (type) {
case VSW_VNETPORT:
}
break;
case VSW_LOCALDEV:
}
break;
default:
/* should never happen */
ASSERT(0);
return (1);
}
return (rv);
}
/*
* Attempt to program a unicast address into HW.
*
* Returns 0 on sucess, 1 on failure.
*/
static int
{
void *mah;
int rv;
return (1);
if (rv == 0)
return (0);
/*
* Its okay for the add to fail because we have exhausted
* all the resouces in the hardware device. Any other error
* we want to flag.
*/
"address %x:%x:%x:%x:%x:%x into HW "
}
return (1);
}
/*
* Remove a unicast mac address which has previously been programmed
* into HW.
*
* Returns 0 on sucess, 1 on failure.
*/
static int
{
void *mah;
int rv;
return (1);
if (rv != 0) {
"from slot %d in device %s (err %d)",
return (1);
}
return (0);
}
/*
* Set network card into promisc mode.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
return (1);
}
if (vswp->promisc_cnt++ == 0) {
vswp->promisc_cnt--;
return (1);
}
}
if (type == VSW_VNETPORT) {
} else {
}
return (0);
}
/*
* Turn off promiscuous mode on network card.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
return (1);
}
if (--vswp->promisc_cnt == 0) {
vswp->promisc_cnt++;
return (1);
}
/*
* We are exiting promisc mode either because we were
* only in promisc mode because we had failed over from
* switched mode due to HW resource issues, or the user
* wanted the card in promisc mode for all the ports and
* the last port is now being deleted. Tweak the message
* accordingly.
*/
} else {
}
}
if (type == VSW_VNETPORT) {
} else {
}
return (0);
}
/*
* Determine whether or not we are operating in our prefered
* mode and if not whether the physical resources now allow us
* to operate in it.
*
* If a port is being removed should only be invoked after port has been
* removed from the port list.
*/
static void
{
int s_idx;
return;
}
/*
* If we are in layer 2 (i.e. switched) or would like to be
* in layer 2 then check if any ports or the vswitch itself
* need to be programmed into the HW.
*
* This can happen in two cases - switched was specified as
* the prefered mode of operation but we exhausted the HW
* resources and so failed over to the next specifed mode,
* or switched was the only mode specified so after HW
* resources were exhausted there was nothing more we
* could do.
*/
else
return;
}
/*
* First, attempt to set the vswitch mac address into HW,
* if required.
*/
if (vsw_prog_if(vswp)) {
return;
}
/*
* Next, attempt to set any ports which have not yet been
* programmed into HW.
*/
if (vsw_prog_ports(vswp)) {
return;
}
/*
* By now we know that have programmed all desired ports etc
* into HW, so safe to mark reconfiguration as complete.
*/
}
/*
* Check to see if vsw itself is plumbed, and if so whether or not
* its mac address should be written into HW.
*
* Returns 0 if could set address, or didn't have to set it.
* Returns 1 if failed to set address.
*/
static int
{
return (1);
}
/*
* If previously when plumbed had had to place
* interface into promisc mode, now reverse that.
*
* Note that interface will only actually be set into
* programmed into HW.
*/
}
return (0);
}
/*
* Scan the port list for any ports which have not yet been set
* into HW. For those found attempt to program their mac addresses
* into the physical device.
*
* Returns 0 if able to program all required ports (can be 0) into HW.
* Returns 1 if failed to set at least one mac address.
*/
static int
{
vsw_port_t *tp;
int rv = 0;
rv = 1;
break;
}
/*
* If when this port had first attached we had
* had to place the interface into promisc mode,
* then now reverse that.
*
* Note that the interface will not actually
* change to non-promisc mode until all ports
* have been programmed.
*/
(void) vsw_unset_hw_promisc(vswp,
tp, VSW_VNETPORT);
}
}
return (rv);
}
static void
{
}
static void
{
int i;
vswp->mac_ring_tbl =
KM_SLEEP);
for (i = 0; i < vswp->mac_ring_tbl_sz; i++)
}
static void
{
int i;
for (i = 0; i < vswp->mac_ring_tbl_sz; i++) {
/*
* Destroy the queue.
*/
/*
* Re-initialize the structure.
*/
}
}
vswp->mac_ring_tbl_sz = 0;
}
/*
* Handle resource add callbacks from the driver below.
*/
static mac_resource_handle_t
{
int i;
/*
* Check to make sure we have the correct resource type.
*/
return (NULL);
/*
* Find a open entry in the ring table.
*/
for (i = 0; i < vswp->mac_ring_tbl_sz; i++) {
/*
* Check for an empty slot, if found, then setup queue
* and thread.
*/
/*
* Create the queue for this ring.
*/
vqp = vsw_queue_create();
/*
* Initialize the ring data structure.
*/
/*
* Create the worker thread.
*/
}
/*
* Make sure thread get's running state for
* this ring.
*/
}
/*
* If the thread is not running, cleanup.
*/
ringp);
}
}
return ((mac_resource_handle_t)ringp);
}
}
/*
* No slots in the ring table available.
*/
return (NULL);
}
static void
{
}
}
static vsw_queue_t *
{
return (vqp);
}
static void
{
}
static void
{
/*
* Set the state to running, since the thread is now active.
*/
/*
* Wait for work to do or the state has changed
* to not running.
*/
}
/*
* Process packets that we received from the interface.
*/
/* switch the chain of packets received */
}
}
/*
* We are drained and signal we are done.
*/
/*
* Exit lock and drain the remaining packets.
*/
/*
* Exit the thread
*/
thread_exit();
}
/*
* static void
* vsw_rx_queue_cb() - Receive callback routine when
* vsw_multi_ring_enable is non-zero. Queue the packets
* to a packet queue for a worker thread to process.
*/
static void
{
/*
* Find the last element in the mblk chain.
*/
do {
/* Get the queue for the packets */
/*
* Grab the lock such we can queue the packets.
*/
goto vsw_rx_queue_cb_exit;
}
/*
* Add the mblk chain to the queue. If there
* is some mblks in the queue, then add the new
* chain to the end.
*/
else
/*
* Signal the worker thread that there is work to
* do.
*/
/*
* Let go of the lock and exit.
*/
}
/*
* receive callback routine. Invoked by MAC layer when there
* are pkts being passed up from physical device.
*
* PERF: It may be more efficient when the card is in promisc
* mode to check the dest address of the pkts here (against
* the FDB) rather than checking later. Needs to be investigated.
*/
static void
{
/* switch the chain of packets received */
}
/*
* Send a message out over the physical device via the MAC layer.
*
* Returns any mblks that it was unable to transmit.
*/
static mblk_t *
{
const mac_txinfo_t *mtp;
return (mp);
} else {
for (;;) {
break;
}
break;
}
}
return (mp);
}
/*
* Register with the MAC layer as a network device, so we
* can be plumbed if necessary.
*/
static int
{
int rv;
return (EINVAL);
if (rv == 0)
return (rv);
}
static int
{
int rv = 0;
if (rv != 0) {
"framework", __func__);
return (rv);
}
/* mark i/f as down and unregistered */
}
return (rv);
}
static int
{
return (EINVAL);
}
/* return stats from underlying device */
return (0);
}
static void
vsw_m_stop(void *arg)
{
if (vswp->recfg_reqd)
}
static int
vsw_m_start(void *arg)
{
return (0);
}
/*
* Change the local interface address.
*
* Note: we don't support this entry point. The local
* mac address of the switch can only be changed via its
* MD node properties.
*/
static int
{
return (DDI_FAILURE);
}
static int
{
int i, ret = 0;
/*
* Convert address into form that can be used
* as hash table key.
*/
for (i = 0; i < ETHERADDRL; i++) {
}
if (add) {
/*
* Update the list of multicast addresses
* contained within the vsw_t structure to
* include this new one.
*/
return (1);
}
/*
* Call into the underlying driver to program the
* address into HW.
*/
if (ret != 0) {
"add multicast address",
goto vsw_remove_addr;
}
}
} else {
}
return (ret);
}
/*
* Remove the address from the hash table..
*/
/*
* ..and then from the list maintained in the
* vsw_t structure.
*/
}
return (0);
}
static int
{
if (on)
else
return (0);
}
static mblk_t *
{
return (NULL);
}
/*
* Register for machine description (MD) updates.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
/*
* In each 'virtual-device' node in the MD there is a
* 'cfg-handle' property which is the MD's concept of
* an instance number (this may be completely different from
* the device drivers instance #). OBP reads that value and
* stores it in the 'reg' property of the appropriate node in
* the device tree. So we use the 'reg' value when registering
* with the mdeg framework, to ensure we get events for the
* correct nodes.
*/
if (inst == -1) {
return (1);
}
/*
* Allocate and initialize a per-instance copy
* of the global property spec array that will
* uniquely identify this vsw instance.
*/
templatesz = sizeof (vsw_prop_template);
/* initialize the complete prop spec structure */
/*
* Register an interest in 'virtual-device' nodes with a
* 'name' property of 'virtual-network-switch'
*/
if (rv != MDEG_SUCCESS) {
goto mdeg_reg_fail;
}
/*
* Register an interest in 'vsw-port' nodes.
*/
(void *)vswp, &mdeg_port_hdl);
if (rv != MDEG_SUCCESS) {
(void) mdeg_unregister(mdeg_hdl);
goto mdeg_reg_fail;
}
/* save off data that will be needed later */
return (0);
return (1);
}
static void
{
sizeof (vsw_prop_template));
}
sizeof (mdeg_node_spec_t));
}
}
/*
* Mdeg callback invoked for the vsw node itself.
*/
static int
{
int idx;
return (MDEG_FAILURE);
/*
* Expect 'added' to be non-zero if virtual-network-switch
* nodes exist in the MD when the driver attaches.
*/
continue;
}
continue;
}
}
/*
* A non-zero 'match' value indicates that the MD has been
* updated and that a virtual-network-switch node is present
* which may or may not have been updated. It is up to the clients
* to examine their own nodes and determine if they have changed.
*/
continue;
}
continue;
}
}
return (MDEG_SUCCESS);
}
/*
* Mdeg callback invoked for changes to the vsw-port nodes
* under the vsw node.
*/
static int
{
int idx;
return (MDEG_FAILURE);
/* process added ports */
}
}
/* process removed ports */
continue;
}
}
}
/*
* Currently no support for updating already active ports.
* So, ignore the match_curr and match_priv arrays for now.
*/
return (MDEG_SUCCESS);
}
/*
* Read the initial start-of-day values from the specified MD node.
*/
static void
{
int i;
/*
* Note it is valid for the physname property to
* be NULL so check actual name length to determine
* if we have a actual device name.
*/
} else {
return;
}
/* mac address for vswitch device itself */
/*
* Fallback to using the mac address of the physical
* device.
*/
if (vsw_get_physaddr(vswp) == 0) {
} else {
}
} else {
for (i = ETHERADDRL - 1; i >= 0; i--) {
macaddr >>= 8;
}
}
for (i = 0; i < NUM_SMODES; i++)
} else {
}
/*
* Unable to setup any switching mode, nothing more
* we can do.
*/
if (vsw_setup_switching(vswp))
return;
if (vsw_mac_register(vswp) != 0) {
/*
* Treat this as a non-fatal error as we may be
* able to operate in some other mode.
*/
}
}
}
/*
* Check to see if the relevant properties in the specified node have
* changed, and if so take the appropriate action.
*
* If any of the properties are missing or invalid we don't take
* any action, as this function should only be invoked when modifications
* have been made to what we assume is a working configuration, which
* we leave active.
*
* Note it is legal for this routine to be invoked even if none of the
* properties in the port node within the MD have actually changed.
*/
static void
{
int i, smode_num = 0;
enum {MD_init = 0x1,
MD_physname = 0x2,
MD_macaddr = 0x4,
/*
* Check if name of physical device in MD has changed.
*/
/*
* if its non NULL. It is valid for the device name to
* have changed from a non NULL to a NULL value, i.e.
* the vsw is being changed to 'routed' mode.
*/
&ddi_instance) != DDI_SUCCESS)) {
goto fail_reconf;
}
updated |= MD_physname;
} else {
}
} else {
goto fail_reconf;
}
/*
* Check if MAC address has changed.
*/
goto fail_reconf;
} else {
for (i = ETHERADDRL - 1; i >= 0; i--) {
!= (macaddr & 0xFF)) {
__func__, i,
(macaddr & 0xFF));
updated |= MD_macaddr;
break;
}
macaddr >>= 8;
}
}
/*
* Check if switching modes have changed.
*/
goto fail_reconf;
} else {
}
for (i = 0; i < smode_num; i++) {
break;
}
}
}
/*
* Now make any changes which are needed...
*/
/*
* Disconnect all ports from the current card
*/
/* Remove address if was programmed into HW. */
goto fail_update;
}
}
/*
* Stop, detach the old device..
*/
/*
* Update phys name.
*/
if (updated & MD_physname) {
}
/*
* Update array with the new switch mode values.
*/
for (i = 0; i < smode_num; i++)
}
/*
* ..and attach, start the new device.
*/
if (vsw_setup_switching(vswp))
goto fail_update;
/*
* Connect ports to new card.
*/
goto fail_update;
}
}
}
if (updated & MD_macaddr) {
for (i = ETHERADDRL - 1; i >= 0; i--) {
macaddr >>= 8;
}
/*
* Remove old address from HW (if programmed) and set
* new address.
*/
/*
* Notify the MAC layer of the changed address.
*/
}
return;
return;
}
/*
* Add a new port to the system.
*
* Returns 0 on success, 1 on failure.
*/
int
{
int i, addrsz;
int listsz = 0;
struct ether_addr ea;
return (1);
}
/*
* Find the channel endpoint node(s) (which should be under this
* port node) which contain the channel id(s).
*/
return (1);
}
/* allocate enough space for node list */
if (nchan <= 0) {
return (1);
}
/* use property from first node found */
return (1);
}
/* don't need list any more */
/* read mac-address property */
return (1);
}
if (addrsz < ETHERADDRL) {
return (1);
}
for (i = ETHERADDRL - 1; i >= 0; i--) {
macaddr >>= 8;
}
return (1);
}
/* just successfuly created the port, so it should exist */
return (0);
}
/*
* Attach the specified port.
*
* Returns 0 on success, 1 on failure.
*/
static int
struct ether_addr *macaddr)
{
int i;
/* port already exists? */
return (1);
}
}
if (nids > VSW_PORT_MAX_LDCS) {
}
for (i = 0; i < nids; i++) {
return (1);
}
}
/* link it into the list of ports for this vsw instance */
/*
* Initialise the port and any ldc's under it.
*/
(void) vsw_init_ldcs(port);
return (0);
}
/*
* Detach the specified port.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
return (1);
}
return (1);
}
/* Remove any multicast addresses.. */
/*
* No longer need to hold writer lock on port list now
* that we have unlinked the target port from the list.
*/
/* Remove address if was programmed into HW. */
if (vswp->recfg_reqd)
if (vsw_port_delete(port)) {
return (1);
}
return (0);
}
/*
* Detach all active ports.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
" from port list", __func__,
port->p_instance);
return (1);
}
/* Remove address if was programmed into HW. */
/* Remove any multicast addresses.. */
/*
* No longer need to hold the lock on the port list
* now that we have unlinked the target port from the
* list.
*/
if (vsw_port_delete(port)) {
return (1);
}
}
return (0);
}
/*
* Delete the specified port.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
(void) vsw_uninit_ldcs(port);
/*
* Wait for any pending ctrl msg tasks which reference this
* port to finish.
*/
if (vsw_drain_port_taskq(port))
return (1);
/*
* Wait for port reference count to hit zero.
*/
/*
* Wait for any active callbacks to finish
*/
if (vsw_drain_ldcs(port))
return (1);
return (1);
}
}
return (0);
}
/*
* Attach a logical domain channel (ldc) under a specified port.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
int status = DDI_FAILURE;
int rv;
PROG_callback = 0x2}
return (1);
}
/* allocate pool of receive mblks */
if (rv) {
return (1);
}
progress |= PROG_mblks;
/* required for handshake with peer */
ldcp->peer_session = 0;
ldcp->session_status = 0;
/* only set for outbound lane, inbound set by peer */
if (status != 0) {
goto ldc_attach_fail;
}
if (status != 0) {
goto ldc_attach_fail;
}
goto ldc_attach_fail;
}
/* link it into the list of channels for this port */
return (0);
if (progress & PROG_callback) {
}
/*
* Something odd has happened, as the destroy
* will only fail if some mblks have been allocated
* from the pool already (which shouldn't happen)
* and have not been returned.
*
* Add the pool pointer to a list maintained in
* the device instance. Another attempt will be made
* to free the pool when the device itself detaches.
*/
"failed and cannot destroy associated mblk "
}
}
return (1);
}
/*
* Detach a logical domain channel (ldc) belonging to a
* particular port.
*
* Returns 0 on success, 1 on failure.
*/
static int
{
int rv;
break;
}
}
/* specified ldc id not found */
return (1);
}
/*
* Before we can close the channel we must release any mapped
* resources (e.g. drings).
*/
/*
* If the close fails we are in serious trouble, as won't
* be able to delete the parent port.
*/
return (1);
}
/*
* Mostly likely some mblks are still in use and
* have not been returned to the pool. Add the pool
* to the list maintained in the device instance.
* Another attempt will be made to destroy the pool
* when the device detaches.
*/
}
}
/* unlink it from the list */
return (0);
}
/*
* Open and attempt to bring up the channel. Note that channel
* can only be brought up if peer has also opened channel.
*
* Returns 0 if can open and bring up channel, otherwise
* returns 1.
*/
static int
{
ldc_status_t istatus = 0;
int rv;
/* don't start at 0 in case clients don't like that */
if (rv != 0) {
return (1);
}
return (1);
return (1);
}
if (rv != 0) {
/*
* Not a fatal error for ldc_up() to fail, as peer
* end point may simply not be ready yet.
*/
return (1);
}
/*
* ldc_up() call is non-blocking so need to explicitly
* check channel status to see if in fact the channel
* is UP.
*/
return (1);
}
return (0);
}
return (0);
}
/* disable callbacks on the channel */
static int
{
int rv;
if (rv != 0) {
return (1);
}
return (0);
}
static int
{
(void) vsw_ldc_init(ldcp);
}
return (0);
}
static int
{
(void) vsw_ldc_uninit(ldcp);
}
return (0);
}
/*
* Wait until the callback(s) associated with the ldcs under the specified
* port have completed.
*
* Prior to this function being invoked each channel under this port
* should have been quiesced via ldc_set_cb_mode(DISABLE).
*
* A short explaination of what we are doing below..
*
* The simplest approach would be to have a reference counter in
* the ldc structure which is increment/decremented by the callbacks as
* they use the channel. The drain function could then simply disable any
* further callbacks and do a cv_wait for the ref to hit zero. Unfortunately
* there is a tiny window here - before the callback is able to get the lock
* on the channel it is interrupted and this function gets to execute. It
* sees that the ref count is zero and believes its free to delete the
* associated data structures.
*
* We get around this by taking advantage of the fact that before the ldc
* framework invokes a callback it sets a flag to indicate that there is a
* callback active (or about to become active). If when we attempt to
* unregister a callback when this active flag is set then the unregister
* will fail with EWOULDBLOCK.
*
* If the unregister fails we do a cv_timedwait. We will either be signaled
* by the callback as it is exiting (note we have to wait a short period to
* allow the callback to return fully to the ldc framework and it to clear
* the active flag), or by the timer expiring. In either case we again attempt
* the unregister. We repeat this until we can succesfully unregister the
* callback.
*
* The reason we use a cv_timedwait rather than a simple cv_wait is to catch
* the case where the callback has finished but the ldc framework has not yet
* cleared the active flag. In this case we would never get a cv_signal.
*/
static int
{
/*
* If we can unregister the channel callback then we
* know that there is no callback either running or
* scheduled to run for this channel so move on to next
* channel in the list.
*/
/* prompt active callbacks to quit */
continue;
} else {
/*
* If we end up here we know that either 1) a callback
* is currently executing, 2) is about to start (i.e.
* the ldc framework has set the active flag but
* has not actually invoked the callback yet, or 3)
* has finished and has returned to the ldc framework
* but the ldc framework has not yet cleared the
* active bit.
*
* Wait for it to finish.
*/
== EWOULDBLOCK)
}
}
return (0);
}
/*
* Wait until all tasks which reference this port have completed.
*
* Prior to this function being invoked each channel under this port
* should have been quiesced via ldc_set_cb_mode(DISABLE).
*/
static int
{
/*
* Mark the port as in the process of being detached, and
* dispatch a marker task to the queue so we know when all
* relevant tasks have completed.
*/
__func__);
return (1);
}
/*
* Wait for the marker task to finish.
*/
return (0);
}
static void
vsw_marker_task(void *arg)
{
/*
* No further tasks should be dispatched which reference
* this port so ok to mark it as safe to detach.
*/
}
static vsw_port_t *
{
return (port);
}
}
return (NULL);
}
/*
* Search for and remove the specified port from the port
* list. Returns 0 if able to locate and remove port, otherwise
* returns 1.
*/
static int
{
return (1);
} else {
}
break;
} else {
}
}
return (0);
}
/*
* Interrupt handler for ldc messages.
*/
static uint_t
{
return (LDC_SUCCESS);
}
if (event & LDC_EVT_UP) {
/*
* Channel has come up.
*/
}
if (event & LDC_EVT_READ) {
/*
* Data available for reading.
*/
goto vsw_cb_exit;
}
}
/*
* Catch either LDC_EVT_WRITE which we don't support or any
* unknown event.
*/
| LDC_EVT_DOWN | LDC_EVT_READ)) {
}
/*
* Let the drain function know we are finishing if it
* is waiting.
*/
return (LDC_SUCCESS);
}
/*
* Reinitialise data structures associated with the channel.
*/
static void
{
/*
* Remove parent port from any multicast groups
* it may have registered with. Client must resend
* multicast add command after handshake completes.
*/
ldcp->peer_session = 0;
ldcp->session_status = 0;
}
/*
* Process a connection event.
*
* Note - care must be taken to ensure that this function is
* not called with the dlistrw lock held.
*/
static void
{
/*
* Check if either a reset or restart event is pending
* or in progress. If so just return.
*
* A VSW_CONN_RESET event originates either with a LDC_RESET_EVT
* being received by the callback handler, or a ECONNRESET error
* code being returned from a ldc_read() or ldc_write() call.
*
* A VSW_CONN_RESTART event occurs when some error checking code
* decides that there is a problem with data from the channel,
* and that the handshake should be restarted.
*/
return;
/*
* If it is an LDC_UP event we first check the recorded
* state of the channel. If this is UP then we know that
* the channel moving to the UP state has already been dealt
* with and don't need to dispatch a new task.
*
* The reason for this check is that when we do a ldc_up(),
* depending on the state of the peer, we may or may not get
* a LDC_UP event. As we can't depend on getting a LDC_UP evt
* every time we do ldc_up() we explicitly check the channel
* status to see has it come up (ldc_up() is asynch and will
* complete at some undefined time), and take the appropriate
* action.
*
* The flip side of this is that we may get a LDC_UP event
* when we have already seen that the channel is up and have
* dealt with that.
*/
if (evt == VSW_CONN_UP) {
(ldcp->reset_active != 0)) {
return;
}
}
/*
* The transaction group id allows us to identify and discard
* any tasks which are still pending on the taskq and refer
* to the handshake session we are about to restart or reset.
* These stale messages no longer have any real meaning.
*/
goto err_exit;
}
DDI_NOSLEEP) != DDI_SUCCESS) {
goto err_exit;
}
return;
/*
* Have mostly likely failed due to memory shortage. Clear the flag so
* that future requests will at least be attempted and will hopefully
* succeed.
*/
ldcp->reset_active = 0;
}
/*
* Deal with events relating to a connection. Invoked from a taskq.
*/
static void
vsw_conn_task(void *arg)
{
/* can safely free now have copied out data */
return;
}
/*
* If we wish to restart the handshake on this channel, then if
* the channel is UP we bring it DOWN to flush the underlying
* ldc queue.
*/
/*
* re-init all the associated data structures.
*/
/*
* Bring the channel back up (note it does no harm to
* do this even if the channel is already UP, Just
* becomes effectively a no-op).
*/
/*
* Check if channel is now UP. This will only happen if
* peer has also done a ldc_up().
*/
return;
}
/* channel UP so restart handshake by sending version info */
if (curr_status == LDC_UP) {
" handshake attempts (%d) on channel %ld",
return;
}
DDI_NOSLEEP) != DDI_SUCCESS) {
/*
* Don't count as valid restart attempt if couldn't
* send version msg.
*/
}
}
/*
* Mark that the process is complete by clearing the flag.
*
* Note is it possible that the taskq dispatch above may have failed,
* most likely due to memory shortage. We still clear the flag so
* future attempts will at least be attempted and will hopefully
* succeed.
*/
ldcp->reset_active = 0;
}
/*
* returns 0 if legal for event signified by flag to have
* occured at the time it did. Otherwise returns 1.
*/
int
{
else
switch (flag) {
case VSW_VER_INFO_RECV:
if (phase > VSW_MILESTONE0) {
return (1);
}
break;
case VSW_VER_ACK_RECV:
case VSW_VER_NACK_RECV:
if (!(state & VSW_VER_INFO_SENT)) {
" or VER_NACK when in state %d\n",
return (1);
} else
state &= ~VSW_VER_INFO_SENT;
break;
case VSW_ATTR_INFO_RECV:
return (1);
}
break;
case VSW_ATTR_ACK_RECV:
case VSW_ATTR_NACK_RECV:
if (!(state & VSW_ATTR_INFO_SENT)) {
" or ATTR_NACK when in state %d\n",
return (1);
} else
state &= ~VSW_ATTR_INFO_SENT;
break;
case VSW_DRING_INFO_RECV:
if (phase < VSW_MILESTONE1) {
return (1);
}
break;
case VSW_DRING_ACK_RECV:
case VSW_DRING_NACK_RECV:
if (!(state & VSW_DRING_INFO_SENT)) {
" or DRING_NACK when in state %d\n",
return (1);
} else
state &= ~VSW_DRING_INFO_SENT;
break;
case VSW_RDX_INFO_RECV:
if (phase < VSW_MILESTONE3) {
return (1);
}
break;
case VSW_RDX_ACK_RECV:
case VSW_RDX_NACK_RECV:
if (!(state & VSW_RDX_INFO_SENT)) {
" or RDX_NACK when in state %d\n",
return (1);
} else
state &= ~VSW_RDX_INFO_SENT;
break;
case VSW_MCST_INFO_RECV:
if (phase < VSW_MILESTONE3) {
return (1);
}
break;
default:
return (1);
}
else
return (0);
}
void
{
case VSW_MILESTONE0:
/*
* If we haven't started to handshake with our peer,
* start to do so now.
*/
}
/*
* Only way to pass this milestone is to have successfully
* negotiated version info.
*/
/*
* Next milestone is passed when attribute
* information has been successfully exchanged.
*/
}
break;
case VSW_MILESTONE1:
/*
* Only way to pass this milestone is to have successfully
* negotiated attribute information.
*/
/*
* If the peer device has said it wishes to
* use descriptor rings then we send it our ring
* info, otherwise we just set up a private ring
* which we use an internal buffer
*/
}
break;
case VSW_MILESTONE2:
/*
* If peer has indicated in its attribute message that
* it wishes to use descriptor rings then the only way
* to pass this milestone is for us to have received
* valid dring info.
*
* If peer is not using descriptor rings then just fall
* through.
*/
break;
break;
case VSW_MILESTONE3:
/*
* Pass this milestone when all paramaters have been
* successfully exchanged and RDX sent in both directions.
*
* Mark outbound lane as available to transmit data.
*/
} else {
}
break;
case VSW_MILESTONE4:
break;
default:
}
}
/*
* Check if major version is supported.
*
* Returns 0 if finds supported major number, and if necessary
* adjusts the minor field.
*
* to next lowest support values, or to zero if no other values possible.
*/
static int
{
int i;
for (i = 0; i < VSW_NUM_VER; i++) {
/*
* Matching or lower major version found. Update
* minor number if necessary.
*/
" from %d to %d", __func__,
vsw_versions[i].ver_minor);
}
return (0);
}
" from %d to %d", __func__,
vsw_versions[i].ver_minor);
}
return (1);
}
}
/* No match was possible, zero out fields */
return (1);
}
/*
* Main routine for processing messages received over LDC.
*/
static void
vsw_process_pkt(void *arg)
{
int rv = 0;
/*
* If channel is up read messages until channel is empty.
*/
do {
if (rv != 0) {
}
/* channel has been reset */
if (rv == ECONNRESET) {
break;
}
if (msglen == 0) {
break;
}
/*
* Figure out what sort of packet we have gotten by
* examining the msg tag, and then switch it appropriately.
*/
switch (tag.vio_msgtype) {
case VIO_TYPE_CTRL:
break;
case VIO_TYPE_DATA:
break;
case VIO_TYPE_ERR:
break;
default:
break;
}
} while (msglen);
}
/*
* Dispatch a task to process a VIO control message.
*/
static void
{
/*
* We need to handle RDX ACK messages in-band as once they
* are exchanged it is possible that we will get an
* immediate (legitimate) data packet.
*/
return;
"(ostate 0x%llx : hphase %d)", __func__,
return;
}
" msg", __func__);
return;
}
/*
* Dispatch task to processing taskq if port is not in
* the process of being detached.
*/
!= DDI_SUCCESS)) {
__func__);
return;
}
} else {
}
}
/*
* Process a VIO ctrl message. Invoked from taskq.
*/
static void
vsw_process_ctrl_pkt(void *arg)
{
/* stale pkt check */
" earlier (%ld) handshake session", __func__,
return;
}
/* session id check */
return;
}
}
/*
* Switch on vio_subtype envelope, then let lower routines
* decide if its an INFO, ACK or NACK packet.
*/
switch (env) {
case VIO_VER_INFO:
break;
case VIO_DRING_REG:
break;
case VIO_DRING_UNREG:
break;
case VIO_ATTR_INFO:
break;
case VNET_MCAST_INFO:
break;
case VIO_RDX:
break;
default:
}
}
/*
* Version negotiation. We can end up here either because our peer
* has responded to a handshake message we have sent it, or our peer
* has initiated a handshake with us. If its the former then can only
* be ACK or NACK, if its the later can only be INFO.
*
* If its an ACK we move to the next stage of the handshake, namely
* attribute exchange. If its a NACK we see if we can specify another
* version, if we can't we stop.
*
* If it is an INFO we reset all params associated with communication
* in that direction over this channel (remember connection is
* essentially 2 independent simplex channels).
*/
void
{
/*
* cast it into the correct structure.
*/
case VIO_SUBTYPE_INFO:
/*
* Record the session id, which we will use from now
* until we see another VER_INFO msg. Even then the
* session id in most cases will be unchanged, execpt
* if channel was reset.
*/
}
/* Legal message at this time ? */
return;
/*
* First check the device class. Currently only expect
* to be talking to a network device. In the future may
* also talk to another switch.
*/
sizeof (vio_ver_msg_t), B_TRUE);
return;
} else {
}
/*
* Now check the version.
*/
if (vsw_supported_version(ver_pkt) == 0) {
/*
* Support this major version and possibly
* adjusted minor version.
*/
/* Store accepted values */
} else {
/*
* pairing we support (if don't suuport any more
* versions then they will be set to zero.
*/
/* Store updated values */
}
sizeof (vio_ver_msg_t), B_TRUE);
break;
case VIO_SUBTYPE_ACK:
return;
/* Store updated values */
break;
case VIO_SUBTYPE_NACK:
return;
/*
* If our peer sent us a NACK with the ver fields set to
* zero then there is nothing more we can do. Otherwise see
* if we support either the version suggested, or a lesser
* one.
*/
"further.", __func__);
return;
}
/*
* Check to see if we support this major version or
* to zero.
*/
(void) vsw_supported_version(ver_pkt);
/* Nothing more we can do */
__func__);
} else {
/* found a supported major version */
sizeof (vio_ver_msg_t), B_TRUE);
}
break;
default:
}
}
/*
* Process an attribute packet. We can end up here either because our peer
* peer has sent us an attribute INFO message
*
* If its an ACK we then move to the next stage of the handshake which
* is to send our descriptor ring info to our peer. If its a NACK then
* there is nothing more we can (currently) do.
*
* If we get a valid/acceptable INFO packet (and we have already negotiated
* a version) we ACK back and set channel state to ATTR_RECV, otherwise we
* NACK back and reset channel state to INACTIV.
*
* FUTURE: in time we will probably negotiate over attributes, but for
* the moment unacceptable attributes are regarded as a fatal error.
*
*/
void
{
int i;
/*
* cast it into the correct structure.
*/
case VIO_SUBTYPE_INFO:
return;
/*
* If the attributes are unacceptable then we NACK back.
*/
sizeof (vnet_attr_msg_t), B_TRUE);
return;
}
/*
* Otherwise store attributes for this lane and update
* lane state.
*/
for (i = ETHERADDRL - 1; i >= 0; i--) {
macaddr >>= 8;
}
/* setup device specifc xmit routines */
}
sizeof (vnet_attr_msg_t), B_TRUE);
break;
case VIO_SUBTYPE_ACK:
return;
break;
case VIO_SUBTYPE_NACK:
return;
break;
default:
}
}
/*
* Process a dring info packet. We can end up here either because our peer
* peer has sent us a dring INFO message.
*
* If we get a valid/acceptable INFO packet (and we have already negotiated
* a version) we ACK back and update the lane state, otherwise we NACK back.
*
* FUTURE: nothing to stop client from sending us info on multiple dring's
* but for the moment we will just use the first one we are given.
*
*/
void
{
int dring_found = 0;
/*
* cast it into the correct structure.
*/
case VIO_SUBTYPE_INFO:
return;
/*
* If the dring params are unacceptable then we NACK back.
*/
if (vsw_check_dring_info(dring_pkt)) {
sizeof (vio_dring_reg_msg_t), B_TRUE);
return;
}
/*
* Otherwise, attempt to map in the dring using the
* cookie. If that succeeds we send back a unique dring
* identifier that the sending side will use in future
* to refer to this descriptor ring.
*/
/*
* Note: should only get one cookie. Enforced in
* the ldc layer.
*/
sizeof (ldc_mem_cookie_t));
sizeof (vio_dring_reg_msg_t), B_TRUE);
return;
}
sizeof (vio_dring_reg_msg_t), B_TRUE);
return;
} else {
/* store the address of the pub part of ring */
}
/* no private section as we are importing */
/*
* Using simple mono increasing int for ident at
* the moment.
*/
ldcp->next_ident++;
/*
* Link it onto the end of the list of drings
* for this lane.
*/
} else {
}
/* acknowledge it */
sizeof (vio_dring_reg_msg_t), B_TRUE);
break;
case VIO_SUBTYPE_ACK:
return;
/*
* Peer is acknowledging our dring info and will have
* sent us a dring identifier which we will use to
* refer to this ring w.r.t. our peer.
*/
/*
* Find the ring this ident should be associated
* with.
*/
dring_found = 1;
dring_found = 1;
break;
}
}
if (dring_found == 0) {
__func__);
return;
}
} else {
"allocated", __func__);
return;
}
/* store ident */
break;
case VIO_SUBTYPE_NACK:
return;
break;
default:
}
}
/*
* Process a request from peer to unregister a dring.
*
* For the moment we just restart the handshake if our
* peer endpoint attempts to unregister a dring.
*/
void
{
/*
* cast it into the correct structure.
*/
case VIO_SUBTYPE_INFO:
break;
case VIO_SUBTYPE_ACK:
break;
case VIO_SUBTYPE_NACK:
break;
default:
}
}
sizeof (vnet_mcast_msg_t), B_TRUE);
/*
* Process a multicast request from a vnet.
*
* Vnet's specify a multicast address that they are interested in. This
* address is used as a key into the hash table which forms the multicast
* forwarding database (mFDB).
*
* The table keys are the multicast addresses, while the table entries
* are pointers to lists of ports which wish to receive packets for the
* specified multicast address.
*
* When a multicast packet is being switched we use the address as a key
* into the hash table, and then walk the appropriate port list forwarding
* the pkt to each port in turn.
*
* If a vnet is no longer interested in a particular multicast grouping
* we simply find the correct location in the hash table and then delete
* the relevant port from the port list.
*
* To deal with the case whereby a port is being deleted without first
* removing itself from the lists in the hash table, we maintain a list
* of multicast addresses the port has registered an interest in, within
* the port structure itself. We then simply walk that list of addresses
* using them as keys into the hash table and remove the port from the
* appropriate lists.
*/
static void
{
int i;
/*
* cast it into the correct structure.
*/
case VIO_SUBTYPE_INFO:
/*
* Check if in correct state to receive a multicast
* message (i.e. handshake complete). If not reset
* the handshake.
*/
return;
/*
* Before attempting to add or remove address check
* that they are valid multicast addresses.
* If not, then NACK back.
*/
__func__);
return;
}
}
/*
* NACK back.
*/
return;
}
sizeof (vnet_mcast_msg_t), B_TRUE);
break;
case VIO_SUBTYPE_ACK:
/*
* We shouldn't ever get a multicast ACK message as
* at the moment we never request multicast addresses
* to be set on some other device. This may change in
* the future if we have cascading switches.
*/
return;
/* Do nothing */
break;
case VIO_SUBTYPE_NACK:
/*
* We shouldn't get a multicast NACK packet for the
* same reasons as we shouldn't get a ACK packet.
*/
return;
/* Do nothing */
break;
default:
}
}
static void
{
/*
* cast it into the correct structure.
*/
case VIO_SUBTYPE_INFO:
return;
sizeof (vio_rdx_msg_t), B_TRUE);
break;
case VIO_SUBTYPE_ACK:
/*
* Should be handled in-band by callback handler.
*/
break;
case VIO_SUBTYPE_NACK:
return;
break;
default:
}
}
static void
{
/* session id check */
return;
}
}
/*
* It is an error for us to be getting data packets
* before the handshake has completed.
*/
return;
}
/*
* Switch on vio_subtype envelope, then let lower routines
* decide if its an INFO, ACK or NACK packet.
*/
if (env == VIO_DRING_DATA) {
} else if (env == VIO_PKT_DATA) {
} else if (env == VIO_DESC_DATA) {
} else {
}
}
sizeof (vio_dring_msg_t), B_TRUE);
static void
{
int read_attempts = 0;
/*
* cast it into the correct structure.
*/
/*
* Switch on the vio_subtype. If its INFO then we need to
* process the data. If its an ACK we need to make sure
* and if its a NACK then we maybe attempt a retry.
*/
case VIO_SUBTYPE_INFO:
return;
}
if (end == -1) {
num = -1;
} else if (end >= 0) {
/* basic sanity check */
"ring length %lld", __func__,
return;
}
} else {
return;
}
"descriptor at pos %d: err %d",
return;
}
/*
* When given a bounded range of descriptors
* to process, its an error to hit a descriptor
* which is not ready. In the non-bounded case
* (end_idx == -1) this simply indicates we have
* reached the end of the current active range.
*/
/* unbound - no error */
if (end == -1) {
if (read_attempts == vsw_read_attempts)
break;
goto vsw_recheck_desc;
}
/* bounded - error - so NACK back */
return;
}
/*
* If we ACK'd the previous descriptor then now
* record the new range start position for later
* ACK's.
*/
if (prev_desc_ack) {
range_start = pos;
}
/*
* Data is padded to align on 8 byte boundary,
* datalen is actual data length, i.e. minus that
* padding.
*/
/*
* Does peer wish us to ACK when we have finished
* with this descriptor ?
*/
ack_needed = B_TRUE;
" 0x%llx : dstate 0x%lx : datalen 0x%lx",
/*
* Mark that we are starting to process descriptor.
*/
/*
* No free receive buffers available, so
* fallback onto allocb(9F). Make sure that
* we get a data buffer which is a multiple
* of 8 as this is required by ldc_mem_copy.
*/
BPRI_MED);
}
/*
* Ensure that we ask ldc for an aligned
* number of bytes.
*/
if (nbytes & 0x7) {
}
if (rv != 0) {
"data from %d cookies in desc %d"
break;
} else {
" using %d cookies", __func__,
}
/* adjust the read pointer to skip over the padding */
/* point to the actual end of data */
/* build a chain of received packets */
/* first pkt */
chain = 1;
} else {
chain++;
}
/* mark we are finished with this descriptor */
/*
* Send an ACK back to peer if requested.
*/
if (ack_needed) {
sizeof (vio_dring_msg_t),
B_FALSE);
/*
* Check if ACK was successfully sent. If not
* we break and deal with that below.
*/
if (msg_rv != 0)
break;
range_start = pos;
}
/* next descriptor */
cnt++;
/*
* Break out of loop here and stop processing to
* allow some other network device (or disk) to
* get access to the cpu.
*/
if (chain > vsw_chain_len) {
break;
}
}
/*
* If when we attempted to send the ACK we found that the
* channel had been reset then now handle this. We deal with
* it here as we cannot reset the channel while holding the
* continuously in the above loop, as a channel reset should
* be a rare event.
*/
if (msg_rv == ECONNRESET) {
break;
}
/* send the chain of packets to be switched */
}
/*
* We are now finished so ACK back with the state
* set to STOPPING so our peer knows we are finished
*/
/*
* We have not processed any more descriptors beyond
* the last one we ACK'd.
*/
if (prev_desc_ack)
sizeof (vio_dring_msg_t), B_TRUE);
break;
case VIO_SUBTYPE_ACK:
/*
* Verify that the relevant descriptors are all
* marked as DONE
*/
return;
}
j = num = 0;
/* calculate # descriptors taking into a/c wrap around */
/*
* If the last descriptor in a range has the ACK
* bit set then we will get two messages from our
* peer relating to it. The normal ACK msg and then
* a subsequent STOP msg. The first message will have
* resulted in the descriptor being reclaimed and
* its state set to FREE so when we encounter a non
* DONE descriptor we need to check to see if its
* because we have just reclaimed it.
*/
/* clear all the fields */
"0x%llx : priv state 0x%llx", i,
} else {
if (dring_pkt->dring_process_state !=
" 0x%llx not DONE (0x%lx)\n",
return;
}
}
}
/*
* If our peer is stopping processing descriptors then
* we check to make sure it has processed all the descriptors
* we have updated. If not then we send it a new message
* to prompt it to restart.
*/
/*
* Check next descriptor in public section of ring.
* If its marked as READY then we need to prompt our
* peer to start processing the ring again.
*/
/*
* Hold the restart lock across all of this to
* make sure that its not possible for us to
* decide that a msg needs to be sent in the future
* but the sending code having already checked is
* about to exit.
*/
sizeof (vio_dring_msg_t), B_FALSE);
} else {
}
}
/* only do channel reset after dropping dlistrw lock */
if (msg_rv == ECONNRESET)
break;
case VIO_SUBTYPE_NACK:
/*
* Something is badly wrong if we are getting NACK's
* for our data pkts. So reset the channel.
*/
break;
default:
}
}
/*
* VIO_PKT_DATA (a.k.a raw data mode )
*
* Note - currently not supported. Do nothing.
*/
static void
{
}
/*
* Process an in-band descriptor message (most likely from
* OBP).
*/
static void
{
int i, rv;
int j = 0;
case VIO_SUBTYPE_INFO:
return;
/*
* Data is padded to align on a 8 byte boundary,
* nbytes is actual data length, i.e. minus that
* padding.
*/
/*
* allocb(9F) returns an aligned data block. We
* need to ensure that we ask ldc for an aligned
* number of bytes also.
*/
if (nbytes & 0x7) {
}
return;
}
if (rv != 0) {
"%d cookie(s)", __func__,
return;
}
ncookies);
/*
* Upper layer is expecting the IP header in the packet to
* be 4-bytes aligned, but the OBP is sending packets that
* are not aligned. So, copy the data to another message
* such that the alignment requirement is met.
*/
return;
}
/* point to the actual end of data */
/*
* We ACK back every in-band descriptor message we process
*/
sizeof (vnet_ibnd_desc_t), B_TRUE);
/* send the packet to be switched */
break;
case VIO_SUBTYPE_ACK:
/* Verify the ACK is valid */
if (idx >= VSW_RING_NUM_EL) {
return;
}
return;
}
/*
* If the descriptor we are being ACK'ed for is not the
* one we expected, then pkts were lost somwhere, either
* when we tried to send a msg, or a previous ACK msg from
* our peer. In either case we now reclaim the descriptors
* in the range from the last ACK we received up to the
* current ACK.
*/
}
/*
* When we sent the in-band message to our peer we
* marked the copy in our private ring as READY. We now
* check that the descriptor we are being ACK'ed for is in
* fact READY, i.e. it is one we have shared with our peer.
*
* If its not we flag an error, but still reset the descr
* back to FREE.
*/
"READY (0x%lx)", __func__,
"datalen %ld", __func__,
}
/* release resources associated with sent msg */
}
/* update to next expected value */
break;
case VIO_SUBTYPE_NACK:
/*
* We should only get a NACK if our peer doesn't like
* something about a message we have sent it. If this
* happens we just release the resources associated with
* the message. (We are relying on higher layers to decide
* whether or not to resend.
*/
/* limit check */
if (idx >= VSW_RING_NUM_EL) {
return;
}
return;
}
/* move to correct location in ring */
/* release resources associated with sent msg */
break;
default:
}
}
static void
{
/*
* Error vio_subtypes have yet to be defined. So for
* the moment we can't do anything.
*/
}
/*
* Switch the given ethernet frame when operating in layer 2 mode.
*
* vswp: pointer to the vsw instance
* mp: pointer to chain of ethernet frame(s) to be switched
* caller: identifies the source of this frame as:
* 1. VSW_VNETPORT - a vsw port (connected to a vnet).
* 2. VSW_PHYSDEV - the physical ethernet device
* 3. VSW_LOCALDEV - vsw configured as a virtual interface
* arg: argument provided by the caller.
* 1. for VNETPORT - pointer to the corresponding vsw_port_t.
* 2. for PHYSDEV - NULL
* 3. for LOCALDEV - pointer to to this vsw_t(self)
*/
void
{
struct ether_header *ehp;
/*
* PERF: rather than breaking up the chain here, scan it
* to find all mblks heading to same destination and then
* pass that sub-chain to the lower transmit functions.
*/
/* process the chain of packets */
while (bp) {
/*
* If destination is VSW_LOCALDEV (vsw as an eth
* interface) and if the device is up & running,
* send the packet up the stack on this host.
* If the virtual interface is down, drop the packet.
*/
if (caller != VSW_LOCALDEV) {
} else {
/* Interface down, drop pkt */
}
} else {
}
continue;
}
if (port) {
/*
* Mark the port as in-use.
*/
/*
* If plumbed and in promisc mode then copy msg
* and send up the stack.
*/
if (nmp)
} else {
}
/*
* If the destination is in FDB, the packet
* should be forwarded to the correponding
* vsw_port (connected to a vnet device -
* VSW_VNETPORT)
*/
/*
* Decrement use count in port and check if
* should wake delete thread.
*/
} else {
/*
* Destination not in FDB.
*
* If the destination is broadcast or
* multicast forward the packet to all
* (VNETPORTs, PHYSDEV, LOCALDEV),
* except the caller.
*/
if (IS_BROADCAST(ehp)) {
} else if (IS_MULTICAST(ehp)) {
} else {
/*
* If the destination is unicast, and came
* from either a logical network device or
* the switch itself when it is plumbed, then
* send it out on the physical device and also
* up the stack if the logical interface is
* in promiscious mode.
*
* NOTE: The assumption here is that if we
* cannot find the destination in our fdb, its
* a unicast address, and came from either a
* vnet or down the stack (when plumbed) it
* must be destinded for an ethernet device
* outside our ldoms.
*/
if (caller == VSW_VNETPORT) {
if (nmp)
} else {
}
!= NULL) {
"phys dev", __func__);
}
} else if (caller == VSW_PHYSDEV) {
/*
* Pkt seen because card in promisc
* mode. Send up stack if plumbed in
* promisc mode, else drop it.
*/
} else {
}
} else if (caller == VSW_LOCALDEV) {
/*
* Pkt came down the stack, send out
* over physical device.
*/
!= NULL) {
"phys dev", __func__);
}
}
}
}
}
}
/*
* Switch ethernet frame when in layer 3 mode (i.e. using IP
* layer to do the routing).
*
* There is a large amount of overlap between this function and
* vsw_switch_l2_frame. At some stage we need to revisit and refactor
* both these functions.
*/
void
{
struct ether_header *ehp;
/*
* In layer 3 mode should only ever be switching packets
* between IP layer and vnet devices. So make sure thats
* who is invoking us.
*/
return;
}
/* process the chain of packets */
while (bp) {
if (port) {
/*
* Mark port as in-use.
*/
/*
* Finished with port so decrement ref count and
* check if should wake delete thread.
*/
} else {
/*
* Destination not in FDB
*
* If the destination is broadcast or
* multicast forward the packet to all
* (VNETPORTs, PHYSDEV, LOCALDEV),
* except the caller.
*/
if (IS_BROADCAST(ehp)) {
} else if (IS_MULTICAST(ehp)) {
} else {
/*
* Unicast pkt from vnet that we don't have
* an FDB entry for, so must be destinded for
* the outside world. Attempt to send up to the
* IP layer to allow it to deal with it.
*/
if (caller == VSW_VNETPORT) {
__func__);
} else {
/* Interface down, drop pkt */
__func__);
}
}
}
}
}
}
/*
* Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
* except the caller (port on which frame arrived).
*/
static int
{
int skip_port = 0;
/*
* Broadcast message from inside ldoms so send to outside
* world if in either of layer 2 modes.
*/
if (nmp) {
"consisting of %ld bytes of data for"
}
}
}
if (caller == VSW_VNETPORT)
skip_port = 1;
/*
* Broadcast message from other vnet (layer 2 or 3) or outside
* world (layer 2 only), send up stack if plumbed.
*/
if (nmp)
} else {
}
}
/* send it to all VNETPORTs */
/*
* Caution ! - don't reorder these two checks as arg
* will be NULL if the caller is PHYSDEV. skip_port is
* only set if caller is VNETPORT.
*/
continue;
else {
if (nmp) {
} else {
}
}
}
return (0);
}
/*
* Forward pkts to any devices or interfaces which have registered
* an interest in them (i.e. multicast groups).
*/
static int
{
/*
* Convert address to hash table key
*/
/*
* If pkt came from either a vnet or down the stack (if we are
* plumbed) and we are in layer 2 mode, then we send the pkt out
* over the physical adapter, and then check to see if any other
* vnets are interested in it.
*/
if (nmp) {
"consisting of %ld bytes of "
"data for physical device",
}
}
}
(mod_hash_val_t *)&entp) != 0) {
} else {
/*
* Send to list of devices associated with this address...
*/
/* dont send to ourselves */
if ((caller == VSW_VNETPORT) &&
" : port %d", __func__,
port->p_instance);
continue;
} else if ((caller == VSW_LOCALDEV) &&
__func__);
continue;
}
" addr 0x%llx", __func__,
if (nmp)
} else {
if (nmp)
" for addr 0x%llx", __func__,
key);
}
}
}
}
/*
* If the pkt came from either a vnet or from physical device,
* and if we havent already sent the pkt up the stack then we
* and in promisc mode).
*/
if ((check_if) &&
if (nmp)
} else {
}
}
return (0);
}
/* transmit the packet over the given port */
static int
{
int status = 0;
/*
* Note for now, we have a single channel.
*/
return (1);
}
/*
* Send the message out using the appropriate
* transmit function which will free mblock when it
* is finished with it.
*/
else {
}
return (status);
}
/*
* Send packet out via descriptor ring to a logical device.
*/
static int
{
int idx;
int status = LDC_TX_SUCCESS;
/* TODO: make test a macro */
return (LDC_TX_FAILURE);
}
/*
* Note - using first ring only, this may change
* in the future.
*/
return (LDC_TX_FAILURE);
}
return (LDC_TX_FAILURE);
}
/*
* Find a free descriptor
*
* Note: for the moment we are assuming that we will only
* have one dring going from the switch to each of its
* peers. This may change in the future.
*/
/* nothing more we can do */
goto vsw_dringsend_free_exit;
} else {
}
/* copy data into the descriptor */
bufp += VNET_IPALIGN;
bufp += n;
}
/*
* Determine whether or not we need to send a message to our
* peer prompting them to read our newly updated descriptor(s).
*/
if (dp->restart_reqd) {
/*
* Send a vio_dring_msg to peer to prompt them to read
* the updated descriptor ring.
*/
/* Note - for now using first ring */
/*
* If last_ack_recv is -1 then we know we've not
* received any ack's yet, so this must be the first
* msg sent, so set the start to the begining of the ring.
*/
} else {
}
sizeof (vio_dring_msg_t), B_TRUE);
/* free the message block */
return (status);
} else {
}
/* free the message block */
return (status);
}
/*
* Send an in-band descriptor message over ldc.
*/
static int
{
int idx, i;
int status = LDC_TX_SUCCESS;
static int warn_msg = 1;
return (LDC_TX_FAILURE);
}
/*
* only expect single dring to exist, which we use
* as an internal buffer, rather than a transfer channel.
*/
return (LDC_TX_FAILURE);
}
return (LDC_TX_FAILURE);
}
/*
* Find a free descriptor in our buffer ring
*/
if (warn_msg) {
warn_msg = 0;
}
/* nothing more we can do */
goto vsw_descrsend_free_exit;
} else {
warn_msg = 1;
}
/* copy data into the descriptor */
bufp += n;
}
/* create and send the in-band descp msg */
/*
* Copy the mem cookies describing the data from the
* private region of the descriptor ring into the inband
* descriptor.
*/
sizeof (ldc_mem_cookie_t));
}
sizeof (vnet_ibnd_desc_t), B_TRUE);
/* free the allocated message blocks */
return (status);
}
static void
vsw_send_ver(void *arg)
{
}
static void
{
/*
* Subtype is set to INFO by default
*/
/* payload copied from default settings for lane */
}
/*
* Create dring info msg (which also results in the creation of
* a dring).
*/
static vio_dring_reg_msg_t *
{
/*
* If we can't create a dring, obviously no point sending
* a message.
*/
return (NULL);
/* payload */
mp->dring_ident = 0;
return (mp);
}
static void
{
return;
}
sizeof (vio_dring_reg_msg_t), B_TRUE);
}
static void
{
}
/*
* Generic routine to send message out over ldc channel.
*
* It is possible that when we attempt to write over the ldc channel
* that we get notified that it has been reset. Depending on the value
* of the handle_reset flag we either handle that event here or simply
* notify the caller that the channel was reset.
*/
static int
{
int rv;
do {
}
/*
* If channel has been reset we either handle it here or
* simply report back that it has been reset and let caller
* decide what to do.
*/
if (rv == ECONNRESET) {
/*
* N.B - must never be holding the dlistrw lock when
* we do a reset of the channel.
*/
if (handle_reset) {
}
}
return (rv);
}
/*
* Add an entry into FDB, for the given mac address and port_id.
* Returns 0 on success, 1 on failure.
*
* Lock protecting FDB must be held by calling process.
*/
static int
{
/*
* Note: duplicate keys will be rejected by mod_hash.
*/
(mod_hash_val_t)port) != 0) {
return (1);
}
return (0);
}
/*
* Remove an entry from FDB.
* Returns 0 on success, 1 on failure.
*/
static int
{
return (0);
}
/*
* Search fdb for a given mac address.
* Returns pointer to the entry if found, else returns NULL.
*/
static vsw_port_t *
{
(mod_hash_val_t *)&port) != 0) {
return (NULL);
}
return (port);
}
/*
* Add or remove multicast address(es).
*
* Returns 0 on success, 1 on failure.
*/
static int
{
int i;
return (1);
}
/*
* Convert address into form that can be used
* as hash table key.
*/
/*
*/
/*
* Update the list of multicast
* addresses contained within the
* port structure to include this new
* one.
*/
__func__);
return (1);
}
/*
* Program the address into HW. If the addr
* has already been programmed then the MAC
* just increments a ref counter (which is
* used when the address is being deleted)
*/
"add multicast address",
return (1);
}
} else {
"address 0x%llx for port %ld",
return (1);
}
} else {
/*
* Delete an entry from the multicast hash
* table and update the address list
* appropriately.
*/
port->p_instance);
/*
* Remove the address from HW. The address
* will actually only be removed once the ref
* count within the MAC layer has dropped to
* zero. I.e. we can safely call this fn even
* if other ports are interested in this
* address.
*/
"remove multicast address",
return (1);
}
} else {
"addr 0x%llx for port %ld",
return (1);
}
}
}
return (0);
}
/*
* Add a new multicast entry.
*
* Search hash table based on address. If match found then
* update associated val (which is chain of ports), otherwise
*/
static int
{
int dup = 0;
int rv = 0;
if (devtype == VSW_VNETPORT) {
/*
* Being invoked from a vnet.
*/
} else {
/*
* We are being invoked via the m_multicst mac entry
* point.
*/
}
(mod_hash_val_t *)&ment) != 0) {
/* address not currently in table */
(mod_hash_val_t)ment) != 0) {
rv = 1;
} else {
}
} else {
/*
* Address in table. Check to see if specified port
* is already associated with the address. If not add
* it now.
*/
if (devtype == VSW_VNETPORT) {
"found for portid %ld and key "
"0x%llx", __func__,
addr);
} else {
"for key 0x%llx",
}
rv = 1;
dup = 1;
break;
}
}
/*
* Port not on list so add it to end now.
*/
if (0 == dup) {
}
}
return (rv);
}
/*
* Remove a multicast entry from the hashtable.
*
* Search hash table based on address. If match found, scan
* list of ports associated with address. If specified port
* found remove it from list.
*/
static int
{
if (devtype == VSW_VNETPORT) {
addr);
} else {
}
(mod_hash_val_t *)&ment) != 0) {
return (1);
}
if (devtype == VSW_VNETPORT) {
} else {
}
/*
* head of list, if no other element is in
* list then destroy this entry, otherwise
* just replace it with updated value.
*/
} else {
}
} else {
/*
* Not head of list, no need to do
* replacement, just adjust list pointers.
*/
}
break;
}
}
return (0);
}
/*
* Port is being deleted, but has registered an interest in one
* or more multicast groups. Using the list of addresses maintained
* within the port structure find the appropriate entry in the hash
* table and remove this port from the list of interested ports.
*/
static void
{
}
}
/*
* This vsw instance is detaching, but has registered an interest in one
* or more multicast groups. Using the list of addresses maintained
* within the vsw structure find the appropriate entry in the hash
* table and remove this instance from the list of interested ports.
*/
static void
{
}
}
/*
* Remove the specified address from the list of address maintained
* in this port node.
*/
static void
{
if (devtype == VSW_VNETPORT) {
} else {
}
/* match found */
/* list head */
if (devtype == VSW_VNETPORT)
else
} else {
}
break;
} else {
}
}
if (devtype == VSW_VNETPORT)
else
}
/*
* Creates a descriptor ring (dring) and links it into the
* link of outbound drings for this channel.
*
* Returns NULL if creation failed.
*/
static dring_info_t *
{
int i;
/* create public section of ring */
goto create_fail_exit;
}
/*
* Get the base address of the public section of the ring.
*/
goto dring_fail_exit;
} else {
}
/*
* create private portion of ring
*/
goto dring_fail_exit;
}
/* haven't used any descriptors yet */
/* bind dring to the channel */
goto dring_fail_exit;
}
/*
* Only ever create rings for outgoing lane. Link it onto
* end of list.
*/
} else {
}
return (dp);
for (i = 0; i < VSW_RING_NUM_EL; i++) {
(void) ldc_mem_free_handle(
priv_addr++;
}
(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
}
return (NULL);
}
/*
* Create a ring consisting of just a private portion and link
* it into the list of rings for the outbound lane.
*
* These type of rings are used primarily for temporary data
* storage (i.e. as data buffers).
*/
void
{
/* no public section */
(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
return;
}
/* haven't used any descriptors yet */
/*
* Only ever create rings for outgoing lane. Link it onto
* end of list.
*/
} else {
}
}
/*
* Setup the descriptors in the dring. Returns 0 on success, 1 on
* failure.
*/
int
{
static char *name = "vsw_setup_ring";
/* public section may be null but private should never be */
/*
* Allocate the region of memory which will be used to hold
* the data the descriptors will refer to.
*/
/*
* Initialise some of the private and public (if they exist)
* descriptor fields.
*/
for (i = 0; i < VSW_RING_NUM_EL; i++) {
goto setup_ring_cleanup;
}
if (rv != 0) {
goto setup_ring_cleanup;
}
"invalid num of cookies (%d) for size 0x%llx",
goto setup_ring_cleanup;
} else {
for (j = 1; j < ncookies; j++) {
if (rv != 0) {
goto setup_ring_cleanup;
}
"size 0x%llx", name, j,
}
}
/* link pub and private sides */
sizeof (ldc_mem_cookie_t));
}
pub_addr++;
}
/*
* move to next element in the dring and the next
* position in the data buffer.
*/
priv_addr++;
}
return (0);
for (j = 0; j < i; j++) {
priv_addr++;
}
return (1);
}
/*
* Searches the private section of a ring for a free descriptor,
* starting at the location of the last free descriptor found
* previously.
*
* Returns 0 if free descriptor is available, and updates state
* of private descriptor to VIO_DESC_READY, otherwise returns 1.
*
* FUTURE: might need to return contiguous range of descriptors
* as dring info msg assumes all will be contiguous.
*/
static int
{
int num = VSW_RING_NUM_EL;
int ret = 1;
ret = 0;
}
/* ring full */
if (ret == 1) {
}
return (ret);
}
/*
* Map from a dring identifier to the ring itself. Returns
* pointer to ring or NULL if no match found.
*
* Should be called with dlistrw rwlock held as reader.
*/
static dring_info_t *
{
return (NULL);
} else {
if (dp->ident == ident)
return (dp);
if (dp->ident == ident)
break;
}
}
return (dp);
}
/*
* Set the default lane attributes. These are copied into
* the attr msg we send to our peer. If they are not acceptable
* then (currently) the handshake ends.
*/
static void
{
}
/*
* Verify that the attributes are acceptable.
*
* FUTURE: If some attributes are not acceptable, change them
* our desired values.
*/
static int
{
int ret = 0;
/*
* Note we currently only support in-band descriptors
* and descriptor rings, not packet based transfer (VIO_PKT_MODE)
*/
ret = 1;
}
/* Only support MAC addresses at moment. */
ret = 1;
}
/*
* MAC address supplied by device should match that stored
* in the vsw-port OBP node. Need to decide what to do if they
* don't match, for the moment just warn but don't fail.
*/
"0x%llx doesn't match node address 0x%llx\n",
}
/*
* Ack freq only makes sense in pkt mode, in shared
* mode the ring descriptors say whether or not to
* send back an ACK.
*/
" in SHM mode\n");
ret = 1;
}
/*
* Note: for the moment we only support ETHER
* frames. This may change in the future.
*/
ret = 1;
}
return (ret);
}
/*
* Returns 1 if there is a problem, 0 otherwise.
*/
static int
{
int ret = 0;
if ((pkt->num_descriptors == 0) ||
(pkt->descriptor_size == 0) ||
ret = 1;
}
return (ret);
}
/*
* Returns 1 if two memory cookies match. Otherwise returns 0.
*/
static int
{
return (0);
} else {
return (1);
}
}
/*
* Returns 1 if ring described in reg message matches that
* described by dring_info structure. Otherwise returns 0.
*/
static int
{
return (0);
} else {
return (1);
}
}
static caddr_t
{
a[0], a[1], a[2], a[3], a[4], a[5]);
return (ebuf);
}
/*
* Reset and free all the resources associated with
* the channel.
*/
static void
{
int rv = 0;
} else {
}
}
} else {
/*
* unbind, destroy exported dring, free dring struct
*/
}
if (rv == 0) {
}
}
}
/*
* Free ring and all associated resources.
*
* Should be called with dlistrw rwlock held as writer.
*/
static int
{
int i, rv = 1;
/*
* First unbind and free the memory handles
* stored in each descriptor within the ring.
*/
for (i = 0; i < VSW_RING_NUM_EL; i++) {
paddr = (vsw_private_desc_t *)
if (rv != 0) {
"unbinding handle for "
"ring 0x%llx at pos %d",
dp, i);
return (rv);
}
}
if (rv != 0) {
"handle for ring "
"0x%llx at pos %d",
dp, i);
return (rv);
}
}
}
* VSW_RING_NUM_EL));
}
/*
* Now unbind and destroy the ring itself.
*/
}
}
}
return (0);
}
/*
* Debugging routines
*/
static void
display_state(void)
{
"status %d : phase %u\n",
"psession %lu\n",
ldcp->peer_session);
}
}
}
}
static void
{
}
}
static void
{
uint64_t i;
uint64_t priv_count = 0;
for (i = 0; i < VSW_RING_NUM_EL; i++) {
pub_count++;
}
priv_count++;
}
}
i, priv_count, pub_count);
}
static void
{
int i;
typedef struct flag_name {
int flag_val;
char *flag_name;
} flag_name_t;
flag_name_t flags[] = {
VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT",
VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV",
VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV",
VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT",
VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV",
VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT",
VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT",
VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV",
VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT",
VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV",
VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT",
VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV",
VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT",
VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV",
VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT",
VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV",
VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT",
VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV",
VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT",
VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV",
VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT",
VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV",
VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT",
VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV",
VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT",
VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV",
VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT",
VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV",
VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT",
VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV",
VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"};
for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) {
}
}