vnet_gen.c revision 195ce4e5cab5a850f4079df860f51c5659a1298b
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/ethernet.h>
#include <sys/mac_provider.h>
#include <sys/mac_ether.h>
#include <sys/mach_descrip.h>
#include <sys/vio_mailbox.h>
#include <sys/vio_common.h>
#include <sys/vnet_common.h>
#include <sys/vnet_mailbox.h>
#include <sys/vio_util.h>
#include <sys/vnet_gen.h>
/*
* Implementation of the mac functionality for vnet using the
* generic(default) transport layer of sun4v Logical Domain Channels(LDC).
*/
/*
* Function prototypes.
*/
/* vgen proxy entry points */
int vgen_uninit(void *arg);
static int vgen_start(void *arg);
/* vgen internal functions */
int port_num);
static void vgen_ldc_watchdog(void *arg);
/* vgen handshake functions */
static void vgen_hwatchdog(void *arg);
static void vgen_ldc_rcv_worker(void *arg);
/* VLAN routines */
/* externs */
/*
* The handshake process consists of 5 phases defined below, with VH_PHASE0
* being the pre-handshake phase and VH_DONE is the phase to indicate
* successful completion of all phases.
* Each phase may have one to several handshake states which are required
* to complete successfully to move to the next phase.
* Refer to the functions vgen_handshake() and vgen_handshake_done() for
* more details.
*/
/* handshake phases */
/* handshake states */
enum {
VER_INFO_SENT = 0x1,
VER_ACK_RCVD = 0x2,
VER_INFO_RCVD = 0x4,
VER_ACK_SENT = 0x8,
ATTR_INFO_SENT = 0x10,
ATTR_ACK_RCVD = 0x20,
ATTR_INFO_RCVD = 0x40,
ATTR_ACK_SENT = 0x80,
DRING_INFO_SENT = 0x100,
DRING_ACK_RCVD = 0x200,
DRING_INFO_RCVD = 0x400,
DRING_ACK_SENT = 0x800,
RDX_INFO_SENT = 0x1000,
RDX_ACK_RCVD = 0x2000,
RDX_INFO_RCVD = 0x4000,
RDX_ACK_SENT = 0x8000,
};
#define LDC_UNLOCK(ldcp) \
static struct ether_addr etherbroadcastaddr = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
/*
*/
#define IS_BROADCAST(ehp) \
#define IS_MULTICAST(ehp) \
/*
* Property names
*/
static char macaddr_propname[] = "mac-address";
static char rmacaddr_propname[] = "remote-mac-address";
static char channel_propname[] = "channel-endpoint";
static char reg_propname[] = "reg";
static char port_propname[] = "port";
static char swport_propname[] = "switch-port";
static char id_propname[] = "id";
static char vdev_propname[] = "virtual-device";
static char vnet_propname[] = "network";
static char pri_types_propname[] = "priority-ether-types";
static char vgen_pvid_propname[] = "port-vlan-id";
static char vgen_vid_propname[] = "vlan-id";
static char vgen_dvid_propname[] = "default-vlan-id";
static char port_pvid_propname[] = "remote-port-vlan-id";
static char port_vid_propname[] = "remote-vlan-id";
static char vgen_mtu_propname[] = "mtu";
/* versions supported - in decreasing order */
/* Tunables */
/*
* max # of packets accumulated prior to sending them up. It is best
* to keep this at 60% of the number of recieve buffers.
*/
/*
* Internal tunables for receive buffer pools, that is, the size and number of
* mblks for each pool. At least 3 sizes must be specified if these are used.
* The sizes must be specified in increasing order. Non-zero value of the first
* size will be used as a hint to use these values instead of the algorithm
* that determines the sizes based on MTU.
*/
uint32_t vgen_rbufsz1 = 0;
uint32_t vgen_rbufsz2 = 0;
uint32_t vgen_rbufsz3 = 0;
uint32_t vgen_rbufsz4 = 0;
/*
* In the absence of "priority-ether-types" property in MD, the following
* internal tunable can be set to specify a single priority ethertype.
*/
/*
* Number of transmit priority buffers that are preallocated per device.
* This number is chosen to be a small value to throttle transmission
* of priority packets. Note: Must be a power of 2 for vio_create_mblks().
*/
#ifdef DEBUG
/* flags to simulate error conditions for debugging */
int vgen_trigger_txtimeout = 0;
int vgen_trigger_rxlost = 0;
#endif
/*
* Matching criteria passed to the MDEG to register interest
* in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
* by their 'name' and 'cfg-handle' properties.
*/
static md_prop_match_t vdev_prop_match[] = {
{ MDET_PROP_STR, "name" },
{ MDET_PROP_VAL, "cfg-handle" },
{ MDET_LIST_END, NULL }
};
/* MD update matching structure */
static md_prop_match_t vport_prop_match[] = {
{ MDET_PROP_VAL, "id" },
{ MDET_LIST_END, NULL }
};
/* template for matching a particular vnet instance */
static mdeg_prop_spec_t vgen_prop_template[] = {
};
static mac_callbacks_t vgen_m_callbacks = {
0,
NULL,
NULL,
};
/* externs */
extern pri_t maxclsyspri;
extern uint32_t vnet_ntxds;
extern uint32_t vnet_ldcwd_interval;
extern uint32_t vnet_ldcwd_txtimeout;
extern uint32_t vnet_ldc_mtu;
extern uint32_t vnet_nrbufs;
extern uint32_t vnet_ethermtu;
extern uint16_t vnet_default_vlan_id;
extern boolean_t vnet_jumbo_rxpools;
#ifdef DEBUG
extern int vnet_dbglevel;
/* -1 for all LDCs info, or ldc_id for a specific LDC info */
int vgendbg_ldcid = -1;
/* simulate handshake error conditions for debug */
#define HDBG_VERSION 0x1
#define HDBG_TIMEOUT 0x2
#define HDBG_BAD_SID 0x4
#define HDBG_OUT_STATE 0x8
#endif
/*
* vgen_init() is called by an instance of vnet driver to initialize the
* corresponding generic proxy transport layer. The arguments passed by vnet
* are - an opaque pointer to the vnet instance, pointers to dev_info_t and
* the mac address of the vnet device, and a pointer to vgen_t is passed
* back as a handle to vnet.
*/
int
{
int instance;
int rv;
return (DDI_FAILURE);
/* allocate multicast table */
sizeof (struct ether_addr), KM_SLEEP);
if (rv != 0) {
goto vgen_init_fail;
}
/* register with MD event generator */
if (rv != DDI_SUCCESS) {
goto vgen_init_fail;
}
return (DDI_SUCCESS);
sizeof (struct ether_addr));
if (VGEN_PRI_ETH_DEFINED(vgenp)) {
}
return (DDI_FAILURE);
}
/*
* Called by vnet to undo the initializations done by vgen_init().
* The handle provided by generic transport during vgen_init() is the argument.
*/
int
vgen_uninit(void *arg)
{
return (DDI_FAILURE);
}
/* unregister with MD event generator */
/* detach all ports from the device */
/*
* free any pending rx mblk pools,
* that couldn't be freed previously during channel detach.
*/
if (vio_destroy_mblks(rp)) {
return (DDI_FAILURE);
}
}
/* free multicast table */
/* free pri_types table */
if (VGEN_PRI_ETH_DEFINED(vgenp)) {
}
return (DDI_SUCCESS);
}
int
vgen_start(void *arg)
{
return (DDI_SUCCESS);
}
void
{
}
/* vgen transmit function */
static mblk_t *
{
int i;
int status = VGEN_FAILURE;
/*
* Retry so that we avoid reporting a failure
* to the upper layer. Returning a failure may cause the
* upper layer to go into single threaded mode there by
* causing performance degradation, especially for a large
* number of connections.
*/
for (i = 0; i < vgen_tx_retries; ) {
if (status == VGEN_SUCCESS) {
break;
}
if (++i < vgen_tx_retries)
}
if (status != VGEN_SUCCESS) {
/* failure */
return (mp);
}
/* success */
return (NULL);
}
/*
* that are being transmitted over the port. It first verifies the vlan
* membership of the destination(port) and drops the packet if the
* destination doesn't belong to the given vlan.
*
* Arguments:
* portp: port over which the frames should be transmitted
* mp: frame to be transmitted
* is_tagged:
* B_TRUE: indicates frame header contains the vlan tag already.
* B_FALSE: indicates frame is untagged.
* vid: vlan in which the frame should be transmitted.
*
* Returns:
* Failure: NULL
*/
static mblk_t *
{
int rv;
/*
* If the packet is going to a vnet:
* Check if the destination vnet is in the same vlan.
* Check the frame header if tag or untag is needed.
*
* We do not check the above conditions if the packet is going to vsw:
* vsw must be present implicitly in all the vlans that a vnet device
* is configured into; even if vsw itself is not assigned to those
* vlans as an interface. For instance, the packet might be destined
* to another vnet(indirectly through vsw) or to an external host
* which is in the same vlan as this vnet and vsw itself may not be
* present in that vlan. Similarly packets going to vsw must be
* always tagged(unless in the default-vlan) if not already tagged,
* as we do not know the final destination. This is needed because
* vsw must always invoke its switching function only after tagging
* the packet; otherwise after switching function determines the
* destination we cannot figure out if the destination belongs to the
* the same vlan that the frame originated from and if it needs tag/
* untag. Note that vsw will tag the packet itself when it receives
* it over the channel from a client if needed. However, that is
* needed only in the case of vlan unaware clients such as obp or
* earlier versions of vnet.
*
*/
/*
* Packet going to a vnet. Check if the destination vnet is in
* needed.
*/
/* drop the packet */
return (NULL);
}
/* is the destination tagged or untagged in this vlan? */
(dst_tagged = B_TRUE);
if (is_tagged == dst_tagged) {
return (mp);
}
/* frame is tagged; destination needs untagged */
return (mp);
}
/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
}
/*
* Packet going to a vnet needs tagging.
* OR
* If the packet is going to vsw, then it must be tagged in all cases:
*/
}
return (mp);
}
/* transmit packets over the given port */
static int
{
int status;
int rv = VGEN_SUCCESS;
struct ether_header *ehp;
if (portp->use_vsw_port) {
dec_refcnt = B_TRUE;
}
return (VGEN_FAILURE);
}
/*
* Determine the vlan id that the frame belongs to.
*/
/* Frames in default vlan must be untagged */
/*
* If the destination is a vnet-port verify it belongs to the
* default vlan; otherwise drop the packet. We do not need
* this check for vsw-port, as it should implicitly belong to
* this vlan; see comments in vgen_vlan_frame_fixtag().
*/
goto portsend_ret;
}
} else { /* frame not in default-vlan */
goto portsend_ret;
}
}
/*
* NOTE: for now, we will assume we have a single channel.
*/
rv = VGEN_FAILURE;
goto portsend_ret;
}
if (status != VGEN_TX_SUCCESS) {
rv = VGEN_FAILURE;
}
if (dec_refcnt == B_TRUE) {
}
return (rv);
}
/*
*/
static int
{
int status;
struct ether_header *ehp;
int i;
for (i = 0; i < num_types; i++) {
/* priority frame, use pri tx function */
return (VGEN_SUCCESS);
}
}
return (status);
}
/*
* This functions handles ldc channel reset while in the context
* of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring().
*/
static void
{
} else {
}
}
}
}
/*
* This function transmits the frame in the payload of a raw data
* (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
* send special frames with high priorities, without going through
* the normal data path which uses descriptor ring mechanism.
*/
static void
{
int rv;
/* drop the packet if ldc is not up or handshake is not done */
ldcp->ldc_status);
goto send_pkt_exit;
}
goto send_pkt_exit;
}
/* frame size bigger than available payload len of raw data msg ? */
goto send_pkt_exit;
}
/* alloc space for a raw data message */
goto send_pkt_exit;
}
/* copy frame into the payload of raw data message */
}
/* setup the raw data msg */
/* send the msg over ldc */
if (rv != VGEN_SUCCESS) {
if (rv == ECONNRESET) {
}
goto send_pkt_exit;
}
/* update stats */
}
/*
* This function transmits normal (non-priority) data frames over
* the channel. It queues the frame into the transmit descriptor ring
* and sends a VIO_DRING_DATA message if needed, to wake up the
* peer to (re)start processing.
*/
static int
{
struct ether_header *ehp;
int rv = 0;
ldcp->ldc_status);
/* retry ldc_up() if needed */
goto send_dring_exit;
}
/* drop the packet if ldc is not up or handshake is not done */
goto send_dring_exit;
}
goto send_dring_exit;
}
/*
* allocate a descriptor
*/
/* Try reclaiming now */
statsp->tx_no_desc++;
return (VGEN_TX_NORESOURCES);
}
}
/* update next available tbuf in the ring and update tx index */
/* Mark the buffer busy before releasing the lock */
/* copy data into pre-allocated transmit buffer */
}
/* initialize the corresponding public descriptor (txd) */
/*
* If the flags not set to BUSY, it implies that the clobber
* was done while we were copying the data. In such case,
* discard the packet and return.
*/
goto send_dring_exit;
}
/* update stats */
if (is_bcast)
else if (is_mcast)
/* send dring datamsg to the peer */
if (ldcp->resched_peer) {
if (rv != 0) {
/* error: drop the packet */
"failed: rv(%d) len(%d)\n",
} else {
}
}
}
if (rv == ECONNRESET) {
}
return (VGEN_TX_SUCCESS);
}
int
{
struct ether_addr *addrp;
int rv = DDI_FAILURE;
uint32_t i;
return (DDI_SUCCESS);
}
return (rv);
}
goto vgen_mcast_exit;
/*
* the multicast address. Otherwise, we just update this
* mcast address in our table and the table will be sync'd
* with vsw when handshake completes.
*/
B_FALSE) != VGEN_SUCCESS) {
goto vgen_mcast_exit;
}
}
if (add) {
/* expand multicast table if necessary */
struct ether_addr *newtab;
sizeof (struct ether_addr), KM_NOSLEEP);
goto vgen_mcast_exit;
sizeof (struct ether_addr));
}
/* add address to the table */
} else {
/* delete address from the table */
/*
* If there's more than one address in this
* table, delete the unwanted one by moving
* the last one in the list over top of it;
* otherwise, just remove it.
*/
}
break;
}
}
}
rv = DDI_SUCCESS;
return (rv);
}
/* set or clear promiscuous mode on the device */
static int
{
return (DDI_SUCCESS);
}
/* set the unicast mac address of the device */
static int
{
return (DDI_SUCCESS);
}
/* get device statistics */
int
{
return (0);
}
static void
{
}
/* vgen internal functions */
/* detach all ports from the device */
static void
{
}
}
/*
* detach the given port.
*/
static void
{
int port_num;
/*
* If this port is connected to the vswitch, then
* potentially there could be ports that may be using
* this port to transmit packets. To address this do
* the following:
* - First set vgenp->vsw_portp to NULL, so that
* its not used after that.
* - Then wait for the refcnt to go down to 0.
* - Now we can safely detach this port.
*/
while (vgenp->vsw_port_refcnt > 0) {
}
}
}
/* remove it from port list */
/* detach channels from this port */
}
}
}
/* add a port to port list */
static void
{
} else {
}
}
/* remove a port from port list */
static void
{
return;
} else {
;
}
}
}
/* lookup a port in the list based on port_num */
static vgen_port_t *
{
break;
}
}
return (portp);
}
static void
{
}
}
static void
{
/* Add the port to the specified vlans */
/* Bring up the channels of this port */
}
static void
{
}
}
static void
{
/* remove the port from vlans it has been assigned to */
}
/*
* Scan the machine description for this instance of vnet
* and read its properties. Called only from vgen_init().
* Returns: 0 on success, 1 on failure.
*/
static int
{
char *name;
int rv = 1;
int num_nodes = 0;
int num_devs = 0;
int listsz = 0;
int i;
return (rv);
}
/* search for all "virtual_device" nodes */
if (num_devs <= 0) {
goto vgen_readmd_exit;
}
/*
* Now loop through the list of virtual-devices looking for
* devices with name "network" and for each such device compare
* its instance with what we have from the 'reg' property to
* find the right node in MD and then read all its properties.
*/
for (i = 0; i < num_devs; i++) {
goto vgen_readmd_exit;
}
/* is this a "network" device? */
continue;
goto vgen_readmd_exit;
}
/* is this the required instance of vnet? */
continue;
/*
* Read the mtu. Note that we set the mtu of vnet device within
* this routine itself, after validating the range.
*/
}
sizeof (struct ether_header) + VLAN_TAGSZ;
/* read priority ether types */
/* read vlan id properties of this vnet instance */
&vnetp->default_vlan_id);
rv = 0;
break;
}
(void) md_fini_handle(mdp);
return (rv);
}
/*
* Read vlan id properties of the given MD node.
* Arguments:
* arg: device argument(vnet device or a port)
* type: type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
* mdp: machine description
* node: md node cookie
*
* Returns:
* pvidp: port-vlan-id of the node
* vidspp: list of vlan-ids of the node
* nvidsp: # of vlan-ids in the list
* default_idp: default-vlan-id of the node(if node is vnet device)
*/
static void
{
char *pvid_propname;
char *vid_propname;
int rv;
int i;
int size;
int inst;
if (type == VGEN_LOCAL) {
} else {
return;
}
if (rv != 0) {
} else {
inst, *default_idp);
}
}
if (rv != 0) {
} else {
}
&size);
if (rv != 0) {
size = 0;
} else {
}
if (nvids != 0) {
for (i = 0; i < nvids; i++) {
}
}
}
/*
* Create a vlan id hash table for the given port.
*/
static void
{
char hashname[MAXNAMELEN];
}
/*
* Destroy the vlan id hash table in the given port.
*/
static void
{
portp->vlan_nchains = 0;
}
}
/*
* Add a port to the vlans specified in its port properites.
*/
static void
{
int rv;
int i;
}
}
/*
* Remove a port from the vlans it has been assigned to.
*/
static void
{
int rv;
int i;
(mod_hash_val_t *)&vp);
(mod_hash_val_t *)&vp);
}
}
/*
* Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
* then the vlan-id is available in the tag; otherwise, its vlan id is
* implicitly obtained from the port-vlan-id of the vnet device.
* The vlan id determined is returned in vidp.
* Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
*/
static boolean_t
{
struct ether_vlan_header *evhp;
/* If it's a tagged frame, get the vlan id from vlan header */
return (B_TRUE);
}
/* Untagged frame, vlan-id is the pvid of vnet device */
return (B_FALSE);
}
/*
* Find the given vlan id in the hash table.
* Return: B_TRUE if the id is found; B_FALSE if not found.
*/
static boolean_t
{
int rv;
if (rv != 0)
return (B_FALSE);
return (B_TRUE);
}
/*
* This function reads "priority-ether-types" property from md. This property
* is used to enable support for priority frames. Applications which need
* a vnet or vsw within ldoms, should configure this property by providing
* the ether type(s) for which the priority facility is needed.
* Normal data frames are delivered over a ldc channel using the descriptor
* ring mechanism which is constrained by factors such as descriptor ring size,
* the rate at which the ring is processed at the peer ldc end point, etc.
* as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
* descriptor ring path and enables a more reliable and timely delivery of
* frames to the peer.
*/
static void
{
int rv;
int size;
int i;
if (rv != 0) {
/*
* Property may not exist if we are running pre-ldoms1.1 f/w.
* Check if 'vgen_pri_eth_type' has been set in that case.
*/
if (vgen_pri_eth_type != 0) {
size = sizeof (vgen_pri_eth_type);
} else {
"prop(%s) not found", pri_types_propname);
size = 0;
}
}
if (size == 0) {
vgenp->pri_num_types = 0;
return;
}
/*
* we have some priority-ether-types defined;
* allocate a table of these types and also
* allocate a pool of mblks to transmit these
* priority packets.
*/
}
&vgenp->pri_tx_vmp);
}
static void
{
int rv;
char *mtu_propname;
if (rv != 0) {
*mtu = vnet_ethermtu;
} else {
}
}
/* register with MD event generator */
static int
{
int rv;
templatesz = sizeof (vgen_prop_template);
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
/*
* NOTE: The instance here refers to the value of "reg" property and
* not the dev_info instance (ddi_get_instance()) of vnet.
*/
/* save parentp in vgen_t */
/*
* Register an interest in 'virtual-device' nodes with a
* 'name' property of 'network'
*/
if (rv != MDEG_SUCCESS) {
goto mdeg_reg_fail;
}
/* Register an interest in 'port' nodes */
&port_hdl);
if (rv != MDEG_SUCCESS) {
goto mdeg_reg_fail;
}
/* save mdeg handle in vgen_t */
return (DDI_SUCCESS);
(void) mdeg_unregister(dev_hdl);
}
return (DDI_FAILURE);
}
/* unregister with MD event generator */
static void
{
}
/* mdeg callback function for the port node */
static int
{
int idx;
int vsw_idx = -1;
return (MDEG_FAILURE);
}
}
/*
* find vsw_port and add it first, because other ports need
* this when adding fdb entry (see vgen_port_init()).
*/
if (val == 0) {
/*
* This port is connected to the
* vsw on service domain.
*/
if (vgen_add_port(vgenp,
DDI_SUCCESS) {
"not initialize virtual "
"switch port.",
return (MDEG_FAILURE);
}
break;
}
}
}
if (vsw_idx == -1) {
return (MDEG_FAILURE);
}
}
continue;
/* If this port can't be added just skip it. */
}
}
return (MDEG_SUCCESS);
}
/* mdeg callback function for the vnet node */
static int
{
return (MDEG_FAILURE);
}
/*
* We get an initial callback for this node as 'added' after
* registering with mdeg. Note that we would have already gathered
* information about this vnet node by walking MD earlier during attach
* (in vgen_read_mdprops()). So, there is a window where the properties
* of this node might have changed when we get this initial 'added'
* callback. We handle this as if an update occured and invoke the same
* function which handles updates to the properties of this vnet-node
* if any. A non-zero 'match' value indicates that the MD has been
* updated and that a 'network' node is present which may or may not
* have been updated. It is up to the clients to examine their own
* nodes and determine if they have changed.
*/
goto vgen_mdeg_cb_err;
}
goto vgen_mdeg_cb_err;
}
} else {
goto vgen_mdeg_cb_err;
}
/* Validate name and instance */
goto vgen_mdeg_cb_err;
}
/* is this a virtual-network device? */
goto vgen_mdeg_cb_err;
}
goto vgen_mdeg_cb_err;
}
/* is this the right instance of vnet? */
goto vgen_mdeg_cb_err;
}
return (MDEG_SUCCESS);
return (MDEG_FAILURE);
}
/*
* Check to see if the relevant properties in the specified node have
* changed, and if so take the appropriate action.
*/
static void
{
enum { MD_init = 0x1,
MD_vlans = 0x2,
int rv;
/* Read the vlan ids */
/* Determine if there are any vlan id updates */
}
/* Read mtu */
} else {
" as the specified value:%d is invalid\n",
}
}
/* Now process the updated props */
/* save the new vlan ids */
}
if (nvids != 0) {
}
/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
} else {
if (nvids != 0) {
}
}
if (rv == 0) {
sizeof (struct ether_header) + VLAN_TAGSZ;
}
}
}
/* add a new port to the device */
static int
{
int rv;
if (rv != DDI_SUCCESS) {
return (DDI_FAILURE);
}
if (rv != DDI_SUCCESS) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/* read properties of the port from its md node */
static int
{
int num_ldcs;
int i;
int addrsz;
int num_nodes = 0;
int listsz = 0;
struct ether_addr ea;
/* read "id" property to get the port number */
return (DDI_FAILURE);
}
/*
* Find the channel endpoint node(s) under this port node.
*/
return (DDI_FAILURE);
}
/* allocate space for node list */
return (DDI_FAILURE);
if (num_ldcs <= 0) {
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
for (i = 0; i < num_ldcs; i++) {
/* read channel ids */
return (DDI_FAILURE);
}
}
&addrsz)) {
return (DDI_FAILURE);
}
if (addrsz < ETHERADDRL) {
return (DDI_FAILURE);
}
for (i = ETHERADDRL - 1; i >= 0; i--) {
macaddr >>= 8;
}
if (val == 0) {
(void) atomic_swap_32(
&vgenp->vsw_port_refcnt, 0);
/* This port is connected to the vsw */
}
}
}
/* now update all properties into the port */
/* read vlan id properties of this port node */
return (DDI_SUCCESS);
}
/* remove a port from the device */
static int
{
/* read "id" property to get the port number */
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/* attach a port to the device based on mdeg data */
static int
{
int i;
int rv;
for (i = 0; i < num_ldcs; i++) {
return (DDI_FAILURE);
}
}
/* create vlan id hash table */
/* This port is connected to the switch port */
} else {
}
return (DDI_FAILURE);
}
if (rv == 0) {
/* link it into the list of ports */
} else {
portp);
}
return (DDI_SUCCESS);
}
/* detach a port from the device based on mdeg data */
static void
{
/* stop the port if needed */
}
}
static int
{
/*
* For now, we get port updates only if vlan ids changed.
* We read the port num and do some sanity check.
*/
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
return (DDI_FAILURE);
return (DDI_FAILURE);
}
/* Read the vlan ids */
/* Determine if there are any vlan id updates */
}
if (updated_vlans == B_FALSE) {
return (DDI_FAILURE);
}
/* remove the port from vlans it has been assigned to */
/* save the new vlan ids */
}
if (nvids != 0) {
}
/* add port to the new vlans */
/* reset the port if it is vlan unaware (ver < 1.3) */
return (DDI_SUCCESS);
}
static uint64_t
{
val = 0;
}
return (val);
}
/* allocate receive resources */
static int
{
int status;
/*
* We round up the mtu specified to be a multiple of 2K.
* We then create rx pools based on the rounded up size.
*/
/*
* If pool sizes are specified, use them. Note that the presence of
* the first tunable will be used as a hint.
*/
if (vgen_rbufsz1 != 0) {
sz1 = vgen_rbufsz1;
sz2 = vgen_rbufsz2;
sz3 = vgen_rbufsz3;
sz4 = vgen_rbufsz4;
if (sz4 == 0) { /* need 3 pools */
} else {
}
return (status);
}
/*
* Pool sizes are not specified. We select the pool sizes based on the
* mtu if vnet_jumbo_rxpools is enabled.
*/
/*
* Receive buffer pool allocation based on mtu is disabled.
* Use the default mechanism of standard size pool allocation.
*/
return (status);
}
switch (data_sz) {
case VNET_4K:
break;
default: /* data_sz: 4K+ to 16K */
break;
}
return (status);
}
/* attach the channel corresponding to the given ldc_id to the port */
static int
{
int status;
char kname[MAXNAMELEN];
int instance;
AST_create_rxmblks = 0x20,
goto ldc_attach_failed;
}
if (status != 0) {
goto ldc_attach_failed;
}
if (vgen_rcv_thread_enabled) {
ldcp->rcv_thr_flags = 0;
goto ldc_attach_failed;
}
}
if (status != 0) {
status);
goto ldc_attach_failed;
}
/*
* allocate a message for ldc_read()s, big enough to hold ctrl and
* data msgs, including raw data msgs used to recv priority frames.
*/
/* allocate transmit resources */
if (status != 0) {
goto ldc_attach_failed;
}
/* allocate receive resources */
if (status != 0) {
goto ldc_attach_failed;
}
/* Setup kstats for the channel */
goto ldc_attach_failed;
}
/* initialize vgen_versions supported */
/* link it into the list of channels for this port */
return (DDI_SUCCESS);
if (attach_state & AST_ldc_reg_cb) {
}
if (attach_state & AST_create_rcv_thread) {
}
}
if (attach_state & AST_create_rxmblks) {
}
if (attach_state & AST_alloc_tx_ring) {
}
if (attach_state & AST_ldc_init) {
}
if (attach_state & AST_mutex_init) {
}
if (attach_state & AST_ldc_alloc) {
}
return (DDI_FAILURE);
}
/* detach a channel from the port */
static void
{
break;
}
}
/* invalid ldcp? */
return;
}
}
/* First stop the receive thread */
}
/*
* if we cannot reclaim all mblks, put this
* on the list of pools(vgenp->rmp) to be reclaimed when the
* device gets detached (see vgen_uninit()).
*/
/* free transmit resources */
/* unlink it from the list */
}
}
/*
* This function allocates transmit resources for the channel.
* The resources consist of a transmit descriptor ring and an associated
* transmit buffer ring.
*/
static int
{
void *tbufp;
int status;
txdsize = sizeof (vnet_public_desc_t);
tbufsize = sizeof (vgen_private_desc_t);
/* allocate transmit buffer ring */
return (DDI_FAILURE);
}
/* create transmit descriptor ring */
&ldcp->tx_dhandle);
if (status) {
return (DDI_FAILURE);
}
/* get the addr of descripror ring */
if (status) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/* Free transmit resources for the channel */
static void
{
int tbufsize = sizeof (vgen_private_desc_t);
/* free transmit descriptor ring */
/* free transmit buffer ring */
}
static void
{
(void) vgen_ldc_init(ldcp);
}
}
static void
{
}
}
static int
{
int rv;
if (rv != 0) {
goto ldcinit_failed;
}
goto ldcinit_failed;
}
if (rv != 0) {
goto ldcinit_failed;
}
if (rv != 0) {
goto ldcinit_failed;
}
do {
}
if (retries++ >= vgen_ldcup_retries)
break;
} while (rv == EWOULDBLOCK);
}
/* initialize transmit watchdog timeout */
/* if channel is already UP - start handshake */
/*
* As the channel is up, use this port from now on.
*/
(void) atomic_swap_32(
}
/* Initialize local session id */
/* clear peer session id */
/* Initiate Handshake process with peer ldc endpoint */
} else {
}
return (DDI_SUCCESS);
if (init_state & ST_cb_enable) {
}
if (init_state & ST_init_tbufs) {
}
if (init_state & ST_ldc_open) {
}
return (DDI_FAILURE);
}
static void
{
int rv;
return;
}
/* disable further callbacks */
if (rv != 0) {
}
}
/*
* clear handshake done bit and wait for pending tx and cb to finish.
* release locks before untimeout(9F) is invoked to cancel timeouts.
*/
/* cancel handshake watchdog timeout */
}
if (ldcp->cancel_htid) {
ldcp->cancel_htid = 0;
}
/* cancel transmit watchdog timeout */
}
drv_usecwait(1000);
/*
* Note that callbacks have been disabled already(above). The
* drain function takes care of the condition when an already
* executing callback signals the worker to start processing or
* the worker has already been signalled and is in the middle of
* processing.
*/
}
/* acquire locks again; any pending transmits and callbacks are done */
/* close the channel - retry on EAGAIN */
if (++retries > vgen_ldccl_retries) {
break;
}
}
if (rv != 0) {
"!vnet%d: Error(%d) closing the channel(0x%lx)\n",
}
}
/* Initialize the transmit buffer ring for the channel */
static int
{
int i;
int rv;
int ci;
/*
* In order to ensure that the number of ldc cookies per descriptor is
* limited to be within the default MAX_COOKIES (2), we take the steps
* outlined below:
*
* Align the entire data buffer area to 8K and carve out per descriptor
* data buffers starting from this 8K aligned base address.
*
* We round up the mtu specified to be a multiple of 2K or 4K.
* For sizes up to 12K we round up the size to the next 2K.
* For sizes > 12K we round up to the next 4K (otherwise sizes such as
* 14K could end up needing 3 cookies, with the buffer spread across
* 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...).
*/
} else {
}
/* allocate extra 8K bytes for alignment */
/* align the starting address of the data area to 8K */
/*
* for each private descriptor, allocate a ldc mem_handle which is
* required to map the data during transmit, set the flags
* to free (available for use by transmit routine).
*/
if (rv) {
goto init_tbufs_failed;
}
/*
* bind ldc memhandle to the corresponding transmit buffer.
*/
if (rv != 0) {
goto init_tbufs_failed;
}
/*
* successful in binding the handle to tx data buffer.
* set datap in the private descr to this buffer.
*/
if ((ncookies == 0) ||
(ncookies > MAX_COOKIES)) {
goto init_tbufs_failed;
}
if (rv != 0) {
goto init_tbufs_failed;
}
}
}
/* reset tbuf walking pointers */
/* initialize tx seqnum and index */
ldcp->resched_peer_txi = 0;
return (DDI_SUCCESS);
return (DDI_FAILURE);
}
/* Uninitialize transmit buffer ring for the channel */
static void
{
int i;
/* for each tbuf (priv_desc), free ldc mem_handle */
}
}
}
/* prealloc'd tx data buffer */
ldcp->tx_data_sz = 0;
}
}
/* clobber tx descriptor ring */
static void
{
int i;
#ifdef DEBUG
int ndone = 0;
#endif
#ifdef DEBUG
ndone++;
#endif
}
}
/* reset tbuf walking pointers */
/* reset tx seqnum and index */
ldcp->resched_peer_txi = 0;
}
/* clobber receive descriptor ring */
static void
{
ldcp->rx_dhandle = 0;
}
/* initialize receive descriptor ring */
static int
{
int rv;
if (rv != 0) {
return (DDI_FAILURE);
}
/*
* sucessfully mapped, now try to
* get info about the mapped dring
*/
if (rv != 0) {
return (DDI_FAILURE);
}
/*
* save ring address, number of descriptors.
*/
return (DDI_SUCCESS);
}
/* get channel statistics */
static uint64_t
{
val = 0;
switch (stat) {
case MAC_STAT_MULTIRCV:
break;
case MAC_STAT_BRDCSTRCV:
break;
case MAC_STAT_MULTIXMT:
break;
case MAC_STAT_BRDCSTXMT:
break;
case MAC_STAT_NORCVBUF:
break;
case MAC_STAT_IERRORS:
break;
case MAC_STAT_NOXMTBUF:
break;
case MAC_STAT_OERRORS:
break;
case MAC_STAT_COLLISIONS:
break;
case MAC_STAT_RBYTES:
break;
case MAC_STAT_IPACKETS:
break;
case MAC_STAT_OBYTES:
break;
case MAC_STAT_OPACKETS:
break;
/* stats not relevant to ldc, return 0 */
case MAC_STAT_IFSPEED:
case ETHER_STAT_ALIGN_ERRORS:
case ETHER_STAT_FCS_ERRORS:
case ETHER_STAT_DEFER_XMTS:
case ETHER_STAT_EX_COLLISIONS:
case ETHER_STAT_MACXMT_ERRORS:
case ETHER_STAT_XCVR_ADDR:
case ETHER_STAT_XCVR_ID:
case ETHER_STAT_XCVR_INUSE:
case ETHER_STAT_CAP_1000FDX:
case ETHER_STAT_CAP_1000HDX:
case ETHER_STAT_CAP_100FDX:
case ETHER_STAT_CAP_100HDX:
case ETHER_STAT_CAP_10FDX:
case ETHER_STAT_CAP_10HDX:
case ETHER_STAT_CAP_ASMPAUSE:
case ETHER_STAT_CAP_PAUSE:
case ETHER_STAT_CAP_AUTONEG:
case ETHER_STAT_ADV_CAP_10FDX:
case ETHER_STAT_ADV_CAP_10HDX:
case ETHER_STAT_ADV_CAP_PAUSE:
case ETHER_STAT_LP_CAP_100FDX:
case ETHER_STAT_LP_CAP_100HDX:
case ETHER_STAT_LP_CAP_10FDX:
case ETHER_STAT_LP_CAP_10HDX:
case ETHER_STAT_LP_CAP_PAUSE:
case ETHER_STAT_LINK_ASMPAUSE:
case ETHER_STAT_LINK_PAUSE:
case ETHER_STAT_LINK_AUTONEG:
case ETHER_STAT_LINK_DUPLEX:
default:
val = 0;
break;
}
return (val);
}
/*
* LDC channel is UP, start handshake process with peer.
*/
static void
{
/*
* As the channel is up, use this port from now on.
*/
}
/* Initialize local session id */
/* clear peer session id */
}
/* Initiate Handshake process with peer ldc endpoint */
}
/*
* LDC channel is Reset, terminate connection with peer and try to
* bring the channel up again.
*/
static void
{
int rv;
/*
* As the channel is down, use the switch port until
* the channel becomes ready to be used.
*/
}
/* Post a reset message */
}
}
/* try to bring the channel up */
if (rv != 0) {
}
} else {
}
/* if channel is already UP - restart handshake */
}
}
/* Interrupt handler for the channel */
static uint_t
{
timeout_id_t cancel_htid = 0;
ldcp->ldc_status);
return (LDC_SUCCESS);
}
/*
* cache cancel_htid before the events specific
* code may overwrite it. Do not clear ldcp->cancel_htid
* as it is also used to indicate the timer to quit immediately.
*/
/*
* NOTE: not using switch() as event could be triggered by
* a state change and a read request. Also the ordering of the
* check for the event types is deliberate.
*/
if (event & LDC_EVT_UP) {
/* status couldn't be determined */
ret = LDC_FAILURE;
goto ldc_cb_ret;
}
" but ldc status is not UP(0x%x)\n",
ldcp->ldc_status);
/* spurious interrupt, return success */
goto ldc_cb_ret;
}
}
/* status couldn't be determined */
ret = LDC_FAILURE;
goto ldc_cb_ret;
}
/*
* but print a debug warning message.
*/
if (event & LDC_EVT_READ) {
event &= ~LDC_EVT_READ;
}
}
if (event & LDC_EVT_READ) {
/*
* If the receive thread is enabled, then
* wakeup the receive thread to process the
* LDC messages.
*/
}
} else {
}
}
/*
* Check to see if the status of cancel_htid has
* changed. If another timer needs to be cancelled,
* then let the next callback to clear it.
*/
if (cancel_htid == 0) {
}
if (cancel_htid) {
/*
* Cancel handshake timer.
* untimeout(9F) will not return until the pending callback is
* cancelled or has run. No problems will result from calling
* untimeout if the handler has already completed.
* If the timeout handler did run, then it would just
* return as cancel_htid is set.
*/
(void) untimeout(cancel_htid);
/* clear it only if its the same as the one we cancelled */
ldcp->cancel_htid = 0;
}
}
return (ret);
}
static void
{
int rv;
/*
* If the receive thread is enabled, then the cblock
* need to be acquired here. If not, the vgen_ldc_cb()
* calls this function with cblock held already.
*/
} else {
}
do {
if (rv != 0) {
if (rv == ECONNRESET)
goto vgen_evtread_error;
break;
}
if (msglen == 0) {
break;
}
/*
* check sid only after we have received peer's sid
* in the version negotiate msg.
*/
#ifdef DEBUG
if (vgen_hdbg & HDBG_BAD_SID) {
/* simulate bad sid condition */
vgen_hdbg &= ~(HDBG_BAD_SID);
}
#endif
if (rv != VGEN_SUCCESS) {
/*
* If sid mismatch is detected,
* reset the channel.
*/
goto vgen_evtread_error;
}
}
switch (tagp->vio_msgtype) {
case VIO_TYPE_CTRL:
break;
case VIO_TYPE_DATA:
break;
case VIO_TYPE_ERR:
break;
default:
tagp->vio_msgtype);
break;
}
/*
* If an error is encountered, stop processing and
* handle the error.
*/
if (rv != 0) {
goto vgen_evtread_error;
}
} while (msglen);
/* check once more before exiting */
goto vgen_evt_read;
}
if (rv == ECONNRESET) {
} else {
}
} else if (rv) {
}
/*
* If the receive thread is enabled, then cancel the
* handshake timeout here.
*/
if (cancel_htid) {
/*
* Cancel handshake timer. untimeout(9F) will
* not return until the pending callback is cancelled
* or has run. No problems will result from calling
* untimeout if the handler has already completed.
* If the timeout handler did run, then it would just
* return as cancel_htid is set.
*/
(void) untimeout(cancel_htid);
/*
* clear it only if its the same as the one we
* cancelled
*/
ldcp->cancel_htid = 0;
}
}
}
}
/* vgen handshake functions */
/* change the hphase for the channel to the next phase */
static vgen_ldc_t *
{
} else {
}
return (ldcp);
}
/*
* wrapper routine to send the given message over ldc using ldc_write().
*/
static int
{
int rv;
return (VGEN_FAILURE);
if (!caller_holds_lock) {
}
}
}
do {
if (retries++ >= vgen_ldcwr_retries)
break;
} while (rv == EWOULDBLOCK);
ldcp->next_txseq++;
}
if (!caller_holds_lock) {
}
if (rv != 0) {
return (rv);
}
return (VGEN_FAILURE);
}
return (VGEN_SUCCESS);
}
/* send version negotiate message to the peer over ldc */
static int
{
int rv;
/* get version msg payload from ldcp->local */
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
/* send attr info message to the peer over ldc */
static int
{
int rv;
/* get attr msg payload from ldcp->local */
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
/* send descriptor ring register message to the peer over ldc */
static int
{
int rv;
/* get dring info msg payload from ldcp->local */
sizeof (ldc_mem_cookie_t));
/*
* dring_ident is set to 0. After mapping the dring, peer sets this
* value and sends it in the ack, which is saved in
* vgen_handle_dring_reg().
*/
msg.dring_ident = 0;
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
static int
{
int rv;
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
/* send descriptor ring data message to the peer over ldc */
static int
{
int rv;
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
/* send multicast addr info message to vsw */
static int
{
struct ether_addr *mca;
int rv;
int i;
uint32_t n;
i = 0;
do {
size = n * sizeof (struct ether_addr);
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
mccount -= n;
i += n;
} while (mccount);
return (VGEN_SUCCESS);
}
/* Initiate Phase 2 of handshake */
static int
{
int rv;
#ifdef DEBUG
if (vgen_hdbg & HDBG_OUT_STATE) {
/* simulate out of state condition */
vgen_hdbg &= ~(HDBG_OUT_STATE);
return (rv);
}
if (vgen_hdbg & HDBG_TIMEOUT) {
/* simulate timeout condition */
vgen_hdbg &= ~(HDBG_TIMEOUT);
return (VGEN_SUCCESS);
}
#endif
if (rv != VGEN_SUCCESS) {
return (rv);
}
/* Bind descriptor ring to the channel */
if (ldcp->num_txdcookies == 0) {
if (rv != 0) {
"rv(%x)\n", rv);
return (rv);
}
}
/* update local dring_info params */
sizeof (ldc_mem_cookie_t));
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
/*
* Set vnet-protocol-version dependent functions based on version.
*/
static void
{
/*
* If the version negotiated with peer is >= 1.4(Jumbo Frame
* Support), set the mtu in our attributes to max_frame_size.
*/
/*
* If the version negotiated with peer is == 1.3 (Vlan Tag
* Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
*/
} else {
/*
* Pre-1.3 peers expect max frame size of ETHERMAX.
* We can negotiate that size with those peers provided the
* following conditions are true:
* - Only pvid is defined for our peer and there are no vids.
* - pvids are equal.
* untagged frames of max size ETHERMAX.
*/
}
}
/* Versions >= 1.2 */
if (VGEN_PRI_ETH_DEFINED(vgenp)) {
/*
* enable priority routines and pkt mode only if
* at least one pri-eth-type is specified in MD.
*/
/* set xfer mode for vgen_send_attr_info() */
} else {
/* no priority eth types defined in MD */
/* set xfer mode for vgen_send_attr_info() */
}
} else {
/* Versions prior to 1.2 */
}
}
/*
* Reset vnet-protocol-version dependent functions to pre-v1.2.
*/
static void
{
/* set xfer mode for vgen_send_attr_info() */
}
static void
{
/*
* NOTE: for now, we will assume we have a single channel.
*/
return;
}
/*
* If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
* the connection. See comments in vgen_set_vnet_proto_ops().
*/
}
}
static void
{
}
}
/*
* This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
* This can happen after a channel comes up (status: LDC_UP) or
* when handshake gets terminated due to various conditions.
*/
static void
{
int rv;
/* reset hstate and hphase */
/*
* Save the id of pending handshake timer in cancel_htid.
* This will be checked in vgen_ldc_cb() and the handshake timer will
* be cancelled after releasing cblock.
*/
}
}
/* Unbind tx descriptor ring from the channel */
if (ldcp->num_txdcookies) {
if (rv != 0) {
}
ldcp->num_txdcookies = 0;
}
/* Unmap peer's dring */
}
/*
* clear local handshake params and initialize.
*/
/* set version to the highest version supported */
/* set attr_info params */
/*
* Note: dring is created, but not bound yet.
* local dring_info params will be updated when we bind the dring in
* vgen_handshake_phase2().
* dring_ident is set to 0. After mapping the dring, peer sets this
* value and sends it in the ack, which is saved in
* vgen_handle_dring_reg().
*/
/* clear peer_hparams */
/* reset the channel if required */
if (ldcp->need_ldc_reset) {
/* clear sids */
/* try to bring the channel up */
if (rv != 0) {
}
} else {
}
}
}
/* wrapper function for vgen_reset_hphase */
static void
{
}
/*
* Initiate handshake with the peer by sending various messages
* based on the handshake-phase that the channel is currently in.
*/
static void
{
int rv = 0;
switch (hphase) {
case VH_PHASE1:
/*
* start timer, for entire handshake process, turn this timer
* off if all phases of handshake complete successfully and
* hphase goes to VH_DONE(below) or
* vgen_reset_hphase() gets called or
* channel is reset due to errors or
* vgen_ldc_uninit() is invoked(vgen_stop).
*/
/* Phase 1 involves negotiating the version */
break;
case VH_PHASE2:
break;
case VH_PHASE3:
break;
case VH_DONE:
/*
* Save the id of pending handshake timer in cancel_htid.
* This will be checked in vgen_ldc_cb() and the handshake
* timer will be cancelled after releasing cblock.
*/
}
/*
* If this channel(port) is connected to vsw,
* need to sync multicast table with vsw.
*/
if (rv != VGEN_SUCCESS)
break;
}
/*
* Check if mac layer should be notified to restart
* transmissions. This can happen if the channel got
* reset and vgen_clobber_tbufs() is called, while
* need_resched is set.
*/
if (ldcp->need_resched) {
}
break;
default:
break;
}
if (rv == ECONNRESET) {
} else {
}
} else if (rv) {
}
}
/*
* Check if the current handshake phase has completed successfully and
* return the status.
*/
static int
{
int status = 0;
switch (hphase) {
case VH_PHASE1:
/*
* Phase1 is done, if version negotiation
* completed successfully.
*/
break;
case VH_PHASE2:
/*
* Phase 2 is done, if attr info and dring info
* have been exchanged successfully.
*/
break;
case VH_PHASE3:
/* Phase 3 is done, if rdx msg has been exchanged */
break;
default:
break;
}
if (status == 0) {
return (VGEN_FAILURE);
}
return (VGEN_SUCCESS);
}
/* retry handshake on failure */
static void
{
/* reset handshake phase */
/* handshake retry is specified and the channel is UP */
}
}
}
/*
* to a version info msg that we sent.
*/
static int
{
int ack = 0;
int failed = 0;
int idx;
int rv = 0;
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/* Cache sid of peer if this is the first time */
}
/*
* If we are not already in VH_PHASE1, reset to
* pre-handshake state, and initiate handshake
* to the peer too.
*/
}
/* save peer's requested values */
/* unsupported dev_class, send NACK */
/* send reply msg back to peer */
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_FAILURE);
}
idx = 0;
for (;;) {
/* nack with next lower version */
break;
}
/* major version match - ACK version */
ack = 1;
/*
* lower minor version to the one this endpt
* supports, if necessary
*/
}
break;
}
idx++;
if (idx == VGEN_NUM_VER) {
/* no version match - send NACK */
failed = 1;
break;
}
}
/* send reply msg back to peer */
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
if (ack) {
}
if (failed) {
return (VGEN_FAILURE);
}
/* VER_ACK_SENT and VER_ACK_RCVD */
/* local and peer versions match? */
/* move to the next phase */
}
break;
case VIO_SUBTYPE_ACK:
/* This should not happen. */
return (VGEN_FAILURE);
}
/* SUCCESS - we have agreed on a version */
/* VER_ACK_SENT and VER_ACK_RCVD */
/* local and peer versions match? */
/* move to the next phase */
}
break;
case VIO_SUBTYPE_NACK:
/* This should not happen. */
return (VGEN_FAILURE);
}
/* check if version in NACK is zero */
/*
* Version Negotiation has failed.
*/
return (VGEN_FAILURE);
}
idx = 0;
for (;;) {
/* select next lower version */
break;
}
/* major version match */
break;
}
idx++;
if (idx == VGEN_NUM_VER) {
/*
* no version match.
* Version Negotiation has failed.
*/
"Version Negotiation Failed\n");
return (VGEN_FAILURE);
}
}
if (rv != VGEN_SUCCESS) {
return (rv);
}
break;
}
return (VGEN_SUCCESS);
}
/* Check if the attributes are supported */
static int
{
return (VGEN_FAILURE);
}
/* versions < 1.4, mtu must match */
return (VGEN_FAILURE);
}
} else {
/* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */
return (VGEN_FAILURE);
}
}
return (VGEN_SUCCESS);
}
/*
* to an attr info msg that we sent.
*/
static int
{
int ack = 1;
int rv = 0;
" Invalid Phase(%u)\n",
return (VGEN_FAILURE);
}
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/* save peer's values */
if (rv == VGEN_FAILURE) {
/* unsupported attr, send NACK */
ack = 0;
} else {
/*
* Versions >= 1.4:
* The mtu is negotiated down to the
* minimum of our mtu and peer's mtu.
*/
/*
* If we have received an ack for the attr info
* that we sent, then check if the mtu computed
* above matches the mtu that the peer had ack'd
* (saved in local hparams). If they don't
* match, we fail the handshake.
*/
/* send NACK */
ack = 0;
}
} else {
/*
* Save the mtu computed above in our
* attr parameters, so it gets sent in
* the attr info from us to the peer.
*/
}
/* save the MIN mtu in the msg to be replied */
}
}
if (ack) {
} else {
}
/* send reply msg back to peer */
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
if (ack) {
} else {
/* failed */
return (VGEN_FAILURE);
}
}
break;
case VIO_SUBTYPE_ACK:
/*
* Versions >= 1.4:
* The ack msg sent by the peer contains the minimum of
* our mtu (that we had sent in our attr info) and the
* peer's mtu.
*
* If we have sent an ack for the attr info msg from
* the peer, check if the mtu that was computed then
* (saved in local hparams) matches the mtu that the
* peer has ack'd. If they don't match, we fail the
* handshake.
*/
return (VGEN_FAILURE);
}
} else {
/*
* If the mtu ack'd by the peer is > our mtu
* fail handshake. Otherwise, save the mtu, so
* we can validate it when we receive attr info
* from our peer.
*/
return (VGEN_FAILURE);
}
}
}
}
}
break;
case VIO_SUBTYPE_NACK:
return (VGEN_FAILURE);
}
return (VGEN_SUCCESS);
}
/* Check if the dring info msg is ok */
static int
{
/* check if msg contents are ok */
sizeof (vnet_public_desc_t))) {
return (VGEN_FAILURE);
}
return (VGEN_SUCCESS);
}
/*
* the peer to a dring register msg that we sent.
*/
static int
{
int ack = 0;
int rv = 0;
/* dring_info can be rcvd in any of the phases after Phase1 */
"Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
return (VGEN_FAILURE);
}
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/*
* verified dring info msg to be ok,
* now try to map the remote dring.
*/
if (rv == DDI_SUCCESS) {
/* now we can ack the peer */
ack = 1;
}
}
if (ack == 0) {
/* failed, send NACK */
} else {
/* save peer's dring_info values */
sizeof (dcookie));
/* set dring_ident for the peer */
/* return the dring_ident in ack msg */
msg->dring_ident =
}
}
/* send reply msg back to peer */
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
if (ack) {
} else {
return (VGEN_FAILURE);
}
}
break;
case VIO_SUBTYPE_ACK:
/* local dring is now ready */
/* save dring_ident acked by peer */
}
}
break;
case VIO_SUBTYPE_NACK:
return (VGEN_FAILURE);
}
return (VGEN_SUCCESS);
}
/*
* from the peer to a rdx info msg that we sent.
*/
static int
{
int rv = 0;
"Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
return (VGEN_FAILURE);
}
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/* send reply msg back to peer */
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
}
break;
case VIO_SUBTYPE_ACK:
}
break;
case VIO_SUBTYPE_NACK:
return (VGEN_FAILURE);
}
return (VGEN_SUCCESS);
}
static int
{
struct ether_addr *addrp;
int count;
int i;
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/* vnet shouldn't recv set mcast msg, only vsw handles it */
break;
case VIO_SUBTYPE_ACK:
break;
case VIO_SUBTYPE_NACK:
/* multicast remove request failed */
break;
}
/* multicast add request failed */
/* delete address from the table */
}
break;
}
}
}
break;
}
return (VGEN_SUCCESS);
}
/* handler for control messages received from the peer ldc end-point */
static int
{
int rv = 0;
switch (tagp->vio_subtype_env) {
case VIO_VER_INFO:
break;
case VIO_ATTR_INFO:
break;
case VIO_DRING_REG:
break;
case VIO_RDX:
break;
case VNET_MCAST_INFO:
break;
case VIO_DDS_INFO:
break;
}
return (rv);
}
/* handler for data messages received from the peer ldc end-point */
static int
{
int rv = 0;
return (rv);
if (rv != 0) {
return (rv);
}
}
switch (tagp->vio_subtype_env) {
case VIO_DRING_DATA:
break;
case VIO_PKT_DATA:
break;
default:
break;
}
return (rv);
}
/*
* dummy pkt data handler function for vnet protocol version 1.0
*/
static void
{
}
/*
* This function handles raw pkt data messages received over the channel.
* Currently, only priority-eth-type frames are received through this mechanism.
* In this case, the frame(data) is present within the message itself which
* is copied into an mblk before sending it up the stack.
*/
static void
{
goto exit;
}
"unable to process priority frame\n");
goto exit;
}
}
/* copy the frame from the payload of raw data msg into the mblk */
/* update stats */
/* send up; call vrx_cb() as cblock is already released */
exit:
}
static int
{
int rv = 0;
if (rv != VGEN_SUCCESS) {
}
return (rv);
}
static int
{
int rv = 0;
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/*
* To reduce the locking contention, release the
* cblock here and re-acquire it once we are done
* receiving packets.
*/
break;
case VIO_SUBTYPE_ACK:
break;
case VIO_SUBTYPE_NACK:
break;
}
return (rv);
}
static int
{
int rv = 0;
#ifdef VGEN_HANDLE_LOST_PKTS
int n;
#endif
/*
* received a data msg, which contains the start and end
* indices of the descriptors within the rx ring holding data,
* the seq_num of data packet corresponding to the start index,
* and the dring_ident.
* We can now read the contents of each of these descriptors
* and gather data from it.
*/
/* validate rx start and end indeces */
/* drop the message if invalid index */
return (rv);
}
/* validate dring_ident */
/* invalid dring_ident, drop the msg */
return (rv);
}
#ifdef DEBUG
if (vgen_trigger_rxlost) {
/* drop this msg to simulate lost pkts for debugging */
vgen_trigger_rxlost = 0;
return (rv);
}
#endif
#ifdef VGEN_HANDLE_LOST_PKTS
/* receive start index doesn't match expected index */
/* calculate the number of pkts lost */
} else {
}
statsp->rx_lost_pkts += n;
/* indicate the range of lost descriptors */
/* dring ident is left unchanged */
if (rv != VGEN_SUCCESS) {
"vgen_sendmsg failed, stype:NACK\n");
return (rv);
}
/*
* and set the new expected value of next_rxi
* and continue(below) to process from the new
* start index.
*/
}
#endif /* VGEN_HANDLE_LOST_PKTS */
/* Now receive messages */
return (rv);
}
static int
{
int count = 0;
int rv = 0;
struct ether_header *ehp;
/*
* start processing the descriptors from the specified
* start index, up to the index a descriptor is not ready
* to be processed or we process the entire descriptor ring
* and wrap around upto the start index.
*/
/* need to set the start index of descriptors to be ack'd */
/* index upto which we have ack'd */
do {
if (rv != 0) {
" rv(%d)\n", rv);
return (rv);
}
/*
* Before waiting and retry here, send up
* the packets that are received already
*/
count = 0;
}
/*
* descriptor is not ready.
* retry descriptor acquire, stop processing
* after max # retries.
*/
if (retries == vgen_recv_retries)
break;
retries++;
goto vgen_recv_retry;
}
retries = 0;
if (set_ack_start) {
/*
* initialize the start index of the range
* of descriptors to be ack'd.
*/
}
} else {
/*
* Try to allocate an mblk from the free pool
* of recv mblks for the channel.
* If this fails, use allocb().
*/
BPRI_MED);
} else {
/*
* Data buffer returned by allocb(9F)
* is 8byte aligned. We allocate extra
* 8 bytes to ensure size is multiple
* of 8 bytes for ldc_mem_copy().
*/
}
}
}
/*
* rxd_err or allocb() failure,
* drop this packet, get next.
*/
if (rxd_err) {
} else {
statsp->rx_allocb_fail++;
}
/* set descriptor done bit */
if (rv != 0) {
"vnet_dring_entry_set_dstate err rv(%d)\n",
rv);
return (rv);
}
if (ack_needed) {
/*
* sender needs ack for this packet,
* ack pkts upto this index.
*/
if (rv != VGEN_SUCCESS) {
goto error_ret;
}
/* need to set new ack start index */
}
goto vgen_next_rxi;
}
/* if ldc_mem_copy() failed */
if (rv) {
goto error_ret;
}
if (rv != 0) {
"vnet_dring_entry_set_dstate err rv(%d)\n", rv);
goto error_ret;
}
if (ack_needed) {
/*
* sender needs ack for this packet,
* ack pkts upto this index.
*/
if (rv != VGEN_SUCCESS) {
goto error_ret;
}
/* need to set new ack start index */
}
"ldc_mem_copy nread(%lx), nbytes(%lx)\n",
goto vgen_next_rxi;
}
/* point to the actual end of data */
/* update stats */
if (IS_BROADCAST(ehp))
else if (IS_MULTICAST(ehp))
/* build a chain of received packets */
/* first pkt */
} else {
}
if (count++ > vgen_chain_len) {
count = 0;
}
/* update end index of range of descrs to be ack'd */
/* update the next index to be processed */
/*
* processed the entire descriptor ring upto
* the index at which we started.
*/
break;
}
} while (1);
/*
* send an ack message to peer indicating that we have stopped
* processing descriptors.
*/
if (set_ack_start) {
/*
* We have ack'd upto some index and we have not
* processed any descriptors beyond that index.
* Use the last ack'd index as both the start and
* end of range of descrs being ack'd.
* Note: This results in acking the last index twice
* and should be harmless.
*/
}
if (rv != VGEN_SUCCESS) {
goto error_ret;
}
/* save new recv index of next dring msg */
/* send up packets received so far */
}
return (rv);
}
static int
{
int rv = 0;
/*
* received an ack corresponding to a specific descriptor for
* which we had set the ACK bit in the descriptor (during
* transmit). This enables us to reclaim descriptors.
*/
/* validate start and end indeces in the tx ack msg */
/* drop the message if invalid index */
return (rv);
}
/* validate dring_ident */
/* invalid dring_ident, drop the msg */
return (rv);
}
/* reclaim descriptors that are done */
/*
* receiver continued processing descriptors after
* sending us the ack.
*/
return (rv);
}
/* receiver stopped processing descriptors */
/*
* determine if there are any pending tx descriptors
* ready to be processed by the receiver(peer) and if so,
* send a message to the peer to restart receiving.
*/
/*
* using the end index of the descriptor range for which
* we received the ack, check if the next descriptor is
* ready.
*/
} else {
/*
* descr next to the end of ack'd descr range is not
* ready.
* starting from the current reclaim index, check
* if any descriptor is ready.
*/
}
}
if (ready_txd) {
/*
* we have tx descriptor(s) ready to be
* processed by the receiver.
* send a message to the peer with the start index
* of ready descriptors.
*/
if (rv != VGEN_SUCCESS) {
return (rv);
}
} else {
/*
* no ready tx descriptors. set the flag to send a
* message to peer when tx descriptors are ready in
* transmit routine.
*/
}
return (rv);
}
static int
{
int rv = 0;
/*
* peer sent a NACK msg to indicate lost packets.
* The start and end correspond to the range of descriptors
* for which the peer didn't receive a dring data msg and so
* didn't receive the corresponding data.
*/
/* validate start and end indeces in the tx nack msg */
/* drop the message if invalid index */
return (rv);
}
/* validate dring_ident */
/* invalid dring_ident, drop the msg */
return (rv);
}
/* no busy descriptors, bogus nack ? */
return (rv);
}
/* we just mark the descrs as done so they can be reclaimed */
}
return (rv);
}
static void
{
}
/*
* transmit reclaim function. starting from the current reclaim index
* look for descriptors marked DONE and reclaim the descriptor and the
* corresponding buffers (tbuf).
*/
static void
{
int count = 0;
#ifdef DEBUG
return;
#endif
count++;
}
/*
* Check if mac layer should be notified to restart transmissions
*/
}
}
/* return the number of pending transmits for the channel */
static int
{
int n;
} else {
/* cur_tbufp > next_tbufp */
}
return (n);
}
/* determine if the transmit descriptor ring is full */
static int
{
return (VGEN_SUCCESS);
}
return (VGEN_FAILURE);
}
/* determine if timeout condition has occured */
static int
{
(vnet_ldcwd_txtimeout) &&
return (VGEN_SUCCESS);
} else {
return (VGEN_FAILURE);
}
}
/* transmit watchdog timeout handler */
static void
vgen_ldc_watchdog(void *arg)
{
int rv;
if (rv == VGEN_SUCCESS) {
#ifdef DEBUG
if (vgen_trigger_txtimeout) {
/* tx timeout triggered for debugging */
}
#endif
if (ldcp->need_resched) {
}
}
}
/* handler for error messages received from the peer ldc end-point */
static void
{
}
static int
{
} else {
return (EINVAL);
}
/* seqnums don't match */
"next_rxseq(0x%lx) != seq_num(0x%lx)\n",
return (EINVAL);
}
ldcp->next_rxseq++;
return (0);
}
/* Check if the session id in the received message is valid */
static int
{
return (VGEN_FAILURE);
}
else
return (VGEN_SUCCESS);
}
static caddr_t
{
"%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
return (ebuf);
}
/* Handshake watchdog timeout handler */
static void
vgen_hwatchdog(void *arg)
{
"handshake timeout ldc(%lx) phase(%x) state(%x)\n",
if (ldcp->cancel_htid) {
ldcp->cancel_htid = 0;
return;
}
}
static void
{
char ea[6];
"\tver_major: %d, ver_minor: %d, dev_class: %d\n",
"\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
"\tldc_addr: 0x%lx, ldc_size: %ld\n",
}
static void
{
"\tldc_id: 0x%lx, ldc_status: 0x%x\n",
"\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
"\thphase: 0x%x, hstate: 0x%x\n",
}
/*
* Send received packets up the stack.
*/
static void
{
} else {
}
} else {
}
}
/*
* vgen_ldc_rcv_worker -- A per LDC worker thread to receive data.
* This thread is woken up by the LDC interrupt handler to process
* LDC packets and receive data.
*/
static void
vgen_ldc_rcv_worker(void *arg)
{
"vnet_rcv_thread");
/*
* Wait until the data is received or a stop
* request is received.
*/
while (!(ldcp->rcv_thr_flags &
(VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) {
}
/*
* First process the stop request.
*/
break;
}
}
/*
* Update the run status and wakeup the thread that
* has sent the stop request.
*/
thread_exit();
}
/* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */
static void
{
/*
* Send a stop request by setting the stop flag and
* wait until the receive thread stops.
*/
}
}
}
/*
* Wait for the channel rx-queue to be drained by allowing the receive
* worker thread to read all messages from the rx-queue of the channel.
* Assumption: further callbacks are disabled at this time.
*/
static void
{
/*
* If there is data in ldc rx queue, wait until the rx
* worker thread runs and drains all msgs in the queue.
*/
/*
* We need to check both bits - DATARCVD and PROCESSING, to be cleared.
* If DATARCVD is set, that means the callback has signalled the worker
* thread, but the worker hasn't started processing yet. If PROCESSING
* is set, that means the thread is awake and processing. Note that the
* DATARCVD state can only be seen once, as the assumption is that
* further callbacks have been disabled at this point.
*/
while (ldcp->rcv_thr_flags &
/*
* Note that the only way we return is due to a timeout;
* we set the new time to wait, before we go back and
* check the condition. The other(unlikely) possibility
* is a premature wakeup(see cv_timedwait(9F)) in which
* case we just continue to use the same time to wait.
*/
}
}
}
/*
* vgen_dds_rx -- post DDS messages to vnet.
*/
static int
{
return (EBADMSG);
}
return (0);
}
/*
* vgen_dds_tx -- an interface called by vnet to send DDS messages.
*/
int
{
goto vgen_dsend_exit;
}
if (rv != VGEN_SUCCESS) {
} else {
rv = 0;
}
return (rv);
}
#if DEBUG
/*
* Print debug messages - set to 0xf to enable all msgs
*/
static void
{
char buf[256];
}
}
}
}
#endif