/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/sysmacros.h>
#include <sys/param.h>
#include <sys/machsystm.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/devops.h>
#include <sys/ksynch.h>
#include <sys/stat.h>
#include <sys/modctl.h>
#include <sys/debug.h>
#include <sys/ethernet.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/strsun.h>
#include <sys/note.h>
#include <sys/mac_provider.h>
#include <sys/mac_ether.h>
#include <sys/ldc.h>
#include <sys/mach_descrip.h>
#include <sys/mdeg.h>
#include <net/if.h>
#include <sys/vnet.h>
#include <sys/vio_mailbox.h>
#include <sys/vio_common.h>
#include <sys/vnet_common.h>
#include <sys/vnet_mailbox.h>
#include <sys/vio_util.h>
#include <sys/vnet_gen.h>
#include <sys/atomic.h>
#include <sys/callb.h>
#include <sys/sdt.h>
#include <sys/intr.h>
#include <sys/pattr.h>
#include <sys/vlan.h>
/*
* Implementation of the mac provider functionality for vnet using the
* generic(default) transport layer of sun4v Logical Domain Channels(LDC).
*/
/* Entry Points */
int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
const uint8_t *macaddr, void **vgenhdl);
int vgen_init_mdeg(void *arg);
void vgen_uninit(void *arg);
int vgen_dds_tx(void *arg, void *dmsg);
int vgen_enable_intr(void *arg);
int vgen_disable_intr(void *arg);
mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
static int vgen_start(void *arg);
static void vgen_stop(void *arg);
static mblk_t *vgen_tx(void *arg, mblk_t *mp);
static int vgen_multicst(void *arg, boolean_t add,
const uint8_t *mca);
static int vgen_promisc(void *arg, boolean_t on);
static int vgen_unicst(void *arg, const uint8_t *mca);
static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
#ifdef VNET_IOC_DEBUG
static int vgen_force_link_state(vgen_port_t *portp, int link_state);
#endif
/* Port/LDC Configuration */
static int vgen_read_mdprops(vgen_t *vgenp);
static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
mde_cookie_t node);
static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
uint32_t *mtu);
static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
boolean_t *pls);
static void vgen_detach_ports(vgen_t *vgenp);
static void vgen_port_detach(vgen_port_t *portp);
static void vgen_port_list_insert(vgen_port_t *portp);
static void vgen_port_list_remove(vgen_port_t *portp);
static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
int port_num);
static int vgen_mdeg_reg(vgen_t *vgenp);
static void vgen_mdeg_unreg(vgen_t *vgenp);
static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
mde_cookie_t mdex);
static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
static int vgen_port_attach(vgen_port_t *portp);
static void vgen_port_detach_mdeg(vgen_port_t *portp);
static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat);
static void vgen_port_reset(vgen_port_t *portp);
static void vgen_reset_vsw_port(vgen_t *vgenp);
static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
static void vgen_ldc_up(vgen_ldc_t *ldcp);
static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
static void vgen_ldc_detach(vgen_ldc_t *ldcp);
static void vgen_port_init(vgen_port_t *portp);
static void vgen_port_uninit(vgen_port_t *portp);
static int vgen_ldc_init(vgen_ldc_t *ldcp);
static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
/* I/O Processing */
static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
static int vgen_ldcsend(void *arg, mblk_t *mp);
static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
static void vgen_tx_watchdog(void *arg);
/* Dring Configuration */
static int vgen_create_dring(vgen_ldc_t *ldcp);
static void vgen_destroy_dring(vgen_ldc_t *ldcp);
static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
static void vgen_unmap_dring(vgen_ldc_t *ldcp);
static int vgen_mapin_avail(vgen_ldc_t *ldcp);
/* VIO Message Processing */
static int vgen_handshake(vgen_ldc_t *ldcp);
static int vgen_handshake_done(vgen_ldc_t *ldcp);
static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
static int vgen_send_attr_info(vgen_ldc_t *ldcp);
static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
uint8_t option);
static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
vio_msg_tag_t *tagp);
static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
uint32_t msglen);
static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
static void vgen_hwatchdog(void *arg);
static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
/* VLANs */
static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
uint16_t *nvidsp, uint16_t *default_idp);
static void vgen_vlan_create_hash(vgen_port_t *portp);
static void vgen_vlan_destroy_hash(vgen_port_t *portp);
static void vgen_vlan_add_ids(vgen_port_t *portp);
static void vgen_vlan_remove_ids(vgen_port_t *portp);
static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
uint16_t *vidp);
static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
boolean_t is_tagged, uint16_t vid);
static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
/* Exported functions */
int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
void vgen_destroy_rxpools(void *arg);
/* Externs */
extern void vnet_dds_rx(void *arg, void *dmsg);
extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen,
boolean_t caller_holds_lock);
extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
extern int vgen_handle_dringdata(void *arg1, void *arg2);
extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
extern int vgen_dringsend(void *arg, mblk_t *mp);
extern void vgen_ldc_msg_worker(void *arg);
extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
uint32_t start, int32_t end, uint8_t pstate);
extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
#define VGEN_PRI_ETH_DEFINED(vgenp) ((vgenp)->pri_num_types != 0)
#define LDC_LOCK(ldcp) \
mutex_enter(&((ldcp)->cblock));\
mutex_enter(&((ldcp)->rxlock));\
mutex_enter(&((ldcp)->wrlock));\
mutex_enter(&((ldcp)->txlock));\
mutex_enter(&((ldcp)->tclock));
#define LDC_UNLOCK(ldcp) \
mutex_exit(&((ldcp)->tclock));\
mutex_exit(&((ldcp)->txlock));\
mutex_exit(&((ldcp)->wrlock));\
mutex_exit(&((ldcp)->rxlock));\
mutex_exit(&((ldcp)->cblock));
#define VGEN_VER_EQ(ldcp, major, minor) \
((ldcp)->local_hparams.ver_major == (major) && \
(ldcp)->local_hparams.ver_minor == (minor))
#define VGEN_VER_LT(ldcp, major, minor) \
(((ldcp)->local_hparams.ver_major < (major)) || \
((ldcp)->local_hparams.ver_major == (major) && \
(ldcp)->local_hparams.ver_minor < (minor)))
#define VGEN_VER_GTEQ(ldcp, major, minor) \
(((ldcp)->local_hparams.ver_major > (major)) || \
((ldcp)->local_hparams.ver_major == (major) && \
(ldcp)->local_hparams.ver_minor >= (minor)))
/*
* Property names
*/
static char macaddr_propname[] = "mac-address";
static char rmacaddr_propname[] = "remote-mac-address";
static char channel_propname[] = "channel-endpoint";
static char reg_propname[] = "reg";
static char port_propname[] = "port";
static char swport_propname[] = "switch-port";
static char id_propname[] = "id";
static char vdev_propname[] = "virtual-device";
static char vnet_propname[] = "network";
static char pri_types_propname[] = "priority-ether-types";
static char vgen_pvid_propname[] = "port-vlan-id";
static char vgen_vid_propname[] = "vlan-id";
static char vgen_dvid_propname[] = "default-vlan-id";
static char port_pvid_propname[] = "remote-port-vlan-id";
static char port_vid_propname[] = "remote-vlan-id";
static char vgen_mtu_propname[] = "mtu";
static char vgen_linkprop_propname[] = "linkprop";
/*
* VIO Protocol Version Info:
*
* The version specified below represents the version of protocol currently
* supported in the driver. It means the driver can negotiate with peers with
* versions <= this version. Here is a summary of the feature(s) that are
* supported at each version of the protocol:
*
* 1.0 Basic VIO protocol.
* 1.1 vDisk protocol update (no virtual network update).
* 1.2 Support for priority frames (priority-ether-types).
* 1.3 VLAN and HybridIO support.
* 1.4 Jumbo Frame support.
* 1.5 Link State Notification support with optional support
* for Physical Link information.
* 1.6 Support for RxDringData mode.
*/
static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 6} };
/* Tunables */
uint32_t vgen_hwd_interval = 5; /* handshake watchdog freq in sec */
uint32_t vgen_ldcwr_retries = 10; /* max # of ldc_write() retries */
uint32_t vgen_ldcup_retries = 5; /* max # of ldc_up() retries */
uint32_t vgen_ldccl_retries = 5; /* max # of ldc_close() retries */
uint32_t vgen_tx_delay = 0x30; /* delay when tx descr not available */
uint32_t vgen_ldc_mtu = VGEN_LDC_MTU; /* ldc mtu */
uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT; /* tx timeout in msec */
/*
* Max # of channel resets allowed during handshake.
*/
uint32_t vgen_ldc_max_resets = 5;
/*
* See comments in vsw.c for details on the dring modes supported.
* In RxDringData mode, # of buffers is determined by multiplying the # of
* descriptors with the factor below. Note that the factor must be > 1; i.e,
* the # of buffers must always be > # of descriptors. This is needed because,
* while the shared memory buffers are sent up the stack on the receiver, the
* sender needs additional buffers that can be used for further transmits.
* See vgen_create_rx_dring() for details.
*/
uint32_t vgen_nrbufs_factor = 2;
/*
* Retry delay used while destroying rx mblk pools. Used in both Dring modes.
*/
int vgen_rxpool_cleanup_delay = 100000; /* 100ms */
/*
* Delay when rx descr not ready; used in TxDring mode only.
*/
uint32_t vgen_recv_delay = 1;
/*
* Retry when rx descr not ready; used in TxDring mode only.
*/
uint32_t vgen_recv_retries = 10;
/*
* Max # of packets accumulated prior to sending them up. It is best
* to keep this at 60% of the number of receive buffers. Used in TxDring mode
* by the msg worker thread. Used in RxDringData mode while in interrupt mode
* (not used in polled mode).
*/
uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
/*
* Internal tunables for receive buffer pools, that is, the size and number of
* mblks for each pool. At least 3 sizes must be specified if these are used.
* The sizes must be specified in increasing order. Non-zero value of the first
* size will be used as a hint to use these values instead of the algorithm
* that determines the sizes based on MTU. Used in TxDring mode only.
*/
uint32_t vgen_rbufsz1 = 0;
uint32_t vgen_rbufsz2 = 0;
uint32_t vgen_rbufsz3 = 0;
uint32_t vgen_rbufsz4 = 0;
uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
/*
* In the absence of "priority-ether-types" property in MD, the following
* internal tunable can be set to specify a single priority ethertype.
*/
uint64_t vgen_pri_eth_type = 0;
/*
* Number of transmit priority buffers that are preallocated per device.
* This number is chosen to be a small value to throttle transmission
* of priority packets. Note: Must be a power of 2 for vio_create_mblks().
*/
uint32_t vgen_pri_tx_nmblks = 64;
uint32_t vgen_vlan_nchains = 4; /* # of chains in vlan id hash table */
/*
* Matching criteria passed to the MDEG to register interest
* in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
* by their 'name' and 'cfg-handle' properties.
*/
static md_prop_match_t vdev_prop_match[] = {
{ MDET_PROP_STR, "name" },
{ MDET_PROP_VAL, "cfg-handle" },
{ MDET_LIST_END, NULL }
};
static mdeg_node_match_t vdev_match = { "virtual-device",
vdev_prop_match };
/* MD update matching structure */
static md_prop_match_t vport_prop_match[] = {
{ MDET_PROP_VAL, "id" },
{ MDET_LIST_END, NULL }
};
static mdeg_node_match_t vport_match = { "virtual-device-port",
vport_prop_match };
/* Template for matching a particular vnet instance */
static mdeg_prop_spec_t vgen_prop_template[] = {
{ MDET_PROP_STR, "name", "network" },
{ MDET_PROP_VAL, "cfg-handle", NULL },
{ MDET_LIST_END, NULL, NULL }
};
#define VGEN_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val)
static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
#ifdef VNET_IOC_DEBUG
#define VGEN_M_CALLBACK_FLAGS (MC_IOCTL)
#else
#define VGEN_M_CALLBACK_FLAGS (0)
#endif
static mac_callbacks_t vgen_m_callbacks = {
VGEN_M_CALLBACK_FLAGS,
vgen_stat,
vgen_start,
vgen_stop,
vgen_promisc,
vgen_multicst,
vgen_unicst,
vgen_tx,
NULL,
vgen_ioctl,
NULL,
NULL
};
/* Externs */
extern pri_t maxclsyspri;
extern proc_t p0;
extern uint32_t vnet_ethermtu;
extern uint16_t vnet_default_vlan_id;
extern uint32_t vnet_num_descriptors;
#ifdef DEBUG
#define DEBUG_PRINTF vgen_debug_printf
extern int vnet_dbglevel;
void vgen_debug_printf(const char *fname, vgen_t *vgenp,
vgen_ldc_t *ldcp, const char *fmt, ...);
/* -1 for all LDCs info, or ldc_id for a specific LDC info */
int vgendbg_ldcid = -1;
/* Flags to simulate error conditions for debugging */
int vgen_inject_err_flag = 0;
boolean_t
vgen_inject_error(vgen_ldc_t *ldcp, int error)
{
if ((vgendbg_ldcid == ldcp->ldc_id) &&
(vgen_inject_err_flag & error)) {
return (B_TRUE);
}
return (B_FALSE);
}
#endif
/*
* vgen_init() is called by an instance of vnet driver to initialize the
* corresponding generic transport layer. This layer uses Logical Domain
* Channels (LDCs) to communicate with the virtual switch in the service domain
* and also with peer vnets in other guest domains in the system.
*
* Arguments:
* vnetp: an opaque pointer to the vnet instance
* regprop: frame to be transmitted
* vnetdip: dip of the vnet device
* macaddr: mac address of the vnet device
*
* Returns:
* Sucess: a handle to the vgen instance (vgen_t)
* Failure: NULL
*/
int
vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
const uint8_t *macaddr, void **vgenhdl)
{
vgen_t *vgenp;
int instance;
int rv;
char qname[TASKQ_NAMELEN];
if ((vnetp == NULL) || (vnetdip == NULL))
return (DDI_FAILURE);
instance = ddi_get_instance(vnetdip);
DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
vgenp->vnetp = vnetp;
vgenp->instance = instance;
vgenp->regprop = regprop;
vgenp->vnetdip = vnetdip;
bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
vgenp->phys_link_state = LINK_STATE_UNKNOWN;
/* allocate multicast table */
vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
sizeof (struct ether_addr), KM_SLEEP);
vgenp->mccount = 0;
vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
(void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
instance);
if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
TASKQ_DEFAULTPRI, 0)) == NULL) {
cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
instance);
goto vgen_init_fail;
}
rv = vgen_read_mdprops(vgenp);
if (rv != 0) {
goto vgen_init_fail;
}
*vgenhdl = (void *)vgenp;
DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
return (DDI_SUCCESS);
vgen_init_fail:
rw_destroy(&vgenp->vgenports.rwlock);
mutex_destroy(&vgenp->lock);
kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
sizeof (struct ether_addr));
if (VGEN_PRI_ETH_DEFINED(vgenp)) {
kmem_free(vgenp->pri_types,
sizeof (uint16_t) * vgenp->pri_num_types);
(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
}
if (vgenp->rxp_taskq != NULL) {
ddi_taskq_destroy(vgenp->rxp_taskq);
vgenp->rxp_taskq = NULL;
}
KMEM_FREE(vgenp);
return (DDI_FAILURE);
}
int
vgen_init_mdeg(void *arg)
{
vgen_t *vgenp = (vgen_t *)arg;
/* register with MD event generator */
return (vgen_mdeg_reg(vgenp));
}
/*
* Called by vnet to undo the initializations done by vgen_init().
* The handle provided by generic transport during vgen_init() is the argument.
*/
void
vgen_uninit(void *arg)
{
vgen_t *vgenp = (vgen_t *)arg;
if (vgenp == NULL) {
return;
}
DBG1(vgenp, NULL, "enter\n");
/* Unregister with MD event generator */
vgen_mdeg_unreg(vgenp);
mutex_enter(&vgenp->lock);
/*
* Detach all ports from the device; note that the device should have
* been unplumbed by this time (See vnet_unattach() for the sequence)
* and thus vgen_stop() has already been invoked on all the ports.
*/
vgen_detach_ports(vgenp);
/*
* We now destroy the taskq used to clean up rx mblk pools that
* couldn't be destroyed when the ports/channels were detached.
* We implicitly wait for those tasks to complete in
* ddi_taskq_destroy().
*/
if (vgenp->rxp_taskq != NULL) {
ddi_taskq_destroy(vgenp->rxp_taskq);
vgenp->rxp_taskq = NULL;
}
/* Free multicast table */
kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
/* Free pri_types table */
if (VGEN_PRI_ETH_DEFINED(vgenp)) {
kmem_free(vgenp->pri_types,
sizeof (uint16_t) * vgenp->pri_num_types);
(void) vio_destroy_mblks(vgenp->pri_tx_vmp);
}
mutex_exit(&vgenp->lock);
rw_destroy(&vgenp->vgenports.rwlock);
mutex_destroy(&vgenp->lock);
DBG1(vgenp, NULL, "exit\n");
KMEM_FREE(vgenp);
}
/* enable transmit/receive for the device */
int
vgen_start(void *arg)
{
vgen_port_t *portp = (vgen_port_t *)arg;
vgen_t *vgenp = portp->vgenp;
DBG1(vgenp, NULL, "enter\n");
mutex_enter(&portp->lock);
vgen_port_init(portp);
portp->flags |= VGEN_STARTED;
mutex_exit(&portp->lock);
DBG1(vgenp, NULL, "exit\n");
return (DDI_SUCCESS);
}
/* stop transmit/receive */
void
vgen_stop(void *arg)
{
vgen_port_t *portp = (vgen_port_t *)arg;
vgen_t *vgenp = portp->vgenp;
DBG1(vgenp, NULL, "enter\n");
mutex_enter(&portp->lock);
if (portp->flags & VGEN_STARTED) {
vgen_port_uninit(portp);
portp->flags &= ~(VGEN_STARTED);
}
mutex_exit(&portp->lock);
DBG1(vgenp, NULL, "exit\n");
}
/* vgen transmit function */
static mblk_t *
vgen_tx(void *arg, mblk_t *mp)
{
vgen_port_t *portp;
int status;
portp = (vgen_port_t *)arg;
status = vgen_portsend(portp, mp);
if (status != VGEN_SUCCESS) {
/* failure */
return (mp);
}
/* success */
return (NULL);
}
/*
* This function provides any necessary tagging/untagging of the frames
* that are being transmitted over the port. It first verifies the vlan
* membership of the destination(port) and drops the packet if the
* destination doesn't belong to the given vlan.
*
* Arguments:
* portp: port over which the frames should be transmitted
* mp: frame to be transmitted
* is_tagged:
* B_TRUE: indicates frame header contains the vlan tag already.
* B_FALSE: indicates frame is untagged.
* vid: vlan in which the frame should be transmitted.
*
* Returns:
* Sucess: frame(mblk_t *) after doing the necessary tag/untag.
* Failure: NULL
*/
static mblk_t *
vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
uint16_t vid)
{
vgen_t *vgenp;
boolean_t dst_tagged;
int rv;
vgenp = portp->vgenp;
/*
* If the packet is going to a vnet:
* Check if the destination vnet is in the same vlan.
* Check the frame header if tag or untag is needed.
*
* We do not check the above conditions if the packet is going to vsw:
* vsw must be present implicitly in all the vlans that a vnet device
* is configured into; even if vsw itself is not assigned to those
* vlans as an interface. For instance, the packet might be destined
* to another vnet(indirectly through vsw) or to an external host
* which is in the same vlan as this vnet and vsw itself may not be
* present in that vlan. Similarly packets going to vsw must be
* always tagged(unless in the default-vlan) if not already tagged,
* as we do not know the final destination. This is needed because
* vsw must always invoke its switching function only after tagging
* the packet; otherwise after switching function determines the
* destination we cannot figure out if the destination belongs to the
* the same vlan that the frame originated from and if it needs tag/
* untag. Note that vsw will tag the packet itself when it receives
* it over the channel from a client if needed. However, that is
* needed only in the case of vlan unaware clients such as obp or
* earlier versions of vnet.
*
*/
if (portp != vgenp->vsw_portp) {
/*
* Packet going to a vnet. Check if the destination vnet is in
* the same vlan. Then check the frame header if tag/untag is
* needed.
*/
rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
if (rv == B_FALSE) {
/* drop the packet */
freemsg(mp);
return (NULL);
}
/* is the destination tagged or untagged in this vlan? */
(vid == portp->pvid) ? (dst_tagged = B_FALSE) :
(dst_tagged = B_TRUE);
if (is_tagged == dst_tagged) {
/* no tagging/untagging needed */
return (mp);
}
if (is_tagged == B_TRUE) {
/* frame is tagged; destination needs untagged */
mp = vnet_vlan_remove_tag(mp);
return (mp);
}
/* (is_tagged == B_FALSE): fallthru to tag tx packet: */
}
/*
* Packet going to a vnet needs tagging.
* OR
* If the packet is going to vsw, then it must be tagged in all cases:
* unknown unicast, broadcast/multicast or to vsw interface.
*/
if (is_tagged == B_FALSE) {
mp = vnet_vlan_insert_tag(mp, vid);
}
return (mp);
}
/* transmit packets over the given port */
static int
vgen_portsend(vgen_port_t *portp, mblk_t *mp)
{
vgen_ldc_t *ldcp;
int status;
int rv = VGEN_SUCCESS;
vgen_t *vgenp = portp->vgenp;
vnet_t *vnetp = vgenp->vnetp;
boolean_t is_tagged;
boolean_t dec_refcnt = B_FALSE;
uint16_t vlan_id;
struct ether_header *ehp;
if (portp == NULL) {
return (VGEN_FAILURE);
}
if (portp->use_vsw_port) {
(void) atomic_inc_32(&vgenp->vsw_port_refcnt);
portp = portp->vgenp->vsw_portp;
ASSERT(portp != NULL);
dec_refcnt = B_TRUE;
}
/*
* Determine the vlan id that the frame belongs to.
*/
ehp = (struct ether_header *)mp->b_rptr;
is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
if (vlan_id == vnetp->default_vlan_id) {
/* Frames in default vlan must be untagged */
ASSERT(is_tagged == B_FALSE);
/*
* If the destination is a vnet-port verify it belongs to the
* default vlan; otherwise drop the packet. We do not need
* this check for vsw-port, as it should implicitly belong to
* this vlan; see comments in vgen_vlan_frame_fixtag().
*/
if (portp != vgenp->vsw_portp &&
portp->pvid != vnetp->default_vlan_id) {
freemsg(mp);
goto portsend_ret;
}
} else { /* frame not in default-vlan */
mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
if (mp == NULL) {
goto portsend_ret;
}
}
ldcp = portp->ldcp;
status = ldcp->tx(ldcp, mp);
if (status != VGEN_TX_SUCCESS) {
rv = VGEN_FAILURE;
}
portsend_ret:
if (dec_refcnt == B_TRUE) {
(void) atomic_dec_32(&vgenp->vsw_port_refcnt);
}
return (rv);
}
/*
* Wrapper function to transmit normal and/or priority frames over the channel.
*/
static int
vgen_ldcsend(void *arg, mblk_t *mp)
{
vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
int status;
struct ether_header *ehp;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
uint32_t num_types;
uint16_t *types;
int i;
ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
num_types = vgenp->pri_num_types;
types = vgenp->pri_types;
ehp = (struct ether_header *)mp->b_rptr;
for (i = 0; i < num_types; i++) {
if (ehp->ether_type == types[i]) {
/* priority frame, use pri tx function */
vgen_ldcsend_pkt(ldcp, mp);
return (VGEN_SUCCESS);
}
}
if (ldcp->tx_dringdata == NULL) {
freemsg(mp);
return (VGEN_SUCCESS);
}
status = ldcp->tx_dringdata(ldcp, mp);
return (status);
}
/*
* This function transmits the frame in the payload of a raw data
* (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
* send special frames with high priorities, without going through
* the normal data path which uses descriptor ring mechanism.
*/
static void
vgen_ldcsend_pkt(void *arg, mblk_t *mp)
{
vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
vio_raw_data_msg_t *pkt;
mblk_t *bp;
mblk_t *nmp = NULL;
vio_mblk_t *vmp;
caddr_t dst;
uint32_t mblksz;
uint32_t size;
uint32_t nbytes;
int rv;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_stats_t *statsp = &ldcp->stats;
/* drop the packet if ldc is not up or handshake is not done */
if (ldcp->ldc_status != LDC_UP) {
(void) atomic_inc_32(&statsp->tx_pri_fail);
DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
ldcp->ldc_status);
goto send_pkt_exit;
}
if (ldcp->hphase != VH_DONE) {
(void) atomic_inc_32(&statsp->tx_pri_fail);
DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
ldcp->hphase);
goto send_pkt_exit;
}
size = msgsize(mp);
/* frame size bigger than available payload len of raw data msg ? */
if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
(void) atomic_inc_32(&statsp->tx_pri_fail);
DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
goto send_pkt_exit;
}
if (size < ETHERMIN)
size = ETHERMIN;
/* alloc space for a raw data message */
vmp = vio_allocb(vgenp->pri_tx_vmp);
if (vmp == NULL) {
(void) atomic_inc_32(&statsp->tx_pri_fail);
DWARN(vgenp, ldcp, "vio_allocb failed\n");
goto send_pkt_exit;
} else {
nmp = vmp->mp;
}
pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
/* copy frame into the payload of raw data message */
dst = (caddr_t)pkt->data;
for (bp = mp; bp != NULL; bp = bp->b_cont) {
mblksz = MBLKL(bp);
bcopy(bp->b_rptr, dst, mblksz);
dst += mblksz;
}
vmp->state = VIO_MBLK_HAS_DATA;
/* setup the raw data msg */
pkt->tag.vio_msgtype = VIO_TYPE_DATA;
pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
pkt->tag.vio_subtype_env = VIO_PKT_DATA;
pkt->tag.vio_sid = ldcp->local_sid;
nbytes = VIO_PKT_DATA_HDRSIZE + size;
/* send the msg over ldc */
rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
if (rv != VGEN_SUCCESS) {
(void) atomic_inc_32(&statsp->tx_pri_fail);
DWARN(vgenp, ldcp, "Error sending priority frame\n");
if (rv == ECONNRESET) {
(void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
}
goto send_pkt_exit;
}
/* update stats */
(void) atomic_inc_64(&statsp->tx_pri_packets);
(void) atomic_add_64(&statsp->tx_pri_bytes, size);
send_pkt_exit:
if (nmp != NULL)
freemsg(nmp);
freemsg(mp);
}
/*
* enable/disable a multicast address
* note that the cblock of the ldc channel connected to the vsw is used for
* synchronization of the mctab.
*/
int
vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
{
vgen_t *vgenp;
vnet_mcast_msg_t mcastmsg;
vio_msg_tag_t *tagp;
vgen_port_t *portp;
vgen_ldc_t *ldcp;
struct ether_addr *addrp;
int rv = DDI_FAILURE;
uint32_t i;
portp = (vgen_port_t *)arg;
vgenp = portp->vgenp;
if (portp->is_vsw_port != B_TRUE) {
return (DDI_SUCCESS);
}
addrp = (struct ether_addr *)mca;
tagp = &mcastmsg.tag;
bzero(&mcastmsg, sizeof (mcastmsg));
ldcp = portp->ldcp;
if (ldcp == NULL) {
return (DDI_FAILURE);
}
mutex_enter(&ldcp->cblock);
if (ldcp->hphase == VH_DONE) {
/*
* If handshake is done, send a msg to vsw to add/remove
* the multicast address. Otherwise, we just update this
* mcast address in our table and the table will be sync'd
* with vsw when handshake completes.
*/
tagp->vio_msgtype = VIO_TYPE_CTRL;
tagp->vio_subtype = VIO_SUBTYPE_INFO;
tagp->vio_subtype_env = VNET_MCAST_INFO;
tagp->vio_sid = ldcp->local_sid;
bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
mcastmsg.set = add;
mcastmsg.count = 1;
if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
B_FALSE) != VGEN_SUCCESS) {
DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
rv = DDI_FAILURE;
goto vgen_mcast_exit;
}
}
if (add) {
/* expand multicast table if necessary */
if (vgenp->mccount >= vgenp->mcsize) {
struct ether_addr *newtab;
uint32_t newsize;
newsize = vgenp->mcsize * 2;
newtab = kmem_zalloc(newsize *
sizeof (struct ether_addr), KM_NOSLEEP);
if (newtab == NULL)
goto vgen_mcast_exit;
bcopy(vgenp->mctab, newtab, vgenp->mcsize *
sizeof (struct ether_addr));
kmem_free(vgenp->mctab,
vgenp->mcsize * sizeof (struct ether_addr));
vgenp->mctab = newtab;
vgenp->mcsize = newsize;
}
/* add address to the table */
vgenp->mctab[vgenp->mccount++] = *addrp;
} else {
/* delete address from the table */
for (i = 0; i < vgenp->mccount; i++) {
if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
/*
* If there's more than one address in this
* table, delete the unwanted one by moving
* the last one in the list over top of it;
* otherwise, just remove it.
*/
if (vgenp->mccount > 1) {
vgenp->mctab[i] =
vgenp->mctab[vgenp->mccount-1];
}
vgenp->mccount--;
break;
}
}
}
rv = DDI_SUCCESS;
vgen_mcast_exit:
mutex_exit(&ldcp->cblock);
return (rv);
}
/* set or clear promiscuous mode on the device */
static int
vgen_promisc(void *arg, boolean_t on)
{
_NOTE(ARGUNUSED(arg, on))
return (DDI_SUCCESS);
}
/* set the unicast mac address of the device */
static int
vgen_unicst(void *arg, const uint8_t *mca)
{
_NOTE(ARGUNUSED(arg, mca))
return (DDI_SUCCESS);
}
/* get device statistics */
int
vgen_stat(void *arg, uint_t stat, uint64_t *val)
{
vgen_port_t *portp = (vgen_port_t *)arg;
*val = vgen_port_stat(portp, stat);
return (0);
}
/* vgen internal functions */
/* detach all ports from the device */
static void
vgen_detach_ports(vgen_t *vgenp)
{
vgen_port_t *portp;
vgen_portlist_t *plistp;
plistp = &(vgenp->vgenports);
WRITE_ENTER(&plistp->rwlock);
while ((portp = plistp->headp) != NULL) {
vgen_port_detach(portp);
}
RW_EXIT(&plistp->rwlock);
}
/*
* detach the given port.
*/
static void
vgen_port_detach(vgen_port_t *portp)
{
vgen_t *vgenp;
int port_num;
vgenp = portp->vgenp;
port_num = portp->port_num;
DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
/*
* If this port is connected to the vswitch, then
* potentially there could be ports that may be using
* this port to transmit packets. To address this do
* the following:
* - First set vgenp->vsw_portp to NULL, so that
* its not used after that.
* - Then wait for the refcnt to go down to 0.
* - Now we can safely detach this port.
*/
if (vgenp->vsw_portp == portp) {
vgenp->vsw_portp = NULL;
while (vgenp->vsw_port_refcnt > 0) {
delay(drv_usectohz(vgen_tx_delay));
}
(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
}
if (portp->vhp != NULL) {
vio_net_resource_unreg(portp->vhp);
portp->vhp = NULL;
}
vgen_vlan_destroy_hash(portp);
/* remove it from port list */
vgen_port_list_remove(portp);
/* detach channels from this port */
vgen_ldc_detach(portp->ldcp);
if (portp->num_ldcs != 0) {
kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
portp->num_ldcs = 0;
}
mutex_destroy(&portp->lock);
KMEM_FREE(portp);
DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
}
/* add a port to port list */
static void
vgen_port_list_insert(vgen_port_t *portp)
{
vgen_portlist_t *plistp;
vgen_t *vgenp;
vgenp = portp->vgenp;
plistp = &(vgenp->vgenports);
if (plistp->headp == NULL) {
plistp->headp = portp;
} else {
plistp->tailp->nextp = portp;
}
plistp->tailp = portp;
portp->nextp = NULL;
}
/* remove a port from port list */
static void
vgen_port_list_remove(vgen_port_t *portp)
{
vgen_port_t *prevp;
vgen_port_t *nextp;
vgen_portlist_t *plistp;
vgen_t *vgenp;
vgenp = portp->vgenp;
plistp = &(vgenp->vgenports);
if (plistp->headp == NULL)
return;
if (portp == plistp->headp) {
plistp->headp = portp->nextp;
if (portp == plistp->tailp)
plistp->tailp = plistp->headp;
} else {
for (prevp = plistp->headp;
((nextp = prevp->nextp) != NULL) && (nextp != portp);
prevp = nextp)
;
if (nextp == portp) {
prevp->nextp = portp->nextp;
}
if (portp == plistp->tailp)
plistp->tailp = prevp;
}
}
/* lookup a port in the list based on port_num */
static vgen_port_t *
vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
{
vgen_port_t *portp = NULL;
for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
if (portp->port_num == port_num) {
break;
}
}
return (portp);
}
static void
vgen_port_init(vgen_port_t *portp)
{
/* Add the port to the specified vlans */
vgen_vlan_add_ids(portp);
/* Bring up the channel */
(void) vgen_ldc_init(portp->ldcp);
}
static void
vgen_port_uninit(vgen_port_t *portp)
{
vgen_ldc_uninit(portp->ldcp);
/* remove the port from vlans it has been assigned to */
vgen_vlan_remove_ids(portp);
}
/*
* Scan the machine description for this instance of vnet
* and read its properties. Called only from vgen_init().
* Returns: 0 on success, 1 on failure.
*/
static int
vgen_read_mdprops(vgen_t *vgenp)
{
vnet_t *vnetp = vgenp->vnetp;
md_t *mdp = NULL;
mde_cookie_t rootnode;
mde_cookie_t *listp = NULL;
uint64_t cfgh;
char *name;
int rv = 1;
int num_nodes = 0;
int num_devs = 0;
int listsz = 0;
int i;
if ((mdp = md_get_handle()) == NULL) {
return (rv);
}
num_nodes = md_node_count(mdp);
ASSERT(num_nodes > 0);
listsz = num_nodes * sizeof (mde_cookie_t);
listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
rootnode = md_root_node(mdp);
/* search for all "virtual_device" nodes */
num_devs = md_scan_dag(mdp, rootnode,
md_find_name(mdp, vdev_propname),
md_find_name(mdp, "fwd"), listp);
if (num_devs <= 0) {
goto vgen_readmd_exit;
}
/*
* Now loop through the list of virtual-devices looking for
* devices with name "network" and for each such device compare
* its instance with what we have from the 'reg' property to
* find the right node in MD and then read all its properties.
*/
for (i = 0; i < num_devs; i++) {
if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
goto vgen_readmd_exit;
}
/* is this a "network" device? */
if (strcmp(name, vnet_propname) != 0)
continue;
if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
goto vgen_readmd_exit;
}
/* is this the required instance of vnet? */
if (vgenp->regprop != cfgh)
continue;
/*
* Read the 'linkprop' property to know if this vnet
* device should get physical link updates from vswitch.
*/
vgen_linkprop_read(vgenp, mdp, listp[i],
&vnetp->pls_update);
/*
* Read the mtu. Note that we set the mtu of vnet device within
* this routine itself, after validating the range.
*/
vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
vnetp->mtu = ETHERMTU;
}
vgenp->max_frame_size = vnetp->mtu +
sizeof (struct ether_header) + VLAN_TAGSZ;
/* read priority ether types */
vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
/* read vlan id properties of this vnet instance */
vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
&vnetp->pvid, &vnetp->vids, &vnetp->nvids,
&vnetp->default_vlan_id);
rv = 0;
break;
}
vgen_readmd_exit:
kmem_free(listp, listsz);
(void) md_fini_handle(mdp);
return (rv);
}
/*
* Read vlan id properties of the given MD node.
* Arguments:
* arg: device argument(vnet device or a port)
* type: type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
* mdp: machine description
* node: md node cookie
*
* Returns:
* pvidp: port-vlan-id of the node
* vidspp: list of vlan-ids of the node
* nvidsp: # of vlan-ids in the list
* default_idp: default-vlan-id of the node(if node is vnet device)
*/
static void
vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
uint16_t *default_idp)
{
vgen_t *vgenp;
vnet_t *vnetp;
vgen_port_t *portp;
char *pvid_propname;
char *vid_propname;
uint_t nvids;
uint32_t vids_size;
int rv;
int i;
uint64_t *data;
uint64_t val;
int size;
int inst;
if (type == VGEN_LOCAL) {
vgenp = (vgen_t *)arg;
vnetp = vgenp->vnetp;
pvid_propname = vgen_pvid_propname;
vid_propname = vgen_vid_propname;
inst = vnetp->instance;
} else if (type == VGEN_PEER) {
portp = (vgen_port_t *)arg;
vgenp = portp->vgenp;
vnetp = vgenp->vnetp;
pvid_propname = port_pvid_propname;
vid_propname = port_vid_propname;
inst = portp->port_num;
} else {
return;
}
if (type == VGEN_LOCAL && default_idp != NULL) {
rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
if (rv != 0) {
DWARN(vgenp, NULL, "prop(%s) not found",
vgen_dvid_propname);
*default_idp = vnet_default_vlan_id;
} else {
*default_idp = val & 0xFFF;
DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
inst, *default_idp);
}
}
rv = md_get_prop_val(mdp, node, pvid_propname, &val);
if (rv != 0) {
DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
*pvidp = vnet_default_vlan_id;
} else {
*pvidp = val & 0xFFF;
DBG2(vgenp, NULL, "%s(%d): (%d)\n",
pvid_propname, inst, *pvidp);
}
rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
&size);
if (rv != 0) {
DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
size = 0;
} else {
size /= sizeof (uint64_t);
}
nvids = size;
if (nvids != 0) {
DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
vids_size = sizeof (uint16_t) * nvids;
*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
for (i = 0; i < nvids; i++) {
(*vidspp)[i] = data[i] & 0xFFFF;
DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
}
DBG2(vgenp, NULL, "\n");
}
*nvidsp = nvids;
}
/*
* Create a vlan id hash table for the given port.
*/
static void
vgen_vlan_create_hash(vgen_port_t *portp)
{
char hashname[MAXNAMELEN];
(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
portp->port_num);
portp->vlan_nchains = vgen_vlan_nchains;
portp->vlan_hashp = mod_hash_create_idhash(hashname,
portp->vlan_nchains, mod_hash_null_valdtor);
}
/*
* Destroy the vlan id hash table in the given port.
*/
static void
vgen_vlan_destroy_hash(vgen_port_t *portp)
{
if (portp->vlan_hashp != NULL) {
mod_hash_destroy_hash(portp->vlan_hashp);
portp->vlan_hashp = NULL;
portp->vlan_nchains = 0;
}
}
/*
* Add a port to the vlans specified in its port properites.
*/
static void
vgen_vlan_add_ids(vgen_port_t *portp)
{
int rv;
int i;
rv = mod_hash_insert(portp->vlan_hashp,
(mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
(mod_hash_val_t)B_TRUE);
ASSERT(rv == 0);
for (i = 0; i < portp->nvids; i++) {
rv = mod_hash_insert(portp->vlan_hashp,
(mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
(mod_hash_val_t)B_TRUE);
ASSERT(rv == 0);
}
}
/*
* Remove a port from the vlans it has been assigned to.
*/
static void
vgen_vlan_remove_ids(vgen_port_t *portp)
{
int rv;
int i;
mod_hash_val_t vp;
rv = mod_hash_remove(portp->vlan_hashp,
(mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
(mod_hash_val_t *)&vp);
ASSERT(rv == 0);
for (i = 0; i < portp->nvids; i++) {
rv = mod_hash_remove(portp->vlan_hashp,
(mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
(mod_hash_val_t *)&vp);
ASSERT(rv == 0);
}
}
/*
* Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
* then the vlan-id is available in the tag; otherwise, its vlan id is
* implicitly obtained from the port-vlan-id of the vnet device.
* The vlan id determined is returned in vidp.
* Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
*/
static boolean_t
vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
{
struct ether_vlan_header *evhp;
/* If it's a tagged frame, get the vlan id from vlan header */
if (ehp->ether_type == ETHERTYPE_VLAN) {
evhp = (struct ether_vlan_header *)ehp;
*vidp = VLAN_ID(ntohs(evhp->ether_tci));
return (B_TRUE);
}
/* Untagged frame, vlan-id is the pvid of vnet device */
*vidp = vnetp->pvid;
return (B_FALSE);
}
/*
* Find the given vlan id in the hash table.
* Return: B_TRUE if the id is found; B_FALSE if not found.
*/
static boolean_t
vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
{
int rv;
mod_hash_val_t vp;
rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
if (rv != 0)
return (B_FALSE);
return (B_TRUE);
}
/*
* This function reads "priority-ether-types" property from md. This property
* is used to enable support for priority frames. Applications which need
* guaranteed and timely delivery of certain high priority frames to/from
* a vnet or vsw within ldoms, should configure this property by providing
* the ether type(s) for which the priority facility is needed.
* Normal data frames are delivered over a ldc channel using the descriptor
* ring mechanism which is constrained by factors such as descriptor ring size,
* the rate at which the ring is processed at the peer ldc end point, etc.
* The priority mechanism provides an Out-Of-Band path to send/receive frames
* as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
* descriptor ring path and enables a more reliable and timely delivery of
* frames to the peer.
*/
static void
vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
{
int rv;
uint16_t *types;
uint64_t *data;
int size;
int i;
size_t mblk_sz;
rv = md_get_prop_data(mdp, node, pri_types_propname,
(uint8_t **)&data, &size);
if (rv != 0) {
/*
* Property may not exist if we are running pre-ldoms1.1 f/w.
* Check if 'vgen_pri_eth_type' has been set in that case.
*/
if (vgen_pri_eth_type != 0) {
size = sizeof (vgen_pri_eth_type);
data = &vgen_pri_eth_type;
} else {
DBG2(vgenp, NULL,
"prop(%s) not found", pri_types_propname);
size = 0;
}
}
if (size == 0) {
vgenp->pri_num_types = 0;
return;
}
/*
* we have some priority-ether-types defined;
* allocate a table of these types and also
* allocate a pool of mblks to transmit these
* priority packets.
*/
size /= sizeof (uint64_t);
vgenp->pri_num_types = size;
vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
for (i = 0, types = vgenp->pri_types; i < size; i++) {
types[i] = data[i] & 0xFFFF;
}
mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
(void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
&vgenp->pri_tx_vmp);
}
static void
vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
{
int rv;
uint64_t val;
char *mtu_propname;
mtu_propname = vgen_mtu_propname;
rv = md_get_prop_val(mdp, node, mtu_propname, &val);
if (rv != 0) {
DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
*mtu = vnet_ethermtu;
} else {
*mtu = val & 0xFFFF;
DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
vgenp->instance, *mtu);
}
}
static void
vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
boolean_t *pls)
{
int rv;
uint64_t val;
char *linkpropname;
linkpropname = vgen_linkprop_propname;
rv = md_get_prop_val(mdp, node, linkpropname, &val);
if (rv != 0) {
DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
*pls = B_FALSE;
} else {
*pls = (val & 0x1) ? B_TRUE : B_FALSE;
DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
vgenp->instance, *pls);
}
}
/* register with MD event generator */
static int
vgen_mdeg_reg(vgen_t *vgenp)
{
mdeg_prop_spec_t *pspecp;
mdeg_node_spec_t *parentp;
uint_t templatesz;
int rv;
mdeg_handle_t dev_hdl = NULL;
mdeg_handle_t port_hdl = NULL;
templatesz = sizeof (vgen_prop_template);
pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
if (pspecp == NULL) {
return (DDI_FAILURE);
}
parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
if (parentp == NULL) {
kmem_free(pspecp, templatesz);
return (DDI_FAILURE);
}
bcopy(vgen_prop_template, pspecp, templatesz);
/*
* NOTE: The instance here refers to the value of "reg" property and
* not the dev_info instance (ddi_get_instance()) of vnet.
*/
VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
parentp->namep = "virtual-device";
parentp->specp = pspecp;
/* save parentp in vgen_t */
vgenp->mdeg_parentp = parentp;
/*
* Register an interest in 'virtual-device' nodes with a
* 'name' property of 'network'
*/
rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
if (rv != MDEG_SUCCESS) {
DERR(vgenp, NULL, "mdeg_register failed\n");
goto mdeg_reg_fail;
}
/* Register an interest in 'port' nodes */
rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
&port_hdl);
if (rv != MDEG_SUCCESS) {
DERR(vgenp, NULL, "mdeg_register failed\n");
goto mdeg_reg_fail;
}
/* save mdeg handle in vgen_t */
vgenp->mdeg_dev_hdl = dev_hdl;
vgenp->mdeg_port_hdl = port_hdl;
return (DDI_SUCCESS);
mdeg_reg_fail:
if (dev_hdl != NULL) {
(void) mdeg_unregister(dev_hdl);
}
KMEM_FREE(parentp);
kmem_free(pspecp, templatesz);
vgenp->mdeg_parentp = NULL;
return (DDI_FAILURE);
}
/* unregister with MD event generator */
static void
vgen_mdeg_unreg(vgen_t *vgenp)
{
if (vgenp->mdeg_dev_hdl != NULL) {
(void) mdeg_unregister(vgenp->mdeg_dev_hdl);
vgenp->mdeg_dev_hdl = NULL;
}
if (vgenp->mdeg_port_hdl != NULL) {
(void) mdeg_unregister(vgenp->mdeg_port_hdl);
vgenp->mdeg_port_hdl = NULL;
}
if (vgenp->mdeg_parentp != NULL) {
kmem_free(vgenp->mdeg_parentp->specp,
sizeof (vgen_prop_template));
KMEM_FREE(vgenp->mdeg_parentp);
vgenp->mdeg_parentp = NULL;
}
}
/* mdeg callback function for the port node */
static int
vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
{
int idx;
int vsw_idx = -1;
uint64_t val;
vgen_t *vgenp;
if ((resp == NULL) || (cb_argp == NULL)) {
return (MDEG_FAILURE);
}
vgenp = (vgen_t *)cb_argp;
DBG1(vgenp, NULL, "enter\n");
mutex_enter(&vgenp->lock);
DBG1(vgenp, NULL, "ports: removed(%x), "
"added(%x), updated(%x)\n", resp->removed.nelem,
resp->added.nelem, resp->match_curr.nelem);
for (idx = 0; idx < resp->removed.nelem; idx++) {
(void) vgen_remove_port(vgenp, resp->removed.mdp,
resp->removed.mdep[idx]);
}
if (vgenp->vsw_portp == NULL) {
/*
* find vsw_port and add it first, because other ports need
* this when adding fdb entry (see vgen_port_init()).
*/
for (idx = 0; idx < resp->added.nelem; idx++) {
if (!(md_get_prop_val(resp->added.mdp,
resp->added.mdep[idx], swport_propname, &val))) {
if (val == 0) {
/*
* This port is connected to the
* vsw on service domain.
*/
vsw_idx = idx;
if (vgen_add_port(vgenp,
resp->added.mdp,
resp->added.mdep[idx]) !=
DDI_SUCCESS) {
cmn_err(CE_NOTE, "vnet%d Could "
"not initialize virtual "
"switch port.",
vgenp->instance);
mutex_exit(&vgenp->lock);
return (MDEG_FAILURE);
}
break;
}
}
}
if (vsw_idx == -1) {
DWARN(vgenp, NULL, "can't find vsw_port\n");
mutex_exit(&vgenp->lock);
return (MDEG_FAILURE);
}
}
for (idx = 0; idx < resp->added.nelem; idx++) {
if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
continue;
/* If this port can't be added just skip it. */
(void) vgen_add_port(vgenp, resp->added.mdp,
resp->added.mdep[idx]);
}
for (idx = 0; idx < resp->match_curr.nelem; idx++) {
(void) vgen_update_port(vgenp, resp->match_curr.mdp,
resp->match_curr.mdep[idx],
resp->match_prev.mdp,
resp->match_prev.mdep[idx]);
}
mutex_exit(&vgenp->lock);
DBG1(vgenp, NULL, "exit\n");
return (MDEG_SUCCESS);
}
/* mdeg callback function for the vnet node */
static int
vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
{
vgen_t *vgenp;
vnet_t *vnetp;
md_t *mdp;
mde_cookie_t node;
uint64_t inst;
char *node_name = NULL;
if ((resp == NULL) || (cb_argp == NULL)) {
return (MDEG_FAILURE);
}
vgenp = (vgen_t *)cb_argp;
vnetp = vgenp->vnetp;
DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
" : prev matched %d", resp->added.nelem, resp->removed.nelem,
resp->match_curr.nelem, resp->match_prev.nelem);
mutex_enter(&vgenp->lock);
/*
* We get an initial callback for this node as 'added' after
* registering with mdeg. Note that we would have already gathered
* information about this vnet node by walking MD earlier during attach
* (in vgen_read_mdprops()). So, there is a window where the properties
* of this node might have changed when we get this initial 'added'
* callback. We handle this as if an update occured and invoke the same
* function which handles updates to the properties of this vnet-node
* if any. A non-zero 'match' value indicates that the MD has been
* updated and that a 'network' node is present which may or may not
* have been updated. It is up to the clients to examine their own
* nodes and determine if they have changed.
*/
if (resp->added.nelem != 0) {
if (resp->added.nelem != 1) {
cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
"invalid: %d\n", vnetp->instance,
resp->added.nelem);
goto vgen_mdeg_cb_err;
}
mdp = resp->added.mdp;
node = resp->added.mdep[0];
} else if (resp->match_curr.nelem != 0) {
if (resp->match_curr.nelem != 1) {
cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
"invalid: %d\n", vnetp->instance,
resp->match_curr.nelem);
goto vgen_mdeg_cb_err;
}
mdp = resp->match_curr.mdp;
node = resp->match_curr.mdep[0];
} else {
goto vgen_mdeg_cb_err;
}
/* Validate name and instance */
if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
DERR(vgenp, NULL, "unable to get node name\n");
goto vgen_mdeg_cb_err;
}
/* is this a virtual-network device? */
if (strcmp(node_name, vnet_propname) != 0) {
DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
goto vgen_mdeg_cb_err;
}
if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
goto vgen_mdeg_cb_err;
}
/* is this the right instance of vnet? */
if (inst != vgenp->regprop) {
DERR(vgenp, NULL, "Invalid cfg-handle: %lx\n", inst);
goto vgen_mdeg_cb_err;
}
vgen_update_md_prop(vgenp, mdp, node);
mutex_exit(&vgenp->lock);
return (MDEG_SUCCESS);
vgen_mdeg_cb_err:
mutex_exit(&vgenp->lock);
return (MDEG_FAILURE);
}
/*
* Check to see if the relevant properties in the specified node have
* changed, and if so take the appropriate action.
*/
static void
vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
{
uint16_t pvid;
uint16_t *vids;
uint16_t nvids;
vnet_t *vnetp = vgenp->vnetp;
uint32_t mtu;
boolean_t pls_update;
enum { MD_init = 0x1,
MD_vlans = 0x2,
MD_mtu = 0x4,
MD_pls = 0x8 } updated;
int rv;
updated = MD_init;
/* Read the vlan ids */
vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
&nvids, NULL);
/* Determine if there are any vlan id updates */
if ((pvid != vnetp->pvid) || /* pvid changed? */
(nvids != vnetp->nvids) || /* # of vids changed? */
((nvids != 0) && (vnetp->nvids != 0) && /* vids changed? */
bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
updated |= MD_vlans;
}
/* Read mtu */
vgen_mtu_read(vgenp, mdp, mdex, &mtu);
if (mtu != vnetp->mtu) {
if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
updated |= MD_mtu;
} else {
cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
" as the specified value:%d is invalid\n",
vnetp->instance, mtu);
}
}
/*
* Read the 'linkprop' property.
*/
vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
if (pls_update != vnetp->pls_update) {
updated |= MD_pls;
}
/* Now process the updated props */
if (updated & MD_vlans) {
/* save the new vlan ids */
vnetp->pvid = pvid;
if (vnetp->nvids != 0) {
kmem_free(vnetp->vids,
sizeof (uint16_t) * vnetp->nvids);
vnetp->nvids = 0;
}
if (nvids != 0) {
vnetp->nvids = nvids;
vnetp->vids = vids;
}
/* reset vlan-unaware peers (ver < 1.3) and restart handshake */
vgen_reset_vlan_unaware_ports(vgenp);
} else {
if (nvids != 0) {
kmem_free(vids, sizeof (uint16_t) * nvids);
}
}
if (updated & MD_mtu) {
DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
vnetp->mtu, mtu);
rv = vnet_mtu_update(vnetp, mtu);
if (rv == 0) {
vgenp->max_frame_size = mtu +
sizeof (struct ether_header) + VLAN_TAGSZ;
}
}
if (updated & MD_pls) {
/* enable/disable physical link state updates */
vnetp->pls_update = pls_update;
mutex_exit(&vgenp->lock);
/* reset vsw-port to re-negotiate with the updated prop. */
vgen_reset_vsw_port(vgenp);
mutex_enter(&vgenp->lock);
}
}
/* add a new port to the device */
static int
vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
{
vgen_port_t *portp;
int rv;
portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
if (rv != DDI_SUCCESS) {
KMEM_FREE(portp);
return (DDI_FAILURE);
}
rv = vgen_port_attach(portp);
if (rv != DDI_SUCCESS) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/* read properties of the port from its md node */
static int
vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
mde_cookie_t mdex)
{
uint64_t port_num;
uint64_t *ldc_ids;
uint64_t macaddr;
uint64_t val;
int num_ldcs;
int i;
int addrsz;
int num_nodes = 0;
int listsz = 0;
mde_cookie_t *listp = NULL;
uint8_t *addrp;
struct ether_addr ea;
/* read "id" property to get the port number */
if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
return (DDI_FAILURE);
}
/*
* Find the channel endpoint node(s) under this port node.
*/
if ((num_nodes = md_node_count(mdp)) <= 0) {
DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
num_nodes);
return (DDI_FAILURE);
}
/* allocate space for node list */
listsz = num_nodes * sizeof (mde_cookie_t);
listp = kmem_zalloc(listsz, KM_NOSLEEP);
if (listp == NULL)
return (DDI_FAILURE);
num_ldcs = md_scan_dag(mdp, mdex,
md_find_name(mdp, channel_propname),
md_find_name(mdp, "fwd"), listp);
if (num_ldcs <= 0) {
DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
kmem_free(listp, listsz);
return (DDI_FAILURE);
}
if (num_ldcs > 1) {
DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
port_num, num_ldcs);
}
ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
if (ldc_ids == NULL) {
kmem_free(listp, listsz);
return (DDI_FAILURE);
}
for (i = 0; i < num_ldcs; i++) {
/* read channel ids */
if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
DWARN(vgenp, NULL, "prop(%s) not found\n",
id_propname);
kmem_free(listp, listsz);
kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
return (DDI_FAILURE);
}
DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
}
kmem_free(listp, listsz);
if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
&addrsz)) {
DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
return (DDI_FAILURE);
}
if (addrsz < ETHERADDRL) {
DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
return (DDI_FAILURE);
}
macaddr = *((uint64_t *)addrp);
DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
for (i = ETHERADDRL - 1; i >= 0; i--) {
ea.ether_addr_octet[i] = macaddr & 0xFF;
macaddr >>= 8;
}
if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
if (val == 0) {
/* This port is connected to the vswitch */
portp->is_vsw_port = B_TRUE;
} else {
portp->is_vsw_port = B_FALSE;
}
}
/* now update all properties into the port */
portp->vgenp = vgenp;
portp->port_num = port_num;
ether_copy(&ea, &portp->macaddr);
portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
portp->num_ldcs = num_ldcs;
/* read vlan id properties of this port node */
vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
&portp->vids, &portp->nvids, NULL);
kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
return (DDI_SUCCESS);
}
/* remove a port from the device */
static int
vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
{
uint64_t port_num;
vgen_port_t *portp;
vgen_portlist_t *plistp;
/* read "id" property to get the port number */
if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
return (DDI_FAILURE);
}
plistp = &(vgenp->vgenports);
WRITE_ENTER(&plistp->rwlock);
portp = vgen_port_lookup(plistp, (int)port_num);
if (portp == NULL) {
DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
RW_EXIT(&plistp->rwlock);
return (DDI_FAILURE);
}
vgen_port_detach_mdeg(portp);
RW_EXIT(&plistp->rwlock);
return (DDI_SUCCESS);
}
/* attach a port to the device based on mdeg data */
static int
vgen_port_attach(vgen_port_t *portp)
{
vgen_portlist_t *plistp;
vgen_t *vgenp;
uint64_t *ldcids;
mac_register_t *macp;
vio_net_res_type_t type;
int rv;
ASSERT(portp != NULL);
vgenp = portp->vgenp;
ldcids = portp->ldc_ids;
DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
portp->port_num, ldcids[0]);
mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
/*
* attach the channel under the port using its channel id;
* note that we only support one channel per port for now.
*/
if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
vgen_port_detach(portp);
return (DDI_FAILURE);
}
/* create vlan id hash table */
vgen_vlan_create_hash(portp);
if (portp->is_vsw_port == B_TRUE) {
/* This port is connected to the switch port */
(void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
type = VIO_NET_RES_LDC_SERVICE;
} else {
(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
type = VIO_NET_RES_LDC_GUEST;
}
if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
vgen_port_detach(portp);
return (DDI_FAILURE);
}
macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
macp->m_driver = portp;
macp->m_dip = vgenp->vnetdip;
macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
macp->m_callbacks = &vgen_m_callbacks;
macp->m_min_sdu = 0;
macp->m_max_sdu = ETHERMTU;
mutex_enter(&portp->lock);
rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
portp->macaddr, &portp->vhp, &portp->vcb);
mutex_exit(&portp->lock);
mac_free(macp);
if (rv == 0) {
/* link it into the list of ports */
plistp = &(vgenp->vgenports);
WRITE_ENTER(&plistp->rwlock);
vgen_port_list_insert(portp);
RW_EXIT(&plistp->rwlock);
if (portp->is_vsw_port == B_TRUE) {
/* We now have the vswitch port attached */
vgenp->vsw_portp = portp;
(void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
}
} else {
DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
portp);
vgen_port_detach(portp);
}
DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
return (DDI_SUCCESS);
}
/* detach a port from the device based on mdeg data */
static void
vgen_port_detach_mdeg(vgen_port_t *portp)
{
vgen_t *vgenp = portp->vgenp;
DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
mutex_enter(&portp->lock);
/* stop the port if needed */
if (portp->flags & VGEN_STARTED) {
vgen_port_uninit(portp);
portp->flags &= ~(VGEN_STARTED);
}
mutex_exit(&portp->lock);
vgen_port_detach(portp);
DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
}
static int
vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
md_t *prev_mdp, mde_cookie_t prev_mdex)
{
uint64_t cport_num;
uint64_t pport_num;
vgen_portlist_t *plistp;
vgen_port_t *portp;
boolean_t updated_vlans = B_FALSE;
uint16_t pvid;
uint16_t *vids;
uint16_t nvids;
/*
* For now, we get port updates only if vlan ids changed.
* We read the port num and do some sanity check.
*/
if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
return (DDI_FAILURE);
}
if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
return (DDI_FAILURE);
}
if (cport_num != pport_num)
return (DDI_FAILURE);
plistp = &(vgenp->vgenports);
READ_ENTER(&plistp->rwlock);
portp = vgen_port_lookup(plistp, (int)cport_num);
if (portp == NULL) {
DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
RW_EXIT(&plistp->rwlock);
return (DDI_FAILURE);
}
/* Read the vlan ids */
vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
&nvids, NULL);
/* Determine if there are any vlan id updates */
if ((pvid != portp->pvid) || /* pvid changed? */
(nvids != portp->nvids) || /* # of vids changed? */
((nvids != 0) && (portp->nvids != 0) && /* vids changed? */
bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
updated_vlans = B_TRUE;
}
if (updated_vlans == B_FALSE) {
RW_EXIT(&plistp->rwlock);
return (DDI_FAILURE);
}
/* remove the port from vlans it has been assigned to */
vgen_vlan_remove_ids(portp);
/* save the new vlan ids */
portp->pvid = pvid;
if (portp->nvids != 0) {
kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
portp->nvids = 0;
}
if (nvids != 0) {
portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
portp->nvids = nvids;
kmem_free(vids, sizeof (uint16_t) * nvids);
}
/* add port to the new vlans */
vgen_vlan_add_ids(portp);
/* reset the port if it is vlan unaware (ver < 1.3) */
vgen_vlan_unaware_port_reset(portp);
RW_EXIT(&plistp->rwlock);
return (DDI_SUCCESS);
}
static uint64_t
vgen_port_stat(vgen_port_t *portp, uint_t stat)
{
return (vgen_ldc_stat(portp->ldcp, stat));
}
/* attach the channel corresponding to the given ldc_id to the port */
static int
vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
{
vgen_t *vgenp;
vgen_ldc_t *ldcp;
ldc_attr_t attr;
int status;
ldc_status_t istatus;
char kname[MAXNAMELEN];
int instance;
enum {AST_init = 0x0, AST_ldc_alloc = 0x1,
AST_mutex_init = 0x2, AST_ldc_init = 0x4,
AST_ldc_reg_cb = 0x8 } attach_state;
attach_state = AST_init;
vgenp = portp->vgenp;
ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
if (ldcp == NULL) {
goto ldc_attach_failed;
}
ldcp->ldc_id = ldc_id;
ldcp->portp = portp;
attach_state |= AST_ldc_alloc;
mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
attach_state |= AST_mutex_init;
attr.devclass = LDC_DEV_NT;
attr.instance = vgenp->instance;
attr.mode = LDC_MODE_UNRELIABLE;
attr.mtu = vgen_ldc_mtu;
status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
if (status != 0) {
DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
goto ldc_attach_failed;
}
attach_state |= AST_ldc_init;
status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
if (status != 0) {
DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
status);
goto ldc_attach_failed;
}
/*
* allocate a message for ldc_read()s, big enough to hold ctrl and
* data msgs, including raw data msgs used to recv priority frames.
*/
ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
attach_state |= AST_ldc_reg_cb;
(void) ldc_status(ldcp->ldc_handle, &istatus);
ASSERT(istatus == LDC_INIT);
ldcp->ldc_status = istatus;
/* Setup kstats for the channel */
instance = vgenp->instance;
(void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
if (ldcp->ksp == NULL) {
goto ldc_attach_failed;
}
/* initialize vgen_versions supported */
bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
vgen_reset_vnet_proto_ops(ldcp);
/* Link this channel to the port */
portp->ldcp = ldcp;
ldcp->link_state = LINK_STATE_UNKNOWN;
#ifdef VNET_IOC_DEBUG
ldcp->link_down_forced = B_FALSE;
#endif
ldcp->flags |= CHANNEL_ATTACHED;
return (DDI_SUCCESS);
ldc_attach_failed:
if (attach_state & AST_ldc_reg_cb) {
(void) ldc_unreg_callback(ldcp->ldc_handle);
kmem_free(ldcp->ldcmsg, ldcp->msglen);
}
if (attach_state & AST_ldc_init) {
(void) ldc_fini(ldcp->ldc_handle);
}
if (attach_state & AST_mutex_init) {
mutex_destroy(&ldcp->tclock);
mutex_destroy(&ldcp->txlock);
mutex_destroy(&ldcp->cblock);
mutex_destroy(&ldcp->wrlock);
mutex_destroy(&ldcp->rxlock);
mutex_destroy(&ldcp->pollq_lock);
}
if (attach_state & AST_ldc_alloc) {
KMEM_FREE(ldcp);
}
return (DDI_FAILURE);
}
/* detach a channel from the port */
static void
vgen_ldc_detach(vgen_ldc_t *ldcp)
{
vgen_port_t *portp;
vgen_t *vgenp;
ASSERT(ldcp != NULL);
portp = ldcp->portp;
vgenp = portp->vgenp;
if (ldcp->ldc_status != LDC_INIT) {
DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
}
if (ldcp->flags & CHANNEL_ATTACHED) {
ldcp->flags &= ~(CHANNEL_ATTACHED);
(void) ldc_unreg_callback(ldcp->ldc_handle);
(void) ldc_fini(ldcp->ldc_handle);
kmem_free(ldcp->ldcmsg, ldcp->msglen);
vgen_destroy_kstats(ldcp->ksp);
ldcp->ksp = NULL;
mutex_destroy(&ldcp->tclock);
mutex_destroy(&ldcp->txlock);
mutex_destroy(&ldcp->cblock);
mutex_destroy(&ldcp->wrlock);
mutex_destroy(&ldcp->rxlock);
mutex_destroy(&ldcp->pollq_lock);
mutex_destroy(&ldcp->msg_thr_lock);
cv_destroy(&ldcp->msg_thr_cv);
KMEM_FREE(ldcp);
}
}
/* enable transmit/receive on the channel */
static int
vgen_ldc_init(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
ldc_status_t istatus;
int rv;
enum { ST_init = 0x0, ST_ldc_open = 0x1,
ST_cb_enable = 0x2} init_state;
int flag = 0;
init_state = ST_init;
DBG1(vgenp, ldcp, "enter\n");
LDC_LOCK(ldcp);
rv = ldc_open(ldcp->ldc_handle);
if (rv != 0) {
DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
goto ldcinit_failed;
}
init_state |= ST_ldc_open;
(void) ldc_status(ldcp->ldc_handle, &istatus);
if (istatus != LDC_OPEN && istatus != LDC_READY) {
DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
goto ldcinit_failed;
}
ldcp->ldc_status = istatus;
rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
if (rv != 0) {
DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
goto ldcinit_failed;
}
init_state |= ST_cb_enable;
vgen_ldc_up(ldcp);
(void) ldc_status(ldcp->ldc_handle, &istatus);
if (istatus == LDC_UP) {
DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
}
ldcp->ldc_status = istatus;
ldcp->hphase = VH_PHASE0;
ldcp->hstate = 0;
ldcp->flags |= CHANNEL_STARTED;
vgen_setup_handshake_params(ldcp);
/* if channel is already UP - start handshake */
if (istatus == LDC_UP) {
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
if (ldcp->portp != vgenp->vsw_portp) {
/*
* As the channel is up, use this port from now on.
*/
(void) atomic_swap_32(
&ldcp->portp->use_vsw_port, B_FALSE);
}
/* Initialize local session id */
ldcp->local_sid = ddi_get_lbolt();
/* clear peer session id */
ldcp->peer_sid = 0;
mutex_exit(&ldcp->tclock);
mutex_exit(&ldcp->txlock);
mutex_exit(&ldcp->wrlock);
mutex_exit(&ldcp->rxlock);
rv = vgen_handshake(vh_nextphase(ldcp));
mutex_exit(&ldcp->cblock);
if (rv != 0) {
flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
VGEN_FLAG_NEED_LDCRESET;
(void) vgen_process_reset(ldcp, flag);
}
} else {
LDC_UNLOCK(ldcp);
}
return (DDI_SUCCESS);
ldcinit_failed:
if (init_state & ST_cb_enable) {
(void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
}
if (init_state & ST_ldc_open) {
(void) ldc_close(ldcp->ldc_handle);
}
LDC_UNLOCK(ldcp);
DBG1(vgenp, ldcp, "exit\n");
return (DDI_FAILURE);
}
/* stop transmit/receive on the channel */
static void
vgen_ldc_uninit(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
DBG1(vgenp, ldcp, "enter\n");
LDC_LOCK(ldcp);
if ((ldcp->flags & CHANNEL_STARTED) == 0) {
LDC_UNLOCK(ldcp);
DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
return;
}
LDC_UNLOCK(ldcp);
while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
}
(void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
DBG1(vgenp, ldcp, "exit\n");
}
/*
* Create a descriptor ring, that will be exported to the peer for mapping.
*/
static int
vgen_create_dring(vgen_ldc_t *ldcp)
{
vgen_hparams_t *lp = &ldcp->local_hparams;
int rv;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
rv = vgen_create_rx_dring(ldcp);
} else {
rv = vgen_create_tx_dring(ldcp);
}
return (rv);
}
/*
* Destroy the descriptor ring.
*/
static void
vgen_destroy_dring(vgen_ldc_t *ldcp)
{
vgen_hparams_t *lp = &ldcp->local_hparams;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
vgen_destroy_rx_dring(ldcp);
} else {
vgen_destroy_tx_dring(ldcp);
}
}
/*
* Map the descriptor ring exported by the peer.
*/
static int
vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
{
int rv;
vgen_hparams_t *lp = &ldcp->local_hparams;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
/*
* In RxDringData mode, dring that we map in
* becomes our transmit descriptor ring.
*/
rv = vgen_map_tx_dring(ldcp, pkt);
} else {
/*
* In TxDring mode, dring that we map in
* becomes our receive descriptor ring.
*/
rv = vgen_map_rx_dring(ldcp, pkt);
}
return (rv);
}
/*
* Unmap the descriptor ring exported by the peer.
*/
static void
vgen_unmap_dring(vgen_ldc_t *ldcp)
{
vgen_hparams_t *lp = &ldcp->local_hparams;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
vgen_unmap_tx_dring(ldcp);
} else {
vgen_unmap_rx_dring(ldcp);
}
}
void
vgen_destroy_rxpools(void *arg)
{
vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg;
vio_mblk_pool_t *npoolp;
while (poolp != NULL) {
npoolp = poolp->nextp;
while (vio_destroy_mblks(poolp) != 0) {
delay(drv_usectohz(vgen_rxpool_cleanup_delay));
}
poolp = npoolp;
}
}
/* get channel statistics */
static uint64_t
vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
{
vgen_stats_t *statsp;
uint64_t val;
val = 0;
statsp = &ldcp->stats;
switch (stat) {
case MAC_STAT_MULTIRCV:
val = statsp->multircv;
break;
case MAC_STAT_BRDCSTRCV:
val = statsp->brdcstrcv;
break;
case MAC_STAT_MULTIXMT:
val = statsp->multixmt;
break;
case MAC_STAT_BRDCSTXMT:
val = statsp->brdcstxmt;
break;
case MAC_STAT_NORCVBUF:
val = statsp->norcvbuf;
break;
case MAC_STAT_IERRORS:
val = statsp->ierrors;
break;
case MAC_STAT_NOXMTBUF:
val = statsp->noxmtbuf;
break;
case MAC_STAT_OERRORS:
val = statsp->oerrors;
break;
case MAC_STAT_COLLISIONS:
break;
case MAC_STAT_RBYTES:
val = statsp->rbytes;
break;
case MAC_STAT_IPACKETS:
val = statsp->ipackets;
break;
case MAC_STAT_OBYTES:
val = statsp->obytes;
break;
case MAC_STAT_OPACKETS:
val = statsp->opackets;
break;
/* stats not relevant to ldc, return 0 */
case MAC_STAT_IFSPEED:
case ETHER_STAT_ALIGN_ERRORS:
case ETHER_STAT_FCS_ERRORS:
case ETHER_STAT_FIRST_COLLISIONS:
case ETHER_STAT_MULTI_COLLISIONS:
case ETHER_STAT_DEFER_XMTS:
case ETHER_STAT_TX_LATE_COLLISIONS:
case ETHER_STAT_EX_COLLISIONS:
case ETHER_STAT_MACXMT_ERRORS:
case ETHER_STAT_CARRIER_ERRORS:
case ETHER_STAT_TOOLONG_ERRORS:
case ETHER_STAT_XCVR_ADDR:
case ETHER_STAT_XCVR_ID:
case ETHER_STAT_XCVR_INUSE:
case ETHER_STAT_CAP_1000FDX:
case ETHER_STAT_CAP_1000HDX:
case ETHER_STAT_CAP_100FDX:
case ETHER_STAT_CAP_100HDX:
case ETHER_STAT_CAP_10FDX:
case ETHER_STAT_CAP_10HDX:
case ETHER_STAT_CAP_ASMPAUSE:
case ETHER_STAT_CAP_PAUSE:
case ETHER_STAT_CAP_AUTONEG:
case ETHER_STAT_ADV_CAP_1000FDX:
case ETHER_STAT_ADV_CAP_1000HDX:
case ETHER_STAT_ADV_CAP_100FDX:
case ETHER_STAT_ADV_CAP_100HDX:
case ETHER_STAT_ADV_CAP_10FDX:
case ETHER_STAT_ADV_CAP_10HDX:
case ETHER_STAT_ADV_CAP_ASMPAUSE:
case ETHER_STAT_ADV_CAP_PAUSE:
case ETHER_STAT_ADV_CAP_AUTONEG:
case ETHER_STAT_LP_CAP_1000FDX:
case ETHER_STAT_LP_CAP_1000HDX:
case ETHER_STAT_LP_CAP_100FDX:
case ETHER_STAT_LP_CAP_100HDX:
case ETHER_STAT_LP_CAP_10FDX:
case ETHER_STAT_LP_CAP_10HDX:
case ETHER_STAT_LP_CAP_ASMPAUSE:
case ETHER_STAT_LP_CAP_PAUSE:
case ETHER_STAT_LP_CAP_AUTONEG:
case ETHER_STAT_LINK_ASMPAUSE:
case ETHER_STAT_LINK_PAUSE:
case ETHER_STAT_LINK_AUTONEG:
case ETHER_STAT_LINK_DUPLEX:
default:
val = 0;
break;
}
return (val);
}
/*
* LDC channel is UP, start handshake process with peer.
*/
static void
vgen_handle_evt_up(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
DBG1(vgenp, ldcp, "enter\n");
ASSERT(MUTEX_HELD(&ldcp->cblock));
if (ldcp->portp != vgenp->vsw_portp) {
/*
* As the channel is up, use this port from now on.
*/
(void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
}
/* Initialize local session id */
ldcp->local_sid = ddi_get_lbolt();
/* clear peer session id */
ldcp->peer_sid = 0;
/* Initiate Handshake process with peer ldc endpoint */
(void) vgen_handshake(vh_nextphase(ldcp));
DBG1(vgenp, ldcp, "exit\n");
}
/*
* LDC channel is Reset, terminate connection with peer and try to
* bring the channel up again.
*/
int
vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
{
if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
ASSERT(MUTEX_HELD(&ldcp->cblock));
}
/* Set the flag to indicate reset is in progress */
if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
/* another thread is already in the process of resetting */
return (EBUSY);
}
if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
mutex_exit(&ldcp->cblock);
}
(void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
mutex_enter(&ldcp->cblock);
}
return (0);
}
/* Interrupt handler for the channel */
static uint_t
vgen_ldc_cb(uint64_t event, caddr_t arg)
{
_NOTE(ARGUNUSED(event))
vgen_ldc_t *ldcp;
vgen_t *vgenp;
ldc_status_t istatus;
vgen_stats_t *statsp;
uint_t ret = LDC_SUCCESS;
ldcp = (vgen_ldc_t *)arg;
vgenp = LDC_TO_VGEN(ldcp);
statsp = &ldcp->stats;
DBG1(vgenp, ldcp, "enter\n");
mutex_enter(&ldcp->cblock);
statsp->callbacks++;
if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
ldcp->ldc_status);
mutex_exit(&ldcp->cblock);
return (LDC_SUCCESS);
}
/*
* NOTE: not using switch() as event could be triggered by
* a state change and a read request. Also the ordering of the
* check for the event types is deliberate.
*/
if (event & LDC_EVT_UP) {
if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
DWARN(vgenp, ldcp, "ldc_status err\n");
/* status couldn't be determined */
ret = LDC_FAILURE;
goto ldc_cb_ret;
}
ldcp->ldc_status = istatus;
if (ldcp->ldc_status != LDC_UP) {
DWARN(vgenp, ldcp, "LDC_EVT_UP received "
" but ldc status is not UP(0x%x)\n",
ldcp->ldc_status);
/* spurious interrupt, return success */
goto ldc_cb_ret;
}
DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
event, ldcp->ldc_status);
vgen_handle_evt_up(ldcp);
ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
}
/* Handle RESET/DOWN before READ event */
if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
DWARN(vgenp, ldcp, "ldc_status error\n");
/* status couldn't be determined */
ret = LDC_FAILURE;
goto ldc_cb_ret;
}
ldcp->ldc_status = istatus;
DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
event, ldcp->ldc_status);
(void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
/*
* As the channel is down/reset, ignore READ event
* but print a debug warning message.
*/
if (event & LDC_EVT_READ) {
DWARN(vgenp, ldcp,
"LDC_EVT_READ set along with RESET/DOWN\n");
event &= ~LDC_EVT_READ;
}
}
if (event & LDC_EVT_READ) {
DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
event, ldcp->ldc_status);
ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
if (ldcp->msg_thread != NULL) {
/*
* If the receive thread is enabled, then
* wakeup the receive thread to process the
* LDC messages.
*/
mutex_exit(&ldcp->cblock);
mutex_enter(&ldcp->msg_thr_lock);
if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
cv_signal(&ldcp->msg_thr_cv);
}
mutex_exit(&ldcp->msg_thr_lock);
mutex_enter(&ldcp->cblock);
} else {
(void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
}
}
ldc_cb_ret:
mutex_exit(&ldcp->cblock);
DBG1(vgenp, ldcp, "exit\n");
return (ret);
}
int
vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
{
int rv;
uint64_t *ldcmsg;
size_t msglen;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vio_msg_tag_t *tagp;
ldc_status_t istatus;
boolean_t has_data;
DBG1(vgenp, ldcp, "enter\n");
if (caller == VGEN_LDC_CB) {
ASSERT(MUTEX_HELD(&ldcp->cblock));
} else if (caller == VGEN_MSG_THR) {
mutex_enter(&ldcp->cblock);
} else {
return (EINVAL);
}
ldcmsg = ldcp->ldcmsg;
vgen_evtread:
do {
msglen = ldcp->msglen;
rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
if (rv != 0) {
DWARN(vgenp, ldcp, "ldc_read() failed "
"rv(%d) len(%d)\n", rv, msglen);
if (rv == ECONNRESET)
goto vgen_evtread_error;
break;
}
if (msglen == 0) {
DBG2(vgenp, ldcp, "ldc_read NODATA");
break;
}
DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
tagp = (vio_msg_tag_t *)ldcmsg;
if (ldcp->peer_sid) {
/*
* check sid only after we have received peer's sid
* in the version negotiate msg.
*/
#ifdef DEBUG
if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
/* simulate bad sid condition */
tagp->vio_sid = 0;
vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
}
#endif
rv = vgen_check_sid(ldcp, tagp);
if (rv != VGEN_SUCCESS) {
/*
* If sid mismatch is detected,
* reset the channel.
*/
DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
goto vgen_evtread_error;
}
}
switch (tagp->vio_msgtype) {
case VIO_TYPE_CTRL:
rv = vgen_handle_ctrlmsg(ldcp, tagp);
if (rv != 0) {
DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
" failed rv(%d)\n", rv);
}
break;
case VIO_TYPE_DATA:
rv = vgen_handle_datamsg(ldcp, tagp, msglen);
if (rv != 0) {
DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
" failed rv(%d)\n", rv);
}
break;
case VIO_TYPE_ERR:
vgen_handle_errmsg(ldcp, tagp);
break;
default:
DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
tagp->vio_msgtype);
break;
}
/*
* If an error is encountered, stop processing and
* handle the error.
*/
if (rv != 0) {
goto vgen_evtread_error;
}
} while (msglen);
/* check once more before exiting */
rv = ldc_chkq(ldcp->ldc_handle, &has_data);
if ((rv == 0) && (has_data == B_TRUE)) {
DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
goto vgen_evtread;
}
vgen_evtread_error:
if (rv != 0) {
/*
* We handle the error and then return the error value. If we
* are running in the context of the msg worker, the error
* tells the worker thread to exit, as the channel would have
* been reset.
*/
if (rv == ECONNRESET) {
if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
DWARN(vgenp, ldcp, "ldc_status err\n");
} else {
ldcp->ldc_status = istatus;
}
(void) vgen_handle_evt_reset(ldcp, caller);
} else {
DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
(void) vgen_ldc_reset(ldcp, caller);
}
}
if (caller == VGEN_MSG_THR) {
mutex_exit(&ldcp->cblock);
}
DBG1(vgenp, ldcp, "exit\n");
return (rv);
}
/* vgen handshake functions */
/* change the hphase for the channel to the next phase */
static vgen_ldc_t *
vh_nextphase(vgen_ldc_t *ldcp)
{
if (ldcp->hphase == VH_PHASE4) {
ldcp->hphase = VH_DONE;
} else {
ldcp->hphase++;
}
return (ldcp);
}
/* send version negotiate message to the peer over ldc */
static int
vgen_send_version_negotiate(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vio_ver_msg_t vermsg;
vio_msg_tag_t *tagp = &vermsg.tag;
int rv;
bzero(&vermsg, sizeof (vermsg));
tagp->vio_msgtype = VIO_TYPE_CTRL;
tagp->vio_subtype = VIO_SUBTYPE_INFO;
tagp->vio_subtype_env = VIO_VER_INFO;
tagp->vio_sid = ldcp->local_sid;
/* get version msg payload from ldcp->local */
vermsg.ver_major = ldcp->local_hparams.ver_major;
vermsg.ver_minor = ldcp->local_hparams.ver_minor;
vermsg.dev_class = ldcp->local_hparams.dev_class;
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
if (rv != VGEN_SUCCESS) {
DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
return (rv);
}
ldcp->hstate |= VER_INFO_SENT;
DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
vermsg.ver_major, vermsg.ver_minor);
return (VGEN_SUCCESS);
}
/* send attr info message to the peer over ldc */
static int
vgen_send_attr_info(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vnet_attr_msg_t attrmsg;
vio_msg_tag_t *tagp = &attrmsg.tag;
int rv;
bzero(&attrmsg, sizeof (attrmsg));
tagp->vio_msgtype = VIO_TYPE_CTRL;
tagp->vio_subtype = VIO_SUBTYPE_INFO;
tagp->vio_subtype_env = VIO_ATTR_INFO;
tagp->vio_sid = ldcp->local_sid;
/* get attr msg payload from ldcp->local */
attrmsg.mtu = ldcp->local_hparams.mtu;
attrmsg.addr = ldcp->local_hparams.addr;
attrmsg.addr_type = ldcp->local_hparams.addr_type;
attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
attrmsg.options = ldcp->local_hparams.dring_mode;
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
if (rv != VGEN_SUCCESS) {
DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
return (rv);
}
ldcp->hstate |= ATTR_INFO_SENT;
DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
return (VGEN_SUCCESS);
}
/*
* Send descriptor ring register message to the peer over ldc.
* Invoked in RxDringData mode.
*/
static int
vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vio_dring_reg_msg_t *msg;
vio_dring_reg_ext_msg_t *emsg;
int rv;
uint8_t *buf;
uint_t msgsize;
msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
msg = kmem_zalloc(msgsize, KM_SLEEP);
/* Initialize the common part of dring reg msg */
vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
/* skip over dring cookies at the tail of common section */
buf = (uint8_t *)msg->cookie;
ASSERT(msg->ncookies == 1);
buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
/* Now setup the extended part, specific to RxDringData mode */
emsg = (vio_dring_reg_ext_msg_t *)buf;
/* copy data_ncookies in the msg */
emsg->data_ncookies = ldcp->rx_data_ncookies;
/* copy data area size in the msg */
emsg->data_area_size = ldcp->rx_data_sz;
/* copy data area cookies in the msg */
bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
if (rv != VGEN_SUCCESS) {
DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
kmem_free(msg, msgsize);
return (rv);
}
ldcp->hstate |= DRING_INFO_SENT;
DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
kmem_free(msg, msgsize);
return (VGEN_SUCCESS);
}
/*
* Send descriptor ring register message to the peer over ldc.
* Invoked in TxDring mode.
*/
static int
vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vio_dring_reg_msg_t msg;
int rv;
bzero(&msg, sizeof (msg));
/*
* Initialize only the common part of dring reg msg in TxDring mode.
*/
vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
if (rv != VGEN_SUCCESS) {
DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
return (rv);
}
ldcp->hstate |= DRING_INFO_SENT;
DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
return (VGEN_SUCCESS);
}
static int
vgen_send_rdx_info(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vio_rdx_msg_t rdxmsg;
vio_msg_tag_t *tagp = &rdxmsg.tag;
int rv;
bzero(&rdxmsg, sizeof (rdxmsg));
tagp->vio_msgtype = VIO_TYPE_CTRL;
tagp->vio_subtype = VIO_SUBTYPE_INFO;
tagp->vio_subtype_env = VIO_RDX;
tagp->vio_sid = ldcp->local_sid;
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
if (rv != VGEN_SUCCESS) {
DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
return (rv);
}
ldcp->hstate |= RDX_INFO_SENT;
DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
return (VGEN_SUCCESS);
}
/* send multicast addr info message to vsw */
static int
vgen_send_mcast_info(vgen_ldc_t *ldcp)
{
vnet_mcast_msg_t mcastmsg;
vnet_mcast_msg_t *msgp;
vio_msg_tag_t *tagp;
vgen_t *vgenp;
struct ether_addr *mca;
int rv;
int i;
uint32_t size;
uint32_t mccount;
uint32_t n;
msgp = &mcastmsg;
tagp = &msgp->tag;
vgenp = LDC_TO_VGEN(ldcp);
mccount = vgenp->mccount;
i = 0;
do {
tagp->vio_msgtype = VIO_TYPE_CTRL;
tagp->vio_subtype = VIO_SUBTYPE_INFO;
tagp->vio_subtype_env = VNET_MCAST_INFO;
tagp->vio_sid = ldcp->local_sid;
n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
size = n * sizeof (struct ether_addr);
mca = &(vgenp->mctab[i]);
bcopy(mca, (msgp->mca), size);
msgp->set = B_TRUE;
msgp->count = n;
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
B_FALSE);
if (rv != VGEN_SUCCESS) {
DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
return (rv);
}
mccount -= n;
i += n;
} while (mccount);
return (VGEN_SUCCESS);
}
/*
* vgen_dds_rx -- post DDS messages to vnet.
*/
static int
vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
if (dmsg->dds_class != DDS_VNET_NIU) {
DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
return (EBADMSG);
}
vnet_dds_rx(vgenp->vnetp, dmsg);
return (0);
}
/*
* vgen_dds_tx -- an interface called by vnet to send DDS messages.
*/
int
vgen_dds_tx(void *arg, void *msg)
{
vgen_t *vgenp = arg;
vio_dds_msg_t *dmsg = msg;
vgen_portlist_t *plistp = &vgenp->vgenports;
vgen_ldc_t *ldcp;
int rv = EIO;
READ_ENTER(&plistp->rwlock);
ldcp = vgenp->vsw_portp->ldcp;
if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
goto vgen_dsend_exit;
}
dmsg->tag.vio_sid = ldcp->local_sid;
rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
if (rv != VGEN_SUCCESS) {
rv = EIO;
} else {
rv = 0;
}
vgen_dsend_exit:
RW_EXIT(&plistp->rwlock);
return (rv);
}
/* Initiate Phase 2 of handshake */
static int
vgen_handshake_phase2(vgen_ldc_t *ldcp)
{
int rv;
#ifdef DEBUG
if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
/* simulate out of state condition */
vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
rv = vgen_send_rdx_info(ldcp);
return (rv);
}
if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
/* simulate timeout condition */
vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
return (VGEN_SUCCESS);
}
#endif
rv = vgen_send_attr_info(ldcp);
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
static int
vgen_handshake_phase3(vgen_ldc_t *ldcp)
{
int rv;
vgen_hparams_t *lp = &ldcp->local_hparams;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_stats_t *statsp = &ldcp->stats;
/* dring mode has been negotiated in attr phase; save in stats */
statsp->dring_mode = lp->dring_mode;
if (lp->dring_mode == VIO_RX_DRING_DATA) { /* RxDringData mode */
ldcp->rx_dringdata = vgen_handle_dringdata_shm;
ldcp->tx_dringdata = vgen_dringsend_shm;
if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
/*
* If priority frames are not in use, we don't need a
* separate wrapper function for 'tx', so we set it to
* 'tx_dringdata'. If priority frames are configured,
* we leave the 'tx' pointer as is (initialized in
* vgen_set_vnet_proto_ops()).
*/
ldcp->tx = ldcp->tx_dringdata;
}
} else { /* TxDring mode */
ldcp->msg_thread = thread_create(NULL,
2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
&p0, TS_RUN, maxclsyspri);
}
rv = vgen_create_dring(ldcp);
if (rv != VGEN_SUCCESS) {
return (rv);
}
/* update local dring_info params */
if (lp->dring_mode == VIO_RX_DRING_DATA) {
bcopy(&(ldcp->rx_dring_cookie),
&(ldcp->local_hparams.dring_cookie),
sizeof (ldc_mem_cookie_t));
ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
ldcp->local_hparams.num_desc = ldcp->num_rxds;
ldcp->local_hparams.desc_size =
sizeof (vnet_rx_dringdata_desc_t);
rv = vgen_send_rx_dring_reg(ldcp);
} else {
bcopy(&(ldcp->tx_dring_cookie),
&(ldcp->local_hparams.dring_cookie),
sizeof (ldc_mem_cookie_t));
ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
ldcp->local_hparams.num_desc = ldcp->num_txds;
ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
rv = vgen_send_tx_dring_reg(ldcp);
}
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_SUCCESS);
}
/*
* Set vnet-protocol-version dependent functions based on version.
*/
static void
vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
{
vgen_hparams_t *lp = &ldcp->local_hparams;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
/*
* Setup the appropriate dring data processing routine and any
* associated thread based on the version.
*
* In versions < 1.6, we only support TxDring mode. In this mode, the
* msg worker thread processes all types of VIO msgs (ctrl and data).
*
* In versions >= 1.6, we also support RxDringData mode. In this mode,
* all msgs including dring data messages are handled directly by the
* callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
* subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
* disabled while the polling thread is active, in which case the
* polling thread processes the rcv descriptor ring.
*
* However, for versions >= 1.6, we can force to only use TxDring mode.
* This could happen if RxDringData mode has been disabled (see
* below) on this guest or on the peer guest. This info is determined
* as part of attr exchange phase of handshake. Hence, we setup these
* pointers for v1.6 after attr msg phase completes during handshake.
*/
if (VGEN_VER_GTEQ(ldcp, 1, 6)) { /* Ver >= 1.6 */
/*
* Set data dring mode for vgen_send_attr_info().
*/
if (vgen_mapin_avail(ldcp) == B_TRUE) {
lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
} else {
lp->dring_mode = VIO_TX_DRING;
}
} else { /* Ver <= 1.5 */
lp->dring_mode = VIO_TX_DRING;
}
if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
vgen_port_t *portp = ldcp->portp;
vnet_t *vnetp = vgenp->vnetp;
/*
* If the version negotiated with vswitch is >= 1.5 (link
* status update support), set the required bits in our
* attributes if this vnet device has been configured to get
* physical link state updates.
*/
if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
lp->physlink_update = PHYSLINK_UPDATE_STATE;
} else {
lp->physlink_update = PHYSLINK_UPDATE_NONE;
}
}
if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
/*
* If the version negotiated with peer is >= 1.4(Jumbo Frame
* Support), set the mtu in our attributes to max_frame_size.
*/
lp->mtu = vgenp->max_frame_size;
} else if (VGEN_VER_EQ(ldcp, 1, 3)) {
/*
* If the version negotiated with peer is == 1.3 (Vlan Tag
* Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
*/
lp->mtu = ETHERMAX + VLAN_TAGSZ;
} else {
vgen_port_t *portp = ldcp->portp;
vnet_t *vnetp = vgenp->vnetp;
/*
* Pre-1.3 peers expect max frame size of ETHERMAX.
* We can negotiate that size with those peers provided the
* following conditions are true:
* - Only pvid is defined for our peer and there are no vids.
* - pvids are equal.
* If the above conditions are true, then we can send/recv only
* untagged frames of max size ETHERMAX.
*/
if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
lp->mtu = ETHERMAX;
}
}
if (VGEN_VER_GTEQ(ldcp, 1, 2)) { /* Versions >= 1.2 */
/*
* Starting v1.2 we support priority frames; so set the
* dring processing routines and xfer modes based on the
* version. Note that the dring routines could be changed after
* attribute handshake phase for versions >= 1.6 (See
* vgen_handshake_phase3())
*/
ldcp->tx_dringdata = vgen_dringsend;
ldcp->rx_dringdata = vgen_handle_dringdata;
if (VGEN_PRI_ETH_DEFINED(vgenp)) {
/*
* Enable priority routines and pkt mode only if
* at least one pri-eth-type is specified in MD.
*/
ldcp->tx = vgen_ldcsend;
ldcp->rx_pktdata = vgen_handle_pkt_data;
/* set xfer mode for vgen_send_attr_info() */
lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
} else {
/* No priority eth types defined in MD */
ldcp->tx = ldcp->tx_dringdata;
ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
/* Set xfer mode for vgen_send_attr_info() */
lp->xfer_mode = VIO_DRING_MODE_V1_2;
}
} else { /* Versions prior to 1.2 */
vgen_reset_vnet_proto_ops(ldcp);
}
}
/*
* Reset vnet-protocol-version dependent functions to pre-v1.2.
*/
static void
vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
{
vgen_hparams_t *lp = &ldcp->local_hparams;
ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
ldcp->rx_dringdata = vgen_handle_dringdata;
ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
/* set xfer mode for vgen_send_attr_info() */
lp->xfer_mode = VIO_DRING_MODE_V1_0;
}
static void
vgen_vlan_unaware_port_reset(vgen_port_t *portp)
{
vgen_ldc_t *ldcp = portp->ldcp;
vgen_t *vgenp = portp->vgenp;
vnet_t *vnetp = vgenp->vnetp;
boolean_t need_reset = B_FALSE;
mutex_enter(&ldcp->cblock);
/*
* If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
* the connection. See comments in vgen_set_vnet_proto_ops().
*/
if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
(portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
need_reset = B_TRUE;
}
mutex_exit(&ldcp->cblock);
if (need_reset == B_TRUE) {
(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
}
}
static void
vgen_port_reset(vgen_port_t *portp)
{
(void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
}
static void
vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
{
vgen_port_t *portp;
vgen_portlist_t *plistp;
plistp = &(vgenp->vgenports);
READ_ENTER(&plistp->rwlock);
for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
vgen_vlan_unaware_port_reset(portp);
}
RW_EXIT(&plistp->rwlock);
}
static void
vgen_reset_vsw_port(vgen_t *vgenp)
{
vgen_port_t *portp;
if ((portp = vgenp->vsw_portp) != NULL) {
vgen_port_reset(portp);
}
}
static void
vgen_setup_handshake_params(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
/*
* clear local handshake params and initialize.
*/
bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
/* set version to the highest version supported */
ldcp->local_hparams.ver_major =
ldcp->vgen_versions[0].ver_major;
ldcp->local_hparams.ver_minor =
ldcp->vgen_versions[0].ver_minor;
ldcp->local_hparams.dev_class = VDEV_NETWORK;
/* set attr_info params */
ldcp->local_hparams.mtu = vgenp->max_frame_size;
ldcp->local_hparams.addr =
vnet_macaddr_strtoul(vgenp->macaddr);
ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
ldcp->local_hparams.ack_freq = 0; /* don't need acks */
ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
/* reset protocol version specific function pointers */
vgen_reset_vnet_proto_ops(ldcp);
ldcp->local_hparams.dring_ident = 0;
ldcp->local_hparams.dring_ready = B_FALSE;
/* clear peer_hparams */
bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
ldcp->peer_hparams.dring_ready = B_FALSE;
}
/*
* Process Channel Reset. We tear down the resources (timers, threads,
* descriptor rings etc) associated with the channel and reinitialize the
* channel based on the flags.
*
* Arguments:
* ldcp: The channel being processed.
*
* flags:
* VGEN_FLAG_EVT_RESET:
* A ECONNRESET error occured while doing ldc operations such as
* ldc_read() or ldc_write(); the channel is already reset and it
* needs to be handled.
* VGEN_FLAG_NEED_LDCRESET:
* Some other errors occured and the error handling code needs to
* explicitly reset the channel and restart handshake with the
* peer. The error could be either in ldc operations or other
* parts of the code such as timeouts or mdeg events etc.
* VGEN_FLAG_UNINIT:
* The channel is being torn down; no need to bring up the channel
* after resetting.
*/
static int
vgen_process_reset(vgen_ldc_t *ldcp, int flags)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_port_t *portp = ldcp->portp;
vgen_hparams_t *lp = &ldcp->local_hparams;
boolean_t is_vsw_port = B_FALSE;
boolean_t link_update = B_FALSE;
ldc_status_t istatus;
int rv;
uint_t retries = 0;
timeout_id_t htid = 0;
timeout_id_t wd_tid = 0;
if (portp == vgenp->vsw_portp) { /* vswitch port ? */
is_vsw_port = B_TRUE;
}
/*
* Report that the channel is being reset; it ensures that any HybridIO
* configuration is torn down before we reset the channel if it is not
* already reset (flags == VGEN_FLAG_NEED_LDCRESET).
*/
if (is_vsw_port == B_TRUE) {
vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
rep_err(portp->vhp, VIO_NET_RES_DOWN);
}
again:
mutex_enter(&ldcp->cblock);
/* Clear hstate and hphase */
ldcp->hstate = 0;
ldcp->hphase = VH_PHASE0;
if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
(void) ldc_down(ldcp->ldc_handle);
(void) ldc_status(ldcp->ldc_handle, &istatus);
DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
ldcp->ldc_status = istatus;
if (flags == VGEN_FLAG_UNINIT) {
/* disable further callbacks */
rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
if (rv != 0) {
DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
}
}
} else {
/* flags == VGEN_FLAG_EVT_RESET */
DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
}
/*
* As the connection is now reset, mark the channel
* link_state as 'down' and notify the stack if needed.
*/
if (ldcp->link_state != LINK_STATE_DOWN) {
ldcp->link_state = LINK_STATE_DOWN;
if (is_vsw_port == B_TRUE) { /* vswitch port ? */
/*
* As the channel link is down, mark physical link also
* as down. After the channel comes back up and
* handshake completes, we will get an update on the
* physlink state from vswitch (if this device has been
* configured to get phys link updates).
*/
vgenp->phys_link_state = LINK_STATE_DOWN;
link_update = B_TRUE;
}
}
if (ldcp->htid != 0) {
htid = ldcp->htid;
ldcp->htid = 0;
}
if (ldcp->wd_tid != 0) {
wd_tid = ldcp->wd_tid;
ldcp->wd_tid = 0;
}
mutex_exit(&ldcp->cblock);
/* Update link state to the stack */
if (link_update == B_TRUE) {
vgen_link_update(vgenp, ldcp->link_state);
}
/*
* As the channel is being reset, redirect traffic to the peer through
* vswitch, until the channel becomes ready to be used again.
*/
if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
(void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
}
/* Cancel handshake watchdog timeout */
if (htid) {
(void) untimeout(htid);
}
/* Cancel transmit watchdog timeout */
if (wd_tid) {
(void) untimeout(wd_tid);
}
/* Stop the msg worker thread */
if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
vgen_stop_msg_thread(ldcp);
}
/* Grab all locks while we tear down tx/rx resources */
LDC_LOCK(ldcp);
/* Destroy the local dring which is exported to the peer */
vgen_destroy_dring(ldcp);
/* Unmap the remote dring which is imported from the peer */
vgen_unmap_dring(ldcp);
/*
* Bring up the channel and restart handshake
* only if the channel is not being torn down.
*/
if (flags != VGEN_FLAG_UNINIT) {
/* Setup handshake parameters to restart a new handshake */
vgen_setup_handshake_params(ldcp);
/* Bring the channel up */
vgen_ldc_up(ldcp);
if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
DWARN(vgenp, ldcp, "ldc_status err\n");
} else {
ldcp->ldc_status = istatus;
}
/* If the channel is UP, start handshake */
if (ldcp->ldc_status == LDC_UP) {
if (is_vsw_port == B_FALSE) {
/*
* Channel is up; use this port from now on.
*/
(void) atomic_swap_32(&portp->use_vsw_port,
B_FALSE);
}
/* Initialize local session id */
ldcp->local_sid = ddi_get_lbolt();
/* clear peer session id */
ldcp->peer_sid = 0;
/*
* Initiate Handshake process with peer ldc endpoint by
* sending version info vio message. If that fails we
* go back to the top of this function to process the
* error again. Note that we can be in this loop for
* 'vgen_ldc_max_resets' times, after which the channel
* is not brought up.
*/
mutex_exit(&ldcp->tclock);
mutex_exit(&ldcp->txlock);
mutex_exit(&ldcp->wrlock);
mutex_exit(&ldcp->rxlock);
rv = vgen_handshake(vh_nextphase(ldcp));
mutex_exit(&ldcp->cblock);
if (rv != 0) {
if (rv == ECONNRESET) {
flags = VGEN_FLAG_EVT_RESET;
} else {
flags = VGEN_FLAG_NEED_LDCRESET;
}
/*
* We still hold 'reset_in_progress'; so we can
* just loop back to the top to restart error
* processing.
*/
goto again;
}
} else {
LDC_UNLOCK(ldcp);
}
} else { /* flags == VGEN_FLAG_UNINIT */
/* Close the channel - retry on EAGAIN */
while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
if (++retries > vgen_ldccl_retries) {
break;
}
drv_usecwait(VGEN_LDC_CLOSE_DELAY);
}
if (rv != 0) {
cmn_err(CE_NOTE,
"!vnet%d: Error(%d) closing the channel(0x%lx)\n",
vgenp->instance, rv, ldcp->ldc_id);
}
ldcp->ldc_reset_count = 0;
ldcp->ldc_status = LDC_INIT;
ldcp->flags &= ~(CHANNEL_STARTED);
LDC_UNLOCK(ldcp);
}
/* Done processing channel reset; clear the atomic flag */
ldcp->reset_in_progress = 0;
return (0);
}
/*
* Initiate handshake with the peer by sending various messages
* based on the handshake-phase that the channel is currently in.
*/
static int
vgen_handshake(vgen_ldc_t *ldcp)
{
uint32_t hphase = ldcp->hphase;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
int rv = 0;
timeout_id_t htid;
switch (hphase) {
case VH_PHASE1:
/*
* start timer, for entire handshake process, turn this timer
* off if all phases of handshake complete successfully and
* hphase goes to VH_DONE(below) or channel is reset due to
* errors or vgen_ldc_uninit() is invoked(vgen_stop).
*/
ASSERT(ldcp->htid == 0);
ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
drv_usectohz(vgen_hwd_interval * MICROSEC));
/* Phase 1 involves negotiating the version */
rv = vgen_send_version_negotiate(ldcp);
break;
case VH_PHASE2:
rv = vgen_handshake_phase2(ldcp);
break;
case VH_PHASE3:
rv = vgen_handshake_phase3(ldcp);
break;
case VH_PHASE4:
rv = vgen_send_rdx_info(ldcp);
break;
case VH_DONE:
ldcp->ldc_reset_count = 0;
DBG1(vgenp, ldcp, "Handshake Done\n");
/*
* The channel is up and handshake is done successfully. Now we
* can mark the channel link_state as 'up'. We also notify the
* stack if the channel is connected to vswitch.
*/
ldcp->link_state = LINK_STATE_UP;
if (ldcp->portp == vgenp->vsw_portp) {
/*
* If this channel(port) is connected to vsw,
* need to sync multicast table with vsw.
*/
rv = vgen_send_mcast_info(ldcp);
if (rv != VGEN_SUCCESS)
break;
if (vgenp->pls_negotiated == B_FALSE) {
/*
* We haven't negotiated with vswitch to get
* physical link state updates. We can update
* update the stack at this point as the
* channel to vswitch is up and the handshake
* is done successfully.
*
* If we have negotiated to get physical link
* state updates, then we won't notify the
* the stack here; we do that as soon as
* vswitch sends us the initial phys link state
* (see vgen_handle_physlink_info()).
*/
mutex_exit(&ldcp->cblock);
vgen_link_update(vgenp, ldcp->link_state);
mutex_enter(&ldcp->cblock);
}
}
if (ldcp->htid != 0) {
htid = ldcp->htid;
ldcp->htid = 0;
mutex_exit(&ldcp->cblock);
(void) untimeout(htid);
mutex_enter(&ldcp->cblock);
}
/*
* Check if mac layer should be notified to restart
* transmissions. This can happen if the channel got
* reset and while tx_blocked is set.
*/
mutex_enter(&ldcp->tclock);
if (ldcp->tx_blocked) {
vio_net_tx_update_t vtx_update =
ldcp->portp->vcb.vio_net_tx_update;
ldcp->tx_blocked = B_FALSE;
vtx_update(ldcp->portp->vhp);
}
mutex_exit(&ldcp->tclock);
/* start transmit watchdog timer */
ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
drv_usectohz(vgen_txwd_interval * 1000));
break;
default:
break;
}
return (rv);
}
/*
* Check if the current handshake phase has completed successfully and
* return the status.
*/
static int
vgen_handshake_done(vgen_ldc_t *ldcp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
uint32_t hphase = ldcp->hphase;
int status = 0;
switch (hphase) {
case VH_PHASE1:
/*
* Phase1 is done, if version negotiation
* completed successfully.
*/
status = ((ldcp->hstate & VER_NEGOTIATED) ==
VER_NEGOTIATED);
break;
case VH_PHASE2:
/*
* Phase 2 is done, if attr info
* has been exchanged successfully.
*/
status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
ATTR_INFO_EXCHANGED);
break;
case VH_PHASE3:
/*
* Phase 3 is done, if dring registration
* has been exchanged successfully.
*/
status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
DRING_INFO_EXCHANGED);
break;
case VH_PHASE4:
/* Phase 4 is done, if rdx msg has been exchanged */
status = ((ldcp->hstate & RDX_EXCHANGED) ==
RDX_EXCHANGED);
break;
default:
break;
}
if (status == 0) {
return (VGEN_FAILURE);
}
DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
return (VGEN_SUCCESS);
}
/*
* Link State Update Notes:
* The link state of the channel connected to vswitch is reported as the link
* state of the vnet device, by default. If the channel is down or reset, then
* the link state is marked 'down'. If the channel is 'up' *and* handshake
* between the vnet and vswitch is successful, then the link state is marked
* 'up'. If physical network link state is desired, then the vnet device must
* be configured to get physical link updates and the 'linkprop' property
* in the virtual-device MD node indicates this. As part of attribute exchange
* the vnet device negotiates with the vswitch to obtain physical link state
* updates. If it successfully negotiates, vswitch sends an initial physlink
* msg once the handshake is done and further whenever the physical link state
* changes. Currently we don't have mac layer interfaces to report two distinct
* link states - virtual and physical. Thus, if the vnet has been configured to
* get physical link updates, then the link status will be reported as 'up'
* only when both the virtual and physical links are up.
*/
static void
vgen_link_update(vgen_t *vgenp, link_state_t link_state)
{
vnet_link_update(vgenp->vnetp, link_state);
}
/*
* Handle a version info msg from the peer or an ACK/NACK from the peer
* to a version info msg that we sent.
*/
static int
vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vgen_t *vgenp;
vio_ver_msg_t *vermsg = (vio_ver_msg_t *)tagp;
int ack = 0;
int failed = 0;
int idx;
vgen_ver_t *versions = ldcp->vgen_versions;
int rv = 0;
vgenp = LDC_TO_VGEN(ldcp);
DBG1(vgenp, ldcp, "enter\n");
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/* Cache sid of peer if this is the first time */
if (ldcp->peer_sid == 0) {
DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
tagp->vio_sid);
ldcp->peer_sid = tagp->vio_sid;
}
if (ldcp->hphase != VH_PHASE1) {
/*
* If we are not already in VH_PHASE1, reset to
* pre-handshake state, and initiate handshake
* to the peer too.
*/
return (EINVAL);
}
ldcp->hstate |= VER_INFO_RCVD;
/* save peer's requested values */
ldcp->peer_hparams.ver_major = vermsg->ver_major;
ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
ldcp->peer_hparams.dev_class = vermsg->dev_class;
if ((vermsg->dev_class != VDEV_NETWORK) &&
(vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
/* unsupported dev_class, send NACK */
DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
tagp->vio_subtype = VIO_SUBTYPE_NACK;
tagp->vio_sid = ldcp->local_sid;
/* send reply msg back to peer */
rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
sizeof (*vermsg), B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
return (VGEN_FAILURE);
}
DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
vermsg->ver_major, vermsg->ver_minor);
idx = 0;
for (;;) {
if (vermsg->ver_major > versions[idx].ver_major) {
/* nack with next lower version */
tagp->vio_subtype = VIO_SUBTYPE_NACK;
vermsg->ver_major = versions[idx].ver_major;
vermsg->ver_minor = versions[idx].ver_minor;
break;
}
if (vermsg->ver_major == versions[idx].ver_major) {
/* major version match - ACK version */
tagp->vio_subtype = VIO_SUBTYPE_ACK;
ack = 1;
/*
* lower minor version to the one this endpt
* supports, if necessary
*/
if (vermsg->ver_minor >
versions[idx].ver_minor) {
vermsg->ver_minor =
versions[idx].ver_minor;
ldcp->peer_hparams.ver_minor =
versions[idx].ver_minor;
}
break;
}
idx++;
if (idx == VGEN_NUM_VER) {
/* no version match - send NACK */
tagp->vio_subtype = VIO_SUBTYPE_NACK;
vermsg->ver_major = 0;
vermsg->ver_minor = 0;
failed = 1;
break;
}
}
tagp->vio_sid = ldcp->local_sid;
/* send reply msg back to peer */
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
if (ack) {
ldcp->hstate |= VER_ACK_SENT;
DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
vermsg->ver_major, vermsg->ver_minor);
}
if (failed) {
DWARN(vgenp, ldcp, "Negotiation Failed\n");
return (VGEN_FAILURE);
}
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
/* VER_ACK_SENT and VER_ACK_RCVD */
/* local and peer versions match? */
ASSERT((ldcp->local_hparams.ver_major ==
ldcp->peer_hparams.ver_major) &&
(ldcp->local_hparams.ver_minor ==
ldcp->peer_hparams.ver_minor));
vgen_set_vnet_proto_ops(ldcp);
/* move to the next phase */
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_ACK:
if (ldcp->hphase != VH_PHASE1) {
/* This should not happen. */
DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
return (VGEN_FAILURE);
}
/* SUCCESS - we have agreed on a version */
ldcp->local_hparams.ver_major = vermsg->ver_major;
ldcp->local_hparams.ver_minor = vermsg->ver_minor;
ldcp->hstate |= VER_ACK_RCVD;
DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
vermsg->ver_major, vermsg->ver_minor);
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
/* VER_ACK_SENT and VER_ACK_RCVD */
/* local and peer versions match? */
ASSERT((ldcp->local_hparams.ver_major ==
ldcp->peer_hparams.ver_major) &&
(ldcp->local_hparams.ver_minor ==
ldcp->peer_hparams.ver_minor));
vgen_set_vnet_proto_ops(ldcp);
/* move to the next phase */
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_NACK:
if (ldcp->hphase != VH_PHASE1) {
/* This should not happen. */
DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
"Phase(%u)\n", ldcp->hphase);
return (VGEN_FAILURE);
}
DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
vermsg->ver_major, vermsg->ver_minor);
/* check if version in NACK is zero */
if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
/*
* Version Negotiation has failed.
*/
DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
return (VGEN_FAILURE);
}
idx = 0;
for (;;) {
if (vermsg->ver_major > versions[idx].ver_major) {
/* select next lower version */
ldcp->local_hparams.ver_major =
versions[idx].ver_major;
ldcp->local_hparams.ver_minor =
versions[idx].ver_minor;
break;
}
if (vermsg->ver_major == versions[idx].ver_major) {
/* major version match */
ldcp->local_hparams.ver_major =
versions[idx].ver_major;
ldcp->local_hparams.ver_minor =
versions[idx].ver_minor;
break;
}
idx++;
if (idx == VGEN_NUM_VER) {
/*
* no version match.
* Version Negotiation has failed.
*/
DWARN(vgenp, ldcp,
"Version Negotiation Failed\n");
return (VGEN_FAILURE);
}
}
rv = vgen_send_version_negotiate(ldcp);
if (rv != VGEN_SUCCESS) {
return (rv);
}
break;
}
DBG1(vgenp, ldcp, "exit\n");
return (VGEN_SUCCESS);
}
static int
vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_hparams_t *lp = &ldcp->local_hparams;
vgen_hparams_t *rp = &ldcp->peer_hparams;
uint32_t mtu;
uint8_t dring_mode;
ldcp->hstate |= ATTR_INFO_RCVD;
/* save peer's values */
rp->mtu = msg->mtu;
rp->addr = msg->addr;
rp->addr_type = msg->addr_type;
rp->xfer_mode = msg->xfer_mode;
rp->ack_freq = msg->ack_freq;
rp->dring_mode = msg->options;
/*
* Process address type, ack frequency and transfer mode attributes.
*/
if ((msg->addr_type != ADDR_TYPE_MAC) ||
(msg->ack_freq > 64) ||
(msg->xfer_mode != lp->xfer_mode)) {
return (VGEN_FAILURE);
}
/*
* Process dring mode attribute.
*/
if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
/*
* Versions >= 1.6:
* Though we are operating in v1.6 mode, it is possible that
* RxDringData mode has been disabled either on this guest or
* on the peer guest. If so, we revert to pre v1.6 behavior of
* TxDring mode. But this must be agreed upon in both
* directions of attr exchange. We first determine the mode
* that can be negotiated.
*/
if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
vgen_mapin_avail(ldcp) == B_TRUE) {
/*
* We are capable of handling RxDringData AND the peer
* is also capable of it; we enable RxDringData mode on
* this channel.
*/
dring_mode = VIO_RX_DRING_DATA;
} else if ((msg->options & VIO_TX_DRING) != 0) {
/*
* If the peer is capable of TxDring mode, we
* negotiate TxDring mode on this channel.
*/
dring_mode = VIO_TX_DRING;
} else {
/*
* We support only VIO_TX_DRING and VIO_RX_DRING_DATA
* modes. We don't support VIO_RX_DRING mode.
*/
return (VGEN_FAILURE);
}
/*
* If we have received an ack for the attr info that we sent,
* then check if the dring mode matches what the peer had ack'd
* (saved in local hparams). If they don't match, we fail the
* handshake.
*/
if (ldcp->hstate & ATTR_ACK_RCVD) {
if (msg->options != lp->dring_mode) {
/* send NACK */
return (VGEN_FAILURE);
}
} else {
/*
* Save the negotiated dring mode in our attr
* parameters, so it gets sent in the attr info from us
* to the peer.
*/
lp->dring_mode = dring_mode;
}
/* save the negotiated dring mode in the msg to be replied */
msg->options = dring_mode;
}
/*
* Process MTU attribute.
*/
if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
/*
* Versions >= 1.4:
* Validate mtu of the peer is at least ETHERMAX. Then, the mtu
* is negotiated down to the minimum of our mtu and peer's mtu.
*/
if (msg->mtu < ETHERMAX) {
return (VGEN_FAILURE);
}
mtu = MIN(msg->mtu, vgenp->max_frame_size);
/*
* If we have received an ack for the attr info
* that we sent, then check if the mtu computed
* above matches the mtu that the peer had ack'd
* (saved in local hparams). If they don't
* match, we fail the handshake.
*/
if (ldcp->hstate & ATTR_ACK_RCVD) {
if (mtu != lp->mtu) {
/* send NACK */
return (VGEN_FAILURE);
}
} else {
/*
* Save the mtu computed above in our
* attr parameters, so it gets sent in
* the attr info from us to the peer.
*/
lp->mtu = mtu;
}
/* save the MIN mtu in the msg to be replied */
msg->mtu = mtu;
} else {
/* versions < 1.4, mtu must match */
if (msg->mtu != lp->mtu) {
return (VGEN_FAILURE);
}
}
return (VGEN_SUCCESS);
}
static int
vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_hparams_t *lp = &ldcp->local_hparams;
/*
* Process dring mode attribute.
*/
if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
/*
* Versions >= 1.6:
* The ack msg sent by the peer contains the negotiated dring
* mode between our capability (that we had sent in our attr
* info) and the peer's capability.
*/
if (ldcp->hstate & ATTR_ACK_SENT) {
/*
* If we have sent an ack for the attr info msg from
* the peer, check if the dring mode that was
* negotiated then (saved in local hparams) matches the
* mode that the peer has ack'd. If they don't match,
* we fail the handshake.
*/
if (lp->dring_mode != msg->options) {
return (VGEN_FAILURE);
}
} else {
if ((msg->options & lp->dring_mode) == 0) {
/*
* Peer ack'd with a mode that we don't
* support; we fail the handshake.
*/
return (VGEN_FAILURE);
}
if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
== (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
/*
* Peer must ack with only one negotiated mode.
* Otherwise fail handshake.
*/
return (VGEN_FAILURE);
}
/*
* Save the negotiated mode, so we can validate it when
* we receive attr info from the peer.
*/
lp->dring_mode = msg->options;
}
}
/*
* Process Physical Link Update attribute.
*/
if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
ldcp->portp == vgenp->vsw_portp) {
/*
* Versions >= 1.5:
* If the vnet device has been configured to get
* physical link state updates, check the corresponding
* bits in the ack msg, if the peer is vswitch.
*/
if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
PHYSLINK_UPDATE_STATE) &&
((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
PHYSLINK_UPDATE_STATE_ACK)) {
vgenp->pls_negotiated = B_TRUE;
} else {
vgenp->pls_negotiated = B_FALSE;
}
}
/*
* Process MTU attribute.
*/
if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
/*
* Versions >= 1.4:
* The ack msg sent by the peer contains the minimum of
* our mtu (that we had sent in our attr info) and the
* peer's mtu.
*
* If we have sent an ack for the attr info msg from
* the peer, check if the mtu that was computed then
* (saved in local hparams) matches the mtu that the
* peer has ack'd. If they don't match, we fail the
* handshake.
*/
if (ldcp->hstate & ATTR_ACK_SENT) {
if (lp->mtu != msg->mtu) {
return (VGEN_FAILURE);
}
} else {
/*
* If the mtu ack'd by the peer is > our mtu
* fail handshake. Otherwise, save the mtu, so
* we can validate it when we receive attr info
* from our peer.
*/
if (msg->mtu > lp->mtu) {
return (VGEN_FAILURE);
}
if (msg->mtu <= lp->mtu) {
lp->mtu = msg->mtu;
}
}
}
return (VGEN_SUCCESS);
}
/*
* Handle an attribute info msg from the peer or an ACK/NACK from the peer
* to an attr info msg that we sent.
*/
static int
vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vnet_attr_msg_t *msg = (vnet_attr_msg_t *)tagp;
int rv = 0;
DBG1(vgenp, ldcp, "enter\n");
if (ldcp->hphase != VH_PHASE2) {
DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
" Invalid Phase(%u)\n",
tagp->vio_subtype, ldcp->hphase);
return (VGEN_FAILURE);
}
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
rv = vgen_handle_attr_info(ldcp, msg);
if (rv == VGEN_SUCCESS) {
tagp->vio_subtype = VIO_SUBTYPE_ACK;
} else {
tagp->vio_subtype = VIO_SUBTYPE_NACK;
}
tagp->vio_sid = ldcp->local_sid;
/* send reply msg back to peer */
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
break;
}
ldcp->hstate |= ATTR_ACK_SENT;
DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_ACK:
rv = vgen_handle_attr_ack(ldcp, msg);
if (rv == VGEN_FAILURE) {
break;
}
ldcp->hstate |= ATTR_ACK_RCVD;
DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_NACK:
DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
return (VGEN_FAILURE);
}
DBG1(vgenp, ldcp, "exit\n");
return (VGEN_SUCCESS);
}
static int
vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
int rv = 0;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_hparams_t *lp = &ldcp->local_hparams;
DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
ldcp->hstate |= DRING_INFO_RCVD;
if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
(lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
/*
* The earlier version of Solaris vnet driver doesn't set the
* option (VIO_TX_DRING in its case) correctly in its dring reg
* message. We workaround that here by doing the check only
* for versions >= v1.6.
*/
DWARN(vgenp, ldcp,
"Rcvd dring reg option (%d), negotiated mode (%d)\n",
((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
return (VGEN_FAILURE);
}
/*
* Map dring exported by the peer.
*/
rv = vgen_map_dring(ldcp, (void *)tagp);
if (rv != VGEN_SUCCESS) {
return (rv);
}
/*
* Map data buffers exported by the peer if we are in RxDringData mode.
*/
if (lp->dring_mode == VIO_RX_DRING_DATA) {
rv = vgen_map_data(ldcp, (void *)tagp);
if (rv != VGEN_SUCCESS) {
vgen_unmap_dring(ldcp);
return (rv);
}
}
if (ldcp->peer_hparams.dring_ready == B_FALSE) {
ldcp->peer_hparams.dring_ready = B_TRUE;
}
return (VGEN_SUCCESS);
}
static int
vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_hparams_t *lp = &ldcp->local_hparams;
DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
ldcp->hstate |= DRING_ACK_RCVD;
if (lp->dring_ready) {
return (VGEN_SUCCESS);
}
/* save dring_ident acked by peer */
lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
/* local dring is now ready */
lp->dring_ready = B_TRUE;
return (VGEN_SUCCESS);
}
/*
* Handle a descriptor ring register msg from the peer or an ACK/NACK from
* the peer to a dring register msg that we sent.
*/
static int
vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
int rv = 0;
int msgsize;
vgen_hparams_t *lp = &ldcp->local_hparams;
DBG1(vgenp, ldcp, "enter\n");
if (ldcp->hphase < VH_PHASE2) {
/* dring_info can be rcvd in any of the phases after Phase1 */
DWARN(vgenp, ldcp,
"Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
tagp->vio_subtype, ldcp->hphase);
return (VGEN_FAILURE);
}
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
rv = vgen_handle_dring_reg_info(ldcp, tagp);
if (rv == VGEN_SUCCESS) {
tagp->vio_subtype = VIO_SUBTYPE_ACK;
} else {
tagp->vio_subtype = VIO_SUBTYPE_NACK;
}
tagp->vio_sid = ldcp->local_sid;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
msgsize =
VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
} else {
msgsize = sizeof (vio_dring_reg_msg_t);
}
/* send reply msg back to peer */
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
DWARN(vgenp, ldcp, "DRING_NACK_SENT");
return (VGEN_FAILURE);
}
ldcp->hstate |= DRING_ACK_SENT;
DBG2(vgenp, ldcp, "DRING_ACK_SENT");
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_ACK:
rv = vgen_handle_dring_reg_ack(ldcp, tagp);
if (rv == VGEN_FAILURE) {
return (rv);
}
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_NACK:
DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
return (VGEN_FAILURE);
}
DBG1(vgenp, ldcp, "exit\n");
return (VGEN_SUCCESS);
}
/*
* Handle a rdx info msg from the peer or an ACK/NACK
* from the peer to a rdx info msg that we sent.
*/
static int
vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
int rv = 0;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
DBG1(vgenp, ldcp, "enter\n");
if (ldcp->hphase != VH_PHASE4) {
DWARN(vgenp, ldcp,
"Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
tagp->vio_subtype, ldcp->hphase);
return (VGEN_FAILURE);
}
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
ldcp->hstate |= RDX_INFO_RCVD;
tagp->vio_subtype = VIO_SUBTYPE_ACK;
tagp->vio_sid = ldcp->local_sid;
/* send reply msg back to peer */
rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
ldcp->hstate |= RDX_ACK_SENT;
DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_ACK:
ldcp->hstate |= RDX_ACK_RCVD;
DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
rv = vgen_handshake(vh_nextphase(ldcp));
if (rv != 0) {
return (rv);
}
}
break;
case VIO_SUBTYPE_NACK:
DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
return (VGEN_FAILURE);
}
DBG1(vgenp, ldcp, "exit\n");
return (VGEN_SUCCESS);
}
/* Handle ACK/NACK from vsw to a set multicast msg that we sent */
static int
vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
struct ether_addr *addrp;
int count;
int i;
DBG1(vgenp, ldcp, "enter\n");
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
/* vnet shouldn't recv set mcast msg, only vsw handles it */
DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
break;
case VIO_SUBTYPE_ACK:
/* success adding/removing multicast addr */
DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
break;
case VIO_SUBTYPE_NACK:
DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
if (!(msgp->set)) {
/* multicast remove request failed */
break;
}
/* multicast add request failed */
for (count = 0; count < msgp->count; count++) {
addrp = &(msgp->mca[count]);
/* delete address from the table */
for (i = 0; i < vgenp->mccount; i++) {
if (ether_cmp(addrp,
&(vgenp->mctab[i])) == 0) {
if (vgenp->mccount > 1) {
int t = vgenp->mccount - 1;
vgenp->mctab[i] =
vgenp->mctab[t];
}
vgenp->mccount--;
break;
}
}
}
break;
}
DBG1(vgenp, ldcp, "exit\n");
return (VGEN_SUCCESS);
}
/*
* Physical link information message from the peer. Only vswitch should send
* us this message; if the vnet device has been configured to get physical link
* state updates. Note that we must have already negotiated this with the
* vswitch during attribute exchange phase of handshake.
*/
static int
vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vnet_physlink_msg_t *msgp = (vnet_physlink_msg_t *)tagp;
link_state_t link_state;
int rv;
if (ldcp->portp != vgenp->vsw_portp) {
/*
* drop the message and don't process; as we should
* receive physlink_info message from only vswitch.
*/
return (VGEN_SUCCESS);
}
if (vgenp->pls_negotiated == B_FALSE) {
/*
* drop the message and don't process; as we should receive
* physlink_info message only if physlink update is enabled for
* the device and negotiated with vswitch.
*/
return (VGEN_SUCCESS);
}
switch (tagp->vio_subtype) {
case VIO_SUBTYPE_INFO:
if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
VNET_PHYSLINK_STATE_UP) {
link_state = LINK_STATE_UP;
} else {
link_state = LINK_STATE_DOWN;
}
if (vgenp->phys_link_state != link_state) {
vgenp->phys_link_state = link_state;
mutex_exit(&ldcp->cblock);
/* Now update the stack */
vgen_link_update(vgenp, link_state);
mutex_enter(&ldcp->cblock);
}
tagp->vio_subtype = VIO_SUBTYPE_ACK;
tagp->vio_sid = ldcp->local_sid;
/* send reply msg back to peer */
rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
sizeof (vnet_physlink_msg_t), B_FALSE);
if (rv != VGEN_SUCCESS) {
return (rv);
}
break;
case VIO_SUBTYPE_ACK:
/* vnet shouldn't recv physlink acks */
DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
break;
case VIO_SUBTYPE_NACK:
/* vnet shouldn't recv physlink nacks */
DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
break;
}
DBG1(vgenp, ldcp, "exit\n");
return (VGEN_SUCCESS);
}
/* handler for control messages received from the peer ldc end-point */
static int
vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
int rv = 0;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
DBG1(vgenp, ldcp, "enter\n");
switch (tagp->vio_subtype_env) {
case VIO_VER_INFO:
rv = vgen_handle_version_negotiate(ldcp, tagp);
break;
case VIO_ATTR_INFO:
rv = vgen_handle_attr_msg(ldcp, tagp);
break;
case VIO_DRING_REG:
rv = vgen_handle_dring_reg(ldcp, tagp);
break;
case VIO_RDX:
rv = vgen_handle_rdx_info(ldcp, tagp);
break;
case VNET_MCAST_INFO:
rv = vgen_handle_mcast_info(ldcp, tagp);
break;
case VIO_DDS_INFO:
/*
* If we are in the process of resetting the vswitch channel,
* drop the dds message. A new handshake will be initiated
* when the channel comes back up after the reset and dds
* negotiation can then continue.
*/
if (ldcp->reset_in_progress == 1) {
break;
}
rv = vgen_dds_rx(ldcp, tagp);
break;
case VNET_PHYSLINK_INFO:
rv = vgen_handle_physlink_info(ldcp, tagp);
break;
}
DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
return (rv);
}
/* handler for error messages received from the peer ldc end-point */
static void
vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
_NOTE(ARGUNUSED(ldcp, tagp))
}
/*
* This function handles raw pkt data messages received over the channel.
* Currently, only priority-eth-type frames are received through this mechanism.
* In this case, the frame(data) is present within the message itself which
* is copied into an mblk before sending it up the stack.
*/
void
vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
{
vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
vio_raw_data_msg_t *pkt = (vio_raw_data_msg_t *)arg2;
uint32_t size;
mblk_t *mp;
vio_mblk_t *vmp;
vio_net_rx_cb_t vrx_cb = NULL;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_stats_t *statsp = &ldcp->stats;
vgen_hparams_t *lp = &ldcp->local_hparams;
uint_t dring_mode = lp->dring_mode;
ASSERT(MUTEX_HELD(&ldcp->cblock));
mutex_exit(&ldcp->cblock);
size = msglen - VIO_PKT_DATA_HDRSIZE;
if (size < ETHERMIN || size > lp->mtu) {
(void) atomic_inc_32(&statsp->rx_pri_fail);
mutex_enter(&ldcp->cblock);
return;
}
vmp = vio_multipool_allocb(&ldcp->vmp, size);
if (vmp == NULL) {
mp = allocb(size, BPRI_MED);
if (mp == NULL) {
(void) atomic_inc_32(&statsp->rx_pri_fail);
DWARN(vgenp, ldcp, "allocb failure, "
"unable to process priority frame\n");
mutex_enter(&ldcp->cblock);
return;
}
} else {
mp = vmp->mp;
}
/* copy the frame from the payload of raw data msg into the mblk */
bcopy(pkt->data, mp->b_rptr, size);
mp->b_wptr = mp->b_rptr + size;
if (vmp != NULL) {
vmp->state = VIO_MBLK_HAS_DATA;
}
/* update stats */
(void) atomic_inc_64(&statsp->rx_pri_packets);
(void) atomic_add_64(&statsp->rx_pri_bytes, size);
/*
* If polling is currently enabled, add the packet to the priority
* packets list and return. It will be picked up by the polling thread.
*/
if (dring_mode == VIO_RX_DRING_DATA) {
mutex_enter(&ldcp->rxlock);
} else {
mutex_enter(&ldcp->pollq_lock);
}
if (ldcp->polling_on == B_TRUE) {
if (ldcp->rx_pri_tail != NULL) {
ldcp->rx_pri_tail->b_next = mp;
} else {
ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
}
} else {
vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
}
if (dring_mode == VIO_RX_DRING_DATA) {
mutex_exit(&ldcp->rxlock);
} else {
mutex_exit(&ldcp->pollq_lock);
}
if (vrx_cb != NULL) {
vrx_cb(ldcp->portp->vhp, mp);
}
mutex_enter(&ldcp->cblock);
}
/*
* dummy pkt data handler function for vnet protocol version 1.0
*/
static void
vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
{
_NOTE(ARGUNUSED(arg1, arg2, msglen))
}
/* handler for data messages received from the peer ldc end-point */
static int
vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
{
int rv = 0;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
vgen_hparams_t *lp = &ldcp->local_hparams;
DBG1(vgenp, ldcp, "enter\n");
if (ldcp->hphase != VH_DONE) {
return (0);
}
/*
* We check the data msg seqnum. This is needed only in TxDring mode.
*/
if (lp->dring_mode == VIO_TX_DRING &&
tagp->vio_subtype == VIO_SUBTYPE_INFO) {
rv = vgen_check_datamsg_seq(ldcp, tagp);
if (rv != 0) {
return (rv);
}
}
switch (tagp->vio_subtype_env) {
case VIO_DRING_DATA:
rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
break;
case VIO_PKT_DATA:
ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
break;
default:
break;
}
DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
return (rv);
}
static int
vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
{
int rv;
if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
ASSERT(MUTEX_HELD(&ldcp->cblock));
}
/* Set the flag to indicate reset is in progress */
if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
/* another thread is already in the process of resetting */
return (EBUSY);
}
if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
mutex_exit(&ldcp->cblock);
}
rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
mutex_enter(&ldcp->cblock);
}
return (rv);
}
static void
vgen_ldc_up(vgen_ldc_t *ldcp)
{
int rv;
uint32_t retries = 0;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
ASSERT(MUTEX_HELD(&ldcp->cblock));
/*
* If the channel has been reset max # of times, without successfully
* completing handshake, stop and do not bring the channel up.
*/
if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
" handshake attempts (%d) on channel %ld",
vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
return;
}
ldcp->ldc_reset_count++;
do {
rv = ldc_up(ldcp->ldc_handle);
if ((rv != 0) && (rv == EWOULDBLOCK)) {
drv_usecwait(VGEN_LDC_UP_DELAY);
}
if (retries++ >= vgen_ldcup_retries)
break;
} while (rv == EWOULDBLOCK);
if (rv != 0) {
DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
}
}
int
vgen_enable_intr(void *arg)
{
uint32_t end_ix;
vio_dring_msg_t msg;
vgen_port_t *portp = (vgen_port_t *)arg;
vgen_ldc_t *ldcp = portp->ldcp;
vgen_hparams_t *lp = &ldcp->local_hparams;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
mutex_enter(&ldcp->rxlock);
ldcp->polling_on = B_FALSE;
/*
* We send a stopped message to peer (sender) as we are turning
* off polled mode. This effectively restarts data interrupts
* by allowing the peer to send further dring data msgs to us.
*/
end_ix = ldcp->next_rxi;
DECR_RXI(end_ix, ldcp);
msg.dring_ident = ldcp->peer_hparams.dring_ident;
(void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
mutex_exit(&ldcp->rxlock);
} else {
mutex_enter(&ldcp->pollq_lock);
ldcp->polling_on = B_FALSE;
mutex_exit(&ldcp->pollq_lock);
}
return (0);
}
int
vgen_disable_intr(void *arg)
{
vgen_port_t *portp = (vgen_port_t *)arg;
vgen_ldc_t *ldcp = portp->ldcp;
vgen_hparams_t *lp = &ldcp->local_hparams;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
mutex_enter(&ldcp->rxlock);
ldcp->polling_on = B_TRUE;
mutex_exit(&ldcp->rxlock);
} else {
mutex_enter(&ldcp->pollq_lock);
ldcp->polling_on = B_TRUE;
mutex_exit(&ldcp->pollq_lock);
}
return (0);
}
mblk_t *
vgen_rx_poll(void *arg, int bytes_to_pickup)
{
vgen_port_t *portp = (vgen_port_t *)arg;
vgen_ldc_t *ldcp = portp->ldcp;
vgen_hparams_t *lp = &ldcp->local_hparams;
mblk_t *mp = NULL;
if (lp->dring_mode == VIO_RX_DRING_DATA) {
mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
} else {
mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
}
return (mp);
}
/* transmit watchdog timeout handler */
static void
vgen_tx_watchdog(void *arg)
{
vgen_ldc_t *ldcp;
vgen_t *vgenp;
int rv;
boolean_t tx_blocked;
clock_t tx_blocked_lbolt;
ldcp = (vgen_ldc_t *)arg;
vgenp = LDC_TO_VGEN(ldcp);
tx_blocked = ldcp->tx_blocked;
tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
if (vgen_txwd_timeout &&
(tx_blocked == B_TRUE) &&
((ddi_get_lbolt() - tx_blocked_lbolt) >
drv_usectohz(vgen_txwd_timeout * 1000))) {
/*
* Something is wrong; the peer is not picking up the packets
* in the transmit dring. We now go ahead and reset the channel
* to break out of this condition.
*/
DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
"tx_blocked_lbolt(%lx)\n",
ddi_get_lbolt(), tx_blocked_lbolt);
#ifdef DEBUG
if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
/* tx timeout triggered for debugging */
vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
}
#endif
/*
* Clear tid before invoking vgen_ldc_reset(). Otherwise,
* it will result in a deadlock when vgen_process_reset() tries
* to untimeout() on seeing a non-zero tid, but it is being
* invoked by the timer itself in this case.
*/
mutex_enter(&ldcp->cblock);
if (ldcp->wd_tid == 0) {
/* Cancelled by vgen_process_reset() */
mutex_exit(&ldcp->cblock);
return;
}
ldcp->wd_tid = 0;
mutex_exit(&ldcp->cblock);
/*
* Now reset the channel.
*/
rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
if (rv == 0) {
/*
* We have successfully reset the channel. If we are
* in tx flow controlled state, clear it now and enable
* transmit in the upper layer.
*/
if (ldcp->tx_blocked) {
vio_net_tx_update_t vtx_update =
ldcp->portp->vcb.vio_net_tx_update;
ldcp->tx_blocked = B_FALSE;
vtx_update(ldcp->portp->vhp);
}
}
/*
* Channel has been reset by us or some other thread is already
* in the process of resetting. In either case, we return
* without restarting the timer. When handshake completes and
* the channel is ready for data transmit/receive we start a
* new watchdog timer.
*/
return;
}
restart_timer:
/* Restart the timer */
mutex_enter(&ldcp->cblock);
if (ldcp->wd_tid == 0) {
/* Cancelled by vgen_process_reset() */
mutex_exit(&ldcp->cblock);
return;
}
ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
drv_usectohz(vgen_txwd_interval * 1000));
mutex_exit(&ldcp->cblock);
}
/* Handshake watchdog timeout handler */
static void
vgen_hwatchdog(void *arg)
{
vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
ldcp->hphase, ldcp->hstate);
mutex_enter(&ldcp->cblock);
if (ldcp->htid == 0) {
/* Cancelled by vgen_process_reset() */
mutex_exit(&ldcp->cblock);
return;
}
ldcp->htid = 0;
mutex_exit(&ldcp->cblock);
/*
* Something is wrong; handshake with the peer seems to be hung. We now
* go ahead and reset the channel to break out of this condition.
*/
(void) vgen_ldc_reset(ldcp, VGEN_OTHER);
}
/* Check if the session id in the received message is valid */
static int
vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
{
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
if (tagp->vio_sid != ldcp->peer_sid) {
DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
ldcp->peer_sid, tagp->vio_sid);
return (VGEN_FAILURE);
}
else
return (VGEN_SUCCESS);
}
/*
* Initialize the common part of dring registration
* message; used in both TxDring and RxDringData modes.
*/
static void
vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
uint8_t option)
{
vio_msg_tag_t *tagp;
tagp = &msg->tag;
tagp->vio_msgtype = VIO_TYPE_CTRL;
tagp->vio_subtype = VIO_SUBTYPE_INFO;
tagp->vio_subtype_env = VIO_DRING_REG;
tagp->vio_sid = ldcp->local_sid;
/* get dring info msg payload from ldcp->local */
bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
sizeof (ldc_mem_cookie_t));
msg->ncookies = ldcp->local_hparams.dring_ncookies;
msg->num_descriptors = ldcp->local_hparams.num_desc;
msg->descriptor_size = ldcp->local_hparams.desc_size;
msg->options = option;
/*
* dring_ident is set to 0. After mapping the dring, peer sets this
* value and sends it in the ack, which is saved in
* vgen_handle_dring_reg().
*/
msg->dring_ident = 0;
}
static int
vgen_mapin_avail(vgen_ldc_t *ldcp)
{
int rv;
ldc_info_t info;
uint64_t mapin_sz_req;
uint64_t dblk_sz;
vgen_t *vgenp = LDC_TO_VGEN(ldcp);
rv = ldc_info(ldcp->ldc_handle, &info);
if (rv != 0) {
return (B_FALSE);
}
dblk_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size);
mapin_sz_req = (VGEN_RXDRING_NRBUFS * dblk_sz);
if (info.direct_map_size_max >= mapin_sz_req) {
return (B_TRUE);
}
return (B_FALSE);
}
#if DEBUG
/*
* Print debug messages - set to 0xf to enable all msgs
*/
void
vgen_debug_printf(const char *fname, vgen_t *vgenp,
vgen_ldc_t *ldcp, const char *fmt, ...)
{
char buf[256];
char *bufp = buf;
va_list ap;
if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
(void) sprintf(bufp, "vnet%d:",
((vnet_t *)(vgenp->vnetp))->instance);
bufp += strlen(bufp);
}
if (ldcp != NULL) {
(void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
bufp += strlen(bufp);
}
(void) sprintf(bufp, "%s: ", fname);
bufp += strlen(bufp);
va_start(ap, fmt);
(void) vsprintf(bufp, fmt, ap);
va_end(ap);
if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
(vgendbg_ldcid == ldcp->ldc_id)) {
cmn_err(CE_CONT, "%s\n", buf);
}
}
#endif
#ifdef VNET_IOC_DEBUG
static void
vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
{
struct iocblk *iocp;
vgen_port_t *portp;
enum ioc_reply {
IOC_INVAL = -1, /* bad, NAK with EINVAL */
IOC_ACK /* OK, just send ACK */
} status;
int rv;
iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
iocp->ioc_error = 0;
portp = (vgen_port_t *)arg;
if (portp == NULL) {
status = IOC_INVAL;
goto vgen_ioc_exit;
}
mutex_enter(&portp->lock);
switch (iocp->ioc_cmd) {
case VNET_FORCE_LINK_DOWN:
case VNET_FORCE_LINK_UP:
rv = vgen_force_link_state(portp, iocp->ioc_cmd);
(rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
break;
default:
status = IOC_INVAL;
break;
}
mutex_exit(&portp->lock);
vgen_ioc_exit:
switch (status) {
default:
case IOC_INVAL:
/* Error, reply with a NAK and EINVAL error */
miocnak(q, mp, 0, EINVAL);
break;
case IOC_ACK:
/* OK, reply with an ACK */
miocack(q, mp, 0, 0);
break;
}
}
static int
vgen_force_link_state(vgen_port_t *portp, int cmd)
{
ldc_status_t istatus;
int rv;
vgen_ldc_t *ldcp = portp->ldcp;
vgen_t *vgenp = portp->vgenp;
mutex_enter(&ldcp->cblock);
switch (cmd) {
case VNET_FORCE_LINK_DOWN:
(void) ldc_down(ldcp->ldc_handle);
ldcp->link_down_forced = B_TRUE;
break;
case VNET_FORCE_LINK_UP:
vgen_ldc_up(ldcp);
ldcp->link_down_forced = B_FALSE;
if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
DWARN(vgenp, ldcp, "ldc_status err\n");
} else {
ldcp->ldc_status = istatus;
}
/* if channel is already UP - restart handshake */
if (ldcp->ldc_status == LDC_UP) {
vgen_handle_evt_up(ldcp);
}
break;
}
mutex_exit(&ldcp->cblock);
return (0);
}
#else
static void
vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
{
vgen_port_t *portp;
portp = (vgen_port_t *)arg;
if (portp == NULL) {
miocnak(q, mp, 0, EINVAL);
return;
}
miocnak(q, mp, 0, ENOTSUP);
}
#endif