/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* sun4v LDC Link Layer
*/
#include <sys/machsystm.h>
#include <sys/machcpuvar.h>
#include <vm/hat_sfmmu.h>
#include <sys/vm_machparam.h>
#include <vm/seg_kmem.h>
#include <sys/hypervisor_api.h>
#include <sys/ldc_impl.h>
/* Core internal functions */
int i_ldc_h2v_error(int h_error);
static void i_ldc_debug_enter(void);
/* Interrupt handling functions */
/* Read method functions */
/* Write method functions */
/* Pkt processing internal functions */
/* Imported functions */
/* LDC Version */
/* number of supported versions */
/* Invalid value for the ldc_chan_t rx_ack_head field */
/* Module State Pointer */
&mod_miscops, /* This is a misc module */
"sun4v LDC module", /* Name of the module */
};
&md,
};
};
/*
* The no. of MTU size messages that can be stored in
* the LDC Tx queue. The number of Tx queue entries is
* then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
*/
/*
* The minimum queue length. This is the size of the smallest
* LDC queue. If the computed value is less than this default,
* the queue length is rounded up to 'ldc_queue_entries'.
*/
/*
* The length of the reliable-mode data queue in terms of the LDC
* receive queue length. i.e., the number of times larger than the
* LDC receive queue that the data queue should be. The HV receive
* queue is required to be a power of 2 and this implementation
* assumes the data queue will also be a power of 2. By making the
* multiplier a power of 2, we ensure the data queue will be a
* power of 2. We use a multiplier because the receive queue is
* sized to be sane relative to the MTU and the same is needed for
* the data queue.
*/
/*
* LDC retry count and delay - when the HV returns EWOULDBLOCK
* the operation is retried 'ldc_max_retries' times with a
* wait of 'ldc_delay' usecs between each retry.
*/
/*
* Channels which have a devclass satisfying the following
* will be reset when entering the prom or kmdb.
*
* LDC_DEVCLASS_PROM_RESET(devclass) != 0
*
* By default, only block device service channels are reset.
*/
/*
* delay between each retry of channel unregistration in
* ldc_close(), to wait for pending interrupts to complete.
*/
/*
* Reserved mapin space for descriptor rings.
*/
/*
* Maximum direct map space allowed per channel.
*/
#ifdef DEBUG
/*
* Print debug messages
*
* set ldcdbg to 0x7 for enabling all msgs
* 0x4 - Warnings
* 0x2 - All debug messages
* 0x1 - Minimal debug messages
*
* set ldcdbgchan to the channel number you want to debug
* setting it to -1 prints debug messages for all channels
* NOTE: ldcdbgchan has no effect on error messages
*/
void
{
/*
* Do not return if,
* caller wants to print it anyway - (id == DBG_ALL_LDCS)
* debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
* debug channel = caller specified channel
*/
if ((id != DBG_ALL_LDCS) &&
(ldcdbgchan != DBG_ALL_LDCS) &&
(ldcdbgchan != id)) {
return;
}
}
static boolean_t
{
return (B_FALSE);
if ((ldc_inject_err_flag & error) == 0)
return (B_FALSE);
/* clear the injection state */
ldc_inject_err_flag &= ~error;
return (B_TRUE);
}
#define D1 \
if (ldcdbg & 0x01) \
#define D2 \
if (ldcdbg & 0x02) \
#define DWARN \
if (ldcdbg & 0x04) \
{ \
int i; \
for (i = 0; i < 64; i++, src++) \
}
{ \
} else { \
} \
}
#else
#define D1
#define D2
#define DWARN
#endif
/*
* dtrace SDT probes to ease tracing of the rx data queue and HV queue
* lengths. Just pass the head, tail, and entries values so that the
* length can be calculated in a dtrace script when the probe is enabled.
*/
/* A dtrace SDT probe to ease tracing of data queue copy operations */
/* The amount of contiguous space at the tail of the queue */
#define ZERO_PKT(p) \
int
_init(void)
{
int status;
if (status != 0) {
" group: 0x%lx major: %ld minor: %ld errno: %d",
return (-1);
}
/* Initialize shared memory HV API version checking */
/* allocate soft state structure */
/* Link the module into the system */
if (status != 0) {
return (status);
}
/* Initialize the LDC state structure */
/* Create a cache for memory handles */
return (-1);
}
/* Create cache for memory segment structures */
return (-1);
}
ldcssp->channel_count = 0;
ldcssp->channels_open = 0;
/* Register debug_enter callback */
return (0);
}
int
{
/* Report status of the dynamically loadable driver module */
}
int
_fini(void)
{
/* Unlink the driver module from the system */
if (status) {
return (EIO);
}
/* Unregister debug_enter callback */
/* Free descriptor rings */
(void) ldc_mem_dring_unbind(
}
(void) ldc_mem_dring_unmap(
}
}
dringp = tmp_dringp;
}
/* close and finalize channels */
}
/* Destroy kmem caches */
/*
* We have successfully "removed" the driver.
* Destroying soft states
*/
(void) hsvc_unregister(&ldc_hsvc);
return (status);
}
/* -------------------------------------------------------------------------- */
/*
* LDC Link Layer Internal Functions
*/
/*
* Translate HV Errors to sun4v error codes
*/
int
{
switch (h_error) {
case H_EOK:
return (0);
case H_ENORADDR:
return (EFAULT);
case H_EBADPGSZ:
case H_EINVAL:
return (EINVAL);
case H_EWOULDBLOCK:
return (EWOULDBLOCK);
case H_ENOACCESS:
case H_ENOMAP:
return (EACCES);
case H_EIO:
case H_ECPUERROR:
return (EIO);
case H_ENOTSUPPORTED:
return (ENOTSUP);
case H_ETOOMANY:
return (ENOSPC);
case H_ECHANNEL:
return (ECHRNG);
default:
break;
}
return (EIO);
}
/*
* Reconfigure the transmit queue
*/
static int
{
int rv;
if (rv) {
return (EIO);
}
if (rv) {
return (EIO);
}
ldcp->link_state);
return (0);
}
/*
* Reconfigure the receive queue
*/
static int
{
int rv;
&(ldcp->link_state));
if (rv) {
"i_ldc_rxq_reconf: (0x%lx) cannot get state",
return (EIO);
}
ldcp->rx_q_entries);
if (rv) {
"i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
return (EIO);
}
}
return (0);
}
/*
* Drain the contents of the receive queue
*/
static void
{
int rv;
int retries = 0;
&(ldcp->link_state));
if (rv) {
return;
}
/* If the queue is already empty just return success. */
return;
/*
* We are draining the queue in order to close the channel.
* Call hv_ldc_rx_set_qhead directly instead of i_ldc_set_rx_head
* because we do not need to reset the channel if the set
* qhead fails.
*/
return;
return;
}
}
/*
* Reset LDC state structure and its contents
*/
static void
{
ldcp->last_ack_rcd = 0;
ldcp->last_msg_rcd = 0;
ldcp->stream_remains = 0;
ldcp->rx_dq_head = 0;
ldcp->rx_dq_tail = 0;
} else {
}
}
}
/*
* Reset a LDC channel
*/
void
{
/* reconfig Tx and Rx queues */
(void) i_ldc_txq_reconf(ldcp);
/* Clear Tx and Rx interrupts */
/* Reset channel state */
/* Mark channel in reset */
}
/*
* Walk the channel list and reset channels if they are of the right
* devclass and their Rx queues have been configured. No locks are
* taken because the function is only invoked by the kernel just before
* entering the prom or debugger when the system is single-threaded.
*/
static void
i_ldc_debug_enter(void)
{
ldcp->rx_q_entries);
}
}
}
/*
* Clear pending interrupts
*/
static void
{
switch (itype) {
case CNEX_TX_INTR:
/* check Tx interrupt */
if (ldcp->tx_intr_state)
else
return;
break;
case CNEX_RX_INTR:
/* check Rx interrupt */
if (ldcp->rx_intr_state)
else
return;
break;
}
"i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
}
/*
* Set the receive queue head
* Resets connection and returns an error if it fails.
*/
static int
{
int rv;
int retries;
return (0);
if (rv != H_EWOULDBLOCK)
break;
/* wait for ldc_delay usecs */
}
return (ECONNRESET);
}
/*
* Returns the tx_head to be used for transfer
*/
static void
{
/* get current Tx head */
/*
* Reliable mode will use the ACKd head instead of the regular tx_head.
* up to the current location of tx_head. This needs to be done
*/
break;
}
/* advance ACKd head */
ldcp->tx_ackd_head =
}
}
}
/*
* Returns the tx_tail to be used for transfer
* Re-reads the TX queue ptrs if and only if the
* the cached head and tail are equal (queue is full)
*/
static int
{
int rv;
/* Read the head and tail ptrs from HV */
if (rv) {
"i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
return (EIO);
}
return (ECONNRESET);
}
/* increment the tail */
if (new_tail == current_head) {
"i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
return (EWOULDBLOCK);
}
return (0);
}
/*
* Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
* and retry ldc_max_retries times before returning an error.
* Returns 0, EWOULDBLOCK or EIO
*/
static int
{
int retries;
retval = 0;
break;
}
if (rv != H_EWOULDBLOCK) {
break;
}
/* wait for ldc_delay usecs */
}
return (retval);
}
/*
* Copy a data packet from the HV receive queue to the data queue.
* Caller must ensure that the data queue is not already full.
*
* The *head argument represents the current head pointer for the HV
* receive queue. After copying a packet from the HV receive queue,
* the *head pointer will be updated. This allows the caller to update
* the head pointer in HV using the returned *head value.
*/
void
{
dq_size) >= LDC_PACKET_SIZE);
/* Update rx head */
/* Update dq tail */
}
/*
* Update the Rx data queue head pointer
*/
static int
{
return (0);
}
/*
* Get the Rx data queue head and tail pointers
*/
static uint64_t
{
return (0);
}
/*
* Wrapper for the Rx HV queue set head function. Giving the
* data queue and HV queue set head functions the same type.
*/
static uint64_t
{
link_state)));
}
/*
* LDC receive interrupt handler
* triggered for channel with data pending to read
* i.e. Rx queue content changes
*/
static uint_t
{
/* Get the channel for which interrupt was received */
return (DDI_INTR_UNCLAIMED);
}
ldcp->link_state);
/* Lock channel */
/* Mark the interrupt as being actively handled */
/*
* If there are no data packets on the queue, clear
* the interrupt. Otherwise, the ldc_read will clear
* interrupts after draining the queue. To indicate the
* interrupt has not yet been cleared, it is marked
* as pending.
*/
if ((event & LDC_EVT_READ) == 0) {
} else {
}
}
/* if callbacks are disabled, do not notify */
if (rv) {
"i_ldc_rx_hdlr: (0x%llx) callback failure",
}
}
/*
* Here, ENOSPC indicates the secondary data
* queue is full and the Rx queue is non-empty.
* Much like how reliable and raw modes are
* handled above, since the Rx queue is non-
* empty, we mark the interrupt as pending to
* indicate it has not yet been cleared.
*/
} else {
/*
* We have processed all CTRL packets and
* copied all DATA packets to the secondary
* queue. Clear the interrupt.
*/
}
}
return (DDI_INTR_CLAIMED);
}
/*
* Wrapper for the Rx HV queue processing function to be used when
* checking the Rx HV queue for data packets. Unlike the interrupt
* handler code flow, the Rx interrupt is not cleared here and
* callbacks are not made.
*/
static uint_t
{
}
/*
* Send a LDC message
*/
static int
{
int rv;
/* Obtain Tx lock */
/* get the current tail for the message */
if (rv) {
"i_ldc_send_pkt: (0x%llx) error sending pkt, "
"type=0x%x,subtype=0x%x,ctrl=0x%x\n",
return (rv);
}
/* Initialize the packet */
curr_seqid++;
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv) {
"i_ldc_send_pkt:(0x%llx) error sending pkt, "
"type=0x%x,stype=0x%x,ctrl=0x%x\n",
return (EIO);
}
return (0);
}
/*
* Checks if packet was received in right order
* in the case of a reliable link.
* Returns 0 if in order, else EIO
*/
static int
{
/* No seqid checking for RAW mode */
return (0);
/* No seqid checking for version, RTS, RTR message */
return (0);
"i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
return (EIO);
}
#ifdef DEBUG
if (LDC_INJECT_PKTLOSS(ldcp)) {
return (EIO);
}
#endif
return (0);
}
/*
* Process an incoming version ctrl message
*/
static int
{
/* get the received version */
/* Obtain Tx lock */
case LDC_INFO:
(void) i_ldc_txq_reconf(ldcp);
return (EAGAIN);
}
/* get the current tail and pkt for the response */
if (rv != 0) {
"i_ldc_process_VER: (0x%llx) err sending "
return (ECONNRESET);
}
/* initialize the packet */
for (;;) {
/* major version match - ACK version */
/*
* lower minor version to the one this endpt
* supports, if necessary
*/
break;
}
" lower idx=%d, v%u.%u\n", idx,
/* nack with next lower version */
sizeof (ldc_versions[idx]));
break;
}
/* next major version */
idx++;
if (idx == LDC_NUM_VERS) {
/* no version match - send NACK */
break;
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv == 0) {
/* Save the ACK'd version */
"(0x%llx) Sent ACK, "
"Agreed on version v%u.%u\n",
}
} else {
"i_ldc_process_VER: (0x%llx) error sending "
return (ECONNRESET);
}
break;
case LDC_ACK:
/* mismatched version - reset connection */
"i_ldc_process_VER: (0x%llx) recvd"
return (ECONNRESET);
}
} else {
/* SUCCESS - we have agreed on a version */
}
/* initiate RTS-RTR-RDX handshake */
if (rv) {
"i_ldc_process_VER: (0x%llx) cannot send RTS\n",
return (ECONNRESET);
}
/* initiate the send by calling into HV and set the new tail */
if (rv) {
"i_ldc_process_VER: (0x%llx) no listener\n",
return (ECONNRESET);
}
break;
case LDC_NACK:
/* check if version in NACK is zero */
/* version handshake failure */
"i_ldc_process_VER: (0x%llx) no version match\n",
return (ECONNRESET);
}
/* get the current tail and pkt for the response */
if (rv != 0) {
"i_ldc_process_VER: (0x%lx) err sending "
return (ECONNRESET);
}
/* initialize the packet */
/* check ver in NACK msg has a match */
for (;;) {
/*
* major version match - resubmit request
* if lower minor version to the one this endpt
* supports, if necessary
*/
break;
}
" lower idx=%d, v%u.%u\n", idx,
/* send next lower version */
sizeof (ldc_versions[idx]));
break;
}
/* next version */
idx++;
if (idx == LDC_NUM_VERS) {
/* no version match - terminate */
return (ECONNRESET);
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv == 0) {
} else {
"i_ldc_process_VER: (0x%lx) error sending version"
return (ECONNRESET);
}
break;
}
return (rv);
}
/*
* Process an incoming RTS ctrl message
*/
static int
{
int rv = 0;
case LDC_NACK:
"i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
case LDC_INFO:
/* check mode */
"i_ldc_process_RTS: (0x%lx) mode mismatch\n",
/*
* send NACK in response to MODE message
* get the current tail for the response
*/
if (rv) {
/* if cannot send NACK - reset channel */
rv = ECONNRESET;
break;
}
}
break;
default:
rv = ECONNRESET;
break;
}
/*
* If either the connection was reset (when rv != 0) or
* a NACK was sent, we return. In the case of a NACK
* we dont want to consume the packet that came in but
* not record that we received the RTS
*/
return (rv);
/* record RTS received */
/* store initial SEQID info */
/* Obtain Tx lock */
/* get the current tail for the response */
if (rv != 0) {
"i_ldc_process_RTS: (0x%lx) err sending RTR\n",
return (ECONNRESET);
}
/* initialize the packet */
/* initiate the send by calling into HV and set the new tail */
if (rv == 0) {
} else {
"i_ldc_process_RTS: (0x%lx) error sending RTR\n",
return (ECONNRESET);
}
return (0);
}
/*
* Process an incoming RTR ctrl message
*/
static int
{
int rv = 0;
case LDC_NACK:
/* RTR NACK received */
"i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
case LDC_INFO:
/* check mode */
"i_ldc_process_RTR: (0x%llx) mode mismatch, "
"expecting 0x%x, got 0x%x\n",
/*
* send NACK in response to MODE message
* get the current tail for the response
*/
if (rv) {
/* if cannot send NACK - reset channel */
rv = ECONNRESET;
break;
}
}
break;
default:
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
}
/*
* If either the connection was reset (when rv != 0) or
* a NACK was sent, we return. In the case of a NACK
* we dont want to consume the packet that came in but
* not record that we received the RTR
*/
return (rv);
if (rv) {
"i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
return (ECONNRESET);
}
return (0);
}
/*
* Process an incoming RDX ctrl message
*/
static int
{
int rv = 0;
case LDC_NACK:
/* RDX NACK received */
"i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
case LDC_INFO:
/*
* if channel is UP and a RDX received after data transmission
* has commenced it is an error
*/
"i_ldc_process_RDX: (0x%llx) unexpected RDX"
return (ECONNRESET);
}
break;
default:
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
}
return (rv);
}
/*
* Process an incoming ACK for a data packet
*/
static int
{
int rv;
/* Obtain Tx lock */
/*
* Read the current Tx head and tail
*/
if (rv != 0) {
"i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
/* Reset the channel -- as we cannot continue */
return (ECONNRESET);
}
/*
* loop from where the previous ACK location was to the
* current head location. This is how far the HV has
* actually send pkts. Pkts between head and tail are
* yet to be sent by HV.
*/
for (;;) {
"i_ldc_process_data_ACK: (0x%llx) found packet\n",
break;
}
/* could not find packet */
"i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
/* Reset the channel -- as we cannot continue */
return (ECONNRESET);
}
}
return (0);
}
/*
* Process incoming control message
* Return 0 - session can continue
* EAGAIN - reprocess packet - state was changed
* ECONNRESET - channel was reset
*/
static int
{
int rv = 0;
case TS_OPEN:
case TS_READY:
case LDC_VER:
/* process version message */
break;
default:
"i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
break;
}
break;
case TS_VREADY:
case LDC_VER:
/* process version message */
break;
case LDC_RTS:
/* process RTS message */
break;
case LDC_RTR:
/* process RTR message */
break;
case LDC_RDX:
/* process RDX message */
break;
default:
"i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
break;
}
break;
case TS_UP:
case LDC_VER:
"i_ldc_ctrlmsg: (0x%llx) unexpected VER "
/* peer is redoing version negotiation */
(void) i_ldc_txq_reconf(ldcp);
break;
case LDC_RDX:
/* process RDX message */
break;
default:
"i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
break;
}
}
return (rv);
}
/*
* Register channel with the channel nexus
*/
static int
{
int rv = 0;
"i_ldc_register_channel: cnex has not registered\n");
return (EAGAIN);
}
if (rv) {
"i_ldc_register_channel: cannot register channel\n");
return (rv);
}
if (rv) {
"i_ldc_register_channel: cannot add Tx interrupt\n");
return (rv);
}
if (rv) {
"i_ldc_register_channel: cannot add Rx interrupt\n");
return (rv);
}
return (0);
}
/*
* Unregister a channel with the channel nexus
*/
static int
{
int rv = 0;
"i_ldc_unregister_channel: cnex has not registered\n");
return (EAGAIN);
}
/* Remove the Rx interrupt */
if (rv) {
"i_ldc_unregister_channel: err removing "
"Rx intr\n");
return (rv);
}
/*
* If interrupts are pending and handler has
* finished running, clear interrupt and try
* again
*/
return (rv);
if (rv) {
"err removing Rx interrupt\n");
return (rv);
}
}
/* Remove the Tx interrupt */
if (rv) {
"i_ldc_unregister_channel: err removing Tx intr\n");
return (rv);
}
/* Unregister the channel */
if (rv) {
"i_ldc_unregister_channel: cannot unreg channel\n");
return (rv);
}
}
return (0);
}
/*
* LDC transmit interrupt handler
* and Tx queue content changes
*/
static uint_t
{
int rv;
/* Get the channel for which interrupt was received */
/* Lock channel */
/* Obtain Tx lock */
/* mark interrupt as pending */
/* save current link state */
&ldcp->link_state);
if (rv) {
"i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
return (DDI_INTR_CLAIMED);
}
/*
* reset the channel state if the channel went down
* (other side unconfigured queue) or channel was reset
* (other side reconfigured its queue)
*/
}
}
}
/* if callbacks are disabled, do not notify */
if (!ldcp->cb_enabled)
if (notify_client) {
if (rv) {
}
}
return (DDI_INTR_CLAIMED);
}
/*
* Process the Rx HV queue.
*
* Returns 0 if data packets were found and no errors were encountered,
* otherwise returns an error. In either case, the *notify argument is
* set to indicate whether or not the client callback function should
* be invoked. The *event argument is set to contain the callback event.
*
* Depending on the channel mode, packets are handled differently:
*
* RAW MODE
* For raw mode channels, when a data packet is encountered,
* processing stops and all packets are left on the queue to be removed
* and processed by the ldc_read code path.
*
* UNRELIABLE MODE
* For unreliable mode, when a data packet is encountered, processing
* stops, and all packets are left on the queue to be removed and
* processed by the ldc_read code path. Control packets are processed
* inline if they are encountered before any data packets.
*
* RELIABLE MODE
* For reliable mode channels, all packets on the receive queue
* are processed: data packets are copied to the data queue and
* control packets are processed inline. Packets are only left on
* the receive queue when the data queue is full.
*/
static uint_t
{
int rv;
*notify_client = B_FALSE;
*notify_event = 0;
/*
* Read packet(s) from the queue
*/
for (;;) {
&ldcp->link_state);
if (rv) {
"i_ldc_rx_process_hvq: (0x%lx) cannot read "
return (EIO);
}
/*
* reset the channel state if the channel went down
* (other side unconfigured queue) or channel was reset
* (other side reconfigured its queue)
*/
switch (ldcp->link_state) {
case LDC_CHANNEL_DOWN:
*notify_client = B_TRUE;
goto loop_exit;
case LDC_CHANNEL_UP:
*notify_client = B_TRUE;
}
break;
case LDC_CHANNEL_RESET:
default:
#ifdef DEBUG
#endif
*notify_client = B_TRUE;
break;
}
}
#ifdef DEBUG
if (LDC_INJECT_RESET(ldcp))
goto force_reset;
if (LDC_INJECT_DRNGCLEAR(ldcp))
#endif
if (trace_length) {
}
break;
}
/* get the message */
/* if channel is in RAW mode or data pkt, notify and return */
*notify_client = B_TRUE;
*notify_event |= LDC_EVT_READ;
break;
}
/* discard packet if channel is not up */
/* move the head one position */
break;
continue;
} else {
/* process only RELIABLE mode data packets */
*notify_client = B_TRUE;
*notify_event |= LDC_EVT_READ;
break;
}
/* don't process packet if queue full */
break;
}
}
}
/* Check the sequence ID for the message received */
if (rv != 0) {
/* Reset last_msg_rcd to start of message */
if (first_fragment != 0) {
first_fragment = 0;
}
/*
* Send a NACK due to seqid mismatch
*/
if (rv) {
/* if cannot send NACK - reset channel */
*notify_client = B_TRUE;
break;
}
/* purge receive queue */
break;
}
/* record the message ID */
/* process control messages */
/* save current internal state */
/* re-process pkt - state was adjusted */
continue;
}
if (rv == ECONNRESET) {
*notify_client = B_TRUE;
break;
}
/*
* control message processing was successful
* channel transitioned to ready for communication
*/
(tstate & ~TS_IN_RESET) !=
*notify_client = B_TRUE;
}
}
/* process data NACKs */
*notify_client = B_TRUE;
break;
}
/* process data ACKs */
*notify_client = B_TRUE;
break;
}
}
/*
* Copy the data packet to the data queue. Note
* that the copy routine updates the rx_head pointer.
*/
*notify_client = B_TRUE;
*notify_event |= LDC_EVT_READ;
} else {
}
/* move the head one position */
*notify_client = B_TRUE;
break;
}
} /* for */
/* ACK data packets */
if ((*notify_event &
int ack_rv;
"i_ldc_rx_process_hvq: (0x%lx) cannot "
*notify_client = B_TRUE;
goto skip_ackpeek;
}
}
/*
* If we have no more space on the data queue, make sure
* there are no ACKs on the rx queue waiting to be processed.
*/
*notify_client = B_TRUE;
}
return (rv);
} else {
}
}
/* Return, indicating whether or not data packets were found */
return (0);
return (ENOMSG);
}
/*
* Process any ACK packets on the HV receive queue.
*
* This function is only used by RELIABLE mode channels when the
* secondary data queue fills up and there are packets remaining on
* the HV receive queue.
*/
int
{
int rv = 0;
break;
}
ldcp->rx_ack_head =
}
return (rv);
}
/* -------------------------------------------------------------------------- */
/*
* LDC API functions
*/
/*
* Initialize the channel. Allocate internal structure and memory for
*/
int
{
return (EINVAL);
}
return (EINVAL);
}
/* check if channel is valid */
if (rv == H_ECHANNEL) {
return (EINVAL);
}
/* check if the channel has already been initialized */
id);
return (EADDRINUSE);
}
}
*handle = 0;
/* Allocate an ldcp structure */
/*
* Initialize the channel and Tx lock
*
* The channel 'lock' protects the entire channel and
* should be acquired before initializing, resetting,
* destroying or reading from a channel.
*
* The 'tx_lock' should be acquired prior to transmitting
* data over the channel. The lock should also be acquired
* prior to channel reconfiguration (in order to prevent
* concurrent writes).
*
* ORDERING: When both locks are being acquired, to prevent
* deadlocks, the channel lock should be always acquired prior
* to the tx_lock.
*/
/* Initialize the channel */
/* Read attributes */
"ldc_init: (0x%llx) channel attributes, class=0x%x, "
"instance=0x%llx, mode=%d, mtu=%d\n",
ldcp->last_ack_rcd = 0;
ldcp->last_msg_rcd = 0;
/* Initialize payload size depending on whether channel is reliable */
case LDC_MODE_RAW:
break;
case LDC_MODE_UNRELIABLE:
break;
case LDC_MODE_RELIABLE:
ldcp->stream_remains = 0;
ldcp->stream_offset = 0;
break;
default:
goto cleanup_on_exit;
}
/*
* qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
* value is smaller than default length of ldc_queue_entries,
* qlen is set to ldc_queue_entries. Ensure that computed
* length is a power-of-two value.
*/
while (qlen) {
}
}
ldcp->rx_q_entries =
/* Create a transmit queue */
"ldc_init: (0x%lx) TX queue allocation failed\n",
goto cleanup_on_exit;
}
/* Create a receive queue */
"ldc_init: (0x%lx) RX queue allocation failed\n",
goto cleanup_on_exit;
}
/* Setup a separate read data queue */
/* Make sure the data queue multiplier is a power of 2 */
if (!ISP2(ldc_rxdq_multiplier)) {
}
KM_SLEEP);
"ldc_init: (0x%lx) RX data queue "
goto cleanup_on_exit;
}
} else {
}
/* Init descriptor ring and memory handle list lock */
/* mark status as INITialized */
/* Add to channel list */
ldcssp->channel_count++;
/* set the handle */
return (0);
if (ldcp)
return (exit_val);
}
/*
* Finalizes the LDC connection. It will return EBUSY if the
* channel is open. A ldc_close() has to be done prior to
* with the channel
*/
int
{
return (EINVAL);
}
return (EBUSY);
}
/* Remove from the channel list */
} else {
break;
}
}
return (EINVAL);
}
}
ldcssp->channel_count--;
/* Free the map table for this channel */
else
}
/* Destroy descriptor ring and memory handle list lock */
/* Free the stream buffer for RELIABLE_MODE */
/* Free the RX queue */
/* Free the RX data queue */
}
/* Free the TX queue */
/* Destroy mutex */
/* free channel structure */
return (0);
}
/*
* with the Hypervisor. It also specifies the interrupt number
* and target CPU for this channel
*/
int
{
int rv;
return (EINVAL);
}
return (EFAULT);
}
return (EFAULT);
}
/*
* Unregister/Register the tx queue with the hypervisor
*/
if (rv) {
"ldc_open: (0x%lx) channel tx queue unconf failed\n",
return (EIO);
}
if (rv) {
"ldc_open: (0x%lx) channel tx queue conf failed\n",
return (EIO);
}
/*
* Unregister/Register the rx queue with the hypervisor
*/
if (rv) {
"ldc_open: (0x%lx) channel rx queue unconf failed\n",
return (EIO);
}
if (rv) {
"ldc_open: (0x%lx) channel rx queue conf failed\n",
return (EIO);
}
/* Register the channel with the channel nexus */
return (EIO);
}
/* mark channel in OPEN state */
/* Read channel state */
if (rv) {
"ldc_open: (0x%lx) cannot read channel state\n",
(void) i_ldc_unregister_channel(ldcp);
return (EIO);
}
/*
* set the ACKd head to current head location for reliable
*/
/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
}
/*
* if channel is being opened in RAW mode - no handshake is needed
* switch the channel READY and UP state
*/
}
/*
* Increment number of open channels
*/
ldcssp->channels_open++;
"ldc_open: (0x%llx) channel (0x%p) open for use "
"(tstate=0x%x, status=0x%x)\n",
return (0);
}
/*
* Close the LDC connection. It will return EBUSY if there
* are memory segments or descriptor rings either bound to or
* mapped over the channel
*/
int
{
return (EINVAL);
}
/* return error if channel is not open */
return (EFAULT);
}
/* if any memory handles, drings, are bound or mapped cannot close */
"ldc_close: (0x%llx) channel has bound memory handles\n",
return (EBUSY);
}
"ldc_close: (0x%llx) channel has bound descriptor rings\n",
return (EBUSY);
}
"ldc_close: (0x%llx) channel has mapped descriptor rings\n",
return (EBUSY);
}
if (ldcp->cb_inprogress) {
return (EWOULDBLOCK);
}
/* Obtain Tx lock */
/*
* Wait for pending transmits to complete i.e Tx queue to drain
* if there are pending pkts - wait 1 ms and retry again
*/
for (;;) {
if (rv) {
return (EIO);
}
break;
}
if (chk_done) {
"ldc_close: (0x%llx) Tx queue drain timeout\n",
break;
}
/* wait for one ms and try again */
}
/*
* Drain the Tx and Rx queues as we are closing the
* channel. We dont care about any pending packets.
* We have to also drain the queue prior to clearing
* pending interrupts, otherwise the HV will trigger
* an interrupt the moment the interrupt state is
* cleared.
*/
(void) i_ldc_txq_reconf(ldcp);
/*
* Unregister the channel with the nexus
*/
/* if any error other than EAGAIN return back */
"ldc_close: (0x%lx) unregister failed, %d\n",
return (rv);
}
/*
* As there could be pending interrupts we need
* to wait and try again
*/
retries++;
}
/*
* Unregister queues
*/
if (rv) {
"ldc_close: (0x%lx) channel TX queue unconf failed\n",
return (EIO);
}
if (rv) {
"ldc_close: (0x%lx) channel RX queue unconf failed\n",
return (EIO);
}
/* Reset channel state information */
/* Mark channel as down and in initialized state */
ldcp->tx_ackd_head = 0;
/* Decrement number of open channels */
ldcssp->channels_open--;
return (0);
}
/*
* Register channel callback
*/
int
{
"ldc_reg_callback: invalid channel handle\n");
return (EINVAL);
}
return (EINVAL);
}
return (EIO);
}
if (ldcp->cb_inprogress) {
return (EWOULDBLOCK);
}
"ldc_reg_callback: (0x%llx) registered callback for channel\n",
return (0);
}
/*
* Unregister channel callback
*/
int
{
"ldc_unreg_callback: invalid channel handle\n");
return (EINVAL);
}
"ldc_unreg_callback: (0x%llx) no callback exists\n",
return (EIO);
}
if (ldcp->cb_inprogress) {
"ldc_unreg_callback: (0x%llx) callback active\n",
return (EWOULDBLOCK);
}
"ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
return (0);
}
/*
* Bring a channel up by initiating a handshake with the peer
* This call is asynchronous. It will complete at a later point
* in time when the peer responds back with an RTR.
*/
int
{
int rv;
return (EINVAL);
}
/* clear the reset state */
"ldc_up: (0x%llx) channel is already in UP state\n",
/* mark channel as up */
/*
* if channel was in reset state and there was
* pending data clear interrupt state. this will
* trigger an interrupt, causing the RX handler to
* to invoke the client's callback
*/
if ((tstate & TS_IN_RESET) &&
"ldc_up: (0x%llx) channel has pending data, "
}
return (0);
}
/* if the channel is in RAW mode - mark it as UP, if READY */
return (0);
}
/* Don't start another handshake if there is one in progress */
"ldc_up: (0x%llx) channel handshake in progress\n",
return (0);
}
/* save current link state */
/* get the current tail for the LDC msg */
if (rv) {
return (ECONNREFUSED);
}
/*
* If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
* from a previous state of DOWN, then mark the channel as
* being ready for handshake.
*/
if ((link_state == LDC_CHANNEL_DOWN) &&
return (0);
} else {
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv) {
"ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
return (rv);
}
return (rv);
}
/*
* Bring a channel down by resetting its state and queues
*/
int
{
return (EINVAL);
}
return (0);
}
/*
* Get the current channel status
*/
int
{
return (EINVAL);
}
return (0);
}
/*
*/
int
{
"ldc_set_intr_mode: invalid channel handle\n");
return (EINVAL);
}
/*
* Record no callbacks should be invoked
*/
switch (cmode) {
case LDC_CB_DISABLE:
if (!ldcp->cb_enabled) {
"ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
break;
}
break;
case LDC_CB_ENABLE:
if (ldcp->cb_enabled) {
"ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
break;
}
break;
}
return (0);
}
/*
* Check to see if there are packets on the incoming queue
* Will return hasdata = B_FALSE if there are no packets
*/
int
{
int rv;
return (EINVAL);
}
return (ECONNRESET);
}
/* Read packet(s) from the queue */
&ldcp->link_state);
if (rv != 0) {
return (EIO);
}
/* reset the channel state if the channel went down */
return (ECONNRESET);
}
case LDC_MODE_RAW:
/*
* In raw mode, there are no ctrl packets, so checking
* if the queue is non-empty is sufficient.
*/
break;
case LDC_MODE_UNRELIABLE:
/*
* In unreliable mode, if the queue is non-empty, we need
* to check if it actually contains unread data packets.
* The queue may just contain ctrl packets.
*/
/*
* If no data packets were found on the queue,
* all packets must have been control packets
* which will now have been processed, leaving
* the queue empty. If the interrupt state
* is pending, we need to clear the interrupt
* here.
*/
}
}
break;
case LDC_MODE_RELIABLE:
/*
* In reliable mode, first check for 'stream_remains' > 0.
* Otherwise, if the data queue head and tail pointers
* differ, there must be data to read.
*/
if (ldcp->stream_remains > 0)
else
break;
default:
return (EIO);
}
return (0);
}
/*
* Read 'size' amount of bytes or less. If incoming buffer
* is more than 'size', ENOBUFS is returned.
*
* On return, size contains the number of bytes read.
*/
int
{
return (EINVAL);
}
/* channel lock */
"ldc_read: (0x%llx) channel is not in UP state\n",
/*
* For reliable mode channels, the interrupt
* state is only set to pending during
* interrupt handling when the secondary data
* queue became full, leaving unprocessed
* packets on the Rx queue. If the interrupt
* state is pending and space is now available
* on the data queue, clear the interrupt.
*/
/* data queue is not full */
}
return (exit_val);
} else {
}
/*
* if queue has been drained - clear interrupt
*/
&ldcp->link_state);
if (rv != 0) {
return (ECONNRESET);
}
if (exit_val == 0) {
}
if ((rv == 0) &&
}
}
return (exit_val);
}
/*
* Basic raw mondo read -
* no interpretation of mondo contents at all.
*
* Enter and exit with ldcp->lock held by caller
*/
static int
{
if (space < LDC_PAYLOAD_SIZE_RAW)
return (ENOBUFS);
/* compute mask for increment */
/*
* Read packet(s) from the queue
*/
&ldcp->link_state);
if (rv != 0) {
"ldc_read_raw: (0x%lx) unable to read queue ptrs",
return (EIO);
}
" rxt=0x%llx, st=0x%llx\n",
/* reset the channel state if the channel went down */
return (ECONNRESET);
}
/*
* Check for empty queue
*/
*sizep = 0;
return (0);
}
/* get the message */
/* if channel is in RAW mode, copy data and return */
return (rv);
}
/*
* Process LDC mondos to build larger packets
* with either un-reliable or reliable delivery.
*
* Enter and exit with ldcp->lock held by caller
*/
static int
{
int rv = 0;
int retries = 0;
/* check if the buffer and size are valid */
return (EINVAL);
}
/* Set q_va and compute increment mask for the appropriate queue */
} else {
}
/*
* Read packet(s) from the queue
*/
&ldcp->link_state);
if (rv != 0) {
return (ECONNRESET);
}
/* reset the channel state if the channel went down */
goto channel_is_reset;
for (;;) {
/*
* If a data queue is being used, check the Rx HV
* queue. This will copy over any new data packets
* that have arrived.
*/
(void) i_ldc_chkq(ldcp);
if (rv != 0) {
"ldc_read: (0x%lx) cannot read queue ptrs",
return (ECONNRESET);
}
goto channel_is_reset;
/* If in the middle of a fragmented xfer */
if (first_fragment != 0) {
/* wait for ldc_delay usecs */
if (++retries < ldc_max_retries)
continue;
*sizep = 0;
ldcp->last_msg_rcd =
first_fragment - 1;
return (EAGAIN);
}
*sizep = 0;
break;
}
}
retries = 0;
"ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
/* get the message */
/* Check the message ID for the message received */
"error, q_ptrs=0x%lx,0x%lx",
/* throw away data */
bytes_read = 0;
/* Reset last_msg_rcd to start of message */
if (first_fragment != 0) {
first_fragment = 0;
}
/*
* Send a NACK -- invalid seqid
* get the current tail for the response
*/
if (rv) {
"ldc_read: (0x%lx) err sending "
/* if cannot send NACK - reset chan */
rv = ECONNRESET;
break;
}
/* purge receive queue */
break;
}
/*
* Process any messages of type CTRL messages
* Future implementations should try to pass these
* to LDC link by resetting the intr state.
*
* NOTE: not done as a switch() as type can be
* both ctrl+data
*/
continue;
*sizep = 0;
bytes_read = 0;
break;
}
}
/* process data ACKs */
*sizep = 0;
bytes_read = 0;
break;
}
}
/* process data NACKs */
return (ECONNRESET);
}
}
/* process data messages */
/* get the packet length */
/*
* FUTURE OPTIMIZATION:
* dont need to set q head for every
* packet we read just need to do this when
* we are done or need to wait for more
* mondos to make a full packet - this is
* currently expensive.
*/
if (first_fragment == 0) {
/*
* first packets should always have the start
* bit set (even for a single packet). If not
* throw away the packet
*/
"ldc_read: (0x%llx) not start - "
/* toss pkt, inc head, cont reading */
bytes_read = 0;
& q_size_mask;
break;
continue;
}
} else {
/* check to see if this is a pkt w/ START bit */
"ldc_read:(0x%llx) unexpected pkt"
" env=0x%x discarding %d bytes,"
" lastmsg=%d, currentmsg=%d\n",
/* throw data we have read so far */
bytes_read = 0;
break;
}
}
/* copy (next) pkt into buffer */
bytes_read += len;
} else {
/*
* there is not enough space in the buffer to
* read this pkt. throw message away & continue
* reading data from queue
*/
"ldc_read: (0x%llx) buffer too small, "
first_fragment = 0;
bytes_read = 0;
/* throw away everything received so far */
break;
/* continue reading remaining pkts */
continue;
}
}
/* set the message id */
/* move the head one position */
/*
* All pkts that are part of this fragmented transfer
* have been read or this was a single pkt read
* or there was an error
*/
/* set the queue head */
bytes_read = 0;
*sizep = bytes_read;
break;
}
/* advance head if it is a CTRL packet or a DATA ACK packet */
/* set the queue head */
bytes_read = 0;
break;
}
}
} /* for (;;) */
return (rv);
return (ECONNRESET);
}
/*
* Fetch and buffer incoming packets so we can hand them back as
* a basic byte stream.
*
* Enter and exit with ldcp->lock held by caller
*/
static int
{
int rv;
if (ldcp->stream_remains == 0) {
if (rv != 0)
return (rv);
ldcp->stream_offset = 0;
}
return (0);
}
/*
* Write specified amount of bytes to the channel
* in multiple pkts of pkt_payload size. Each
* packet is tagged with an unique packet ID in
* the case of a reliable link.
*
* On return, size contains the number of bytes written.
*/
int
{
int rv = 0;
return (EINVAL);
}
/* check if non-zero data to write */
return (EINVAL);
}
if (*sizep == 0) {
return (0);
}
/* Check if channel is UP for data exchange */
"ldc_write: (0x%llx) channel is not in UP state\n",
*sizep = 0;
rv = ECONNRESET;
} else {
}
return (rv);
}
/*
* Write a raw packet to the channel
* On return, size contains the number of bytes written.
*/
static int
{
int rv = 0;
/*
* Check to see if the packet size is less than or
* equal to packet size support in raw mode
*/
"ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
*sizep = 0;
return (EMSGSIZE);
}
/* get the qptrs for the tx queue */
if (rv != 0) {
*sizep = 0;
return (EIO);
}
*sizep = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
*sizep = 0;
return (EWOULDBLOCK);
}
/* Send the data now */
/* copy the data into pkt */
/* increment tail */
/*
* All packets have been copied into the TX queue
* update the tail ptr in the HV
*/
if (rv) {
if (rv == EWOULDBLOCK) {
*sizep = 0;
return (EWOULDBLOCK);
}
*sizep = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
return (rv);
}
/*
* Write specified amount of bytes to the channel
* in multiple pkts of pkt_payload size. Each
* packet is tagged with an unique packet ID in
* the case of a reliable link.
*
* On return, size contains the number of bytes written.
* This function needs to ensure that the write size is < MTU size
*/
static int
{
int rv;
/* compute mask for increment */
/* get the qptrs for the tx queue */
if (rv != 0) {
*size = 0;
return (EIO);
}
*size = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
/*
* Check to see if the queue is full. The check is done using
* the appropriate head based on the link mode.
*/
*size = 0;
return (EWOULDBLOCK);
}
/*
* Make sure that the LDC Tx queue has enough space
*/
return (EWOULDBLOCK);
}
/* Send the data now */
bytes_written = 0;
while (*size > bytes_written) {
curr_seqid++;
/* copy the data into pkt */
bytes_written += len;
/* increment tail */
}
/* Set the start and stop bits */
/*
* All packets have been copied into the TX queue
* update the tail ptr in the HV
*/
if (rv == 0) {
*size = bytes_written;
} else {
int rv2;
if (rv != EWOULDBLOCK) {
*size = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
"(head 0x%x, tail 0x%x state 0x%x)\n",
*size = 0;
}
return (rv);
}
/*
* Write specified amount of bytes to the channel
* in multiple pkts of pkt_payload size. Each
* packet is tagged with an unique packet ID in
* the case of a reliable link.
*
* On return, size contains the number of bytes written.
* This function needs to ensure that the write size is < MTU size
*/
static int
{
/* Truncate packet to max of MTU size */
}
/*
* Interfaces for channel nexus to register/unregister with LDC module
* The nexus will register functions to be used to register individual
* channels with the nexus and enable interrupts for the channels
*/
int
{
return (EINVAL);
}
/* nexus registration */
/* register any channels that might have been previously initialized */
while (ldcp) {
(void) i_ldc_register_channel(ldcp);
}
return (0);
}
int
{
return (EINVAL);
}
return (EINVAL);
}
/* nexus unregister */
return (0);
}
int
{
return (EINVAL);
}
/* check to see if channel is initalized */
"ldc_get_info: (0x%llx) channel not initialized\n",
return (EINVAL);
}
/*
* ldcssp->mapin_size is the max amount of shared memory supported by
* the Hypervisor per guest. e.g, legacy HV supports 64MB; latest HV
* support 1GB. This size is read during ldc module initialization.
*
* ldc_dring_direct_map_rsvd is the amount of memory reserved for
* mapping in descriptor rings. In the initial implementation, we use a
* simple approach to determine the amount of mapin space available per
* channel. In future, we may implement strict accounting of the actual
* memory consumed to determine the exact amount available per channel.
*/
info->direct_map_size_max = 0;
return (0);
}
if (avail >= ldc_direct_map_size_max) {
} else {
info->direct_map_size_max = 0;
}
return (0);
}