ldc.c revision 22f747ef037d84d7799b60247ff9a3df1604a7ee
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* sun4v LDC Link Layer
*/
#include <sys/machsystm.h>
#include <sys/machcpuvar.h>
#include <vm/hat_sfmmu.h>
#include <sys/vm_machparam.h>
#include <vm/seg_kmem.h>
#include <sys/hypervisor_api.h>
#include <sys/ldc_impl.h>
/* Core internal functions */
static int i_ldc_h2v_error(int h_error);
/* Interrupt handling functions */
/* Read method functions */
/* Write method functions */
/* Pkt processing internal functions */
/* Memory synchronization internal functions */
/* LDC Version */
/* number of supported versions */
/* Module State Pointer */
static ldc_soft_state_t *ldcssp;
&mod_miscops, /* This is a misc module */
"sun4v LDC module v%I%", /* Name of the module */
};
static struct modlinkage ml = {
&md,
};
static hsvc_info_t ldc_hsvc = {
};
/*
* LDC framework supports mapping remote domain's memory
* either directly or via shadow memory pages. Default
* support is currently implemented via shadow copy.
* Direct map can be enabled by setting 'ldc_shmem_enabled'
*/
int ldc_shmem_enabled = 0;
/*
* The no. of MTU size messages that can be stored in
* the LDC Tx queue. The number of Tx queue entries is
* then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
*/
/*
* The minimum queue length. This is the size of the smallest
* LDC queue. If the computed value is less than this default,
* the queue length is rounded up to 'ldc_queue_entries'.
*/
/*
* Pages exported for remote access over each channel is
* maintained in a table registered with the Hypervisor.
* The default number of entries in the table is set to
* 'ldc_mtbl_entries'.
*/
/*
* LDC retry count and delay - when the HV returns EWOULDBLOCK
* the operation is retried 'ldc_max_retries' times with a
* wait of 'ldc_delay' usecs between each retry.
*/
int ldc_max_retries = LDC_MAX_RETRIES;
/*
* delay between each retry of channel unregistration in
* ldc_close(), to wait for pending interrupts to complete.
*/
#ifdef DEBUG
/*
* Print debug messages
*
* set ldcdbg to 0x7 for enabling all msgs
* 0x4 - Warnings
* 0x2 - All debug messages
* 0x1 - Minimal debug messages
*
* set ldcdbgchan to the channel number you want to debug
* setting it to -1 prints debug messages for all channels
* NOTE: ldcdbgchan has no effect on error messages
*/
#define DBG_ALL_LDCS -1
int ldcdbg = 0x0;
static void
{
char buf[512];
/*
* Do not return if,
* caller wants to print it anyway - (id == DBG_ALL_LDCS)
* debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
* debug channel = caller specified channel
*/
if ((id != DBG_ALL_LDCS) &&
(ldcdbgchan != DBG_ALL_LDCS) &&
(ldcdbgchan != id)) {
return;
}
}
#define LDC_ERR_RESET 0x1
#define LDC_ERR_PKTLOSS 0x2
static boolean_t
{
return (B_FALSE);
if ((ldc_inject_err_flag & error) == 0)
return (B_FALSE);
/* clear the injection state */
ldc_inject_err_flag &= ~error;
return (B_TRUE);
}
#define D1 \
if (ldcdbg & 0x01) \
#define D2 \
if (ldcdbg & 0x02) \
#define DWARN \
if (ldcdbg & 0x04) \
{ \
int i; \
for (i = 0; i < 64; i++, src++) \
}
#define DUMP_LDC_PKT(c, s, addr) \
{ \
} else { \
} \
}
#else
#define DBG_ALL_LDCS -1
#define D1
#define D2
#define DWARN
#define DUMP_LDC_PKT(c, s, addr)
#endif
#define ZERO_PKT(p) \
int
_init(void)
{
int status;
if (status != 0) {
" group: 0x%lx major: %ld minor: %ld errno: %d",
return (-1);
}
/* allocate soft state structure */
/* Link the module into the system */
if (status != 0) {
return (status);
}
/* Initialize the LDC state structure */
/* Create a cache for memory handles */
return (-1);
}
/* Create cache for memory segment structures */
return (-1);
}
ldcssp->channel_count = 0;
ldcssp->channels_open = 0;
return (0);
}
int
{
/* Report status of the dynamically loadable driver module */
}
int
_fini(void)
{
/* Unlink the driver module from the system */
if (status) {
return (EIO);
}
/* Free descriptor rings */
(void) ldc_mem_dring_unbind(
}
(void) ldc_mem_dring_unmap(
}
}
dringp = tmp_dringp;
}
/* close and finalize channels */
}
/* Destroy kmem caches */
/*
* We have successfully "removed" the driver.
* Destroying soft states
*/
(void) hsvc_unregister(&ldc_hsvc);
return (status);
}
/* -------------------------------------------------------------------------- */
/*
* LDC Link Layer Internal Functions
*/
/*
* Translate HV Errors to sun4v error codes
*/
static int
i_ldc_h2v_error(int h_error)
{
switch (h_error) {
case H_EOK:
return (0);
case H_ENORADDR:
return (EFAULT);
case H_EBADPGSZ:
case H_EINVAL:
return (EINVAL);
case H_EWOULDBLOCK:
return (EWOULDBLOCK);
case H_ENOACCESS:
case H_ENOMAP:
return (EACCES);
case H_EIO:
case H_ECPUERROR:
return (EIO);
case H_ENOTSUPPORTED:
return (ENOTSUP);
case H_ETOOMANY:
return (ENOSPC);
case H_ECHANNEL:
return (ECHRNG);
default:
break;
}
return (EIO);
}
/*
* Reconfigure the transmit queue
*/
static int
{
int rv;
if (rv) {
return (EIO);
}
if (rv) {
return (EIO);
}
ldcp->link_state);
return (0);
}
/*
* Reconfigure the receive queue
*/
static int
{
int rv;
&(ldcp->link_state));
if (rv) {
"i_ldc_rxq_reconf: (0x%lx) cannot get state",
return (EIO);
}
ldcp->rx_q_entries);
if (rv) {
"i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
return (EIO);
}
}
return (0);
}
/*
* Drain the contents of the receive queue
*/
static int
{
int rv;
&(ldcp->link_state));
if (rv) {
return (EIO);
}
/* flush contents by setting the head = tail */
}
/*
* Reset LDC state structure and its contents
*/
static void
{
ldcp->last_ack_rcd = 0;
ldcp->last_msg_rcd = 0;
} else {
}
}
}
/*
* Reset a LDC channel
*/
static void
{
/* reconfig Tx and Rx queues */
(void) i_ldc_txq_reconf(ldcp);
/* Clear Tx and Rx interrupts */
/* Reset channel state */
/* Mark channel in reset */
}
/*
* Clear pending interrupts
*/
static void
{
switch (itype) {
case CNEX_TX_INTR:
/* check Tx interrupt */
if (ldcp->tx_intr_state)
else
return;
break;
case CNEX_RX_INTR:
/* check Rx interrupt */
if (ldcp->rx_intr_state)
else
return;
break;
}
"i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
}
/*
* Set the receive queue head
* Resets connection and returns an error if it fails.
*/
static int
{
int rv;
int retries;
return (0);
if (rv != H_EWOULDBLOCK)
break;
/* wait for ldc_delay usecs */
}
return (ECONNRESET);
}
/*
* Returns the tx_head to be used for transfer
*/
static void
{
/* get current Tx head */
/*
* Reliable mode will use the ACKd head instead of the regular tx_head.
* up to the current location of tx_head. This needs to be done
*/
break;
}
/* advance ACKd head */
ldcp->tx_ackd_head =
}
}
}
/*
* Returns the tx_tail to be used for transfer
* Re-reads the TX queue ptrs if and only if the
* the cached head and tail are equal (queue is full)
*/
static int
{
int rv;
/* Read the head and tail ptrs from HV */
if (rv) {
"i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
return (EIO);
}
return (ECONNRESET);
}
/* increment the tail */
if (new_tail == current_head) {
"i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
return (EWOULDBLOCK);
}
return (0);
}
/*
* Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
* and retry ldc_max_retries times before returning an error.
* Returns 0, EWOULDBLOCK or EIO
*/
static int
{
int retries;
retval = 0;
break;
}
if (rv != H_EWOULDBLOCK) {
break;
}
/* wait for ldc_delay usecs */
}
return (retval);
}
/*
* Send a LDC message
*/
static int
{
int rv;
/* Obtain Tx lock */
/* get the current tail for the message */
if (rv) {
"i_ldc_send_pkt: (0x%llx) error sending pkt, "
"type=0x%x,subtype=0x%x,ctrl=0x%x\n",
return (rv);
}
/* Initialize the packet */
curr_seqid++;
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv) {
"i_ldc_send_pkt:(0x%llx) error sending pkt, "
"type=0x%x,stype=0x%x,ctrl=0x%x\n",
return (EIO);
}
return (0);
}
/*
* Checks if packet was received in right order
* in the case of a reliable link.
* Returns 0 if in order, else EIO
*/
static int
{
/* No seqid checking for RAW mode */
return (0);
/* No seqid checking for version, RTS, RTR message */
return (0);
"i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
return (EIO);
}
#ifdef DEBUG
if (LDC_INJECT_PKTLOSS(ldcp)) {
return (EIO);
}
#endif
return (0);
}
/*
* Process an incoming version ctrl message
*/
static int
{
/* get the received version */
/* Obtain Tx lock */
case LDC_INFO:
(void) i_ldc_txq_reconf(ldcp);
return (EAGAIN);
}
/* get the current tail and pkt for the response */
if (rv != 0) {
"i_ldc_process_VER: (0x%llx) err sending "
return (ECONNRESET);
}
/* initialize the packet */
for (;;) {
/* major version match - ACK version */
/*
* lower minor version to the one this endpt
* supports, if necessary
*/
break;
}
" lower idx=%d, v%u.%u\n", idx,
/* nack with next lower version */
sizeof (ldc_versions[idx]));
break;
}
/* next major version */
idx++;
if (idx == LDC_NUM_VERS) {
/* no version match - send NACK */
break;
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv == 0) {
/* Save the ACK'd version */
"(0x%llx) Sent ACK, "
"Agreed on version v%u.%u\n",
}
} else {
"i_ldc_process_VER: (0x%llx) error sending "
return (ECONNRESET);
}
break;
case LDC_ACK:
/* mismatched version - reset connection */
"i_ldc_process_VER: (0x%llx) recvd"
return (ECONNRESET);
}
} else {
/* SUCCESS - we have agreed on a version */
}
/* initiate RTS-RTR-RDX handshake */
if (rv) {
"i_ldc_process_VER: (0x%llx) cannot send RTS\n",
return (ECONNRESET);
}
/* initiate the send by calling into HV and set the new tail */
if (rv) {
"i_ldc_process_VER: (0x%llx) no listener\n",
return (ECONNRESET);
}
break;
case LDC_NACK:
/* check if version in NACK is zero */
/* version handshake failure */
"i_ldc_process_VER: (0x%llx) no version match\n",
return (ECONNRESET);
}
/* get the current tail and pkt for the response */
if (rv != 0) {
"i_ldc_process_VER: (0x%lx) err sending "
return (ECONNRESET);
}
/* initialize the packet */
/* check ver in NACK msg has a match */
for (;;) {
/*
* major version match - resubmit request
* if lower minor version to the one this endpt
* supports, if necessary
*/
break;
}
" lower idx=%d, v%u.%u\n", idx,
/* send next lower version */
sizeof (ldc_versions[idx]));
break;
}
/* next version */
idx++;
if (idx == LDC_NUM_VERS) {
/* no version match - terminate */
return (ECONNRESET);
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv == 0) {
} else {
"i_ldc_process_VER: (0x%lx) error sending version"
return (ECONNRESET);
}
break;
}
return (rv);
}
/*
* Process an incoming RTS ctrl message
*/
static int
{
int rv = 0;
case LDC_NACK:
"i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
case LDC_INFO:
/* check mode */
"i_ldc_process_RTS: (0x%lx) mode mismatch\n",
/*
* send NACK in response to MODE message
* get the current tail for the response
*/
if (rv) {
/* if cannot send NACK - reset channel */
rv = ECONNRESET;
break;
}
}
break;
default:
rv = ECONNRESET;
break;
}
/*
* If either the connection was reset (when rv != 0) or
* a NACK was sent, we return. In the case of a NACK
* we dont want to consume the packet that came in but
* not record that we received the RTS
*/
return (rv);
/* record RTS received */
/* store initial SEQID info */
/* Obtain Tx lock */
/* get the current tail for the response */
if (rv != 0) {
"i_ldc_process_RTS: (0x%lx) err sending RTR\n",
return (ECONNRESET);
}
/* initialize the packet */
/* initiate the send by calling into HV and set the new tail */
if (rv == 0) {
} else {
"i_ldc_process_RTS: (0x%lx) error sending RTR\n",
return (ECONNRESET);
}
return (0);
}
/*
* Process an incoming RTR ctrl message
*/
static int
{
int rv = 0;
case LDC_NACK:
/* RTR NACK received */
"i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
case LDC_INFO:
/* check mode */
"i_ldc_process_RTR: (0x%llx) mode mismatch, "
"expecting 0x%x, got 0x%x\n",
/*
* send NACK in response to MODE message
* get the current tail for the response
*/
if (rv) {
/* if cannot send NACK - reset channel */
rv = ECONNRESET;
break;
}
}
break;
default:
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
}
/*
* If either the connection was reset (when rv != 0) or
* a NACK was sent, we return. In the case of a NACK
* we dont want to consume the packet that came in but
* not record that we received the RTR
*/
return (rv);
if (rv) {
"i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
return (ECONNRESET);
}
return (0);
}
/*
* Process an incoming RDX ctrl message
*/
static int
{
int rv = 0;
case LDC_NACK:
/* RDX NACK received */
"i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
case LDC_INFO:
/*
* if channel is UP and a RDX received after data transmission
* has commenced it is an error
*/
"i_ldc_process_RDX: (0x%llx) unexpected RDX"
return (ECONNRESET);
}
break;
default:
/* Reset the channel -- as we cannot continue */
rv = ECONNRESET;
break;
}
return (rv);
}
/*
* Process an incoming ACK for a data packet
*/
static int
{
int rv;
/* Obtain Tx lock */
/*
* Read the current Tx head and tail
*/
if (rv != 0) {
"i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
/* Reset the channel -- as we cannot continue */
return (ECONNRESET);
}
/*
* loop from where the previous ACK location was to the
* current head location. This is how far the HV has
* actually send pkts. Pkts between head and tail are
* yet to be sent by HV.
*/
for (;;) {
"i_ldc_process_data_ACK: (0x%llx) found packet\n",
break;
}
/* could not find packet */
"i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
/* Reset the channel -- as we cannot continue */
return (ECONNRESET);
}
}
return (0);
}
/*
* Process incoming control message
* Return 0 - session can continue
* EAGAIN - reprocess packet - state was changed
* ECONNRESET - channel was reset
*/
static int
{
int rv = 0;
case TS_OPEN:
case TS_READY:
case LDC_VER:
/* process version message */
break;
default:
"i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
break;
}
break;
case TS_VREADY:
case LDC_VER:
/* process version message */
break;
case LDC_RTS:
/* process RTS message */
break;
case LDC_RTR:
/* process RTR message */
break;
case LDC_RDX:
/* process RDX message */
break;
default:
"i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
break;
}
break;
case TS_UP:
case LDC_VER:
"i_ldc_ctrlmsg: (0x%llx) unexpected VER "
/* peer is redoing version negotiation */
(void) i_ldc_txq_reconf(ldcp);
break;
case LDC_RDX:
/* process RDX message */
break;
default:
"i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
break;
}
}
return (rv);
}
/*
* Register channel with the channel nexus
*/
static int
{
int rv = 0;
"i_ldc_register_channel: cnex has not registered\n");
return (EAGAIN);
}
if (rv) {
"i_ldc_register_channel: cannot register channel\n");
return (rv);
}
if (rv) {
"i_ldc_register_channel: cannot add Tx interrupt\n");
return (rv);
}
if (rv) {
"i_ldc_register_channel: cannot add Rx interrupt\n");
return (rv);
}
return (0);
}
/*
* Unregister a channel with the channel nexus
*/
static int
{
int rv = 0;
"i_ldc_unregister_channel: cnex has not registered\n");
return (EAGAIN);
}
/* Remove the Rx interrupt */
if (rv) {
"i_ldc_unregister_channel: err removing "
"Rx intr\n");
return (rv);
}
/*
* If interrupts are pending and handler has
* finished running, clear interrupt and try
* again
*/
return (rv);
if (rv) {
"err removing Rx interrupt\n");
return (rv);
}
}
/* Remove the Tx interrupt */
if (rv) {
"i_ldc_unregister_channel: err removing Tx intr\n");
return (rv);
}
/* Unregister the channel */
if (rv) {
"i_ldc_unregister_channel: cannot unreg channel\n");
return (rv);
}
}
return (0);
}
/*
* LDC transmit interrupt handler
* and Tx queue content changes
*/
static uint_t
{
int rv;
/* Get the channel for which interrupt was received */
/* Lock channel */
/* Obtain Tx lock */
/* mark interrupt as pending */
/* save current link state */
&ldcp->link_state);
if (rv) {
"i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
return (DDI_INTR_CLAIMED);
}
/*
* reset the channel state if the channel went down
* (other side unconfigured queue) or channel was reset
* (other side reconfigured its queue)
*/
}
}
}
/* if callbacks are disabled, do not notify */
if (!ldcp->cb_enabled)
if (notify_client) {
if (rv) {
}
}
return (DDI_INTR_CLAIMED);
}
/*
* LDC receive interrupt handler
* triggered for channel with data pending to read
* i.e. Rx queue content changes
*/
static uint_t
{
int rv;
uint64_t notify_event = 0;
/* Get the channel for which interrupt was received */
return (DDI_INTR_UNCLAIMED);
}
ldcp->link_state);
/* Lock channel */
/* mark interrupt as pending */
/*
* Read packet(s) from the queue
*/
for (;;) {
&ldcp->link_state);
if (rv) {
"i_ldc_rx_hdlr: (0x%lx) cannot read "
return (DDI_INTR_CLAIMED);
}
/*
* reset the channel state if the channel went down
* (other side unconfigured queue) or channel was reset
* (other side reconfigured its queue)
*/
switch (ldcp->link_state) {
case LDC_CHANNEL_DOWN:
goto loop_exit;
case LDC_CHANNEL_UP:
}
break;
case LDC_CHANNEL_RESET:
default:
#ifdef DEBUG
#endif
break;
}
}
#ifdef DEBUG
if (LDC_INJECT_RESET(ldcp))
goto force_reset;
#endif
break;
}
/* get the message */
/* if channel is in RAW mode or data pkt, notify and return */
break;
}
/* discard packet if channel is not up */
/* move the head one position */
break;
continue;
} else {
break;
}
}
/* Check the sequence ID for the message received */
if (rv != 0) {
/* Reset last_msg_rcd to start of message */
if (first_fragment != 0) {
first_fragment = 0;
}
/*
* Send a NACK due to seqid mismatch
*/
if (rv) {
"i_ldc_rx_hdlr: (0x%lx) err sending "
/* if cannot send NACK - reset channel */
break;
}
/* purge receive queue */
break;
}
/* record the message ID */
/* process control messages */
/* save current internal state */
/* re-process pkt - state was adjusted */
continue;
}
if (rv == ECONNRESET) {
break;
}
/*
* control message processing was successful
* channel transitioned to ready for communication
*/
(tstate & ~TS_IN_RESET) !=
}
}
/* process data NACKs */
break;
}
/* process data ACKs */
break;
}
}
/* move the head one position */
break;
}
} /* for */
/* if callbacks are disabled, do not notify */
if (!ldcp->cb_enabled)
/*
* If there are data packets in the queue, the ldc_read will
* clear interrupts after draining the queue, else clear interrupts
*/
if ((notify_event & LDC_EVT_READ) == 0) {
} else
if (notify_client) {
if (rv) {
"i_ldc_rx_hdlr: (0x%llx) callback failure",
}
}
return (DDI_INTR_CLAIMED);
}
/* -------------------------------------------------------------------------- */
/*
* LDC API functions
*/
/*
* Initialize the channel. Allocate internal structure and memory for
*/
int
{
return (EINVAL);
}
return (EINVAL);
}
/* check if channel is valid */
if (rv == H_ECHANNEL) {
return (EINVAL);
}
/* check if the channel has already been initialized */
id);
return (EADDRINUSE);
}
}
*handle = 0;
/* Allocate an ldcp structure */
/*
* Initialize the channel and Tx lock
*
* The channel 'lock' protects the entire channel and
* should be acquired before initializing, resetting,
* destroying or reading from a channel.
*
* The 'tx_lock' should be acquired prior to transmitting
* data over the channel. The lock should also be acquired
* prior to channel reconfiguration (in order to prevent
* concurrent writes).
*
* ORDERING: When both locks are being acquired, to prevent
* deadlocks, the channel lock should be always acquired prior
* to the tx_lock.
*/
/* Initialize the channel */
/* Read attributes */
"ldc_init: (0x%llx) channel attributes, class=0x%x, "
"instance=0x%llx, mode=%d, mtu=%d\n",
ldcp->last_ack_rcd = 0;
ldcp->last_msg_rcd = 0;
/* Initialize payload size depending on whether channel is reliable */
case LDC_MODE_RAW:
break;
case LDC_MODE_UNRELIABLE:
break;
case LDC_MODE_RELIABLE:
break;
case LDC_MODE_STREAM:
ldcp->stream_remains = 0;
ldcp->stream_offset = 0;
break;
default:
goto cleanup_on_exit;
}
/*
* qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
* value is smaller than default length of ldc_queue_entries,
* qlen is set to ldc_queue_entries. Ensure that computed
* length is a power-of-two value.
*/
while (qlen) {
}
}
ldcp->rx_q_entries =
/* Create a transmit queue */
"ldc_init: (0x%lx) TX queue allocation failed\n",
goto cleanup_on_exit;
}
/* Create a receive queue */
"ldc_init: (0x%lx) RX queue allocation failed\n",
goto cleanup_on_exit;
}
/* Init descriptor ring and memory handle list lock */
/* mark status as INITialized */
/* Add to channel list */
ldcssp->channel_count++;
/* set the handle */
return (0);
if (ldcp)
return (exit_val);
}
/*
* Finalizes the LDC connection. It will return EBUSY if the
* channel is open. A ldc_close() has to be done prior to
* with the channel
*/
int
{
return (EINVAL);
}
return (EBUSY);
}
/* Remove from the channel list */
} else {
break;
}
}
return (EINVAL);
}
}
ldcssp->channel_count--;
/* Free the map table for this channel */
else
}
/* Destroy descriptor ring and memory handle list lock */
/* Free the stream buffer for STREAM_MODE */
/* Free the RX queue */
/* Free the TX queue */
/* Destroy mutex */
/* free channel structure */
return (0);
}
/*
* with the Hypervisor. It also specifies the interrupt number
* and target CPU for this channel
*/
int
{
int rv;
return (EINVAL);
}
return (EFAULT);
}
return (EFAULT);
}
/*
* Unregister/Register the tx queue with the hypervisor
*/
if (rv) {
"ldc_open: (0x%lx) channel tx queue unconf failed\n",
return (EIO);
}
if (rv) {
"ldc_open: (0x%lx) channel tx queue conf failed\n",
return (EIO);
}
/*
* Unregister/Register the rx queue with the hypervisor
*/
if (rv) {
"ldc_open: (0x%lx) channel rx queue unconf failed\n",
return (EIO);
}
if (rv) {
"ldc_open: (0x%lx) channel rx queue conf failed\n",
return (EIO);
}
/* Register the channel with the channel nexus */
return (EIO);
}
/* mark channel in OPEN state */
/* Read channel state */
if (rv) {
"ldc_open: (0x%lx) cannot read channel state\n",
(void) i_ldc_unregister_channel(ldcp);
return (EIO);
}
/*
* set the ACKd head to current head location for reliable &
* streaming mode
*/
/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
}
/*
* if channel is being opened in RAW mode - no handshake is needed
* switch the channel READY and UP state
*/
}
/*
* Increment number of open channels
*/
ldcssp->channels_open++;
"ldc_open: (0x%llx) channel (0x%p) open for use "
"(tstate=0x%x, status=0x%x)\n",
return (0);
}
/*
* Close the LDC connection. It will return EBUSY if there
* are memory segments or descriptor rings either bound to or
* mapped over the channel
*/
int
{
return (EINVAL);
}
/* return error if channel is not open */
return (EFAULT);
}
/* if any memory handles, drings, are bound or mapped cannot close */
"ldc_close: (0x%llx) channel has bound memory handles\n",
return (EBUSY);
}
"ldc_close: (0x%llx) channel has bound descriptor rings\n",
return (EBUSY);
}
"ldc_close: (0x%llx) channel has mapped descriptor rings\n",
return (EBUSY);
}
if (ldcp->cb_inprogress) {
return (EWOULDBLOCK);
}
/* Obtain Tx lock */
/*
* Wait for pending transmits to complete i.e Tx queue to drain
* if there are pending pkts - wait 1 ms and retry again
*/
for (;;) {
if (rv) {
return (EIO);
}
break;
}
if (chk_done) {
"ldc_close: (0x%llx) Tx queue drain timeout\n",
break;
}
/* wait for one ms and try again */
}
/*
* Drain the Tx and Rx queues as we are closing the
* channel. We dont care about any pending packets.
* We have to also drain the queue prior to clearing
* pending interrupts, otherwise the HV will trigger
* an interrupt the moment the interrupt state is
* cleared.
*/
(void) i_ldc_txq_reconf(ldcp);
(void) i_ldc_rxq_drain(ldcp);
/*
* Unregister the channel with the nexus
*/
/* if any error other than EAGAIN return back */
"ldc_close: (0x%lx) unregister failed, %d\n",
return (rv);
}
/*
* As there could be pending interrupts we need
* to wait and try again
*/
retries++;
}
/*
* Unregister queues
*/
if (rv) {
"ldc_close: (0x%lx) channel TX queue unconf failed\n",
return (EIO);
}
if (rv) {
"ldc_close: (0x%lx) channel RX queue unconf failed\n",
return (EIO);
}
/* Reset channel state information */
/* Mark channel as down and in initialized state */
ldcp->tx_ackd_head = 0;
/* Decrement number of open channels */
ldcssp->channels_open--;
return (0);
}
/*
* Register channel callback
*/
int
{
"ldc_reg_callback: invalid channel handle\n");
return (EINVAL);
}
return (EINVAL);
}
return (EIO);
}
if (ldcp->cb_inprogress) {
return (EWOULDBLOCK);
}
"ldc_reg_callback: (0x%llx) registered callback for channel\n",
return (0);
}
/*
* Unregister channel callback
*/
int
{
"ldc_unreg_callback: invalid channel handle\n");
return (EINVAL);
}
"ldc_unreg_callback: (0x%llx) no callback exists\n",
return (EIO);
}
if (ldcp->cb_inprogress) {
"ldc_unreg_callback: (0x%llx) callback active\n",
return (EWOULDBLOCK);
}
"ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
return (0);
}
/*
* Bring a channel up by initiating a handshake with the peer
* This call is asynchronous. It will complete at a later point
* in time when the peer responds back with an RTR.
*/
int
{
int rv;
return (EINVAL);
}
/* clear the reset state */
"ldc_up: (0x%llx) channel is already in UP state\n",
/* mark channel as up */
/*
* if channel was in reset state and there was
* pending data clear interrupt state. this will
* trigger an interrupt, causing the RX handler to
* to invoke the client's callback
*/
if ((tstate & TS_IN_RESET) &&
"ldc_up: (0x%llx) channel has pending data, "
}
return (0);
}
/* if the channel is in RAW mode - mark it as UP, if READY */
return (0);
}
/* Don't start another handshake if there is one in progress */
"ldc_up: (0x%llx) channel handshake in progress\n",
return (0);
}
/* save current link state */
/* get the current tail for the LDC msg */
if (rv) {
return (ECONNREFUSED);
}
/*
* If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
* from a previous state of DOWN, then mark the channel as
* being ready for handshake.
*/
if ((link_state == LDC_CHANNEL_DOWN) &&
return (0);
} else {
}
}
/* initiate the send by calling into HV and set the new tail */
if (rv) {
"ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
return (rv);
}
return (rv);
}
/*
* Bring a channel down by resetting its state and queues
*/
int
{
return (EINVAL);
}
return (0);
}
/*
* Get the current channel status
*/
int
{
return (EINVAL);
}
return (0);
}
/*
*/
int
{
"ldc_set_intr_mode: invalid channel handle\n");
return (EINVAL);
}
/*
* Record no callbacks should be invoked
*/
switch (cmode) {
case LDC_CB_DISABLE:
if (!ldcp->cb_enabled) {
"ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
break;
}
break;
case LDC_CB_ENABLE:
if (ldcp->cb_enabled) {
"ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
break;
}
break;
}
return (0);
}
/*
* Check to see if there are packets on the incoming queue
* Will return hasdata = B_FALSE if there are no packets
*/
int
{
int rv;
return (EINVAL);
}
return (ECONNRESET);
}
/* Read packet(s) from the queue */
&ldcp->link_state);
if (rv != 0) {
return (EIO);
}
/* reset the channel state if the channel went down */
return (ECONNRESET);
}
"ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
}
return (0);
}
/*
* Read 'size' amount of bytes or less. If incoming buffer
* is more than 'size', ENOBUFS is returned.
*
* On return, size contains the number of bytes read.
*/
int
{
return (EINVAL);
}
/* channel lock */
"ldc_read: (0x%llx) channel is not in UP state\n",
} else {
}
/*
* if queue has been drained - clear interrupt
*/
&ldcp->link_state);
if (rv != 0) {
return (ECONNRESET);
}
if (exit_val == 0) {
}
if ((rv == 0) &&
}
}
return (exit_val);
}
/*
* Basic raw mondo read -
* no interpretation of mondo contents at all.
*
* Enter and exit with ldcp->lock held by caller
*/
static int
{
if (space < LDC_PAYLOAD_SIZE_RAW)
return (ENOBUFS);
/* compute mask for increment */
/*
* Read packet(s) from the queue
*/
&ldcp->link_state);
if (rv != 0) {
"ldc_read_raw: (0x%lx) unable to read queue ptrs",
return (EIO);
}
" rxt=0x%llx, st=0x%llx\n",
/* reset the channel state if the channel went down */
return (ECONNRESET);
}
/*
* Check for empty queue
*/
*sizep = 0;
return (0);
}
/* get the message */
/* if channel is in RAW mode, copy data and return */
return (rv);
}
/*
* Process LDC mondos to build larger packets
* with either un-reliable or reliable delivery.
*
* Enter and exit with ldcp->lock held by caller
*/
static int
{
int rv = 0;
int retries = 0;
uint64_t first_fragment = 0;
/* check if the buffer and size are valid */
return (EINVAL);
}
/* compute mask for increment */
/*
* Read packet(s) from the queue
*/
&ldcp->link_state);
if (rv != 0) {
return (ECONNRESET);
}
/* reset the channel state if the channel went down */
goto channel_is_reset;
for (;;) {
if (rv != 0) {
"ldc_read: (0x%lx) cannot read queue ptrs",
return (ECONNRESET);
}
goto channel_is_reset;
/* If in the middle of a fragmented xfer */
if (first_fragment != 0) {
/* wait for ldc_delay usecs */
if (++retries < ldc_max_retries)
continue;
*sizep = 0;
return (EAGAIN);
}
*sizep = 0;
break;
}
}
retries = 0;
"ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
/* get the message */
/* Check the message ID for the message received */
/* throw away data */
bytes_read = 0;
/* Reset last_msg_rcd to start of message */
if (first_fragment != 0) {
first_fragment = 0;
}
/*
* Send a NACK -- invalid seqid
* get the current tail for the response
*/
if (rv) {
"ldc_read: (0x%lx) err sending "
/* if cannot send NACK - reset channel */
rv = ECONNRESET;
break;
}
/* purge receive queue */
break;
}
/*
* Process any messages of type CTRL messages
* Future implementations should try to pass these
* to LDC link by resetting the intr state.
*
* NOTE: not done as a switch() as type can be both ctrl+data
*/
continue;
*sizep = 0;
bytes_read = 0;
break;
}
}
/* process data ACKs */
*sizep = 0;
bytes_read = 0;
break;
}
}
/* process data NACKs */
return (ECONNRESET);
}
/* process data messages */
/* get the packet length */
/*
* FUTURE OPTIMIZATION:
* dont need to set q head for every
* packet we read just need to do this when
* we are done or need to wait for more
* mondos to make a full packet - this is
* currently expensive.
*/
if (first_fragment == 0) {
/*
* first packets should always have the start
* bit set (even for a single packet). If not
* throw away the packet
*/
"ldc_read: (0x%llx) not start - "
/* toss pkt, inc head, cont reading */
bytes_read = 0;
& q_size_mask;
break;
continue;
}
} else {
/* check to see if this is a pkt w/ START bit */
"ldc_read:(0x%llx) unexpected pkt"
" env=0x%x discarding %d bytes,"
" lastmsg=%d, currentmsg=%d\n",
/* throw data we have read so far */
bytes_read = 0;
break;
}
}
/* copy (next) pkt into buffer */
bytes_read += len;
} else {
/*
* there is not enough space in the buffer to
* read this pkt. throw message away & continue
* reading data from queue
*/
"ldc_read: (0x%llx) buffer too small, "
first_fragment = 0;
bytes_read = 0;
/* throw away everything received so far */
break;
/* continue reading remaining pkts */
continue;
}
}
/* set the message id */
/* move the head one position */
/*
* All pkts that are part of this fragmented transfer
* have been read or this was a single pkt read
* or there was an error
*/
/* set the queue head */
bytes_read = 0;
*sizep = bytes_read;
break;
}
/* advance head if it is a DATA ACK */
/* set the queue head */
bytes_read = 0;
break;
}
}
} /* for (;;) */
/*
* If useful data was read - Send msg ACK
* OPTIMIZE: do not send ACK for all msgs - use some frequency
*/
/* if cannot send ACK - reset channel */
goto channel_is_reset;
}
}
return (rv);
return (ECONNRESET);
}
/*
* Use underlying reliable packet mechanism to fetch
* and buffer incoming packets so we can hand them back as
* a basic byte stream.
*
* Enter and exit with ldcp->lock held by caller
*/
static int
{
int rv;
if (ldcp->stream_remains == 0) {
if (rv != 0)
return (rv);
ldcp->stream_offset = 0;
}
return (0);
}
/*
* Write specified amount of bytes to the channel
* in multiple pkts of pkt_payload size. Each
* packet is tagged with an unique packet ID in
* the case of a reliable link.
*
* On return, size contains the number of bytes written.
*/
int
{
int rv = 0;
return (EINVAL);
}
/* check if writes can occur */
/*
* Could not get the lock - channel could
* be in the process of being unconfigured
* or reader has encountered an error
*/
return (EAGAIN);
}
/* check if non-zero data to write */
return (EINVAL);
}
if (*sizep == 0) {
return (0);
}
/* Check if channel is UP for data exchange */
"ldc_write: (0x%llx) channel is not in UP state\n",
*sizep = 0;
rv = ECONNRESET;
} else {
}
return (rv);
}
/*
* Write a raw packet to the channel
* On return, size contains the number of bytes written.
*/
static int
{
int rv = 0;
/*
* Check to see if the packet size is less than or
* equal to packet size support in raw mode
*/
"ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
*sizep = 0;
return (EMSGSIZE);
}
/* get the qptrs for the tx queue */
if (rv != 0) {
*sizep = 0;
return (EIO);
}
*sizep = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
*sizep = 0;
return (EWOULDBLOCK);
}
/* Send the data now */
/* copy the data into pkt */
/* increment tail */
/*
* All packets have been copied into the TX queue
* update the tail ptr in the HV
*/
if (rv) {
if (rv == EWOULDBLOCK) {
*sizep = 0;
return (EWOULDBLOCK);
}
*sizep = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
return (rv);
}
/*
* Write specified amount of bytes to the channel
* in multiple pkts of pkt_payload size. Each
* packet is tagged with an unique packet ID in
* the case of a reliable link.
*
* On return, size contains the number of bytes written.
* This function needs to ensure that the write size is < MTU size
*/
static int
{
int rv;
/* compute mask for increment */
/* get the qptrs for the tx queue */
if (rv != 0) {
*size = 0;
return (EIO);
}
*size = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
/*
* Check to see if the queue is full. The check is done using
* the appropriate head based on the link mode.
*/
*size = 0;
return (EWOULDBLOCK);
}
/*
* Make sure that the LDC Tx queue has enough space
*/
return (EWOULDBLOCK);
}
/* Send the data now */
bytes_written = 0;
while (*size > bytes_written) {
curr_seqid++;
/* copy the data into pkt */
bytes_written += len;
/* increment tail */
}
/* Set the start and stop bits */
/*
* All packets have been copied into the TX queue
* update the tail ptr in the HV
*/
if (rv == 0) {
*size = bytes_written;
} else {
int rv2;
if (rv != EWOULDBLOCK) {
*size = 0;
} else {
/*
* Release Tx lock, and then reacquire channel
* and Tx lock in correct order
*/
}
return (ECONNRESET);
}
"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
"(head 0x%x, tail 0x%x state 0x%x)\n",
*size = 0;
}
return (rv);
}
/*
* Write specified amount of bytes to the channel
* in multiple pkts of pkt_payload size. Each
* packet is tagged with an unique packet ID in
* the case of a reliable link.
*
* On return, size contains the number of bytes written.
* This function needs to ensure that the write size is < MTU size
*/
static int
{
/* Truncate packet to max of MTU size */
}
/*
* Interfaces for channel nexus to register/unregister with LDC module
* The nexus will register functions to be used to register individual
* channels with the nexus and enable interrupts for the channels
*/
int
{
return (EINVAL);
}
/* nexus registration */
/* register any channels that might have been previously initialized */
while (ldcp) {
(void) i_ldc_register_channel(ldcp);
}
return (0);
}
int
{
return (EINVAL);
}
return (EINVAL);
}
/* nexus unregister */
return (0);
}
/* ------------------------------------------------------------------------- */
/*
* Allocate a memory handle for the channel and link it into the list
* Also choose which memory table to use if this is the first handle
* being assigned to this channel
*/
int
{
"ldc_mem_alloc_handle: invalid channel handle\n");
return (EINVAL);
}
/* check to see if channel is initalized */
"ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
return (EINVAL);
}
/* allocate handle for channel */
/* initialize the lock */
/* insert memory handle (@ head) into list */
} else {
/* insert @ head */
}
/* return the handle */
return (0);
}
/*
* Free memory handle for the channel and unlink it from the list
*/
int
{
"ldc_mem_free_handle: invalid memory handle\n");
return (EINVAL);
}
"ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
mhdl);
return (EINVAL);
}
/* first handle */
"ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
} else {
/* walk the list - unlink and free */
"ldc_mem_free_handle: (0x%llx) freed "
break;
}
}
}
"ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
return (EINVAL);
}
return (0);
}
/*
* Bind a memory handle to a virtual address.
* The virtual address is converted to the corresponding real addresses.
* Returns pointer to the first ldc_mem_cookie and the total number
* of cookies for this virtual address. Other cookies can be obtained
* using the ldc_mem_nextcookie() call. If the pages are stored in
* consecutive locations in the table, a single cookie corresponding to
* the first location is returned. The cookie size spans all the entries.
*
* If the VA corresponds to a page that is already being exported, reuse
* the page and do not export it again. Bump the page's use count.
*/
int
{
int i, rv;
"ldc_mem_bind_handle: invalid memory handle\n");
return (EINVAL);
}
/* clear count */
*ccount = 0;
"ldc_mem_bind_handle: (0x%x) handle already bound\n",
mhandle);
return (EINVAL);
}
/* Force address and size to be 8-byte aligned */
return (EINVAL);
}
/*
* If this channel is binding a memory handle for the
* first time allocate it a memory map table and initialize it
*/
/* Allocate and initialize the map table structure */
/* Allocate the table itself */
/* allocate a page of memory using kmem_alloc */
"ldc_mem_bind_handle: (0x%llx) reduced tbl size "
}
/* zero out the memory */
/* initialize the lock */
/* register table for this channel */
if (rv != 0) {
"ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
else
return (EIO);
}
"ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
}
/* FUTURE: get the page size, pgsz code, and shift */
"va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
/* aligned VA and its offset */
"(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
/* lock the memory table - exclusive access to channel */
return (ENOMEM);
}
/* Allocate a memseg structure */
/* Allocate memory to store all pages and cookies */
/*
* Check if direct shared memory map is enabled, if not change
* the mapping type to include SHADOW_MAP.
*/
if (ldc_shmem_enabled == 0)
/*
* Table slots are used in a round-robin manner. The algorithm permits
* inserting duplicate entries. Slots allocated earlier will typically
* get freed before we get back to reusing the slot.Inserting duplicate
* entries should be OK as we only lookup entries using the cookie addr
* i.e. tbl index, during export, unexport and copy operation.
*
* One implementation what was tried was to search for a duplicate
* page entry first and reuse it. The search overhead is very high and
* in the vnet case dropped the perf by almost half, 50 to 24 mbps.
* So it does make sense to avoid searching for duplicates.
*
* But during the process of searching for a free slot, if we find a
* duplicate entry we will go ahead and use it, and bump its use count.
*/
/* index to start searching from */
cookie_idx = -1;
if (mtype & LDC_DIRECT_MAP) {
}
if (mtype & LDC_SHADOW_MAP) {
}
if (mtype & LDC_IO_MAP) {
}
/* initialize each mem table entry */
for (i = 0; i < npages; i++) {
/* check if slot is available in the table */
/* we have looped around */
"ldc_mem_bind_handle: (0x%llx) cannot find "
*ccount = 0;
/* NOTE: free memory, remove previous entries */
/* this shouldnt happen as num_avail was ok */
return (ENOMEM);
}
}
/* get the real address */
/* build the mte */
/* update entry in table */
/* calculate the size and offset for this export range */
if (i == 0) {
/* first page */
} else if (i == (npages - 1)) {
/* last page */
if (psize == 0)
poffset = 0;
} else {
/* middle pages */
poffset = 0;
}
/* store entry for this page */
/* create the cookie */
cookie_idx++;
} else {
}
"(0x%llx) va=0x%llx, idx=0x%llx, "
"ra=0x%llx(sz=0x%x,off=0x%x)\n",
/* decrement number of available entries */
/* increment va by page size */
/* increment index */
prev_index = index;
/* save the next slot */
}
/* memory handle = bound */
/* update memseg_t */
/* return count and first cookie */
"ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
"pgs=0x%llx cookies=0x%llx\n",
return (0);
}
/*
* Return the next cookie associated with the specified memory handle
*/
int
{
"ldc_mem_nextcookie: invalid memory handle\n");
return (EINVAL);
}
if (cookie == 0) {
"ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
return (EINVAL);
}
if (memseg->next_cookie != 0) {
memseg->next_cookie++;
memseg->next_cookie = 0;
} else {
return (EINVAL);
}
"ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
return (0);
}
/*
* Unbind the virtual memory region associated with the specified
* memory handle. Allassociated cookies are freed and the corresponding
* RA space is no longer exported.
*/
int
{
int i, rv;
"ldc_mem_unbind_handle: invalid memory handle\n");
return (EINVAL);
}
"ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
mhandle);
return (EINVAL);
}
/* lock the memory table - exclusive access to channel */
/* undo the pages exported */
/* check for mapped pages, revocation cookie != 0 */
if (rv) {
"ldc_mem_unbind_handle: (0x%llx) cannot "
}
}
/* clear the entry from the table */
}
/* free the allocated memseg and page structures */
/* uninitialize the memory handle */
return (0);
}
/*
* Get information about the dring. The base address of the descriptor
* ring along with the type and permission are returned back.
*/
int
{
return (EINVAL);
}
return (EINVAL);
}
}
return (0);
}
/*
* Copy data either from or to the client specified virtual address
* space to or from the exported memory associated with the cookies.
* The direction argument determines whether the data is read from or
* written to exported memory.
*/
int
{
int i, rv = 0;
return (EINVAL);
}
/* check to see if channel is UP */
chid);
return (ECONNRESET);
}
/* Force address and size to be 8-byte aligned */
return (EINVAL);
}
/* Find the size of the exported memory */
export_size = 0;
for (i = 0; i < ccount; i++)
/* check to see if offset is valid */
if (off > export_size) {
"ldc_mem_copy: (0x%llx) start offset > export mem size\n",
chid);
return (EINVAL);
}
/*
* Check to see if the export size is smaller than the size we
* are requesting to copy - if so flag an error
*/
"ldc_mem_copy: (0x%llx) copy size > export mem size\n",
chid);
return (EINVAL);
}
/* FUTURE: get the page size, pgsz code, and shift */
"(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
/* aligned VA and its offset */
"ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
len -= local_psize;
/*
* find the first cookie in the list of cookies
* if the offset passed in is not zero
*/
if (off < cookie_size)
break;
off -= cookie_size;
}
for (;;) {
"ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
" loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
" exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
" total_bal=0x%llx\n",
copy_size, &copied_len);
if (rv != 0) {
"ldc_mem_copy: (0x%llx) err %d during copy\n",
"ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
"loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
" exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
" copied_len=0x%lx, total_bal=0x%lx\n",
/*
* check if reason for copy error was due to
* a channel reset. we need to grab the lock
* just in case we have to do a reset.
*/
error = ECONNRESET;
}
return (error);
}
local_poff += copied_len;
total_bal -= copied_len;
if (copy_size != copied_len)
continue;
if (export_psize == 0 && total_bal != 0) {
if (cookie_size == 0) {
idx++;
} else {
export_caddr += pg_size;
export_poff = 0;
}
}
if (local_psize == 0 && total_bal != 0) {
local_valign += pg_size;
local_poff = 0;
len -= local_psize;
}
/* check if we are all done */
if (total_bal == 0)
break;
}
"ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
return (0);
}
/*
* Copy data either from or to the client specified virtual address
* space to or from HV physical memory.
*
* The direction argument determines whether the data is read from or
* written to HV memory. direction values are LDC_COPY_IN/OUT similar
* to the ldc_mem_copy interface
*/
int
{
int rv = 0;
"ldc_mem_rdwr_cookie: invalid channel handle\n");
return (EINVAL);
}
/* check to see if channel is UP */
"ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
return (ECONNRESET);
}
/* Force address and size to be 8-byte aligned */
return (EINVAL);
}
target_size = *size;
/* FUTURE: get the page size, pgsz code, and shift */
"(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
/* aligned VA and its offset */
"val=0x%llx,off=0x%x,pgs=0x%x\n",
len -= local_psize;
for (;;) {
"ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
" loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
" tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
" total_bal=0x%llx\n",
copy_size, &copied_len);
if (rv != 0) {
"ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
"ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
"tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
"loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
"copy_sz=0x%llx, total_bal=0x%llx\n",
return (i_ldc_h2v_error(rv));
}
local_poff += copied_len;
if (copy_size != copied_len)
continue;
if (target_psize == 0 && target_size != 0) {
target_poff = 0;
}
if (local_psize == 0 && target_size != 0) {
local_valign += pg_size;
local_poff = 0;
len -= local_psize;
}
/* check if we are all done */
if (target_size == 0)
break;
}
return (0);
}
/*
* Map an exported memory segment into the local address space. If the
* memory range was exported for direct map access, a HV call is made
* to allocate a RA range. If the map is done via a shadow copy, local
* shadow memory is allocated and the base VA is returned in 'vaddr'. If
* the mapping is a direct map then the RA is returned in 'raddr'.
*/
int
{
return (EINVAL);
}
return (EINVAL);
}
"ldc_mem_dring_map: (0x%llx) channel is not UP\n",
return (ECONNRESET);
}
return (EINVAL);
}
/* FUTURE: get the page size, pgsz code, and shift */
/* calculate the number of pages in the exported cookie */
/* Allocate memseg structure */
/* Allocate memory to store all pages and cookies */
/*
* Check if direct map over shared memory is enabled, if not change
* the mapping type to SHADOW_MAP.
*/
if (ldc_shmem_enabled == 0)
/*
* Check to see if the client is requesting direct or shadow map
* If direct map is requested, try to map remote memory first,
* and if that fails, revert to shadow map
*/
if (mtype == LDC_DIRECT_MAP) {
/* Allocate kernel virtual space for mapping */
"ldc_mem_map: (0x%lx) memory map failed\n",
(sizeof (ldc_mem_cookie_t) * ccount));
(sizeof (ldc_page_t) * npages));
return (ENOMEM);
}
/* Unload previous mapping */
/* for each cookie passed in - map into address space */
idx = 0;
cookie_size = 0;
for (i = 0; i < npages; i++) {
if (cookie_size == 0) {
pg_size);
idx++;
}
/* map the cookie into address space */
retries++) {
break;
}
"ldc_mem_map: (0x%llx) hv mapin err %d\n",
/* remove previous mapins */
for (j = 0; j < i; j++) {
rv = hv_ldc_unmap(
if (rv) {
"ldc_mem_map: (0x%llx) "
"cannot unmap ra=0x%llx\n",
}
}
/* free kernel virtual space */
map_size);
/* direct map failed - revert to shadow map */
break;
} else {
"ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
"0x%llx, cookie=0x%llx, perm=0x%llx\n",
cookie_addr, perm);
/*
* NOTE: Calling hat_devload directly, causes it
* to look for page_t using the pfn. Since this
* addr is greater than the memlist, it treates
* it as non-memory
*/
"ldc_mem_map: (0x%llx) ra 0x%llx -> "
cookie_size -= pg_size;
cookie_addr += pg_size;
}
}
}
if (mtype == LDC_SHADOW_MAP) {
} else {
/*
* Use client supplied memory for memseg->vaddr
* WARNING: assuming that client mem is >= exp_size
*/
}
/* Save all page and cookie information */
}
}
/* save all cookies */
/* update memseg_t */
memseg->next_cookie = 0;
/* memory handle = mapped */
"va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
if (mtype == LDC_SHADOW_MAP)
base_off = 0;
if (raddr)
if (vaddr)
return (0);
}
/*
* Unmap a memory segment. Free shadow memory (if any).
*/
int
{
int i, rv;
"ldc_mem_unmap: (0x%llx) handle is not mapped\n",
mhandle);
return (EINVAL);
}
/* if we allocated shadow memory - free it */
/* unmap in the case of DIRECT_MAP */
if (rv) {
"ldc_mem_map: (0x%lx) hv unmap err %d\n",
}
}
}
/* free the allocated memseg and page structures */
/* uninitialize the memory handle */
return (0);
}
/*
* Internal entry point for LDC mapped memory entry consistency
* semantics. Acquire copies the contents of the remote memory
* into the local shadow copy. The release operation copies the local
* contents into the remote memory. The offset and size specify the
* bounds for the memory range being synchronized.
*/
static int
{
int err;
"i_ldc_mem_acquire_release: invalid memory handle\n");
return (EINVAL);
}
"i_ldc_mem_acquire_release: not mapped memory\n");
return (EINVAL);
}
/* do nothing for direct map */
return (0);
}
/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
return (0);
}
"i_ldc_mem_acquire_release: memory out of range\n");
return (EINVAL);
}
/* get the channel handle and memory segment */
"i_ldc_mem_acquire_release: copy failed\n");
return (err);
}
}
return (0);
}
/*
* Ensure that the contents in the remote memory seg are consistent
* with the contents if of local segment
*/
int
{
}
/*
* Ensure that the contents in the local memory seg are consistent
* with the contents if of remote segment
*/
int
{
}
/*
* Allocate a descriptor ring. The size of each each descriptor
* must be 8-byte aligned and the entire ring should be a multiple
* of MMU_PAGESIZE.
*/
int
{
return (EINVAL);
}
if (len == 0) {
return (EINVAL);
}
/* descriptor size should be 8-byte aligned */
return (EINVAL);
}
*dhandle = 0;
/* Allocate a desc ring structure */
/* Initialize dring */
/* round off to multiple of pagesize */
if (size & MMU_PAGEOFFSET)
/* allocate descriptor ring memory */
/* initialize the desc ring lock */
/* Add descriptor ring to the head of global list */
return (0);
}
/*
* Destroy a descriptor ring.
*/
int
{
"ldc_mem_dring_destroy: invalid desc ring handle\n");
return (EINVAL);
}
"ldc_mem_dring_destroy: desc ring is bound\n");
return (EACCES);
}
/* remove from linked list - if not bound */
if (tmp_dringp == dringp) {
} else {
while (tmp_dringp != NULL) {
break;
}
}
if (tmp_dringp == NULL) {
"ldc_mem_dring_destroy: invalid descriptor\n");
return (EINVAL);
}
}
/* free the descriptor ring */
/* destroy dring lock */
/* free desc ring object */
return (0);
}
/*
* Bind a previously allocated dring to a channel. The channel should
* be OPEN in order to bind the ring to the channel. Returns back a
* descriptor ring cookie. The descriptor ring is exported for remote
* access by the client at the other end of the channel. An entry for
* dring pages is stored in map table (via call to ldc_mem_bind_handle).
*/
int
{
int err;
/* check to see if channel is initalized */
"ldc_mem_dring_bind: invalid channel handle\n");
return (EINVAL);
}
"ldc_mem_dring_bind: invalid desc ring handle\n");
return (EINVAL);
}
"ldc_mem_dring_bind: invalid cookie arg\n");
return (EINVAL);
}
"ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
return (EINVAL);
}
if ((perm & LDC_MEM_RW) == 0) {
"ldc_mem_dring_bind: invalid permissions\n");
return (EINVAL);
}
return (EINVAL);
}
/* create an memory handle */
"ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
return (err);
}
/* bind the descriptor ring to channel */
if (err) {
"ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
return (err);
}
/*
* For now return error if we get more than one cookie
* FUTURE: Return multiple cookies ..
*/
if (*ccount > 1) {
(void) ldc_mem_unbind_handle(mhandle);
(void) ldc_mem_free_handle(mhandle);
*ccount = 0;
return (EAGAIN);
}
/* Add descriptor ring to channel's exported dring list */
return (0);
}
/*
* Return the next cookie associated with the specified dring handle
*/
int
{
int rv = 0;
"ldc_mem_dring_nextcookie: invalid desc ring handle\n");
return (EINVAL);
}
"ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
"is not bound\n", dringp);
return (EINVAL);
}
"ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
return (EINVAL);
}
return (rv);
}
/*
* Unbind a previously bound dring from a channel.
*/
int
{
"ldc_mem_dring_unbind: invalid desc ring handle\n");
return (EINVAL);
}
"ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
dringp);
return (EINVAL);
}
if (tmp_dringp == dringp) {
} else {
while (tmp_dringp != NULL) {
break;
}
}
if (tmp_dringp == NULL) {
"ldc_mem_dring_unbind: invalid descriptor\n");
return (EINVAL);
}
}
return (0);
}
/*
* Get information about the dring. The base address of the descriptor
* ring along with the type and permission are returned back.
*/
int
{
int rv;
"ldc_mem_dring_info: invalid desc ring handle\n");
return (EINVAL);
}
if (rv) {
"ldc_mem_dring_info: error reading mem info\n");
return (rv);
}
} else {
}
return (0);
}
/*
* Map an exported descriptor ring into the local address space. If the
* descriptor ring was exported for direct map access, a HV call is made
* to allocate a RA range. If the map is done via a shadow copy, local
* shadow memory is allocated.
*/
int
{
int err;
"ldc_mem_dring_map: invalid dhandle\n");
return (EINVAL);
}
/* check to see if channel is initalized */
"ldc_mem_dring_map: invalid channel handle\n");
return (EINVAL);
}
"ldc_mem_dring_map: (0x%llx) invalid cookie\n",
return (EINVAL);
}
/* FUTURE: For now we support only one cookie per dring */
return (EINVAL);
}
*dhandle = 0;
/* Allocate an dring structure */
"ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
/* Initialize dring */
/* round of to multiple of page size */
if (dring_size & MMU_PAGEOFFSET)
/* create an memory handle */
"ldc_mem_dring_map: cannot alloc hdl err=%d\n",
err);
return (ENOMEM);
}
/* map the dring into local memory */
"ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
(void) ldc_mem_free_handle(mhandle);
return (ENOMEM);
}
/* initialize the desc ring lock */
/* Add descriptor ring to channel's imported dring list */
return (0);
}
/*
* Unmap a descriptor ring. Free shadow memory (if any).
*/
int
{
"ldc_mem_dring_unmap: invalid desc ring handle\n");
return (EINVAL);
}
"ldc_mem_dring_unmap: not a mapped desc ring\n");
return (EINVAL);
}
/* find and unlink the desc ring from channel import list */
if (tmp_dringp == dringp) {
} else {
while (tmp_dringp != NULL) {
break;
}
}
if (tmp_dringp == NULL) {
"ldc_mem_dring_unmap: invalid descriptor\n");
return (EINVAL);
}
}
/* do a LDC memory handle unmap and free */
/* destroy dring lock */
/* free desc ring object */
return (0);
}
/*
* Internal entry point for descriptor ring access entry consistency
* semantics. Acquire copies the contents of the remote descriptor ring
* into the local shadow copy. The release operation copies the local
* contents into the remote dring. The start and end locations specify
* bounds for the entries being synchronized.
*/
static int
{
int err;
"i_ldc_dring_acquire_release: invalid desc ring handle\n");
return (EINVAL);
}
"i_ldc_dring_acquire_release: not a mapped desc ring\n");
return (EINVAL);
}
"i_ldc_dring_acquire_release: index out of range\n");
return (EINVAL);
}
/* get the channel handle */
/* Calculate the relative offset for the first desc */
if (err) {
"i_ldc_dring_acquire_release: copy failed\n");
return (err);
}
/* do the balance */
soff = 0;
if (err) {
"i_ldc_dring_acquire_release: copy failed\n");
return (err);
}
}
return (0);
}
/*
* Ensure that the contents in the local dring are consistent
* with the contents if of remote dring
*/
int
{
}
/*
* Ensure that the contents in the remote dring are consistent
* with the contents if of local dring
*/
int
{
}
/* ------------------------------------------------------------------------- */