wrsm_tl.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Transport Layer of the Wildcat RSM driver. This module provides an rpc
* drivers.
*/
#include <sys/wci_common.h>
#include <sys/wrsm_transport.h>
#include <sys/wrsm_cmmu.h>
#include <sys/wrsm_intr.h>
#include <sys/wrsm_session.h>
#include <sys/wrsm_memseg.h>
#include <sys/wrsm_memseg_impl.h>
/*
* Manifest Constants and Macros
*/
#ifdef DEBUG
#define TLDBG 0x1
#define TLWARN 0x2
#define TLERR 0x4
#define TLTRACE 0x8
#define TLDUMP 0x10
#else /* DEBUG */
#define DPRINTF(a, b) { }
#endif /* DEBUG */
/*
* Set RPC message highwater mark to 44%, lowwater to 20% of packetring
* size. 44% allows reply messages and other messages that don't go through
* the flow control to have some space.
*/
#define WRSM_TL_RPC_HIGHWATER_PERCENT (88)
#define WRSM_TL_RPC_LOWWATER_PERCENT (20)
#define WRSM_TL_RPC_LOWWATER (((WRSM_TL_PACKETRING_SIZE) * \
(WRSM_TL_RPC_LOWWATER_PERCENT)) / 100)
/*
* Message IDs go from 1 through 0x7fffffff. ID 0 is reserved as a null id.
* The MSB of the message ID is reserved as an RPC response flag. If this
* bit is set, the message is a response to the message whose ID is stored
* in the lower 31 bits.
*/
#define MESSAGE_ID_INVALID 0
#define MESSAGE_ID_FIRST 1
#define MESSAGE_ID_MAX 0x7fffffff
#define MESSAGE_ID_RPCRESP 0x80000000
#define MASK_ALIGN (~0x3f)
/*
* RPC timeout:
* The nr_event_thread that awakens to process response events may actually
* be delayed by up to 1.5 seconds if the processor it is scheduled to run on
* is in the middle of running wrsm_lc_clear_cmmu() for another controller.
* This function calls wrsm_lc_cmmu_update() 2,097,152 times, resulting in the
* observed delay. Therefore, we set the RPC timeout to 5 seconds to be safe.
*/
/*
* This tunable controls the number of worker threads available per taskq.
* There is one taskq for each of the 7 categories of messages, so multiply
* this value by 7 to get the total number of worker threads created.
*/
/* #define msgcpy(dest, src) bcopy((dest), (src), sizeof (wrsm_message_t)) */
void
{
unsigned i;
for (i = 0; i < WRSM_MESSAGE_BODY_SIZE; i++) {
}
}
/*
* Local Typedefs
*/
/*
* The following is an element in a linked list of RPC's waiting for a
* response. The message_id is used to match response with original
* message, and *response is where the user wants the response placed.
*/
typedef struct pending_rpc {
struct pending_rpc *next;
struct pending_rpc *prev;
/*
* The wrsm_transport holds the state of a given instance of
* the transport.
*/
typedef struct tl_cnode {
} tl_cnode_t;
typedef struct tl_event {
} tl_event_t;
struct wrsm_transport {
/* event thread handling */
/* taskqs for WRSM_MSG_SEGMENT_* messages */
};
/*
* Event Handling
*/
/* Processes events for the event thread. */
static void
{
}
}
/*
* Event thread. Handles TL message processing so we don't have to
* process in the interrupt thread
*/
static void
tl_event_thread(void *arg)
{
callb_generic_cpr, "tl_event_thread");
while (!tl->stop_events) {
}
/*
* CALLB_CPR_EXIT() calls mutex_exit() on the
* lock passed into CALLB_CPR_INIT() above, therefore
* we don't want to call mutex_exit() here. See
*/
thread_exit();
}
/* Adds an event to the event queue and wakes up the event thread */
static void
{
if (tl->stop_events) {
return;
}
if (evt) {
} else {
}
}
/*
* Utility functions
*/
/* Implements a ping service */
static boolean_t
{
return (B_TRUE);
}
#ifdef DEBUG
/* Debug function to print pending RPC list info */
static void
{
pending_rpc_t *p;
(void *)p->prev));
}
}
#endif /* DEBUG */
/* Adds a pending_rpc entry to the linked list */
static int
{
int retval;
/* Check to see if pending rpc is above highwater mark */
/* retval of 0 indicates a signal was received */
return (EAGAIN);
}
}
/* We got in! */
/* Point to who used to be first */
/* If someone else used to be first on list... */
/* Make them point back at me */
}
/* Change list to point to me */
return (0);
}
/* Removes a pending_rpc entry to the linked list */
static void
{
/*
* If there's a previous node, make them point to our next
* node. If not, we were first, so update head pointer.
*/
} else {
}
/*
* If there's a next node, have them point
* back to whomever we pointed back to.
*/
}
/* If rpc count is falling below lowwater, wake up waiting threads */
}
}
/* Constructs and initializes a wrsm_transport structure */
static wrsm_transport_t *
alloc_state(void)
{
int i;
for (i = 0; i < WRSM_MSG_TYPES_MAX; i++) {
}
tl->last_message_id = 0;
return (tl);
}
/* Cleans-up and destroys a wrsm_transport structure */
static void
{
}
}
/* Handles incoming messages */
/* ARGSUSED */
static rsm_intr_hand_ret_t
void *data,
{
NOTE("wrsm_tl::intr_handler");
#ifdef DEBUG
}
#endif /* DEBUG */
"local version %u, remote version %u",
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
/*
* Verify that the msg->source_cnode matches where the
* interrupt came from.
*/
"msg->header.source_cnode %d != from_cnode %d",
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
"sender %d is not reachable; dropping message",
from_cnode));
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
/*
* All messages with an identifier in the _SESSION_ or _RECVQ_
* range could possibly either block, or take locks from other
* threads that could block. Therefore, handle them on the
* event thread.
*/
} else {
}
}
/*
* This is an RPC response and handler was successful
*/
pending_rpc_t *p;
/* Walk the linked list, looking for matching message id */
if (p->message_id == orig_id) {
mutex_enter(&p->mutex);
/* Clear message id, so we don't reenter. */
p->message_id = MESSAGE_ID_INVALID;
/* Copy response to holding area */
/* Set flag indicating response was received */
p->resp_recvd = B_TRUE;
/* Wake up the waiting thread */
mutex_exit(&p->mutex);
break;
}
}
if (!p) {
"rpc resp but no one waiting: 0x%08X 0x%08X",
#ifdef DEBUG
list_print(tl);
#endif
}
/* LINTED: E_NOP_IF_STMT */
/*
* This is a datagram and we don't have a handler
*/
"no handler for message_type %u",
}
return (RSM_INTR_HAND_CLAIMED_EXCLUSIVE);
}
/* Formats and sends a message, used by dg, rpc and resp */
static int
{
int retry;
int retval;
/* Validate the message. */
/* Validate that we know how to get to destination */
destination));
return (EPIPE);
}
destination));
return (EPIPE);
}
/* Call send function handler before sending message. */
if (h) {
if (handler_rc == B_FALSE) {
"failed rc=%d handler=%p", handler_rc, (void *)h));
return (EAGAIN);
}
}
/* Calculate remote address, taking into account striping */
/*
* Advance the offset by the link stripe stride, so we tend to
* distribute interrupts across link and WCI stripes, but make
* sure not to exceed the page size!
*
* Note that this should be atomic, but the worst thing that
* can happen is we send two interrupts in a row on the same
* link which is probably less of a performance impact than
* grabbing a mutex for every single interrupt.
*/
/* Send the message */
#ifdef DEBUG
}
#endif /* DEBUG */
/* Stop retrying on success */
if (retval == 0) {
break;
}
}
return (retval);
}
/* Atomically allocates a unique message id */
static wrsm_messageid_t
{
tl->last_message_id++;
}
return (msg_id);
}
/*
* Transport API functions
*/
int
{
int retval;
/* First, initialize interrupt component */
if (retval) {
return (retval);
}
/* Allocate structure to store transport state */
tl = alloc_state();
/* Spin up the event thread for this transport */
/*
* Allocate all the taskq's for MSG_SEGMENT handlers. You need to
* allocate one for each message type we handle in this way in order
* to guarantee that one message type blocking doesn't interfere with
* any other message types.
*
* The gloabl variable wrsm_tl_tqthreads controls the number of
* threads servicing -each- taskq.
*/
/* Hook structure into network */
/* Register with the interrupt component for driver messages */
/* Register ping message handler with ourselves */
return (0);
}
void
{
/* First, validate pointers */
/* Unregister from interrupt component */
/* stop the taskq's and wait for them to drain */
/* Stop and exit the event thread */
/* Last, fini interrupt component */
}
int
{
int retval = RSM_SUCCESS;
if (retval) {
return (retval);
}
NULL,
if (retval) {
return (retval);
}
return (RSM_SUCCESS);
}
int
{
int rc = RSM_SUCCESS;
}
"no recvq - tl_newcnode never called\n", cnodeid));
return (ENOENT);
}
/* Unmap remote address if it had been mapped */
PAGESIZE);
}
return (rc);
}
void
{
int retval;
/* We've never been able to reach this node before... */
PAGESIZE);
if (retval != DDI_SUCCESS) {
retval);
}
}
#ifdef DEBUG
"ncslice %u off 0x%p mapped to va=0x%p, pa=0x%p, nc=%u",
(void *)pa,
ncslice));
}
#endif /* DEBUG */
}
void
{
/* Node was never reachable */
return;
}
}
int
{
int rc = RSM_SUCCESS;
"receive handler already exists for msg type %u",
msg_type));
}
"send handler already exists for msg type %u",
msg_type));
}
if (!rc) {
}
return (rc);
}
int
{
int rc;
return (rc);
}
int
{
int rc = RSM_SUCCESS;
/* Allocate ourselves a message id now, before it's too late */
/* Create and initialize a pending rpc item in linked list */
/* Don't need to use mutex, cause we're still not listed */
/* Add me to the front of the waiting rpc list */
/* Failed to add to list, return error */
return (EAGAIN);
}
/* Grab our mutex -- response could come before we get to wait */
/* Send the message */
if (rc == RSM_SUCCESS) {
/* Wait for rpc response */
timeout_ticks = ddi_get_lbolt() +
/*
* If cv_timedwait returns -1, condition was "not necessarily"
* signaled. To see if response was actually received, we
* really need to check resp_recvd flag.
*/
if (!me.resp_recvd) {
}
/* LINTED: E_NOP_ELSE_STMT */
} else {
}
if (rc != RSM_SUCCESS) {
}
#ifdef DEBUG
/*
* can't take tl->mutex while holding me.mutex, so do debug stuff here
*/
if (!me.resp_recvd) {
list_print(tl);
}
#endif
return (rc);
}
int
{
return (rc);
}
{
/*
* If this message is a response, don't try to establish
* a new session, just get the current session id or
* SESS_ID_INVALID if the session has ended.
*/
} else {
/*
* If this message is not a response, try to establish
* a new session if one doesn't already exist.
*/
}
}
{
return ((session_id != SESS_ID_INVALID) &&
}
/*
* Generic memseg message handler for the following message types:
* CONNECT SMALLPUTMAP BARRIERMAP SEGMAP DISCONNECT UNPUBLISH ACCESS
*
* Note: This must reside in wrsm_tl.c because it needs to know about
* the innards of the wrsm_transport structure.
*/
{
/* non-existent node */
return (B_FALSE);
}
/* Verify that this is a message type we support */
/* session must not be valid */
return (B_FALSE);
}
/*
* Allocate the args structure, to be passed to the event handlers.
* NEEDS TO BE DEALLOCATED IN THE HANDLERS!
*/
/* grab the taskq's lock so they don't disappear out from under us */
if (tl->stop_taskqs) {
return (B_FALSE);
}
/* export segment events */
case WRSM_MSG_SEGMENT_CONNECT:
break;
break;
break;
case WRSM_MSG_SEGMENT_SEGMAP:
break;
break;
/* import segment events */
break;
case WRSM_MSG_SEGMENT_ACCESS:
break;
}
return (B_TRUE);
}
#ifdef DEBUG
static char *
{
switch (type) {
case WRSM_MSG_ACK:
return ("ACK");
case WRSM_MSG_NACK:
return ("NACK");
case WRSM_MSG_PING:
return ("PING");
case WRSM_MSG_PING_RESPONSE:
return ("PING_RESPONSE");
case WRSM_MSG_CONFIG_COOKIE:
return ("CONFIG_COOKIE");
return ("CONFIG_PASSTHROUGH_LIST");
return ("CONFIG_PASSTHROUGH_LIST_RESPONSE");
return ("CONFIG_CNODE_ACCESS");
case WRSM_MSG_SESSION_START:
return ("SESSION_START");
return ("SESSION_START_RESPONSE");
case WRSM_MSG_SESSION_END:
return ("SESSION_END");
case WRSM_MSG_SEGMENT_CONNECT:
return ("SEGMENT_CONNECT");
return ("SEGMENT_CONNECT_RESPONSE");
return ("SEGMENT_SMALLPUTMAP");
return ("SEGMENT_SMALLPUTMAP_RESPONSE");
return ("SEGMENT_BARRIERMAP");
return ("SEGMENT_BARRIERMAP_RESPONSE");
case WRSM_MSG_SEGMENT_SEGMAP:
return ("SEGMENT_SEGMAP");
return ("SEGMENT_SEGMAP_RESPONSE");
return ("SEGMENT_DISCONNECT");
return ("SEGMENT_UNPUBLISH");
return ("SEGMENT_UNPUBLISH_RESPONSE");
case WRSM_MSG_SEGMENT_ACCESS:
return ("SEGMENT_ACCESS");
return ("SEGMENT_ACCESS_RESPONSE");
return ("INTR_RECVQ_CREATE");
return ("INTR_RECVQ_CREATE_RESPONSE");
return ("INTR_RECVQ_DESTROY");
default:
return ("Unknown");
}
}
void
{
unsigned i;
for (i = 0; i < WRSM_MESSAGE_BODY_SIZE; i += 8) {
"%02X %02X %02X %02X %02X %02X %02X %02X\n",
}
}
#endif /* DEBUG */