ds.c revision 0d63ce2b32a9e1cc8ed71d4d92536c44d66a530a
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Domain Services Module
*
* The Domain Services (DS) module is responsible for communication
* with external service entities. It provides an API for clients to
* publish capabilities and handles the low level communication and
* version negotiation required to export those capabilities to any
* interested service entity. Once a capability has been successfully
* registered with a service entity, the DS module facilitates all
* data transfers between the service entity and the client providing
* that particular capability.
*/
#include <sys/mach_descrip.h>
/*
* All DS ports in the system
*
* The list of DS ports is read in from the MD when the DS module is
* initialized and is never modified. This eliminates the need for
* locking to access the port array itself. Access to the individual
* ports are synchronized at the port level.
*/
/*
* Table of registered services
*
* Locking: Accesses to the table of services are synchronized using
* a RW lock. The reader lock must be held when looking up service
* information in the table. The writer lock must be held when any
* service information is being modified.
*/
static struct ds_svcs {
} ds_svcs;
/* initial size of the table */
#define DS_MAXSVCS_INIT 32
/*
* Lock Usage
*
* ds_svcs.rwlock
*
* See comment just above definition of ds_svcs structure above.
*
* ds_port mutex
*
* Protects the elements of each port structure. Must be acquired for
* access to any of the elements.
*
* ds_log mutex
*
* See comment above definition of ds_log structure.
*
* Multiple lock requirements:
*
* Some code will need to access both a ds_svc_t structure and
* a ds_port_t. In that case, the acquisition order must be:
*
* ds_svcs.rwlock -> port lock
*/
/*
* Taskq for internal task processing
*/
/*
* The actual required number of parallel threads is not expected
* to be very large. Use the maximum number of CPUs in the system
* as a rough upper bound.
*/
#define DS_MAX_TASKQ_THR NCPU
/*
* Retry count and delay for LDC reads and writes
*/
static int ds_retries = DS_DEFAULT_RETRIES;
/*
* Supported versions of the DS message protocol
*
* The version array must be sorted in order from the highest
* supported version to the lowest. Support for a particular
* <major>.<minor> version implies all lower minor versions of
* that same major version are supported as well.
*/
/*
* Results of checking version array with ds_vers_isvalid()
*/
typedef enum {
/* incoming message handling functions */
/*
* DS Message Handler Dispatch Table
*
* A table used to dispatch all incoming messages. This table
* contains handlers for all the fixed message types, as well as
* the the messages defined in the 1.0 version of the DS protocol.
*/
static const ds_msg_handler_t ds_msg_handlers[] = {
ds_handle_init_req, /* DS_INIT_REQ */
ds_handle_init_ack, /* DS_INIT_ACK */
ds_handle_init_nack, /* DS_INIT_NACK */
ds_handle_reg_req, /* DS_REG_REQ */
ds_handle_reg_ack, /* DS_REG_ACK */
ds_handle_reg_nack, /* DS_REG_NACK */
ds_handle_unreg_req, /* DS_UNREG */
ds_handle_unreg_ack, /* DS_UNREG_ACK */
ds_handle_unreg_nack, /* DS_UNREG_NACK */
ds_handle_data, /* DS_DATA */
ds_handle_nack /* DS_NACK */
};
/*
* DS message log
*
* Locking: The message log is protected by a single mutex. This
* protects all fields in the log structure itself as well as
* everything in the entry structures on both the log and the
* free list.
*/
static struct log {
} ds_log;
/* log soft limit */
/* initial pool of log entry structures */
/*
* Debugging Features
*/
#ifdef DEBUG
#define DS_DBG_FLAG_LDC 0x1
#define DS_DBG_FLAG_LOG 0x2
#define DS_DBG_FLAG_MSG 0x4
#define DS_DBG_FLAG_ALL 0xf
#else /* DEBUG */
#define DS_DBG_LDC DS_DBG
#define DS_DBG_LOG DS_DBG
#endif /* DEBUG */
/* initialization functions */
static void ds_init(void);
static void ds_fini(void);
static int ds_ports_init(void);
static int ds_ports_fini(void);
/* event processing functions */
static void ds_dispatch_event(void *arg);
static void ds_handle_recv(void *arg);
/* message sending functions */
/* walker functions */
/* service utilities */
static ds_svc_t *ds_alloc_svc(void);
/* port utilities */
/* misc utilities */
/* log functions */
static void ds_log_init(void);
static void ds_log_fini(void);
static int ds_log_remove(void);
static void ds_log_purge(void *arg);
"Domain Services %I%"
};
static struct modlinkage modlinkage = {
(void *)&modlmisc,
};
int
_init(void)
{
int rv;
/*
* Perform all internal setup before initializing
* the DS ports. This ensures that events can be
* processed as soon as the port comes up.
*/
ds_init();
if ((rv = ds_ports_init()) != 0) {
ds_fini();
return (rv);
}
(void) ds_ports_fini();
ds_fini();
}
return (rv);
}
int
{
}
int
_fini(void)
{
int rv;
(void) ds_ports_fini();
ds_fini();
}
return (rv);
}
static void
ds_init(void)
{
int tblsz;
/*
* Initialize table of registered service classes
*/
/*
* Initialize the message log.
*/
ds_log_init();
/*
* Create taskq for internal processing threads. This
* includes processing incoming request messages and
* sending out of band registration messages.
*/
ds_enabled = B_TRUE;
/* catch problems with the version array */
}
static void
ds_fini(void)
{
int idx;
/*
* Flip the enabled switch to make sure that no
* incoming events get dispatched while things
* are being torn down.
*/
/*
* Destroy the taskq.
*/
/*
* Destroy the message log.
*/
ds_log_fini();
/*
* Deallocate the table of registered services
*/
/* clear out all entries */
/* should have gone through the whole table */
/* destroy the table itself */
}
/*
* Initialize the list of ports based on the MD.
*/
static int
ds_ports_init(void)
{
int idx;
int rv = 0;
int num_nodes;
int listsz;
int nport;
int nchan;
return (-1);
}
/* allocate temporary storage for MD scans */
/*
* The root of the search for DS port nodes is the
* DS node. Perform a scan to find that node.
*/
if (nport <= 0) {
goto done;
}
/* expecting only one DS node */
if (nport != 1) {
DS_DBG("expected one '%s' node in the MD, found %d\n",
}
/* find all the DS ports in the MD */
if (nport <= 0) {
goto done;
}
/*
* Initialize all the ports found in the MD.
*/
/* get the channels for this port */
if (nchan <= 0) {
rv = -1;
goto done;
}
/* expecting only one channel */
if (nchan != 1) {
DS_DBG("expected one '%s' node for DS port, found %d\n",
}
rv = -1;
goto done;
}
}
/*
* Initialize the LDC channel for each port.
*/
continue;
if (ds_ldc_init(port)) {
} else {
DS_DBG("ds@%lx: ports_init: initialization complete\n",
}
}
rv = 0;
done:
if (rv != 0)
(void) ds_ports_fini();
(void) md_fini_handle(mdp);
return (rv);
}
static int
ds_ports_fini(void)
{
int idx;
/*
* Tear down each initialized port.
*/
continue;
/* shut down the LDC for this port */
(void) ds_ldc_fini(port);
}
/* clean up the port structure */
}
return (0);
}
static int
{
int rv;
goto done;
}
/* register the LDC callback */
goto done;
}
goto done;
}
DS_DBG_LDC("ds@%lx: ldc_init: initial LDC state 0x%x\n",
/* if port is up, send init message */
}
done:
return (rv);
}
static int
{
int rv;
return (rv);
}
return (rv);
}
return (rv);
}
return (rv);
}
/*
* A DS event consists of a buffer on a port.
*/
typedef struct ds_event {
char *buf;
} ds_event_t;
static uint_t
{
int rv;
if (!ds_enabled) {
return (LDC_SUCCESS);
}
/*
* Check the LDC event.
*/
/* reset the port state */
/* read status after bringing LDC up */
goto done;
}
/*
* If the channel is already up, initiate
* the handshake.
*/
goto done;
}
if (event & LDC_EVT_UP) {
goto done;
}
/* initiate the handshake */
}
if (event & LDC_EVT_READ) {
/* dispatch a thread to handle the read event */
}
}
if (event & LDC_EVT_WRITE) {
DS_DBG("ds@%lx: LDC write event received, not supported\n",
goto done;
}
/* report any unknown LDC events */
}
done:
return (LDC_SUCCESS);
}
/*
* Attempt to read a specified number of bytes from a particular LDC.
* Returns zero for success or the return code from the LDC read on
* failure. The actual number of bytes read from the LDC is returned
* in the size parameter.
*/
static int
{
int rv = 0;
int retry_count = 0;
*sizep = 0;
while (bytes_left > 0) {
nbytes = bytes_left;
break;
} else {
if (nbytes != 0) {
DS_DBG_LDC("ds@%lx: read %ld bytes, %d "
bytes_left -= nbytes;
/* reset counter on a successful read */
retry_count = 0;
continue;
}
/*
* No data was read. Check if this is the
* first attempt. If so, just return since
* nothing has been read yet.
*/
if (bytes_left == bytes_req) {
DS_DBG_LDC("ds@%lx: read zero bytes, no data "
break;
}
}
/*
* A retry is necessary because the read returned
* EAGAIN, or a zero length read occurred after
* reading a partial message.
*/
if (retry_count++ >= ds_retries) {
DS_DBG_LDC("ds@%lx: timed out waiting for "
break;
}
}
return (rv);
}
static void
ds_handle_recv(void *arg)
{
char *hbuf;
char *currp;
int rv;
/*
* Read messages from the channel until there are none
* pending. Valid messages are dispatched to be handled
* by a separate thread while any malformed messages are
* dropped.
*/
/*
* Read in the next message.
*/
/* read in the message header */
continue;
}
/*
* A zero length read is a valid signal that
* there is no data left on the channel.
*/
if (read_size != 0) {
"length, received %ld bytes, expected %ld",
}
continue;
}
/* get payload size and allocate a buffer */
/* move message header into buffer */
/* read in the message body */
continue;
}
/* validate the size of the message */
continue;
}
/*
* Send the message for processing, and store it
* in the log. The memory is deallocated only when
* the message is removed from the log.
*/
/* log the message */
/* send the message off to get processed in a new thread */
continue;
}
}
}
static void
ds_dispatch_event(void *arg)
{
goto done;
}
done:
}
/*
* Version negotiation is always initiated by the guest. Any
* attempt by a remote party to initiate the handshake gets
* nack'd with a major number equal to zero. This indicates
* that no version is supported since an init request is not
* expected.
*/
static void
{
char *msg;
/* sanity check the incoming message */
} else {
}
nack->major_vers = 0;
/* send message */
}
static void
{
/* sanity check the incoming message */
return;
}
return;
}
/* agreed upon a major version */
/*
* If the returned minor version is larger than
* the requested minor version, use the lower of
* the two, i.e. the requested version.
*/
/*
* Use the minor version specified in the
* original request.
*/
} else {
/*
* Use the lower minor version returned in
* the ack. By definition, all lower minor
* versions must be supported.
*/
}
/*
* The port came up, so update all the services
* with this information. Follow that up with an
* attempt to register any service that is not
* already registered.
*/
}
static void
{
int idx;
/* sanity check the incoming message */
return;
}
return;
}
if (nack->major_vers == 0) {
/* no supported protocol version */
return;
}
/*
* Walk the version list, looking for a major version
* that is as close to the requested major version as
* possible.
*/
/* found a version to try */
goto done;
}
}
if (idx == DS_NUM_VER) {
/* no supported version */
return;
}
done:
/* start the handshake again */
}
static void
{
char *msg;
/* the request information */
/* sanity check the incoming message */
} else {
DS_DBG("ds@%lx: <reg_req: id='%s', ver=%d.%d, hdl=0x%09lx\n",
req->svc_handle);
}
nack->major_vers = 0;
/* send message */
}
static void
{
/* sanity check the incoming message */
return;
}
/* lookup appropriate client */
goto done;
}
/* make sure the message makes sense */
goto done;
}
/* major version has been agreed upon */
/*
* Use the minor version specified in the
* original request.
*/
} else {
/*
* Use the lower minor version returned in
* the ack. By definition, all lower minor
* versions must be supported.
*/
}
/* notify the client that registration is complete */
/*
* Use a temporary version structure so that
* the copy in the svc structure cannot be
* modified by the client.
*/
}
done:
}
static void
{
int idx;
/* sanity check the incoming message */
return;
}
/* lookup appropriate client */
goto done;
}
/* make sure the message makes sense */
goto done;
}
goto done;
}
/*
* A major version of zero indicates that the
* service is not supported at all.
*/
if (nack->major_vers == 0) {
goto done;
}
/*
* Walk the version list for the service, looking for
* a major version that is as close to the requested
* major version as possible.
*/
/* found a version to try */
break;
}
}
/* no supported version */
DS_DBG("ds@%lx: <reg_nack: %s v%d.x not supported\n",
goto done;
}
/* start the handshake again */
done:
if (reset_svc)
}
static void
{
char *msg;
/* sanity check the incoming message */
return;
}
/* the request information */
/* lookup appropriate client */
goto done;
}
/* unregister the service */
/* send message */
done:
}
static void
{
/* sanity check the incoming message */
return;
}
/*
* Since the unregister request was initiated locally,
* the service structure has already been torn down.
* Just perform a sanity check to make sure the message
* is appropriate.
*/
}
}
static void
{
/* sanity check the incoming message */
return;
}
nack->svc_handle);
/*
* Since the unregister request was initiated locally,
* the service structure has already been torn down.
* Just perform a sanity check to make sure the message
* is appropriate.
*/
}
}
static void
{
char *msg;
int msgsz;
int hdrsz;
/* sanity check the incoming message */
return;
}
/* strip off the header for the client */
/* lookup appropriate client */
return;
}
/* dispatch this message to the client */
}
static void
{
/* sanity check the incoming message */
return;
}
return;
}
}
}
static int
{
int rv;
int loopcnt = 0;
/*
* ensure that no other messages can be sent on this port in case
* the write doesn't get sent with one write to guarantee that the
* message doesn't become fragmented.
*/
/* send the message */
do {
} else {
"ds@%lx: send_msg: ldc_write failed (%d)",
return (rv);
}
} else {
loopcnt = 0;
}
} while (amt_left > 0);
return (rv);
}
static void
{
return;
}
/* send the message */
}
}
static int
{
/* assumes some checking has already occurred */
/* check on the LDC to Zeus */
/* can not send message */
return (-1);
}
/* make sure port is ready */
/* can not send message */
return (-1);
}
/* allocate the message buffer */
/* copy in the header data */
/* copy in the service id */
/* send the message */
return (-1);
} else {
}
return (0);
}
static int
{
DS_DBG("send_unreg_req: service '%s' not associated with "
return (-1);
}
/* check on the LDC to Zeus */
/* can not send message */
return (-1);
}
/* make sure port is ready */
/* can not send message */
return (-1);
}
/* copy in the header data */
/* send the message */
return (-1);
}
return (0);
}
static void
{
/* check on the LDC to Zeus */
/* can not send message */
return;
}
/* make sure port is ready */
/* can not send message */
return;
}
/* copy in the header data */
/* send the message */
}
static void
{
/* check on the LDC to Zeus */
/* can not send message */
return;
}
/* make sure port is ready */
/* can not send message */
return;
}
/* copy in the header data */
/* send the message */
}
#ifdef DEBUG
#define BYTESPERLINE 8
/*
* Output a buffer formatted with a set number of bytes on
* each line. Append each line with the ASCII equivalent of
* each byte if it falls within the printable ASCII range,
* and '.' otherwise.
*/
static void
{
int i, j;
char *curr;
char *aoff;
/* abort if not debugging ldc */
if (!(ds_debug & DS_DBG_FLAG_MSG)) {
return;
}
/* walk the buffer one line at a time */
for (i = 0; i < len; i += BYTESPERLINE) {
/*
* Walk the bytes in the current line, storing
* the hex value for the byte as well as the
* ASCII representation in a temporary buffer.
* All ASCII values are placed at the end of
* the line.
*/
for (j = 0; (j < BYTESPERLINE) && ((i + j) < len); j++) {
curr += 3;
aoff++;
}
/*
* Fill in to the start of the ASCII translation
* with spaces. This will only be necessary if
* this is the last line and there are not enough
* bytes to fill the whole line.
*/
*curr++ = ' ';
}
}
#endif /* DEBUG */
/*
* Walk the table of registered services, executing the specified
* callback function for each service. A non-zero return value from
* the callback is used to terminate the walk, not to indicate an
* error. Returns the index of the last service visited.
*/
static int
{
int idx;
/* walk every table entry */
/* execute the callback */
break;
}
return (idx);
}
static int
{
/*
* Looking for a free service. This may be a NULL entry
* in the table, or an unused structure that could be
* reused.
*/
if (DS_SVC_ISFREE(svc)) {
/* yes, it is free */
return (1);
}
/* not a candidate */
return (0);
}
static int
{
if (DS_SVC_ISFREE(svc)) {
return (0);
}
/* found a match */
return (1);
}
return (0);
}
static int
{
return (0);
}
}
}
return (0);
}
static int
{
int idx;
/* check the state of the service */
return (0);
/* check if there are any ports to try */
return (0);
/*
* Attempt to register the service. Start with the lowest
* numbered port and continue until a registration message
* is sent successfully, or there are no ports left to try.
*/
/*
* If the port is not in the available list,
* it is not a candidate for registration.
*/
continue;
}
if (ds_send_reg_req(svc) == 0) {
/* register sent successfully */
break;
}
/* reset the service to try the next port */
}
return (0);
}
static int
{
if (DS_SVC_ISFREE(svc)) {
return (0);
}
/* make sure the service is using this port */
return (0);
}
/* reset the service structure */
/* increment the count in the handle to prevent reuse */
/* call the client unregister callback */
/* try to initiate a new registration */
return (0);
}
static int
{
if (DS_SVC_ISFREE(svc)) {
/* nothing to do */
return (0);
}
return (0);
}
static ds_svc_t *
ds_alloc_svc(void)
{
int idx;
goto found;
}
/*
* There was no free space in the table. Grow
* the table to double its current size.
*/
/* copy old table data to the new table */
}
/* clean up the old table */
/* search for a free space again */
/* the table is locked so should find a free slot */
/* allocate a new svc structure if necessary */
/* allocate a new service */
}
/* fill in the handle */
return (newsvc);
}
static void
{
}
static ds_svc_t *
{
int idx;
if (hdl == DS_INVALID_HDL)
return (NULL);
/* check if index is out of bounds */
return (NULL);
/* check for a valid service */
if (DS_SVC_ISFREE(svc))
return (NULL);
/* make sure the handle is an exact match */
return (NULL);
return (svc);
}
static int
{
/* get the ID for this port */
return (-1);
}
/* sanity check the port id */
if (port_id > DS_MAX_PORT_ID) {
port_id);
return (-1);
}
/* get the channel ID for this port */
port_id);
return (-1);
}
/* get the port structure from the array of ports */
/* check for a duplicate port in the MD */
port_id);
return (-1);
}
/* initialize the port lock */
/* initialize the port */
/* add the port to the set of all ports */
return (0);
}
static void
{
/* connection went down, mark everything inactive */
}
/*
* Verify that a version array is sorted as expected for the
* version negotiation to work correctly.
*/
static ds_vers_check_t
{
int idx;
/*
* Walk the version array, verifying correct ordering.
* The array must be sorted from highest supported
* version to lowest supported version.
*/
DS_DBG("vers_isvalid: version array has increasing "
"major versions\n");
return (DS_VERS_INCREASING_MAJOR_ERR);
}
continue;
}
DS_DBG("vers_isvalid: version array has increasing "
"minor versions\n");
return (DS_VERS_INCREASING_MINOR_ERR);
}
}
return (DS_VERS_OK);
}
/*
* Logging Support
*/
static void
ds_log_init(void)
{
/* initialize global lock */
/* initialize the log */
/* initialize the free list */
}
DS_DBG_LOG("ds_log initialized: size=%d bytes, limit=%d bytes, "
}
static void
ds_log_fini(void)
{
/* clear out the log */
(void) ds_log_remove();
/*
* Now all the entries are on the free list.
* Clear out the free list, deallocating any
* entry that was dynamically allocated.
*/
}
}
}
static ds_log_entry_t *
ds_log_entry_alloc(void)
{
}
/* free list was empty */
}
return (new);
}
static void
{
return;
}
/* place entry on the free list */
}
/*
* Add a message to the end of the log
*/
static int
{
} else {
}
/* increase the log size, including the metadata size */
DS_DBG_LOG("ds_log: added %ld data bytes, %ld total bytes\n",
return (0);
}
/*
* Remove an entry from the head of the log
*/
static int
ds_log_remove(void)
{
/* empty list */
return (0);
/* one element list */
} else {
}
DS_DBG_LOG("ds_log: removed %ld data bytes, %ld total bytes\n",
return (0);
}
/*
* Replace the data in the entry at the front of the list with then
* new data. This has the effect of removing the oldest entry and
* adding the new entry.
*/
static int
{
DS_DBG_LOG("ds_log: replaced %ld data bytes (%ld total) with %ld data "
return (0);
}
static void
ds_log_purge(void *arg)
{
DS_DBG_LOG("ds_log: purging oldest log entries\n");
(void) ds_log_remove();
}
}
static int
{
int rv = 0;
/* check if the log is larger than the soft limit */
/*
* The log is larger than the soft limit.
* Swap the oldest entry for the newest.
*/
DS_DBG_LOG("ds_log: replacing oldest entry with new entry\n");
} else {
/*
* Still have headroom under the soft limit.
* Add the new entry to the log.
*/
new = ds_log_entry_alloc();
/* fill in message data */
}
/* check if the log is larger than the hard limit */
/*
* Wakeup the thread to remove entries
* from the log until it is smaller than
* the soft limit.
*/
DS_DBG_LOG("ds_log: log exceeded %d bytes, scheduling a "
"purge...\n", DS_LOG_LIMIT);
"start");
}
}
return (rv);
}
/*
* Client Interface
*/
int
{
int idx;
/* sanity check the args */
return (EINVAL);
}
/* sanity check the capability specifier */
return (EINVAL);
}
/* sanity check the version array */
"increasing major versions" :
"increasing minor versions");
return (EINVAL);
}
/* data and register callbacks are required */
return (EINVAL);
}
DS_DBG("ds_cap_init: svc_id='%s', data_cb=0x%lx, cb_arg=0x%lx\n",
/* check if the service is already registered */
/* already registered */
return (EALREADY);
}
svc = ds_alloc_svc();
/* copy over all the client information */
/* make a copy of the service name */
/* make a copy of the version array */
/* copy the client ops vector */
/* attempt to register the service */
DS_DBG("ds_cap_init: service '%s' assigned handle 0x%09lx\n",
return (0);
}
int
{
int idx;
/* make sure the service is registered */
/* service is not registered */
return (EINVAL);
}
/*
* Attempt to send an unregister notification. Even
* if sending the message fails, the local unregister
* request must be honored, since this indicates that
* the client will no longer handle incoming requests.
*/
(void) ds_send_unreg_req(svc);
/*
* Clear out the structure, but do not deallocate the
* memory. It can be reused for the next registration.
*/
/* save the handle to prevent reuse */
/* initialize for next use */
return (0);
}
int
{
int rv;
return (EINVAL);
}
return (ECONNRESET);
}
/* check that the LDC channel is ready */
return (ECONNRESET);
}
/* channel is up, but svc is not registered */
return (EINVAL);
}
}
DS_DBG("ds@%lx: data>: hdl=0x%09lx, len=%ld, payload_len=%d\n",
}
return (rv);
}