/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This file contains the Starcat Solaris Mailbox Client module. This module
* handles mailbox messages from the SC to the OS (as opposed to messages sent
* to specific drivers) and vice versa. Two task queues are created upon
* startup; one handles reading and processing of all incoming messages, while
* the other handles transmission of all outgoing messages.
*/
#include <sys/sysmacros.h>
#include <sys/cpu_sgnblk_defs.h>
#include <sys/machsystm.h>
#include <sys/iosramio.h>
#include <sys/plat_ecc_unum.h>
#include <sys/plat_ecc_dimm.h>
#include <sys/plat_datapath.h>
/* mailbox keys */
/* mailbox commands */
/* general constants */
#ifndef TRUE
#endif
#ifndef FALSE
#define FALSE 0
#endif
/*
* When a message needs to be sent to the SC, an scosmb_msgdata_t should be
* populated with the data to be used for the message, and a call to
* scosmb_process_output should be dispatched on the scosmb_output_taskq, with
* the address of the scosmb_msgdata_t structure as its arg. The "length" and
* "data" fields can be used if the message needs to include data beyond the
* header fields (type, cmd, and transid) and that information must be recorded
* when the message is placed on the taskq. If appropriate for the message type
* (e.g. nodename info that should always be the most recent available), the
* "data" field can be set to NULL and the additional data can be assembled
* immediately prior to sending the message in scosmb_process_output().
*
* If log_error is set, any errors in delivering the message cause a
* cmn_err() message to be issued. If it is zero, the error is expressed
* only through return values.
*/
typedef struct {
int log_error;
void *data;
/*
* Datapath error and fault messages arrive unsolicited. The message data
* is contained in a plat_datapath_info_t structure.
*/
typedef struct {
/* externally visible routines */
/* local routines */
static void scosmb_inbox_handler();
static void scosmb_process_input(void *unused);
/* local variables */
static char *dperrtype[] = {
};
/*
* Structures from modctl.h used for loadable module support.
* SCOSMB is a "miscellaneous" module.
*/
extern struct mod_ops mod_miscops;
"Sun Fire 15000 OS Mbox Client v1.10",
};
(void *)&modlmisc,
};
/*
* _init
*
* Loadable module support routine. Initializes mutex and condition variables
* and starts thread.
*/
int
_init(void)
{
int error;
/*
* Initialize the mailboxes
*/
scosmb_inbox_handler)) != 0) {
error);
return (error);
}
error);
(void) mboxsc_fini(SCDM_KEY);
return (error);
}
/*
* Initialize the global lock
*/
/*
* Create the task queues used for processing input and output messages
*/
/*
* Attempt to install the module. If unsuccessful, uninitialize
* everything.
*/
if (error != 0) {
(void) mboxsc_fini(DMSC_KEY);
(void) mboxsc_fini(SCDM_KEY);
}
return (error);
}
/*
* _fini
*
* Loadable module support routine. Since this routine shouldn't be unloaded (it
* provides a critical service, and its symbols may be referenced externally),
* EBUSY is returned to prevent unloading.
*/
int
_fini(void)
{
return (EBUSY);
}
/*
* _info
*
* Loadable module support routine.
*/
int
{
int error = 0;
return (error);
}
/*
* scosmb_inbox_handler() - mbox API event handler.
*
* This routine adds an entry to the scosmb_input_taskq that will cause the
* scosmb_process_input() routine to be called to service the SCDM mailbox. The
* possibility that taskq_dispatch may fail when given KM_NOSLEEP is safely
* ignored because there can only be one message waiting in the mailbox at any
* given time, so the current message will end up being handled by one of the
* previously queued jobs (and a previous message presumably timed out before we
* got around to reading it).
*/
static void
{
}
/*
* dp_get_cores()
*
* Checks cpu implementation for the input cpuid and returns
* the number of cores.
* If implementation cannot be determined, returns 1
*/
static int
{
if (slot == 1)
else
/* find first with valid implementation */
break;
}
return (2);
else
return (1);
}
/*
* dp_payload_add_cpus()
*
* From datapath mailbox message, determines the number of and safari IDs
* for affected cpus, then adds this info to the datapath ereport.
*
* Input maxcat (if set) is a count of maxcat cpus actually present - it is
* a count of cpuids, which takes into account multi-core architecture.
*/
static int
{
/* check for multiple core architectures */
/*
* Determine the number of cpu cores impacted
*/
case DP_CDS_TYPE:
if (maxcat)
nummaxcpus = ncores;
else
break;
case DP_DX_TYPE:
if (maxcat)
else
break;
case DP_EX_TYPE:
if (maxcat)
else
break;
case DP_CP_TYPE:
/*
* SC-DE supplies the base cpuid affected, if
* maxcat id was given, there's no slot 0 board
* present.
*/
if (!maxcat) {
/* Slot 0 id was given - set numcpus */
}
break;
default:
ASSERT(0);
return (-1);
}
/* Allocate space for cores */
/*
* populate dparray with impacted cores (only those present)
*/
case DP_CDS_TYPE:
/*
* For a CDS error, it's the reporting cpuid
* and it's other core (if present)
*/
break;
case DP_DX_TYPE:
/*
* For a DX error, it's the reporting cpuid (all
* cores), and the other CPU sharing the same
* DX<-->DCDS interface (all cores)
*/
/* reporting cpuid */
/* find partner cpuid */
else
/* add partner cpuid */
break;
case DP_EX_TYPE:
/*
* For an EX error, it is all cpuids (all cores)
* on the reporting board
*/
count = nummaxcpus;
else
}
break;
case DP_CP_TYPE:
/*
* For a CP error, it is all cpuids (all cores)
* on both boards (SB & IO) in the boardset
*/
/* Do slot 0 */
}
/* Do slot 1 */
}
break;
}
/*
* The datapath message could not be associated with any
* configured CPU.
*/
if (!jj) {
return (-1);
}
return (0);
}
/*
* dp_trans_event() - datapath message handler.
*
* Process datapath error and fault messages received from the SC. Checks
* for, and disregards, messages associated with I/O boards. Otherwise,
* extracts message info to produce a datapath ereport.
*/
static void
{
/* check for I/O board message */
if (slot) {
for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
/* maxcat cpu present */
maxcat++;
}
}
/*
* Ignore I/O board msg
*/
if (maxcat == 0)
return;
}
/* allocate space for ereport */
/*
*
* Member Name Data Type Comments
* ----------- --------- -----------
* version uint8 0
* class string "asic"
* ENA uint64 ENA Format 1
* detector fmri aggregated ID data for SC-DE
*
* Datapath ereport subclasses and data payloads:
* There will be two types of ereports (error and fault) which will be
* identified by the "type" member.
*
* ereport.asic.starcat.cds.cds-dp
* ereport.asic.starcat.dx.dx-dp
* ereport.asic.starcat.sdi.sdi-dp
*
* Member Name Data Type Comments
* ----------- --------- -----------
* erptype uint16 derived from message type: error or
* fault
* t-value uint32 SC's datapath SERD timeout threshold
* dp-list-sz uint8 number of dp-list array elements
* dp-list array of uint16 Safari IDs of affected cpus
* sn-list array of uint64 Serial numbers of affected cpus
*
*/
/* compose common ereport elements */
/*
* Create legacy FMRI for the detector
*/
case DP_CDS_TYPE:
case DP_DX_TYPE:
if (slot == 1)
else
break;
case DP_EX_TYPE:
break;
case DP_CP_TYPE:
break;
default:
break;
}
/* build ereport class name */
/* add payload elements */
if (msgtype == SCDM_DP_ERROR_MSG) {
} else {
}
/* post ereport */
}
/* free ereport memory */
}
/*
* scosmb_process_input() - incoming message processing routine
*
* this routine attempts to read a message from the SCDM mailbox and, if
* successful, processes the command. if an unrecoverable error is encountered,
* the scosmb_task thread will be terminated.
*/
/* ARGSUSED0 */
static void
{
int error;
int cap_size;
int cap_ver_len;
int max_size;
/*
* Attempt to read a message from the SCDM mailbox.
*
* Setup a local buffer to read incoming messages from the SC.
*/
/*
* If EAGAIN or ETIMEDOUT was received, give up. The SC can just try
* again if it was important. If any other non-zero error was
* encountered, the mailbox service is broken, and there's nothing more
* we can do.
*/
return;
} else if (error != 0) {
/*
* The mailbox service appears to be badly broken. If it was
* working previously, generate a warning and set a flag to
* avoid repeating the warning on subsequent failures.
*/
if (!scosmb_mboxsc_failed) {
error);
}
return;
} else {
/*
* If the mailbox module failed previously, it appears to have
* recovered, so we'll want to generate a warning if it fails
* again.
*/
}
/*
* A message was successfully received, so go ahead and process it.
*/
case SCDM_GOTO_OBP: /* jump to OBP */
debug_enter("SC requested jump to OBP");
break;
case SCDM_GOTO_PANIC: /* Panic the domain */
break;
case SCDM_SHUTDOWN: /* graceful shutdown */
/*
* In the event kadmin does not bring down the
* domain, environmental shutdown is forced
*/
/*FALLTHROUGH*/
case SCDM_ENVIRON: /* environmental shutdown */
/*
* Send SIGPWR to init(1) it will run rc0,
* which will uadmin to power down.
*/
/*
* If we're still booting and init(1) isn't set up yet,
* simply halt.
*/
extern void halt(char *);
power_down((char *)NULL);
halt("Power off the System!\n");
}
/*
* else, graceful shutdown with inittab and all
* getting involved
*/
break;
case SCDM_GET_NODENAME:
break;
case SCDM_LOG_ECC_CAP_RESP:
/*
* The SC has responded to our initiator capability message
* issued during the boot flow via scosmb_update_nodename().
*
* Parse the incoming data, and appropriately set SC
* capabilities...
*/
break;
case SCDM_LOG_ECC_CAP_INIT:
/*
* The SC has initiated a capability messaging exchange with
* the OS.
*
* We start out just as we do for an SC response capability
* message, a parse of incoming data to appropriately set SC
* described capabilities...
*/
/*
* The next step is setting up our Response to the SC.
*
* Allocate memory for message data, initialize appropriately,
* and place a new job on the scosmb_output_taskq for
* SCDM_LOG_ECC_CAP_RESP, our OS capability messaging response
* to the SC initiated sequence detected here.
*/
cap_msgdatap->transid = 0;
(void) taskq_dispatch(scosmb_output_taskq,
KM_SLEEP);
break;
case SCDM_DP_ERROR_MSG:
case SCDM_DP_FAULT_MSG:
break;
case SCDM_DIMM_SERIAL_ID:
break;
default:
break;
}
/*
* Free up buffer for incoming messasge data that we allocated earlier
*/
}
/*
* scosmb_process_output() - outgoing message processing routine
*
* This routine handles jobs that are queued on the scosmb_output_taskq, or
* sent directly from scosmb_log_ecc_error. Each job corresponds to a single
* mailbox message that needs to be sent to the SC via the DMSC mailbox. Some
* processing of the message may be performed before it is sent to the SC,
* depending on the value of the command field.
*/
static int
{
int error;
int length;
void *free_data;
int free_data_len;
int cap_size;
int cap_ver_len;
/*
* This shouldn't ever happen, but it can't hurt to check anyway.
*/
return (EINVAL);
}
/*
* If data was passed in, we'll need to free it before returning.
*/
/*
* Some commands may need additional processing prior to transmission.
*/
/*
* Since the SC is only interested in the most recent value of
* utsname.nodename, we wait until now to collect that data. We
* also use a global flag to prevent multiple event-type
* nodename messages from being queued at the same time for the
* same reason.
*/
case SCDM_GET_NODENAME:
if (length == 0) {
} else {
}
}
break;
/*
* SCDM_LOG_ECC_CAP_INIT
* Initiator Capability message from OS to SC
*
* We construct and send an initiator capability message
* every time we go through scosmb_update_nodename(), which
* works out to getting an "initiator" capability message
* sent from the OS to the SC during the OS boot flow.
*
* The SC also issues a request to scosmb_update_nodename()
* during an SC reboot. Which results in an additional
* capability message exchange during SC reboot scenarios.
*
* SCDM_LOG_ECC_CAP_RESP
* Response Capability message from SC to OS
*
* In certain scenarios, the SC could initiate a capability
* messaging exchange with the OS. Processing starts in
* scosmb_process_input(), where we detect an incoming
* initiator capability message from the SC. We finish
* processing here, by sending a response capability message
* back to the SC that reflects OS capabilities.
*/
case SCDM_LOG_ECC_CAP_INIT:
/*FALLTHROUGH*/
case SCDM_LOG_ECC_CAP_RESP:
cap_size = sizeof (plat_capability_data_t) +
/*
* Build the capability solaris_version string:
* utsname.release + " " + utsname.version
*/
/*
* The capability message is constructed, now plug it
* into the starcat msgdatap:
*/
/*
* message set up.
*
* Note that after sending an "initiator" capability
* message, we can expect a subsequent "response"
* capability message from the SC, which we will
* pick up and minimally handle later,
* in scosmb_process_input().
*
* If we're sending a "response" capability message
* to the SC, then we're done once the message is sent.
*/
}
break;
default:
break;
}
/*
* Attempt to send the message.
*/
/*
* Free any allocated memory that was passed in.
*/
}
}
/*
* If EAGAIN or ETIMEDOUT was received, give up. The sender can try
* again if it was important. If any other non-zero error was
* encountered, the mailbox service is broken, and there's nothing more
* we can do.
*/
/*
* Indictment mailbox messages use the return value to
* indicate a problem in the mailbox. For Error
* mailbox messages, we'll have to use a syslog message.
*/
"(0x%x/0x%x) to the System Controller. Error: %d",
}
} else if (error != 0) {
/*
* The mailbox service appears to be badly broken. If it was
* working previously, generate a warning and set a flag to
* avoid repeating the warning on subsequent failures.
*/
"while processing this message (0x%x/0x%x)",
}
} else {
/*
* If the mailbox module failed previously, it appears to have
* recovered, so we'll want to generate a warning if it fails
* again.
*/
}
return (error);
}
/*
* scosmb_update_nodename() - nodename update routine
*
* this routine, which may be invoked from outside of the scosmb module, will
* cause the current nodename to be sent to the SC. The mailbox message sent to
* the SC will use the indicated transaction ID, and will either be a reply
* message if the ID is non-zero or an event message if it is 0.
*
* Capability messaging enhancements:
* Every time we move through this code flow, we put an "initiator
* capability message" on the message output taskq. This action will
* get a capability message sent to the SC from the OS during boot
* scenarios. A capability message exchange will also happen for
* SC reboot scenarios, as the SC will initiate a nodename update
* as a matter of course while coming back up.
*
* We'll also get an extraneous capability message sent
* to the SC from time to time, but that won't hurt anything.
*/
void
{
/*
* If we're generating an unsolicited nodename update (presumably having
* been called from platmod:plat_nodename_set()), there's no need to add
* a new job to the queue if there is already one on it that will be
* sending the latest nodename data.
*/
if (transid == 0) {
return;
} else {
}
}
/*
* Allocate memory for the message data, initialize it, and place a new
* job on the scosmb_output_taskq for SCDM_GET_NODENAME.
*/
KM_SLEEP);
(void) taskq_dispatch(scosmb_output_taskq,
/*
* Next, allocate memory, initialize, and place a new job on the
* scosmb_output_taskq for SCDM_LOG_ECC_CAP_INIT. That's a
* capability message, where we're the initiator.
*/
(void) taskq_dispatch(scosmb_output_taskq,
}
/*
* scosmb_log_ecc_error() - Record ECC error information to SC
* For ECC error messages, send the messages through a taskq mechanism
* to prevent impaired system performance during ECC floods. Indictment
* messages have already passed through a taskq, so directly call the
* output function.
*/
int
{
/*
* Set header type and length for message
*/
switch (msg_type) {
case PLAT_ECC_ERROR_MESSAGE:
/*
* We do not want to sleep in an error logging thread. So,
* we set the NOSLEEP flag and go through a taskq before we
* send the message.
*/
msg_length = sizeof (plat_ecc_error_data_t);
log_error = 1;
do_queue = 1;
break;
case PLAT_ECC_ERROR2_MESSAGE:
msg_length = sizeof (plat_ecc_error2_data_t);
log_error = 1;
do_queue = 1;
break;
/*
* For indictment messages, we're allowed to sleep, and we
* can directly call the output function, since we've already
* gone through a taskq
*/
msg_length = sizeof (plat_ecc_indictment_data_t);
log_error = 0;
do_queue = 0;
break;
/*
* For indictment2 messages, we're allowed to sleep, and we
* can directly call the output function, since we've already
* gone through a taskq
*/
msg_length = sizeof (plat_ecc_indictment2_data_t);
log_error = 0;
do_queue = 0;
break;
/*
* For DIMM sid request messages, we're allowed to sleep, and we
* can directly call the output function, since we've already
* gone through a taskq
*/
msg_length = sizeof (plat_dimm_sid_request_data_t);
log_error = 0;
do_queue = 0;
break;
default:
return (EINVAL);
}
/*
* Allocate memory for the mailbox message header.
*/
if (msg_header_ptr == NULL) {
#ifdef DEBUG
"message header.");
#endif /* DEBUG */
return (ENOMEM);
}
msg_header_ptr->transid = 0;
/*
* Allocate memory for the mailbox message payload.
*/
#ifdef DEBUG
"message data.");
#endif /* DEBUG */
return (ENOMEM);
}
/*
* Based on our earlier look at the message type, we either go through
* a taskq or directly call the output function.
*/
if (do_queue != 0) {
/*
* Place a new job on the scosmb_output_taskq.
*/
(void *)msg_header_ptr, TQ_NOSLEEP) == 0) {
#ifdef DEBUG
"ECC mailbox message.");
#endif /* DEBUG */
return (ENOMEM);
}
return (0);
} else {
return (scosmb_process_output(msg_header_ptr));
}
}