wrsm_lc.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file manages WCIs for the Wildcat RSM driver. It orchestrates link
* bringup and takedown based on configuration changes and changes in
* remote connectivity, monitors links, and reports status to higher layers
* (specifically, the multi-hop layer and syseventd).
*/
#include <sys/int_fmtio.h>
#include <sys/machsystm.h>
#include <sys/cheetahregs.h>
#include <sys/wci_common.h>
#include <sys/wci_regs.h>
#include <sys/wci_offsets.h>
#include <sys/wci_masks.h>
#include <sys/wrsm_driver.h>
#include <sys/wrsm_plat.h>
#ifdef DEBUG
#define LC_DEBUG 0x001
#define LC_CSR_WRITE_DEBUG 0x002
#define LC_CSR_READ_DEBUG 0x004
#define LC_POLL 0x008
#define LC_CMMU 0x010
#define LC_CESR 0x020
#define LC_HWSH 0x040
#define LC_DEBUG_EXTRA 0x100
#define LC_WARN 0x200
#define LC_ECC 0x400
#define LC_DEBUG_PLAT 0x800
#else /* DEBUG */
#define DPRINTF(a, b) { }
#endif /* DEBUG */
#define LC_MAX_CONT_ERRS 100
/* globals */
/* weight of old average in error average */
/* minutes in shortterm error interval */
/* number of shortterm intervals per long term interval */
/*
* Artificially limit ourselves to only enough CMMU entries to map 4GB
* in that it's not 64bit clean.
*
* Normal value: 0x200000 2M entries, 16GB
* Workaround value: 0x80000 .5M entries, 4GB
*/
static clock_t wrsm_poll_hz;
static clock_t wrsm_restart_hz;
static clock_t wrsm_shortterm_hz;
/* calculate the weight over wrsm_avg_weight intervals */
#define MAX(i, j) ((i > j)?i:j)
/* prototypes for local functions */
#define NUM_PLATFORMS 7
typedef enum {
starcat_direct, /* starcat in direct connect topology */
starcat_wcx, /* starcat in WCX topology */
starcat_pt, /* starcat compute node in central switch */
serengeti_direct, /* serengeti in direct connect topology */
serengeti_wcx, /* serengeti in WCX topology */
serengeti_pt, /* serengeti compute node in central switch */
starcat_switch /* starcat as a central switch */
switch (wnode) { \
}
switch (wnode) { \
}
#define CA_TIMEOUT_CONFIG_VAL_WCI2 0x00001000000100FFULL
static gnid_t
{
/*
* For a WCX, the wnode is always equal to the gnid
* so that mapping is not stored in the table.
*/
if (WRSM_GNID_IS_WCX(wnode))
return (wnode);
/* Make sure this wnode is in a valid range */
if (wnode_tmp >= WRSM_MAX_WNODES) {
continue;
}
/* See if gnid yielded the right wnode id */
break;
}
}
return (gnid);
}
{
if (map == 0) {
} else {
}
return (link);
}
void
int map)
{
if (gnid >= WRSM_MAX_WNODES) {
wnode));
return;
}
if (map == 0) {
&(route_map0.val));
route_map0.val));
/* also update wci_gnid_map0 for wci 3 */
}
} else {
&(route_map1.val));
route_map0.val));
/* also update wci_gnid_map1 for wci 3 */
}
}
}
static void
{
if ((link_state == lc_down) ||
(link_state == sc_wait_errdown) ||
(link_state == sc_wait_down) ||
(link_state == sc_wait_up) ||
(link_state == lc_up)) {
if (link_state != lc_up) {
/*
* lc_up links are config confirmation requests,
* hence shouldn't change to sc_wait_up state.
*/
}
B_FALSE /* loopback */);
#ifdef DEBUG
} else {
"for wrsm%d link %d wci %d wci",
#endif /* DEBUG */
}
}
int
{
int rc;
if (local_link_num >= WRSM_LINKS_PER_WCI)
return (EINVAL);
return (rc);
return (0);
}
int
{
int i;
if (local_link_num >= WRSM_LINKS_PER_WCI)
return (EINVAL);
return (EACCES);
B_TRUE);
/*
* If no other links on this wci are in loopback test mode
* then release the wci for general use
*/
for (i = 0; i < WRSM_LINKS_PER_WCI; ++i) {
break;
}
}
if (release)
return (0);
}
int
{
return (EACCES);
}
/*
* Take down link, and record that this is a user-requested takedown.
*/
int
{
return (EINVAL);
}
return (0);
}
/*
* Unset user takedown boolean, and attempt to bring it up link.
*/
int
{
return (EINVAL);
}
return (ENODEV);
}
return (0);
}
void
{
int i;
if (WRSM_GNID_IS_WCX(remote_gnid)) {
for (i = 0; i < WRSM_MAX_WNODES; ++i) {
if ((gnids & (1 << i)) &&
softsp->gnid_to_wnode[i]);
}
}
} else {
}
}
static void
{
if (user_requested) {
}
/*
* ignore any request for link takedown due to linkerr
* unless the current link state is up. (A linkerr
* takedown is only generated when the link was up, but
* because the lock is dropped, this may not be the state
* by the time we get here.)
*/
"link %d not up, state = %d", local_link_num,
link_state));
return;
}
if (link_state == lc_up) {
/* Notify MH that link is going down */
}
/*
* check for a linkerr reason for links in lc_up state
*/
if (linkerr) {
/*
* Link takedown request is due to link
* errors. Change state to sc_wait_errdown.
*/
"in wrsm_lc_link_takedown: linkerr"));
/*
* Link error occured on an up link
* after a link_bringup that was
* requested by lc_installconfig.
* mh_link_is_up has not been
* called yet because
* tell_mh_link_is_up is still
* false. Increment
* newlink_waitup_cnt since we must
* now wait for this link to come
* back up - again.
*/
}
} else {
/*
* link_takedown request was intentially
* unconfigured. log system event.
*/
local_link_num, "unconfigured");
}
}
} else if (link_state == sc_wait_errdown) {
/*
* lc_cleanconfig or user requested link takedown while
* link was already coming down due to link error.
* Change state so link does not come back up automatically.
*/
" errdown state link %d wci %d linkerr true",
}
/*
* ignore link down request if link is already down
*/
}
void
{
int err = DDI_SUCCESS;
KM_SLEEP)) == DDI_SUCCESS) {
if (err == DDI_SUCCESS) {
}
if (err == DDI_SUCCESS) {
}
switch (eventtype) {
case link_up:
if (err == DDI_SUCCESS) {
}
break;
case link_down:
if (err == DDI_SUCCESS) {
reason);
}
if (err == DDI_SUCCESS) {
}
break;
default:
break;
}
}
#ifdef DEBUG
if (err != DDI_SUCCESS) {
}
#endif /* DEBUG */
}
/*
* This function is only called by the Mbox message handler function.
* it is called when the SC has completed discovery.
* The purpose of this functions is to confirm the remote config data before
* the hardware knows the link is physically 'connected' that is, the wci
* register has link_state = in-use. Once lc_phys_link_up confirms that remote
* data is correct, it will put in a request for the SC to set the link_state
* to in-use
*/
/* ARGSUSED */
void
{
/*
* since data is invalid, we are not sure
* how to send a stop discovery message
*/
"Invalid args wrsm%d link %d",
return;
}
return;
}
/* verify args */
"got controller version %ld expected %ld",
} else {
"wci %d link %d bad remote configuration: "
"got controller id %d expected %d",
}
remote_gnid != remote_gnid) {
"wci %d link %d bad remote configuration: "
}
"wci %d link %d bad remote configuration: "
}
!= remote_port) {
"wci %d link %d bad remote configuration: "
"got safari port id %d expected %d",
}
}
/* Update softstate struct with badconfig reasons */
if (badconfigcaught) {
"badconfig");
return;
}
/* If we weren't waiting for the link to come up, just bail */
return;
}
if (WRSM_GNID_IS_WCX(remote_gnid))
else
/* Is it OK to tell MH link is up? */
if (WRSM_GNID_IS_WCX(remote_gnid)) {
NULL);
return;
/* LINTED: E_NOP_IF_STMT */
if (remote_wnode >= WRSM_MAX_WNODES) {
"Bad remote wnode for wci %d link %d: %d",
} else {
}
return;
} else {
/*
* newlink_waitup_cnt is how we keep track of
* bringup link on a new link requested
* initiated by NR via lc_installconfig.
* In this case, link is up but the NR has
* not as of yet called lc_enableconfig
*/
if (softsp->newlink_waitup_cnt == 0) {
/*
* All new links are up and MH doesn't
* know - tell NR so that it will call
* lc_enableconfig
*/
return;
}
}
}
/*
* this function is only called by the mbox message handler function.
* this function processes responses from the SC (via the SBBC mailbox)
* that a link is down.
*
* This may also be called when we bring down a link in loopback mode.
* it is harmless, as the lc state is marked as down and the call is ignored
*/
void
{
/* invalid args */
"Invalid args wrsm%d link %d",
return;
}
return;
}
(softsp->oldlink_waitdown_cnt != 0)) {
/*
* If oldlink_waitdown_cnt is set, lc_installconfig
* is waiting for all links not in the config to
* come down. Decrement oldlink_waitdown_cnt here
* to reflect that another link has come down.
*/
if (softsp->oldlink_waitdown_cnt == 0) {
/*
* All all requested links are down, so
* signal installconfig() to go.
*/
"wrsm_lc_phys_link_down signal"
" lc_installconfig"));
}
/*
* Link takedown must have been due to
* a config error being caught. Set timer to
* bring up link later if one isn't already set.
*/
if (softsp->restart_timeout_id == 0) {
} else {
timeout((void (*)(void *))
"RESTART LINK "
"timeout STARTED wci %d",
}
}
}
/*
* If a new config is being installed which doesn't
* include this link, don't bring it back up.
*/
} else {
/*
* Link take down was due to link error.
* Immediately try bringing link back up,
* unless this link has had errors on the
* last LC_MAX_CONT_ERRS link error checks.
*/
} else {
/*
* Set timer to bring up link later if one
* isn't already set.
*/
if (softsp->restart_timeout_id == 0) {
} else {
timeout((void (*)(void *))
"RESTART LINK "
"timeout STARTED wci %d",
}
}
}
}
}
}
/*
* one time setup for timeout speeds
*/
void
{
"poll = 0x%x restart = 0x%x shortterm = 0x%x",
}
static int
{
int rev;
switch (jtag_id) {
case WCI_ID_WCI1:
rev = 10;
break;
case WCI_ID_WCI2:
rev = 20;
break;
case WCI_ID_WCI3:
rev = 30;
break;
case WCI_ID_WCI31:
rev = 31;
break;
case WCI_ID_WCI4:
rev = 40;
break;
case WCI_ID_WCI41:
rev = 41;
break;
default:
rev = 99;
" assuming equivalent to WCI 4.1", jtag_id));
break;
}
return (rev);
}
typedef struct {
/*
* Create a wrsm_platform_csr_t entry
*/
/*
* Create a wrsm_platform_csr_t entry for a register array
*/
/*
* Create a wrsm_platform_csr_t entry using the same value for all
* platforms.
*/
/*
* Platform specific CSRs
*
* The following is a list of all the WCI CSRs which may need to have
* different settings based on the platform type of the local node
*
* 0x00180 wci_error_pause_timer_hold
* 0x200e0 wci_ca_timeout_config
* 0x201a0 wci_ca_timeout_config_2
* 0x001c0 wci_csra_timeout_config
* 0x500c0 wci_sa_timeout_config
* 0x31100 wci_ra_timeout_config
* 0x400e0 wci_ha_timeout_config
* 0x201e0 wci_qlim_config_cag
* 0x34080 wci_qlim_config_piq
* 0x340a0 wci_qlim_config_niq
* 0x340c0 wci_qlim_config_ciq
* 0x00040 wci_config
* 0x20100 wci_ca_config
*
* 0x34040 wci_qlim_3req_priority
* 0x34060 wci_qlim_2req_priority
* 0x64160 wci_qlim_sort_ciq
* 0x64140 wci_qlim_sort_niq
* 0x64120 wci_qlim_sort_piq
*/
/*
* These CSR values come from the "Programming the Timeouts", "RSM
* Sorting and Bandwidth Balancing" and "Setting the WC{I,X}
* Network Timeouts: a programmer's guide" sections of the WCI 3 PRM.
* The seven field values correspond to the the seven platform types
* as specified by the wrsm_platform_types_t enumeration.
*/
/* Programming the Timeouts */
0x1fff, 0x1fff, 0x1fff),
0x1fff, 0x1fff, 0x1fff),
0x1fff, 0x1fff, 0x1fff),
1024, 2048),
/* RSM Sorting and Bandwidth Balancing */
0xb, 0xb, 0xaaa9aaab),
0xf000030f, 0xffffffff, 0xffffffff, 0xffffffff, 0xf000010f),
0, 0, 0, 0xffffcf0),
0xf0000000, 0, 0, 0, 0xf0000200),
/* Setting the WC{I,X} Network Timeouts: a programmer's guide */
{ 0 }
};
/*
* lc_wciinit may ONLY be called from lc_replaceconfig
* lc_wciinit sets the softstate struct with the local_cnode
* local_wnode, and initializes several different registers
* routemap0 and routemap1, wci_brd2cnid_array.
* enables cluster_disable in wci_ca_config.
*/
static void
{
int i;
softsp->restart_timeout_id = 0;
/* set striping bits to no striping */
/*
* set up the wci_error_inducement to reset state
* this must be done before writing to SRAM (CMMU)
*/
wci_error_inducement.val = 0;
/* clear all CMMU entries in SRAM */
/*
* Entry 1 of the CMMU is reserved during cmmu_init() to
* be used for clearing cluster write lockout. Here we
* initialized it to a sensible value.
*/
/* clear wci_nc_slice_config_array */
for (i = 0; i < ENTRIES_WCI_NC_SLICE_CONFIG_ARRAY; i++) {
}
/* clear wci_cluster_error_count */
/* clear inid2dnid array */
for (i = 0; i < ENTRIES_WCI_INID2DNID_ARRAY; i++) {
}
/* Clear cluster members bits */
for (i = 0; i < ENTRIES_WCI_CLUSTER_MEMBERS_BITS; i++) {
}
/* clear wci_sw_link_error_count */
for (i = 0; i < ENTRIES_WCI_SW_LINK_ERROR_COUNT; i++)
/* set wci revisions - to determine if wci 2 or 3 */
for (i = 0; i < ENTRIES_WCI_CLUSTER_ERROR_STATUS_ARRAY; i++) {
}
/* Clear the write lockout bits */
for (i = 0; i < ENTRIES_WCI_CLUSTER_WRITE_LOCKOUT; ++i) {
}
/* clear write lockout status */
/*
* disable all slave and home agent instances - they are
* not used in cluster mode.
*/
/* wci_dco_ce_count must be set before wci_dc_esr register */
/* Clear any previous freeze bits set in prior configs */
/* the following ESR registers require a TOGGLE to reset bit */
/*
* this step should always follow reseting esr
*/
/*
* The PRM recommends unmasking dco_data_parity_error.
* these errors happen all the time, so we have to mask it.
*/
/*
* although the ha (home agent) and sa (slave agent) are only used
* in a SSM configured wci, it was recommended that the mask be
* set correctly in the wrsm driver so that illegal ssm transactions
* can be caught.
*/
/*
* On timeout config registers, Hien Nguyen suggests the
* following settings.
*
* Tru(cag) = wci_ca_timeout_config.reuse_timeout
* Tdp(rag) = wci_ra_timeout_config.primary_cluster_wr_timeout
* T(cpu) = Cheetah timeout value
* T(rag) = wci_ra_timeout_config.primary_cluster_rd_timeout
* T(cag) = wci_ca_timeout_config.primary_timeout
*
* T(cag) < T(rag) < Tcpu < Tdp(rag) < Tru(cag)
*
* Here is a suggestion without much analysis.
* Tru(cag) = 256*2^24 cycles (max)
* Tdp(rag) = 32*2^24 cycles
* T(cpu) = 16*2^24 cycles (Is 2^28 the default?)
* T(rag) = 2*2^24 cycles
* T(cag) = 1*2^24 cycles
*
* These numbers are ok for small clusters. When we have larger
* machines, we will have to make some adjustments.
*/
ca_timeout_config.val = 0;
/*
* the position of the reuse_mag and reuse_val change between wci2
* wci3. Since our driver is compiled to deal with > wci 3 headers
* the values for wci 2 fields must be set manually.
* reuse_val; wci 2 0-7 th bit, wci 3 0-10 bit
* reuse_mag; wci 2 8th and 9th bit, wci 3 11 and 12
*/
/* reuse_mag already 0 since config.val = 0 above */
} else { /* wci 3 or later revision */
}
ra_timeout_config.val = 0;
/* set routemap 0 and routemap 1 to local */
/* Perforance counters settings - expand to bit fields later */
/* Set wci_misc_ctr_ctl to cluster agent */
/* set first element in board2cnid */
/*
* This write to wci_board2dnid_array will be ignored on a Starcat
* system if jtag-wr_only bit is set in wci_csr_control.
*/
/* Disable board2cnid */
/* set wnode in wci_config register */
/* turn-off cluster_disable -- that is, allow transactions */
"wciINIT ca_config = 0x%"PRIx64,
#ifdef DEBUG
#endif
/* Initialize CSRs new to WCI-3 */
/* Initialize GNID maps for loopback */
}
softsp->num_sram_ecc_errors = 0;
softsp->last_sram_ecc_errors = 0;
softsp->max_sram_ecc_errors = 0;
softsp->avg_sram_ecc_errors = 0;
/* Enable the QDL for WCI-3.1 and later */
}
/*
* error polling needs to be started here in order to check
* esr registers even if no physical links are up. an error
* may appear during initialization or a loopback transaction
*/
/* already holding lc_mutex */
} else {
}
}
static void
{
int i;
type));
for (i = 0; i < csr->array_entries; ++i) {
}
}
/*
* network topology.
*/
static void
{
int i;
int *conf_values;
switch (node_type) {
case wrsm_node_serengeti:
case wrsm_node_wssm:
switch (net_type) {
break;
case topology_san_switch:
break;
case topology_central_switch:
break;
}
break;
case wrsm_node_starcat:
switch (net_type) {
break;
case topology_san_switch:
break;
case topology_central_switch:
break;
}
}
}
for (i = 0; plat_csr_values[i].offset; ++i)
&plat_csr_values[i], plat_type);
}
/*
* Give the wrsmplat module an oportunity to make CSR settings
* specific to this platform.
*/
/*
* CSR settings which over-ride the defaults can be encoded in
* the driver .conf file using the "wci_csr" property. This
* property is array of integers where each set of 12 ints
* represents one wrsm_platform_csr_t struct. This section
* should be performed last in WCI initialization so that
* settings in the .conf file take priority to those
* hard-coded in the driver.
*/
nentries));
/*
* The number of int in the array must be a multiple
* of the size of the wrsm_platform_csr_t struct.
*/
sizeof (wrsm_platform_csr_t);
if ((num_regs * sizeof (wrsm_platform_csr_t)) ==
(nentries * sizeof (int))) {
for (i = 0; i < num_regs; ++i)
&conf_plat_array[i], plat_type);
} else {
"length for wci_csr property: %d",
nentries);
}
}
}
/*
* lc_wcifini can ONLY be called by lc_installconfig.
* lc_wcifini resets some register values to 'clean' values
* ie. WCI_ROUTE_MAP= LOOPBACK, wci_ca_config.cluster_disable
* is disabled - that is set BACK to 1, clears cmmu, etc.
*/
static void
{
softsp->err_timeout_id = 0;
/* LINTED: E_NOP_IF_STMT */
" err_timeout_id not valid"));
}
/* Clear CMMU to make all entries invalid */
/* Reset route maps */
/* enable cluster_disable */
/* set wnode in wci_config register to reset value */
/* set first element in board2cnid to 'cleared' value */
/*
* This write to wci_board2dnid_array will be ignored on a Starcat
* system if jtag-wr_only bit is set in wci_csr_control.
*/
}
/*
* lc_verifyconfig must be called before any calls to lc_replaceconfig
* verifies that a new configuration for existing links is not a mismatch
* returns TRUE if config is GOOD
*/
{
int i;
/* For WCI2, gnids must equal wnodes */
for (i = 0; i < WRSM_MAX_WNODES; i++) {
if (wnode < WRSM_MAX_WNODES &&
wnode != i) {
"gnids must equal wnodes"));
}
}
}
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
/* a valid link that is part of a config */
/*
* Remote gnid changes. To support changing
* gnids, must bring down links. Not yet
* supported.
*/
continue;
}
if (WRSM_GNID_IS_WCX(rgnid_old)) {
/*
* We already know remote gnid isn't
* changing. If remote gnid is a switch,
* don't need to check for changes in
* cnodeids.
*/
continue;
}
if (rwnid_new >= WRSM_MAX_WNODES) {
"%d not in reachable list",
/* remote wnodeid mismatch */
} else {
[rwnid_old]) ||
"verifyconfig bad remote-cnode"
" link %d", i));
" remote_cnode for wrsm%d link"
}
}
}
}
((softsp->local_cnode) !=
}
} else { /* wnode mismatch */
"bad config local-wnode link %d", i));
}
return (config_ok);
}
/*
* lc_replaceconfig must be called before a call to lc_cleanconfig
* and lc_installconfig. lc_verify_config must be called prior to
* lc_replaceconfig. responsible for initialization required before link
* takedown and bringup request can be made.
*/
void
{
uint64_t i;
/*
* newlink_waitup_cnt is used to count the number of installconfig
* initiated link_bringup requests. It is decremented when a link
* reaches lc_up state.
*
* old_link_waitdown_cnt is used to count the number of cleanconfig
* initiated link_takedown request. It is decremented when a link
* reaches lc_down state.
*/
softsp->newlink_waitup_cnt = 0;
softsp->oldlink_waitdown_cnt = 0;
/* first time only initialize links */
/*
* tell_mh_link_is_up is needed so that the LC knows when
* it is allowed to call mh_link_is_up. On new request, the
* LC must wait until lc_enableconfig is called,
* lc_enableconfig sets tell_mh_link_is_up to TRUE, and TRUE
* is the value that tell_mh_link_is_up remains until
* another 'new' bringup link request is made in
* lc_installconfig.
*/
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
}
}
/* Save local gnid, gnid_to_wnode map for later */
for (i = 0; i < WRSM_MAX_WNODES; i++) {
}
/* Update WCI-3 with local gnid and setup dnid2gnid */
for (i = 0; i < WRSM_MAX_WNODES; ++i) {
if (gnid2dnid[i] < WRSM_MAX_WNODES)
(gnid2dnid[i] * 4);
}
}
}
}
/*
* initiates takedown link request for any links where the config pointer
* in the softstate struct is NULL, or the link is marked as not
* present in the new configuration.
*/
void
{
int i;
/*
* Take down all links not in the config.
*/
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
/*
* All user link down states are discarded when a
* link is removed from a config.
*/
}
}
/*
* Calculate which links not in the config are not down yet, and
* need to be waited for.
*/
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
}
}
}
}
/*
* lc_installconfig waits for all takedown request to be fully completed.
* This function will bringup up new links and confirms existing link
* connections.
*/
void
{
int i;
/*
* wait for old links to come down
*/
while (softsp->oldlink_waitdown_cnt != 0) {
}
/*
* Cancel timeout initiated link bringup - any links we want to
* bring for the new config will be brought up now.
*/
if (softsp->restart_timeout_id != 0) {
softsp->restart_timeout_id = 0;
/* LINTED: E_NOP_IF_STMT */
"wrsm_lc_link_installconfig"
" restart_timeout_id not valid"));
}
} else {
}
/* no new config; clean up and return */
return;
}
/*
* bring up links in new config
*/
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
/*
* It is important to attempt to bringup links in
* sc_wait_down state to allow for uniform
* treatment of down states. Links in
* sc_wait_errdown state will already come back up.
*/
/*
* For any link that is not currently up,
* the LC is not allowed to call
* mh_link_is_up for this link until
* lc_enableconfig is called.
*/
"lc_installconfig: new link %d"
" newlink_waitup_cnt = %d", i,
/* verify remote config data */
" remote config on old link %d", i));
}
}
}
}
/*
* calls mh_link_is_up() for each new link requested up in
* lc_installconfig
*/
void
{
int i;
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
/* LINTED: E_NOP_IF_STMT */
if (WRSM_GNID_IS_WCX(remote_gnid)) {
" enable link %d to wcx %d", i,
remote_gnid));
/* LINTED: E_NOP_IF_STMT */
} else if (remote_wnode >= WRSM_MAX_WNODES) {
"Bad remote wnode for wci %d link %d: %d",
} else {
}
} else {
}
}
/*
* We no longer care about newlink_waitup_cnt. The purpose of
* this count is to determine ahead of time that all the
* requested links are up and then call nr_all_links_up which will
* force the NR into calling lc_enableconfig earlier
*/
softsp->newlink_waitup_cnt = 0;
}
{
}
int
{
}
/*
* given dev_id (and box_id from wci_config) get index into cesr
*/
static uint32_t
{
int box_id;
int index = 0;
/*
* Hardware defines the index into the CESR as follows:
* cesr index bit [0] = dev_id bit [0]
* cesr index bit [1] = dev_id bit [1]
* cesr index bit [2] = dev_id bit [3] | box_id bit [0]
* cesr index bit [3] = dev_id bit [4] | box_id bit [1]
* cesr index bit [4] = dev_id bit [2] | box_id bit [2]
* cesr index bit [5] = box_id bit [3]
* cesr index bit [6] = box_id bit [4]
* cesr index bit [7] = box_id bit [5]
*/
return (index);
}
/*
* returns entire contents of cesr[index] in entry
*/
void
{
}
/*
* writes entire entry to cesr[index]
*/
void
{
}
/*
* reads from register at reg_offset and returns entire register contents
* in entry
*/
void
{
/* check for valid offset */
}
/*
* write entire contents of entry at register at reg_offset
*/
void
{
/* LINTED: E_FUNC_SET_NOT_USED */
/* check for valid offset */
/* Write to the CSR */
/* Read back to ensure it's stuck in hardware */
}
static void
{
int link;
time_now = ddi_get_lbolt();
(CE_CONT, "lc_poll_timeout: do_shortterm"));
}
/* Retry send link bringup request periodically */
if (softsp->ctlr_config) {
case sc_wait_up:
break;
}
"wrsm_lc_poll_timeout: "
"retrying link bringup for "
break;
case sc_wait_down:
break;
}
"wrsm_lc_poll_timeout: "
"retrying link takedown for "
B_FALSE);
break;
case sc_wait_errdown:
break;
}
"wrsm_lc_poll_timeout: "
"retrying link takedown on errors for "
B_FALSE);
break;
}
}
}
if (softsp->err_timeout_id) {
}
}
/*
* Bring back up any links in the config that have been brought down due to
* prolonged link errors or bad configs.
*/
static void
{
int i;
if (softsp->restart_timeout_id == 0) {
return;
}
softsp->restart_timeout_id = 0;
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
}
}
}
/*
* update error statistics used by status kstat
*/
static void
{
/* update shortterm error monitor */
if (do_shortterm) {
/* update longterm error monitor */
}
}
}
/*
* wrsm_lc_err_cnt checks and clears the link error count register:
* wci_sw_link_error_count
* wrsm_sw_link_error_count is and array of NUM_LINKS =
* ENTRIES_WCI_SW_LINK_ERROR_COUNT = 3
* SC controls:
* wci_sw_link_control_u.near_end_shutdown_lock (bit 4) should be set
* wci_sw_link_control_u.auto_shut_en set to enable hardware to shut
* down link. (bit 2) (default is NOT enabled)
*/
static void
{
int i;
/*
* We enable auto link shutdown in cluster mode; because of this,
* we must check the accumulated auto_shut bits to test for a
* hardware shutdown.
*
* Read registers associated with HW shutdown.
*/
&sw_link_ctrl.val);
/*
* the bit field in question we must first set all fields to 0 so
* that we don't inadvertently toggle write a bit field other than
* the one(s) in question.
*/
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
/* only check on links that are already up */
continue;
}
/*
* Check if link has been auto-shutdown. If so, we still
* need to turn off lasers, etc, so still must call
* wrsm_lc_link_takedown().
*/
if (acc_auto_shut[i]) {
/* paroli removed while in use */
"removed from service while in use",
} else {
}
/*
* If link is configured and session is in
* progress, report this link down event.
* Otherwise don't; the link_down reason will be
* reported as unconfigured.
*/
WRSM_MAX_WNODES) ||
link_down, i,
"hardware-shutdown");
} else {
/*
* no session, link was probably
* shutdown at remote end
*/
link_down, i,
"disconnected-shutdown");
}
}
/*
* We clear the acc_auto_shut bit in wci_sw_esr so
* that the LC can use it to detect the next
* hardware shutdown. This register is a
* order to clear the bit.
*
* reset - bit fields are not arrays:
* acc_link_0 bit field 20
* acc_link_1 bit field 21
* acc_link_2 bit field 22
*/
}
/*
* Check the link error counter to see if link
* should be shutdown by hand, and to clear
* any errors.
*/
num_errs, i));
/*
* If link is configured, report this link down
* event. Otherwise don't; the link_down reason
* will be reported as unconfigured.
*/
WRSM_MAX_WNODES) ||
link_down, i,
"link-errors");
} else {
/*
* no session, link was probably
* shutdown at remote end
*/
link_down, i,
"disconnected-shutdown");
}
}
}
if (!takedown_link) {
/*
* No link problem during this
* round.
*/
}
/* take down link if needed */
if (takedown_link) {
}
/*
* Link error counter and error status registers
* must be cleared if any link errors have occured.
*/
if (num_errs > 0) {
/* write back to clear error bits */
if (wrsm_log_link_errors) {
"wci_sw_link_error_count = %lu, "
"wci_sw_link_error_sum = %lu, "
"crc = %ld, framing = %ld, clocking = %ld",
}
}
if (takedown_link) {
/*
* Don't count links errors that caused a link
* takedown in link_errors counter; they are
* counted in the takedown counters.
*/
num_errs = 0;
}
}
/* Toggle auto_shut bits back to reset mode */
}
static void
{
/* First clear the esr bit */
/* Next do the special read from page 1 */
/* clear the esr again, if necessary */
}
#ifdef DEBUG
#endif
/* For good measure, clear the status register too */
}
}
/*
* In the event the SC should crash while messages were between
* the SC and the mailbox, the SC will have no way of knowing what
* messages were lost. In this event, the SC sends a 'I am Here"
* message. When the LC gets this it knows the SC crashed. This function
* resends all messages in the sc_waitxxx state to the SC, and resends
* request to set LEDs off or On depending on if the link is lc_down or lc_up
*/
void
{
int i;
/* invalid arg */
"invalid softsp"));
return;
}
return;
}
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
B_FALSE /* loopback */);
== sc_wait_errdown)) {
}
}
}
static void
{
int i;
#ifdef DEBUG
#endif /* DEBUG */
for (i = 0; i < numcmmu_entries; i++) {
}
#ifdef DEBUG
#endif /* DEBUG */
}
void
{
volatile uint64_t *cluster_syncp;
/* LINTED: E_FUNC_SET_NOT_USED */
"lc_cmmu_update(wci %d, index=%u, entry0=0x%08lX, "
/* Don't write off the end of the CMMU sram */
return;
}
/*
* Note cmmu_entry0 and cmmu_entry1 are interleaved.
* Because of this each Index for an entry is spaced 0x40
* apart from the previous one.
* The initial physical address starts where cmmu_entry0 begins,
* because of the cmmu_entry1 must always be offset by an
* additional 20 from the base of softsp->cmmu_addr
*/
/* used to inject SRAM ECC errors into CMMU entries by cmmu injector */
if (flags & CMMU_UPDATE_WRITEONLY) {
/* write full values */
"clean write to cmmu, no read back", index));
return;
}
if (flags & CMMU_UPDATE_WRITE_0) {
/* write full value 0 */
"clean write to cmmu entry 1, no read back", index));
return;
}
if (flags & CMMU_UPDATE_WRITE_1) {
/* write full value 1 */
"clean write to cmmu entry 0, no read back", index));
return;
}
/* store value for entry 0 in tmp_entry */
/*
* Set the cmmu entry to invalid so that the separate update of each
* half of the entry doesn't end up creating a strange state.
*
* Only need to clear the valid bit IF both entries need to change
* and the valid bit was set and the user error bit wasn't.
*/
}
if (flags == CMMU_UPDATE_ALL) {
/*
* Use passed in values for all fields.
*
* Set entry 1, then entry 0 of the cmmu entry to the
* passed in values. Entry 0 is updated second in order to
* leave the valid bit set to false until the update of
* both halves is complete.
*/
} else {
/*
* Update tmp_entry with the appropriate fields, then set
* entry 1 followed by entry 0 of the cmmu entry to the
* values in tmp_entry. Entry 0 is updated second in order
* to leave the valid bit set to false until the update of
* both halves is complete.
*/
if (flags & CMMU_UPDATE_MONDO) {
}
if (flags & CMMU_UPDATE_FROMNODE) {
}
if (flags & CMMU_UPDATE_TYPE) {
}
if (flags & CMMU_UPDATE_WRITABLE) {
}
if (flags & CMMU_UPDATE_USERERROR) {
}
if (flags & CMMU_UPDATE_FROMALL) {
}
if (flags & CMMU_UPDATE_LARGEPAGE) {
}
if (flags & CMMU_UPDATE_ENABLEPERF) {
}
if (flags & CMMU_UPDATE_VALID) {
} else { /* set back to original value */
}
/*
* Only allowed to set either the LPA or the INTRDEST fields,
* but not both. (These fields overlap.)
*/
if (flags & CMMU_UPDATE_LPA) {
} else if (flags & CMMU_UPDATE_INTRDEST) {
}
}
/* readback to assure the write to hardware occured */
if (flags & CMMU_UPDATE_FLUSH) {
/* grab lock so only one thread at a time has access */
cluster_syncp = (uint64_t *)
/*
* setting sync_in_progress tells the hardware to do
* CMMU flush. once the hardware has set sync_in_progress
* back to 0 AND there aren't any cag_busy bits set
* we can considered the CMMU successfully FLUSHED.
*/
while (cluster_sync.val) {
" CMMU_UPDATE_FLUSH addr %p wci_cluster_sync "
"0x%lx has is waiting on hardware to respond",
};
}
}
void
{
/* Don't read beyond the end of the CMMU sram */
return;
}
/*
* Note cmmu_entry0 and cmmu_entry1 are interleaved.
* Because of this each Index for an entry is spaced 0x40
* apart from the previous one.
* The initial physical address starts where cmmu_entry0 begins,
* because of the cmmu_entry1 must always be offset by an
* additional 20 from the base of softsp->cmmu_addr
*/
}
}
}
int
{
int num_entries;
return (num_entries);
}
/* The following is for the interrupt trap handler only */
{
/* phsyical addr used to access wci */
}
/*
* The following function is used to emulate a response from the sc
*/
void
{
"could not get softsp"));
return;
}
DDI_PROP_DONTPASS, "simwci", 0);
"get_remote_config_data: wci %d sim %d regs %p",
}
if (simwci)
else
}
/*
* lc_check_paroli_hotplug is called from wrsm_lc_poll_timeout to check
* occasionally if a paroli has been hot pluged.
*/
static void
{
int i;
for (i = 0; i < WRSM_LINKS_PER_WCI; i++) {
/*
* if state is both not_there and a paroli has been detected
* this is hotpluged link. Check if part of a config
* and try to bring up.
*/
== WCI_PAROLI_PRESENT)) {
/*
* No check for simwci needed since we don't use the
* not_there state for simwci's.
*/
"lc_check_paroli_hotplug: paroli detected"
": wrsm%d port id#%u, link#%d",
/* link is part of a config - bring it up */
}
}
}
}
static void
{
int i, gnid;
/* If WCI is not part of a controller, abort */
return;
}
for (i = 0; i < WRSM_LINKS_PER_WCI; ++i) {
link->remote_wnode));
/*
* Look for links that are up and connected to a WCX
*/
continue;
/*
* If it's not ok to call mh, just skip this link,
* the status bits will still be there later.
*/
if (!link->tell_mh_link_is_up)
continue;
/*
* farend_ustat1 contains a bitmask of other gnids which
* are reachable through the WCX at the remote end of this link
*/
if (active_diff) {
"changed reachable 0x%x old 0x%x", active_diff,
continue;
if (!(active_diff & mask))
continue;
/*
* Make sure that the indicated gnid is
* actually in the current config.
*/
if (remote_wnode >= WRSM_MAX_WNODES) {
"check_wcx_links: bad active "
"gnid from switch %d", gnid));
continue;
}
/* new gnid reachable */
continue;
}
/* previously reachable gnid now gone */
continue;
}
}
}
}
}
/*
* Checks and handles ECC errors noted by the driver
*
* correctable errors are noted and if possible corrected
* uncorrectable errors cause a trap
*
*/
void
{
wci_ra_esr_1_clear.val = 0;
wci_ca_esr_0_clear.val = 0;
wci_dc_esr_clear.val = 0;
/*
* Request Agent Mtag UE
*
* need to check that there is an uncorrectable error,
* and the address logged is not data [mtag],
* and the address logged is for ue [not ce]
*/
REQ_CLUSTER_MASK) == REQUEST_AGENT_MASK) &&
} else {
}
/*
* wci_ra_ecc_addr_tmp.bit.addr <42:4> is over
* bit 31 boundary (32/64 bit).
*/
/*
* clear wci_ra_esr_1 acc_uncorrectable_mtag_error bit
* The bit type is RW1TX, i.e.toggle, thus it needs to be
* set 1 to be cleared.
*
*/
}
/* Request Agent Data UE */
REQ_CLUSTER_MASK) == REQUEST_AGENT_MASK) &&
} else {
}
/*
* Address bits [42:4] of the transaction that caused the
* first UE error.
*/
/* clear wci_ra_esr_1 acc_uncorrectable_data_error bit */
}
/* Request Agent Mtag CE */
REQ_CLUSTER_MASK) == REQUEST_AGENT_MASK) &&
} else {
}
/*
* Address bits [42:4] of the transaction that caused the
* first CE error.
*/
/*
* clear wci_ra_esr_1 acc_correctable_mtag_error bit
* The bit type is RW1TX (toggle), thus needs to be
* set 1 to be cleared.
*
*/
}
/* Request Agent Data CE */
REQ_CLUSTER_MASK) == REQUEST_AGENT_MASK) &&
} else {
}
/*
* Address bits [42:4] of the transaction that caused the
* first CE error.
*/
/* clear wci_ra_esr_1 acc_correctable_data_error bit */
}
/* Cluster Agent Mtag UE */
REQ_CLUSTER_MASK) == CLUSTER_AGENT_MASK) &&
} else {
}
/*
* Address bits [36:0] of the transaction that caused the
* first CE error.
*/
/* mark if it was passthru request */
/* clear wci_ca_esr_0 acc_uncorrectable_mtag_error bit */
}
/* Cluster Agent Data UE */
REQ_CLUSTER_MASK) == CLUSTER_AGENT_MASK) &&
} else {
}
/*
* Address bits [36:0] of the transaction that caused the
* first CE error.
*/
/* mark if it was passthru request */
/* clear wci_ca_esr_0 acc_uncorrectable_data_error bit */
}
/* Cluster Agent Mtag CE */
REQ_CLUSTER_MASK) == CLUSTER_AGENT_MASK) &&
} else {
}
/*
* Address bits [36:0] of the transaction that caused the
* first CE error.
*/
/* mark if it was passthru request */
/* clear wci_ca_esr_0 acc_correctable_mtag_error bit */
}
/* Cluster Agent Data CE */
REQ_CLUSTER_MASK) == CLUSTER_AGENT_MASK) &&
} else {
}
/*
* Address bits [36:0] of the transaction that caused the
* first CE error.
*/
/* mark if it was passthru request */
/* clear wci_ca_esr_0 acc_correctable_data_error bit */
}
"ECC error(s) caught and handled "
/* reset the W1X esr registers */
/* wci_dco_ce_count counts down from ECC_MAX_CNT */
/* !! bit type is RW1TX toggle */
/* clear syndrome */
*softsp->wci_dco_state_vaddr = 0;
/* reset ca,ra address regs */
*softsp->wci_ra_ecc_addr_vaddr = 0;
*softsp->wci_ca_ecc_addr_vaddr = 0;
"wci_dco.ce.cnt = %lld,"
"wci_dc_esr.bit.acc_dco_ce = %lld,"
"wci_dc_esr.bit.dco_ce = %lld,\n"
"*softsp->wci_dco_state_vaddr = 0x%llx,\n"
"*softsp->wci_ra_ecc_addr_vaddr = 0x%llx,"
"*softsp->wci_ca_ecc_addr_vaddr = 0x%llx\n",
}
}
/*
* WRSM Correctable ecc error trap handler
*
*/
/* ARGSUSED */
static void
{
/*
* We can only scrub if the error comes from CA and it was not
* a passthru transaction. This is already marked in flt_stat
*/
"ecc->flt_in_memory = 1"));
} else {
/* RA or CA in passthru */
ecc->flt_in_memory = 0;
"ecc->flt_in_memory = 0"));
}
}
/*
* WRSM Uncorrectable ecc error trap handler
*/
/* ARGSUSED */
static void
{
#ifdef DEBUG
#else
#endif
"agent_type %d, fault address 0x%08x.%08x",
}
/*
* Examines the state of wci SRAM in wrsm (CMMU)
*/
static void
{
if (!wci_cci_esr.val) {
/* no errors found */
return;
}
/* map relevant registers */
"syndrome=0x%X",
sizeof (seprom));
/*
* Note that the CMMU remains disabled after we log the error
*/
/*
* The ecc is calculated from 34 bits of data +
* 2 parity bits taken from the address.
* We are here knowing that at least one of the address
* bits was corrupted
*/
}
wci_ca_esr_1.val = 0;
}
"CSRA, wci_csra_esr_status 0x%08x.%08x",
wci_csra_esr.val = 0;
}
/* note it for kstats */
}
"correctable error, "
"with%s address, total number of CE errors: %d",
/* note count for kstats */
if (do_shortterm) {
softsp->sram_ecc_errsum = 0;
}
}
/* clean up */
/* reset the count */
/* reset the wci_cci_esr (write 1 to toggle) */
}
/* ARGSUSED */
int
{
int retval = 0;
int num_entries; /* number of cmmu entries */
/* Only allow privileged users to do this */
return (retval);
return (EFAULT);
switch (cmd) {
case WRSM_LC_READCSR:
ADDR_LAST_CSR)) {
break;
}
flag) != 0)
break;
case WRSM_LC_WRITECSR:
ADDR_LAST_CSR)) {
break;
}
break;
case WRSM_LC_READCESR:
if (args[0] > ENTRIES_WCI_CLUSTER_ERROR_STATUS_ARRAY) {
break;
}
flag) != 0)
break;
case WRSM_LC_WRITECESR:
if (args[0] > ENTRIES_WCI_CLUSTER_ERROR_STATUS_ARRAY) {
break;
}
break;
case WRSM_LC_UPDATECMMU:
break;
}
break;
case WRSM_LC_READCMMU:
break;
}
flag) != 0)
break;
default:
return (EINVAL);
}
return (retval);
}
/*
* cancel all timeouts as part of DDI_SUSPEND
*/
void
{
timeout_id_t err_tmoid = 0;
return;
}
if (softsp->restart_timeout_id) {
softsp->restart_timeout_id = 0;
}
if (softsp->err_timeout_id) {
softsp->err_timeout_id = 0;
}
if (restart_tmoid)
(void) untimeout(restart_tmoid);
if (err_tmoid)
}
/*
* restart any timeouts cancelled for a DDI_SUSPEND
*/
void
{
return;
}
if (softsp->need_restart_timeout) {
}
if (softsp->need_err_timeout) {
}
}