drmach.c revision 07d06da50d310a325b457d6330165aebab1e0064
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/debug.h>
#include <sys/types.h>
#include <sys/varargs.h>
#include <sys/errno.h>
#include <sys/cred.h>
#include <sys/dditypes.h>
#include <sys/devops.h>
#include <sys/modctl.h>
#include <sys/poll.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/ndi_impldefs.h>
#include <sys/stat.h>
#include <sys/kmem.h>
#include <sys/vmem.h>
#include <sys/opl_olympus_regs.h>
#include <sys/cpuvar.h>
#include <sys/cpupart.h>
#include <sys/mem_config.h>
#include <sys/ddi_impldefs.h>
#include <sys/systm.h>
#include <sys/machsystm.h>
#include <sys/autoconf.h>
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
#include <sys/x_call.h>
#include <sys/promif.h>
#include <sys/prom_plat.h>
#include <sys/membar.h>
#include <vm/seg_kmem.h>
#include <sys/mem_cage.h>
#include <sys/stack.h>
#include <sys/archsystm.h>
#include <vm/hat_sfmmu.h>
#include <sys/pte.h>
#include <sys/mmu.h>
#include <sys/cpu_module.h>
#include <sys/obpdefs.h>
#include <sys/note.h>
#include <sys/ontrap.h>
#include <sys/cpu_sgnblk_defs.h>
#include <sys/opl.h>
#include <sys/cpu_impl.h>
#include <sys/promimpl.h>
#include <sys/prom_plat.h>
#include <sys/kobj.h>
#include <sys/sysevent.h>
#include <sys/sysevent/dr.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/drmach.h>
#include <sys/dr_util.h>
#include <sys/fcode.h>
#include <sys/opl_cfg.h>
extern void bcopy32_il(uint64_t, uint64_t);
extern void flush_cache_il(void);
extern void drmach_sleep_il(void);
typedef struct {
struct drmach_node *node;
void *data;
} drmach_node_walk_args_t;
typedef struct drmach_node {
void *here;
pnode_t (*get_dnode)(struct drmach_node *node);
int (*walk)(struct drmach_node *node, void *data,
int (*cb)(drmach_node_walk_args_t *args));
dev_info_t *(*n_getdip)(struct drmach_node *node);
int (*n_getproplen)(struct drmach_node *node, char *name,
int *len);
int (*n_getprop)(struct drmach_node *node, char *name,
void *buf, int len);
int (*get_parent)(struct drmach_node *node,
struct drmach_node *pnode);
} drmach_node_t;
typedef struct {
int min_index;
int max_index;
int arr_sz;
drmachid_t *arr;
} drmach_array_t;
typedef struct {
void *isa;
void (*dispose)(drmachid_t);
sbd_error_t *(*release)(drmachid_t);
sbd_error_t *(*status)(drmachid_t, drmach_status_t *);
char name[MAXNAMELEN];
} drmach_common_t;
typedef struct {
uint32_t core_present;
uint32_t core_hotadded;
uint32_t core_started;
} drmach_cmp_t;
typedef struct {
drmach_common_t cm;
int bnum;
int assigned;
int powered;
int connected;
int cond;
drmach_node_t *tree;
drmach_array_t *devices;
int boot_board; /* if board exists on bootup */
drmach_cmp_t cores[OPL_MAX_COREID_PER_BOARD];
} drmach_board_t;
typedef struct {
drmach_common_t cm;
drmach_board_t *bp;
int unum;
int portid;
int busy;
int powered;
const char *type;
drmach_node_t *node;
} drmach_device_t;
typedef struct drmach_cpu {
drmach_device_t dev;
processorid_t cpuid;
int sb;
int chipid;
int coreid;
int strandid;
int status;
#define OPL_CPU_HOTADDED 1
} drmach_cpu_t;
typedef struct drmach_mem {
drmach_device_t dev;
uint64_t slice_base;
uint64_t slice_size;
uint64_t base_pa; /* lowest installed memory base */
uint64_t nbytes; /* size of installed memory */
struct memlist *memlist;
} drmach_mem_t;
typedef struct drmach_io {
drmach_device_t dev;
int channel;
int leaf;
} drmach_io_t;
typedef struct drmach_domain_info {
uint32_t floating;
int allow_dr;
} drmach_domain_info_t;
drmach_domain_info_t drmach_domain;
typedef struct {
int flags;
drmach_device_t *dp;
sbd_error_t *err;
dev_info_t *dip;
} drmach_config_args_t;
typedef struct {
drmach_board_t *obj;
int ndevs;
void *a;
sbd_error_t *(*found)(void *a, const char *, int, drmachid_t);
sbd_error_t *err;
} drmach_board_cb_data_t;
static drmach_array_t *drmach_boards;
static sbd_error_t *drmach_device_new(drmach_node_t *,
drmach_board_t *, int, drmachid_t *);
static sbd_error_t *drmach_cpu_new(drmach_device_t *, drmachid_t *);
static sbd_error_t *drmach_mem_new(drmach_device_t *, drmachid_t *);
static sbd_error_t *drmach_io_new(drmach_device_t *, drmachid_t *);
static dev_info_t *drmach_node_ddi_get_dip(drmach_node_t *np);
static int drmach_node_ddi_get_prop(drmach_node_t *np,
char *name, void *buf, int len);
static int drmach_node_ddi_get_proplen(drmach_node_t *np,
char *name, int *len);
static int drmach_get_portid(drmach_node_t *);
static sbd_error_t *drmach_i_status(drmachid_t, drmach_status_t *);
static int opl_check_dr_status();
static void drmach_io_dispose(drmachid_t);
static sbd_error_t *drmach_io_release(drmachid_t);
static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
static int drmach_init(void);
static void drmach_fini(void);
static void drmach_swap_pa(drmach_mem_t *, drmach_mem_t *);
static drmach_board_t *drmach_get_board_by_bnum(int);
/* options for the second argument in drmach_add_remove_cpu() */
#define HOTADD_CPU 1
#define HOTREMOVE_CPU 2
#define ON_BOARD_CORE_NUM(x) (((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \
(OPL_MAX_COREID_PER_BOARD - 1))
extern struct cpu *SIGBCPU;
static int drmach_name2type_idx(char *);
static drmach_board_t *drmach_board_new(int, int);
#ifdef DEBUG
#define DRMACH_PR if (drmach_debug) printf
int drmach_debug = 1; /* set to non-zero to enable debug messages */
#else
#define DRMACH_PR _NOTE(CONSTANTCONDITION) if (0) printf
#endif /* DEBUG */
#define DRMACH_OBJ(id) ((drmach_common_t *)id)
#define DRMACH_NULL_ID(id) ((id) == 0)
#define DRMACH_IS_BOARD_ID(id) \
((id != 0) && \
(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
#define DRMACH_IS_CPU_ID(id) \
((id != 0) && \
(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
#define DRMACH_IS_MEM_ID(id) \
((id != 0) && \
(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
#define DRMACH_IS_IO_ID(id) \
((id != 0) && \
(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
#define DRMACH_IS_DEVICE_ID(id) \
((id != 0) && \
(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \
DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \
DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
#define DRMACH_IS_ID(id) \
((id != 0) && \
(DRMACH_OBJ(id)->isa == (void *)drmach_board_new || \
DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \
DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \
DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
#define DRMACH_INTERNAL_ERROR() \
drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__)
static char *drmach_ie_fmt = "drmach.c %d";
static struct {
const char *name;
const char *type;
sbd_error_t *(*new)(drmach_device_t *, drmachid_t *);
} drmach_name2type[] = {
{ "cpu", DRMACH_DEVTYPE_CPU, drmach_cpu_new },
{ "pseudo-mc", DRMACH_DEVTYPE_MEM, drmach_mem_new },
{ "pci", DRMACH_DEVTYPE_PCI, drmach_io_new },
};
/* utility */
#define MBYTE (1048576ull)
/*
* drmach autoconfiguration data structures and interfaces
*/
extern struct mod_ops mod_miscops;
static struct modlmisc modlmisc = {
&mod_miscops,
"OPL DR 1.1"
};
static struct modlinkage modlinkage = {
MODREV_1,
(void *)&modlmisc,
NULL
};
static krwlock_t drmach_boards_rwlock;
typedef const char *fn_t;
int
_init(void)
{
int err;
if ((err = drmach_init()) != 0) {
return (err);
}
if ((err = mod_install(&modlinkage)) != 0) {
drmach_fini();
}
return (err);
}
int
_fini(void)
{
int err;
if ((err = mod_remove(&modlinkage)) == 0)
drmach_fini();
return (err);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
struct drmach_mc_lookup {
int bnum;
drmach_board_t *bp;
dev_info_t *dip; /* rv - set if found */
};
#define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
#define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
static int
drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp)
{
uint64_t memory_ranges[128];
int len;
struct memlist *ml;
int rv;
hwd_sb_t *hwd;
hwd_memory_t *pm;
len = sizeof (memory_ranges);
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
"sb-mem-ranges", (caddr_t)&memory_ranges[0], &len) !=
DDI_PROP_SUCCESS) {
mp->slice_base = 0;
mp->slice_size = 0;
return (-1);
}
mp->slice_base = memory_ranges[0];
mp->slice_size = memory_ranges[1];
if (!mp->dev.bp->boot_board) {
int i;
rv = opl_read_hwd(mp->dev.bp->bnum, NULL, NULL, NULL, &hwd);
if (rv != 0) {
return (-1);
}
ml = NULL;
pm = &hwd->sb_cmu.cmu_memory;
for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) {
if (pm->mem_chunks[i].chnk_size > 0) {
ml = memlist_add_span(ml,
pm->mem_chunks[i].chnk_start_address,
pm->mem_chunks[i].chnk_size);
}
}
} else {
/*
* we intersect phys_install to get base_pa.
* This only works at bootup time.
*/
memlist_read_lock();
ml = memlist_dup(phys_install);
memlist_read_unlock();
ml = memlist_del_span(ml, 0ull, mp->slice_base);
if (ml) {
uint64_t basepa, endpa;
endpa = _ptob64(physmax + 1);
basepa = mp->slice_base + mp->slice_size;
ml = memlist_del_span(ml, basepa, endpa - basepa);
}
}
if (ml) {
uint64_t nbytes = 0;
struct memlist *p;
for (p = ml; p; p = p->next) {
nbytes += p->size;
}
if ((mp->nbytes = nbytes) > 0)
mp->base_pa = ml->address;
else
mp->base_pa = 0;
mp->memlist = ml;
} else {
mp->base_pa = 0;
mp->nbytes = 0;
}
return (0);
}
struct drmach_hotcpu {
drmach_board_t *bp;
int bnum;
int core_id;
int rv;
int option;
};
static int
drmach_cpu_cb(dev_info_t *dip, void *arg)
{
struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg;
char name[OBP_MAXDRVNAME];
int len = OBP_MAXDRVNAME;
int bnum, core_id, strand_id;
drmach_board_t *bp;
if (dip == ddi_root_node()) {
return (DDI_WALK_CONTINUE);
}
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, "name",
(caddr_t)name, &len) != DDI_PROP_SUCCESS) {
return (DDI_WALK_PRUNECHILD);
}
/* only cmp has board number */
bnum = -1;
len = sizeof (bnum);
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, OBP_BOARDNUM,
(caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
bnum = -1;
}
if (strcmp(name, "cmp") == 0) {
if (bnum != p->bnum)
return (DDI_WALK_PRUNECHILD);
return (DDI_WALK_CONTINUE);
}
/* we have already pruned all unwanted cores and cpu's above */
if (strcmp(name, "core") == 0) {
return (DDI_WALK_CONTINUE);
}
if (strcmp(name, "cpu") == 0) {
processorid_t cpuid;
len = sizeof (cpuid);
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, "cpuid",
(caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
p->rv = -1;
return (DDI_WALK_TERMINATE);
}
core_id = p->core_id;
bnum = LSB_ID(cpuid);
if (ON_BOARD_CORE_NUM(cpuid) != core_id)
return (DDI_WALK_CONTINUE);
bp = p->bp;
ASSERT(bnum == bp->bnum);
if (p->option == HOTADD_CPU) {
if (prom_hotaddcpu(cpuid) != 0) {
p->rv = -1;
return (DDI_WALK_TERMINATE);
}
strand_id = STRAND_ID(cpuid);
bp->cores[core_id].core_hotadded |= (1 << strand_id);
} else if (p->option == HOTREMOVE_CPU) {
if (prom_hotremovecpu(cpuid) != 0) {
p->rv = -1;
return (DDI_WALK_TERMINATE);
}
strand_id = STRAND_ID(cpuid);
bp->cores[core_id].core_hotadded &= ~(1 << strand_id);
}
return (DDI_WALK_CONTINUE);
}
return (DDI_WALK_PRUNECHILD);
}
static int
drmach_add_remove_cpu(int bnum, int core_id, int option)
{
struct drmach_hotcpu arg;
drmach_board_t *bp;
bp = drmach_get_board_by_bnum(bnum);
ASSERT(bp);
arg.bp = bp;
arg.bnum = bnum;
arg.core_id = core_id;
arg.rv = 0;
arg.option = option;
ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg);
return (arg.rv);
}
struct drmach_setup_core_arg {
drmach_board_t *bp;
};
static int
drmach_setup_core_cb(dev_info_t *dip, void *arg)
{
struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg;
char name[OBP_MAXDRVNAME];
int len = OBP_MAXDRVNAME;
int bnum;
int core_id, strand_id;
if (dip == ddi_root_node()) {
return (DDI_WALK_CONTINUE);
}
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, "name",
(caddr_t)name, &len) != DDI_PROP_SUCCESS) {
return (DDI_WALK_PRUNECHILD);
}
/* only cmp has board number */
bnum = -1;
len = sizeof (bnum);
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, OBP_BOARDNUM,
(caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
bnum = -1;
}
if (strcmp(name, "cmp") == 0) {
if (bnum != p->bp->bnum)
return (DDI_WALK_PRUNECHILD);
return (DDI_WALK_CONTINUE);
}
/* we have already pruned all unwanted cores and cpu's above */
if (strcmp(name, "core") == 0) {
return (DDI_WALK_CONTINUE);
}
if (strcmp(name, "cpu") == 0) {
processorid_t cpuid;
len = sizeof (cpuid);
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
DDI_PROP_DONTPASS, "cpuid",
(caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
return (DDI_WALK_TERMINATE);
}
bnum = LSB_ID(cpuid);
ASSERT(bnum == p->bp->bnum);
core_id = ON_BOARD_CORE_NUM(cpuid);
strand_id = STRAND_ID(cpuid);
p->bp->cores[core_id].core_present |= (1 << strand_id);
return (DDI_WALK_CONTINUE);
}
return (DDI_WALK_PRUNECHILD);
}
static void
drmach_setup_core_info(drmach_board_t *obj)
{
struct drmach_setup_core_arg arg;
int i;
for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
obj->cores[i].core_present = 0;
obj->cores[i].core_hotadded = 0;
obj->cores[i].core_started = 0;
}
arg.bp = obj;
ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg);
for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
if (obj->boot_board) {
obj->cores[i].core_hotadded =
obj->cores[i].core_started =
obj->cores[i].core_present;
}
}
}
/*
* drmach_node_* routines serve the purpose of separating the
* rest of the code from the device tree and OBP. This is necessary
* because of In-Kernel-Probing. Devices probed after stod, are probed
* by the in-kernel-prober, not OBP. These devices, therefore, do not
* have dnode ids.
*/
typedef struct {
drmach_node_walk_args_t *nwargs;
int (*cb)(drmach_node_walk_args_t *args);
int err;
} drmach_node_ddi_walk_args_t;
static int
drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
{
drmach_node_ddi_walk_args_t *nargs;
nargs = (drmach_node_ddi_walk_args_t *)arg;
/*
* dip doesn't have to be held here as we are called
* from ddi_walk_devs() which holds the dip.
*/
nargs->nwargs->node->here = (void *)dip;
nargs->err = nargs->cb(nargs->nwargs);
/*
* Set "here" to NULL so that unheld dip is not accessible
* outside ddi_walk_devs()
*/
nargs->nwargs->node->here = NULL;
if (nargs->err)
return (DDI_WALK_TERMINATE);
else
return (DDI_WALK_CONTINUE);
}
static int
drmach_node_ddi_walk(drmach_node_t *np, void *data,
int (*cb)(drmach_node_walk_args_t *args))
{
drmach_node_walk_args_t args;
drmach_node_ddi_walk_args_t nargs;
/* initialized args structure for callback */
args.node = np;
args.data = data;
nargs.nwargs = &args;
nargs.cb = cb;
nargs.err = 0;
/*
* Root node doesn't have to be held in any way.
*/
ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
return (nargs.err);
}
static int
drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
{
dev_info_t *ndip;
static char *fn = "drmach_node_ddi_get_parent";
ndip = np->n_getdip(np);
if (ndip == NULL) {
cmn_err(CE_WARN, "%s: NULL dip", fn);
return (-1);
}
bcopy(np, pp, sizeof (drmach_node_t));
pp->here = (void *)ddi_get_parent(ndip);
if (pp->here == NULL) {
cmn_err(CE_WARN, "%s: NULL parent dip", fn);
return (-1);
}
return (0);
}
/*ARGSUSED*/
static pnode_t
drmach_node_ddi_get_dnode(drmach_node_t *np)
{
return ((pnode_t)NULL);
}
static drmach_node_t *
drmach_node_new(void)
{
drmach_node_t *np;
np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
np->get_dnode = drmach_node_ddi_get_dnode;
np->walk = drmach_node_ddi_walk;
np->n_getdip = drmach_node_ddi_get_dip;
np->n_getproplen = drmach_node_ddi_get_proplen;
np->n_getprop = drmach_node_ddi_get_prop;
np->get_parent = drmach_node_ddi_get_parent;
return (np);
}
static void
drmach_node_dispose(drmach_node_t *np)
{
kmem_free(np, sizeof (*np));
}
static dev_info_t *
drmach_node_ddi_get_dip(drmach_node_t *np)
{
return ((dev_info_t *)np->here);
}
static int
drmach_node_walk(drmach_node_t *np, void *param,
int (*cb)(drmach_node_walk_args_t *args))
{
return (np->walk(np, param, cb));
}
static int
drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
{
int rv = 0;
dev_info_t *ndip;
static char *fn = "drmach_node_ddi_get_prop";
ndip = np->n_getdip(np);
if (ndip == NULL) {
cmn_err(CE_WARN, "%s: NULL dip", fn);
rv = -1;
} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
DDI_PROP_DONTPASS, name,
(caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
rv = -1;
}
return (rv);
}
static int
drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
{
int rv = 0;
dev_info_t *ndip;
ndip = np->n_getdip(np);
if (ndip == NULL) {
rv = -1;
} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS, name,
len) != DDI_PROP_SUCCESS) {
rv = -1;
}
return (rv);
}
static drmachid_t
drmach_node_dup(drmach_node_t *np)
{
drmach_node_t *dup;
dup = drmach_node_new();
dup->here = np->here;
dup->get_dnode = np->get_dnode;
dup->walk = np->walk;
dup->n_getdip = np->n_getdip;
dup->n_getproplen = np->n_getproplen;
dup->n_getprop = np->n_getprop;
dup->get_parent = np->get_parent;
return (dup);
}
/*
* drmach_array provides convenient array construction, access,
* bounds checking and array destruction logic.
*/
static drmach_array_t *
drmach_array_new(int min_index, int max_index)
{
drmach_array_t *arr;
arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
if (arr->arr_sz > 0) {
arr->min_index = min_index;
arr->max_index = max_index;
arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
return (arr);
} else {
kmem_free(arr, sizeof (*arr));
return (0);
}
}
static int
drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
{
if (idx < arr->min_index || idx > arr->max_index)
return (-1);
else {
arr->arr[idx - arr->min_index] = val;
return (0);
}
/*NOTREACHED*/
}
static int
drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
{
if (idx < arr->min_index || idx > arr->max_index)
return (-1);
else {
*val = arr->arr[idx - arr->min_index];
return (0);
}
/*NOTREACHED*/
}
static int
drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
{
int rv;
*idx = arr->min_index;
while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
*idx += 1;
return (rv);
}
static int
drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
{
int rv;
*idx += 1;
while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
*idx += 1;
return (rv);
}
static void
drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
{
drmachid_t val;
int idx;
int rv;
rv = drmach_array_first(arr, &idx, &val);
while (rv == 0) {
(*disposer)(val);
rv = drmach_array_next(arr, &idx, &val);
}
kmem_free(arr->arr, arr->arr_sz);
kmem_free(arr, sizeof (*arr));
}
static drmach_board_t *
drmach_get_board_by_bnum(int bnum)
{
drmachid_t id;
if (drmach_array_get(drmach_boards, bnum, &id) == 0)
return ((drmach_board_t *)id);
else
return (NULL);
}
static pnode_t
drmach_node_get_dnode(drmach_node_t *np)
{
return (np->get_dnode(np));
}
/*ARGSUSED*/
sbd_error_t *
drmach_configure(drmachid_t id, int flags)
{
drmach_device_t *dp;
sbd_error_t *err = NULL;
dev_info_t *rdip;
dev_info_t *fdip = NULL;
if (DRMACH_IS_CPU_ID(id)) {
return (NULL);
}
if (!DRMACH_IS_DEVICE_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
dp = id;
rdip = dp->node->n_getdip(dp->node);
ASSERT(rdip);
ASSERT(e_ddi_branch_held(rdip));
if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
(void) ddi_pathname(dip, path);
err = drerr_new(1, EOPL_DRVFAIL, path);
kmem_free(path, MAXPATHLEN);
/* If non-NULL, fdip is returned held and must be released */
if (fdip != NULL)
ddi_release_devi(fdip);
}
return (err);
}
static sbd_error_t *
drmach_device_new(drmach_node_t *node,
drmach_board_t *bp, int portid, drmachid_t *idp)
{
int i;
int rv;
drmach_device_t proto;
sbd_error_t *err;
char name[OBP_MAXDRVNAME];
rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
if (rv) {
/* every node is expected to have a name */
err = drerr_new(1, EOPL_GETPROP, "device node %s: property %s",
ddi_node_name(node->n_getdip(node)), "name");
return (err);
}
/*
* The node currently being examined is not listed in the name2type[]
* array. In this case, the node is no interest to drmach. Both
* dp and err are initialized here to yield nothing (no device or
* error structure) for this case.
*/
i = drmach_name2type_idx(name);
if (i < 0) {
*idp = (drmachid_t)0;
return (NULL);
}
/* device specific new function will set unum */
bzero(&proto, sizeof (proto));
proto.type = drmach_name2type[i].type;
proto.bp = bp;
proto.node = node;
proto.portid = portid;
return (drmach_name2type[i].new(&proto, idp));
}
static void
drmach_device_dispose(drmachid_t id)
{
drmach_device_t *self = id;
self->cm.dispose(id);
}
static drmach_board_t *
drmach_board_new(int bnum, int boot_board)
{
static sbd_error_t *drmach_board_release(drmachid_t);
static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
drmach_board_t *bp;
bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
bp->cm.isa = (void *)drmach_board_new;
bp->cm.release = drmach_board_release;
bp->cm.status = drmach_board_status;
(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
bp->bnum = bnum;
bp->devices = NULL;
bp->connected = boot_board;
bp->tree = drmach_node_new();
bp->assigned = boot_board;
bp->powered = boot_board;
bp->boot_board = boot_board;
/*
* If this is not bootup initialization, we have to wait till
* IKP sets up the device nodes in drmach_board_connect().
*/
if (boot_board)
drmach_setup_core_info(bp);
(void) drmach_array_set(drmach_boards, bnum, bp);
return (bp);
}
static void
drmach_board_dispose(drmachid_t id)
{
drmach_board_t *bp;
ASSERT(DRMACH_IS_BOARD_ID(id));
bp = id;
if (bp->tree)
drmach_node_dispose(bp->tree);
if (bp->devices)
drmach_array_dispose(bp->devices, drmach_device_dispose);
kmem_free(bp, sizeof (*bp));
}
static sbd_error_t *
drmach_board_status(drmachid_t id, drmach_status_t *stat)
{
sbd_error_t *err = NULL;
drmach_board_t *bp;
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
bp = id;
stat->assigned = bp->assigned;
stat->powered = bp->powered;
stat->busy = 0; /* assume not busy */
stat->configured = 0; /* assume not configured */
stat->empty = 0;
stat->cond = bp->cond = SBD_COND_OK;
(void) strncpy(stat->type, "System Brd", sizeof (stat->type));
stat->info[0] = '\0';
if (bp->devices) {
int rv;
int d_idx;
drmachid_t d_id;
rv = drmach_array_first(bp->devices, &d_idx, &d_id);
while (rv == 0) {
drmach_status_t d_stat;
err = drmach_i_status(d_id, &d_stat);
if (err)
break;
stat->busy |= d_stat.busy;
stat->configured |= d_stat.configured;
rv = drmach_array_next(bp->devices, &d_idx, &d_id);
}
}
return (err);
}
int
drmach_board_is_floating(drmachid_t id)
{
drmach_board_t *bp;
if (!DRMACH_IS_BOARD_ID(id))
return (0);
bp = (drmach_board_t *)id;
return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0);
}
static int
drmach_init(void)
{
dev_info_t *rdip;
int i, rv, len;
int *floating;
rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
rdip = ddi_root_node();
if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
"floating-boards", &len) != DDI_PROP_SUCCESS) {
cmn_err(CE_WARN, "Cannot get floating-boards proplen\n");
} else {
floating = (int *)kmem_alloc(len, KM_SLEEP);
rv = ddi_prop_op(DDI_DEV_T_ANY, rdip, PROP_LEN_AND_VAL_BUF,
DDI_PROP_DONTPASS, "floating-boards", (caddr_t)floating,
&len);
if (rv != DDI_PROP_SUCCESS) {
cmn_err(CE_WARN, "Cannot get floating-boards prop\n");
} else {
drmach_domain.floating = 0;
for (i = 0; i < len / sizeof (int); i++) {
drmach_domain.floating |= (1 << floating[i]);
}
}
kmem_free(floating, len);
}
drmach_domain.allow_dr = opl_check_dr_status();
rdip = ddi_get_child(ddi_root_node());
do {
int bnum;
drmachid_t id;
bnum = -1;
bnum = ddi_getprop(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
OBP_BOARDNUM, -1);
if (bnum == -1)
continue;
if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
cmn_err(CE_WARN, "Device node 0x%p has invalid "
"property value, %s=%d", (void *)rdip,
OBP_BOARDNUM, bnum);
goto error;
} else if (id == NULL) {
(void) drmach_board_new(bnum, 1);
}
} while ((rdip = ddi_get_next_sibling(rdip)) != NULL);
opl_hold_devtree();
/*
* Initialize the IKP feature.
*
* This can be done only after DR has acquired a hold on all the
* device nodes that are interesting to IKP.
*/
if (opl_init_cfg() != 0) {
cmn_err(CE_WARN, "DR - IKP initialization failed");
opl_release_devtree();
goto error;
}
return (0);
error:
drmach_array_dispose(drmach_boards, drmach_board_dispose);
rw_destroy(&drmach_boards_rwlock);
return (ENXIO);
}
static void
drmach_fini(void)
{
rw_enter(&drmach_boards_rwlock, RW_WRITER);
drmach_array_dispose(drmach_boards, drmach_board_dispose);
drmach_boards = NULL;
rw_exit(&drmach_boards_rwlock);
/*
* Walk immediate children of the root devinfo node
* releasing holds acquired on branches in drmach_init()
*/
opl_release_devtree();
rw_destroy(&drmach_boards_rwlock);
}
/*
* Each system board contains 2 Oberon PCI bridge and
* 1 CMUCH.
* Each oberon has 2 channels.
* Each channel has 2 pci-ex leaf.
* Each CMUCH has 1 pci bus.
*
*
* Device Path:
* /pci@<portid>,reg
*
* where
* portid[10] = 0
* portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel
*
* LLEAF_ID[9:8] = 0
* LLEAF_ID[8:4] = LSB_ID[4:0]
* LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon)
* channel 4 is pcicmu
* LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B)
*
* Properties:
* name = pci
* device_type = "pciex"
* board# = LSBID
* reg = int32 * 2, Oberon CSR space of the leaf and the UBC space
* portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#)
*/
static sbd_error_t *
drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
{
drmach_io_t *ip;
int portid;
portid = proto->portid;
ASSERT(portid != -1);
proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1);
ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
bcopy(proto, &ip->dev, sizeof (ip->dev));
ip->dev.node = drmach_node_dup(proto->node);
ip->dev.cm.isa = (void *)drmach_io_new;
ip->dev.cm.dispose = drmach_io_dispose;
ip->dev.cm.release = drmach_io_release;
ip->dev.cm.status = drmach_io_status;
ip->channel = (portid >> 1) & 0x7;
ip->leaf = (portid & 0x1);
(void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
ip->dev.type, ip->dev.unum);
*idp = (drmachid_t)ip;
return (NULL);
}
static void
drmach_io_dispose(drmachid_t id)
{
drmach_io_t *self;
ASSERT(DRMACH_IS_IO_ID(id));
self = id;
if (self->dev.node)
drmach_node_dispose(self->dev.node);
kmem_free(self, sizeof (*self));
}
/*ARGSUSED*/
sbd_error_t *
drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
{
drmach_board_t *bp = (drmach_board_t *)id;
sbd_error_t *err = NULL;
/* allow status and ncm operations to always succeed */
if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
return (NULL);
}
/* check all other commands for the required option string */
if ((opts->size > 0) && (opts->copts != NULL)) {
DRMACH_PR("platform options: %s\n", opts->copts);
if (strstr(opts->copts, "opldr") == NULL) {
err = drerr_new(1, EOPL_SUPPORT, NULL);
}
} else {
err = drerr_new(1, EOPL_SUPPORT, NULL);
}
if (!err && id && DRMACH_IS_BOARD_ID(id)) {
switch (cmd) {
case SBD_CMD_TEST:
case SBD_CMD_STATUS:
case SBD_CMD_GETNCM:
break;
case SBD_CMD_CONNECT:
if (bp->connected)
err = drerr_new(0, ESBD_STATE, NULL);
else if (!drmach_domain.allow_dr)
err = drerr_new(1, EOPL_SUPPORT, NULL);
break;
case SBD_CMD_DISCONNECT:
if (!bp->connected)
err = drerr_new(0, ESBD_STATE, NULL);
else if (!drmach_domain.allow_dr)
err = drerr_new(1, EOPL_SUPPORT, NULL);
break;
default:
if (!drmach_domain.allow_dr)
err = drerr_new(1, EOPL_SUPPORT, NULL);
break;
}
}
return (err);
}
/*ARGSUSED*/
sbd_error_t *
drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
{
return (NULL);
}
sbd_error_t *
drmach_board_assign(int bnum, drmachid_t *id)
{
sbd_error_t *err = NULL;
rw_enter(&drmach_boards_rwlock, RW_WRITER);
if (drmach_array_get(drmach_boards, bnum, id) == -1) {
err = drerr_new(1, EOPL_BNUM, "%d", bnum);
} else {
drmach_board_t *bp;
if (*id)
rw_downgrade(&drmach_boards_rwlock);
bp = *id;
if (!(*id))
bp = *id =
(drmachid_t)drmach_board_new(bnum, 0);
bp->assigned = 1;
}
rw_exit(&drmach_boards_rwlock);
return (err);
}
/*ARGSUSED*/
sbd_error_t *
drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
{
extern int cpu_alljupiter;
drmach_board_t *obj = (drmach_board_t *)id;
unsigned cpu_impl;
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
if (opl_probe_sb(obj->bnum, &cpu_impl) != 0)
return (drerr_new(1, EOPL_PROBE, NULL));
if (cpu_alljupiter) {
if (cpu_impl & (1 << OLYMPUS_C_IMPL)) {
(void) opl_unprobe_sb(obj->bnum);
return (drerr_new(1, EOPL_MIXED_CPU, NULL));
}
}
(void) prom_attach_notice(obj->bnum);
drmach_setup_core_info(obj);
obj->connected = 1;
return (NULL);
}
static int drmach_cache_flush_flag[NCPU];
/*ARGSUSED*/
static void
drmach_flush_cache(uint64_t id, uint64_t dummy)
{
extern void cpu_flush_ecache(void);
cpu_flush_ecache();
drmach_cache_flush_flag[id] = 0;
}
static void
drmach_flush_all()
{
cpuset_t xc_cpuset;
int i;
xc_cpuset = cpu_ready_set;
for (i = 0; i < NCPU; i++) {
if (CPU_IN_SET(xc_cpuset, i)) {
drmach_cache_flush_flag[i] = 1;
xc_one(i, drmach_flush_cache, i, 0);
while (drmach_cache_flush_flag[i]) {
DELAY(1000);
}
}
}
}
static int
drmach_disconnect_cpus(drmach_board_t *bp)
{
int i, bnum;
bnum = bp->bnum;
for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
if (bp->cores[i].core_present) {
if (bp->cores[i].core_started)
return (-1);
if (bp->cores[i].core_hotadded) {
if (drmach_add_remove_cpu(bnum, i,
HOTREMOVE_CPU)) {
cmn_err(CE_WARN, "Failed to remove "
"CMP %d on board %d\n", i, bnum);
return (-1);
}
}
}
}
return (0);
}
/*ARGSUSED*/
sbd_error_t *
drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
{
drmach_board_t *obj;
int rv = 0;
sbd_error_t *err = NULL;
if (DRMACH_NULL_ID(id))
return (NULL);
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
obj = (drmach_board_t *)id;
if (drmach_disconnect_cpus(obj)) {
err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
return (err);
}
rv = opl_unprobe_sb(obj->bnum);
if (rv == 0) {
(void) prom_detach_notice(obj->bnum);
obj->connected = 0;
} else
err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
return (err);
}
static int
drmach_get_portid(drmach_node_t *np)
{
int portid;
char type[OBP_MAXPROPNAME];
if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
return (portid);
/*
* Get the device_type property to see if we should
* continue processing this node.
*/
if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
return (-1);
if (strcmp(type, OPL_CPU_NODE) == 0) {
/*
* We return cpuid because it has no portid
*/
if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0)
return (portid);
}
return (-1);
}
/*
* This is a helper function to determine if a given
* node should be considered for a dr operation according
* to predefined dr type nodes and the node's name.
* Formal Parameter : The name of a device node.
* Return Value: -1, name does not map to a valid dr type.
* A value greater or equal to 0, name is a valid dr type.
*/
static int
drmach_name2type_idx(char *name)
{
int index, ntypes;
if (name == NULL)
return (-1);
/*
* Determine how many possible types are currently supported
* for dr.
*/
ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
/* Determine if the node's name correspond to a predefined type. */
for (index = 0; index < ntypes; index++) {
if (strcmp(drmach_name2type[index].name, name) == 0)
/* The node is an allowed type for dr. */
return (index);
}
/*
* If the name of the node does not map to any of the
* types in the array drmach_name2type then the node is not of
* interest to dr.
*/
return (-1);
}
/*
* there is some complication on OPL:
* - pseudo-mc nodes do not have portid property
* - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB#
* - cmp has board#
* - core and cpu nodes do not have portid and board# properties
* starcat uses portid to derive the board# but that does not work
* for us. starfire reads board# property to filter the devices.
* That does not work either. So for these specific device,
* we use specific hard coded methods to get the board# -
* cpu: LSB# = CPUID[9:5]
*/
static int
drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
{
drmach_node_t *node = args->node;
drmach_board_cb_data_t *data = args->data;
drmach_board_t *obj = data->obj;
int rv, portid;
int bnum;
drmachid_t id;
drmach_device_t *device;
char name[OBP_MAXDRVNAME];
portid = drmach_get_portid(node);
/*
* core, cpu and pseudo-mc do not have portid
* we use cpuid as the portid of the cpu node
* for pseudo-mc, we do not use portid info.
*/
rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
if (rv)
return (0);
rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum));
if (rv) {
/*
* cpu does not have board# property. We use
* CPUID[9:5]
*/
if (strcmp("cpu", name) == 0) {
bnum = (portid >> 5) & 0x1f;
} else
return (0);
}
if (bnum != obj->bnum)
return (0);
if (drmach_name2type_idx(name) < 0) {
return (0);
}
/*
* Create a device data structure from this node data.
* The call may yield nothing if the node is not of interest
* to drmach.
*/
data->err = drmach_device_new(node, obj, portid, &id);
if (data->err)
return (-1);
else if (!id) {
/*
* drmach_device_new examined the node we passed in
* and determined that it was one not of interest to
* drmach. So, it is skipped.
*/
return (0);
}
rv = drmach_array_set(obj->devices, data->ndevs++, id);
if (rv) {
data->err = DRMACH_INTERNAL_ERROR();
return (-1);
}
device = id;
data->err = (*data->found)(data->a, device->type, device->unum, id);
return (data->err == NULL ? 0 : -1);
}
sbd_error_t *
drmach_board_find_devices(drmachid_t id, void *a,
sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
{
drmach_board_t *bp = (drmach_board_t *)id;
sbd_error_t *err;
int max_devices;
int rv;
drmach_board_cb_data_t data;
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
max_devices = MAX_CPU_UNITS_PER_BOARD;
max_devices += MAX_MEM_UNITS_PER_BOARD;
max_devices += MAX_IO_UNITS_PER_BOARD;
bp->devices = drmach_array_new(0, max_devices);
if (bp->tree == NULL)
bp->tree = drmach_node_new();
data.obj = bp;
data.ndevs = 0;
data.found = found;
data.a = a;
data.err = NULL;
rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
if (rv == 0)
err = NULL;
else {
drmach_array_dispose(bp->devices, drmach_device_dispose);
bp->devices = NULL;
if (data.err)
err = data.err;
else
err = DRMACH_INTERNAL_ERROR();
}
return (err);
}
int
drmach_board_lookup(int bnum, drmachid_t *id)
{
int rv = 0;
rw_enter(&drmach_boards_rwlock, RW_READER);
if (drmach_array_get(drmach_boards, bnum, id)) {
*id = 0;
rv = -1;
}
rw_exit(&drmach_boards_rwlock);
return (rv);
}
sbd_error_t *
drmach_board_name(int bnum, char *buf, int buflen)
{
(void) snprintf(buf, buflen, "SB%d", bnum);
return (NULL);
}
sbd_error_t *
drmach_board_poweroff(drmachid_t id)
{
drmach_board_t *bp;
sbd_error_t *err;
drmach_status_t stat;
if (DRMACH_NULL_ID(id))
return (NULL);
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
bp = id;
err = drmach_board_status(id, &stat);
if (!err) {
if (stat.configured || stat.busy)
err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
else {
bp->powered = 0;
}
}
return (err);
}
sbd_error_t *
drmach_board_poweron(drmachid_t id)
{
drmach_board_t *bp;
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
bp = id;
bp->powered = 1;
return (NULL);
}
static sbd_error_t *
drmach_board_release(drmachid_t id)
{
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
return (NULL);
}
/*ARGSUSED*/
sbd_error_t *
drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
{
return (NULL);
}
sbd_error_t *
drmach_board_unassign(drmachid_t id)
{
drmach_board_t *bp;
sbd_error_t *err;
drmach_status_t stat;
if (DRMACH_NULL_ID(id))
return (NULL);
if (!DRMACH_IS_BOARD_ID(id)) {
return (drerr_new(0, EOPL_INAPPROP, NULL));
}
bp = id;
rw_enter(&drmach_boards_rwlock, RW_WRITER);
err = drmach_board_status(id, &stat);
if (err) {
rw_exit(&drmach_boards_rwlock);
return (err);
}
if (stat.configured || stat.busy) {
err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
} else {
if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
err = DRMACH_INTERNAL_ERROR();
else
drmach_board_dispose(bp);
}
rw_exit(&drmach_boards_rwlock);
return (err);
}
/*
* We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id,
* implementation #, etc
*/
static sbd_error_t *
drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
{
static void drmach_cpu_dispose(drmachid_t);
static sbd_error_t *drmach_cpu_release(drmachid_t);
static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
int portid;
drmach_cpu_t *cp = NULL;
/* portid is CPUID of the node */
portid = proto->portid;
ASSERT(portid != -1);
/* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */
proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) &
(OPL_MAX_CMPID_PER_BOARD - 1)) +
((portid & (OPL_MAX_CPUID_PER_CMP - 1)) *
(OPL_MAX_CMPID_PER_BOARD));
cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
bcopy(proto, &cp->dev, sizeof (cp->dev));
cp->dev.node = drmach_node_dup(proto->node);
cp->dev.cm.isa = (void *)drmach_cpu_new;
cp->dev.cm.dispose = drmach_cpu_dispose;
cp->dev.cm.release = drmach_cpu_release;
cp->dev.cm.status = drmach_cpu_status;
(void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
cp->dev.type, cp->dev.unum);
/*
* CPU ID representation
* CPUID[9:5] = SB#
* CPUID[4:3] = Chip#
* CPUID[2:1] = Core# (Only 2 core for OPL)
* CPUID[0:0] = Strand#
*/
/*
* reg property of the strand contains strand ID
* reg property of the parent node contains core ID
* We should use them.
*/
cp->cpuid = portid;
cp->sb = (portid >> 5) & 0x1f;
cp->chipid = (portid >> 3) & 0x3;
cp->coreid = (portid >> 1) & 0x3;
cp->strandid = portid & 0x1;
*idp = (drmachid_t)cp;
return (NULL);
}
static void
drmach_cpu_dispose(drmachid_t id)
{
drmach_cpu_t *self;
ASSERT(DRMACH_IS_CPU_ID(id));
self = id;
if (self->dev.node)
drmach_node_dispose(self->dev.node);
kmem_free(self, sizeof (*self));
}
static int
drmach_cpu_start(struct cpu *cp)
{
int cpuid = cp->cpu_id;
extern int restart_other_cpu(int);
ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
cp->cpu_flags &= ~CPU_POWEROFF;
/*
* NOTE: restart_other_cpu pauses cpus during the
* slave cpu start. This helps to quiesce the
* bus traffic a bit which makes the tick sync
* routine in the prom more robust.
*/
DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
(void) restart_other_cpu(cpuid);
return (0);
}
static sbd_error_t *
drmach_cpu_release(drmachid_t id)
{
if (!DRMACH_IS_CPU_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
return (NULL);
}
static sbd_error_t *
drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
{
drmach_cpu_t *cp;
drmach_device_t *dp;
ASSERT(DRMACH_IS_CPU_ID(id));
cp = (drmach_cpu_t *)id;
dp = &cp->dev;
stat->assigned = dp->bp->assigned;
stat->powered = dp->bp->powered;
mutex_enter(&cpu_lock);
stat->configured = (cpu_get(cp->cpuid) != NULL);
mutex_exit(&cpu_lock);
stat->busy = dp->busy;
(void) strncpy(stat->type, dp->type, sizeof (stat->type));
stat->info[0] = '\0';
return (NULL);
}
sbd_error_t *
drmach_cpu_disconnect(drmachid_t id)
{
if (!DRMACH_IS_CPU_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
return (NULL);
}
sbd_error_t *
drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
{
drmach_cpu_t *cpu;
if (!DRMACH_IS_CPU_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
cpu = (drmach_cpu_t *)id;
/* get from cpu directly on OPL */
*cpuid = cpu->cpuid;
return (NULL);
}
sbd_error_t *
drmach_cpu_get_impl(drmachid_t id, int *ip)
{
drmach_device_t *cpu;
drmach_node_t *np;
drmach_node_t pp;
int impl;
char type[OBP_MAXPROPNAME];
if (!DRMACH_IS_CPU_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
cpu = id;
np = cpu->node;
if (np->get_parent(np, &pp) != 0) {
return (DRMACH_INTERNAL_ERROR());
}
/* the parent should be core */
if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) {
return (drerr_new(0, EOPL_GETPROP, NULL));
}
if (strcmp(type, OPL_CORE_NODE) == 0) {
if (pp.n_getprop(&pp, "implementation#", &impl,
sizeof (impl)) != 0) {
return (drerr_new(0, EOPL_GETPROP, NULL));
}
} else {
return (DRMACH_INTERNAL_ERROR());
}
*ip = impl;
return (NULL);
}
sbd_error_t *
drmach_get_dip(drmachid_t id, dev_info_t **dip)
{
drmach_device_t *dp;
if (!DRMACH_IS_DEVICE_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
dp = id;
*dip = dp->node->n_getdip(dp->node);
return (NULL);
}
sbd_error_t *
drmach_io_is_attached(drmachid_t id, int *yes)
{
drmach_device_t *dp;
dev_info_t *dip;
int state;
if (!DRMACH_IS_IO_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
dp = id;
dip = dp->node->n_getdip(dp->node);
if (dip == NULL) {
*yes = 0;
return (NULL);
}
state = ddi_get_devstate(dip);
*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
(state == DDI_DEVSTATE_UP));
return (NULL);
}
struct drmach_io_cb {
char *name; /* name of the node */
int (*func)(dev_info_t *);
int rv;
dev_info_t *dip;
};
#define DRMACH_IO_POST_ATTACH 0
#define DRMACH_IO_PRE_RELEASE 1
static int
drmach_io_cb_check(dev_info_t *dip, void *arg)
{
struct drmach_io_cb *p = (struct drmach_io_cb *)arg;
char name[OBP_MAXDRVNAME];
int len = OBP_MAXDRVNAME;
if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "name",
(caddr_t)name, &len) != DDI_PROP_SUCCESS) {
return (DDI_WALK_PRUNECHILD);
}
if (strcmp(name, p->name) == 0) {
ndi_hold_devi(dip);
p->dip = dip;
return (DDI_WALK_TERMINATE);
}
return (DDI_WALK_CONTINUE);
}
static int
drmach_console_ops(drmachid_t *id, int state)
{
drmach_io_t *obj = (drmach_io_t *)id;
struct drmach_io_cb arg;
int (*msudetp)(dev_info_t *);
int (*msuattp)(dev_info_t *);
dev_info_t *dip, *pdip;
int circ;
/* 4 is pcicmu channel */
if (obj->channel != 4)
return (0);
arg.name = "serial";
arg.func = NULL;
if (state == DRMACH_IO_PRE_RELEASE) {
msudetp = (int (*)(dev_info_t *))
modgetsymvalue("oplmsu_dr_detach", 0);
if (msudetp != NULL)
arg.func = msudetp;
} else if (state == DRMACH_IO_POST_ATTACH) {
msuattp = (int (*)(dev_info_t *))
modgetsymvalue("oplmsu_dr_attach", 0);
if (msuattp != NULL)
arg.func = msuattp;
} else {
return (0);
}
if (arg.func == NULL) {
return (0);
}
arg.rv = 0;
arg.dip = NULL;
dip = obj->dev.node->n_getdip(obj->dev.node);
if (pdip = ddi_get_parent(dip)) {
ndi_hold_devi(pdip);
ndi_devi_enter(pdip, &circ);
} else {
/* this cannot happen unless something bad happens */
return (-1);
}
ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg);
ndi_devi_exit(pdip, circ);
ndi_rele_devi(pdip);
if (arg.dip) {
arg.rv = (*arg.func)(arg.dip);
ndi_rele_devi(arg.dip);
} else {
arg.rv = -1;
}
return (arg.rv);
}
sbd_error_t *
drmach_io_pre_release(drmachid_t id)
{
int rv;
if (!DRMACH_IS_IO_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE);
if (rv != 0)
cmn_err(CE_WARN, "IO callback failed in pre-release\n");
return (NULL);
}
static sbd_error_t *
drmach_io_release(drmachid_t id)
{
if (!DRMACH_IS_IO_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
return (NULL);
}
sbd_error_t *
drmach_io_unrelease(drmachid_t id)
{
if (!DRMACH_IS_IO_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
return (NULL);
}
/*ARGSUSED*/
sbd_error_t *
drmach_io_post_release(drmachid_t id)
{
return (NULL);
}
/*ARGSUSED*/
sbd_error_t *
drmach_io_post_attach(drmachid_t id)
{
int rv;
if (!DRMACH_IS_IO_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH);
if (rv != 0)
cmn_err(CE_WARN, "IO callback failed in post-attach\n");
return (0);
}
static sbd_error_t *
drmach_io_status(drmachid_t id, drmach_status_t *stat)
{
drmach_device_t *dp;
sbd_error_t *err;
int configured;
ASSERT(DRMACH_IS_IO_ID(id));
dp = id;
err = drmach_io_is_attached(id, &configured);
if (err)
return (err);
stat->assigned = dp->bp->assigned;
stat->powered = dp->bp->powered;
stat->configured = (configured != 0);
stat->busy = dp->busy;
(void) strncpy(stat->type, dp->type, sizeof (stat->type));
stat->info[0] = '\0';
return (NULL);
}
static sbd_error_t *
drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
{
static void drmach_mem_dispose(drmachid_t);
static sbd_error_t *drmach_mem_release(drmachid_t);
static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
dev_info_t *dip;
int rv;
drmach_mem_t *mp;
rv = 0;
if ((proto->node->n_getproplen(proto->node, "mc-addr", &rv) < 0) ||
(rv <= 0)) {
*idp = (drmachid_t)0;
return (NULL);
}
mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
proto->unum = 0;
bcopy(proto, &mp->dev, sizeof (mp->dev));
mp->dev.node = drmach_node_dup(proto->node);
mp->dev.cm.isa = (void *)drmach_mem_new;
mp->dev.cm.dispose = drmach_mem_dispose;
mp->dev.cm.release = drmach_mem_release;
mp->dev.cm.status = drmach_mem_status;
(void) snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s",
mp->dev.type);
dip = mp->dev.node->n_getdip(mp->dev.node);
if (drmach_setup_mc_info(dip, mp) != 0) {
return (drerr_new(1, EOPL_MC_SETUP, NULL));
}
/* make sure we do not create memoryless nodes */
if (mp->nbytes == 0) {
*idp = (drmachid_t)NULL;
kmem_free(mp, sizeof (drmach_mem_t));
} else
*idp = (drmachid_t)mp;
return (NULL);
}
static void
drmach_mem_dispose(drmachid_t id)
{
drmach_mem_t *mp;
ASSERT(DRMACH_IS_MEM_ID(id));
mp = id;
if (mp->dev.node)
drmach_node_dispose(mp->dev.node);
if (mp->memlist) {
memlist_delete(mp->memlist);
mp->memlist = NULL;
}
kmem_free(mp, sizeof (*mp));
}
sbd_error_t *
drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
{
pfn_t basepfn = (pfn_t)(basepa >> PAGESHIFT);
pgcnt_t npages = (pgcnt_t)(size >> PAGESHIFT);
int rv;
ASSERT(size != 0);
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
if (rv == ENOMEM) {
cmn_err(CE_WARN, "%lu megabytes not available to kernel cage",
(ulong_t)(size == 0 ? 0 : size / MBYTE));
} else if (rv != 0) {
/* catch this in debug kernels */
ASSERT(0);
cmn_err(CE_WARN, "unexpected kcage_range_add return value %d",
rv);
}
if (rv) {
return (DRMACH_INTERNAL_ERROR());
}
else
return (NULL);
}
sbd_error_t *
drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
{
pfn_t basepfn = (pfn_t)(basepa >> PAGESHIFT);
pgcnt_t npages = (pgcnt_t)(size >> PAGESHIFT);
int rv;
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
if (size > 0) {
rv = kcage_range_delete_post_mem_del(basepfn, npages);
if (rv != 0) {
cmn_err(CE_WARN,
"unexpected kcage_range_delete_post_mem_del"
" return value %d", rv);
return (DRMACH_INTERNAL_ERROR());
}
}
return (NULL);
}
sbd_error_t *
drmach_mem_disable(drmachid_t id)
{
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
else {
drmach_flush_all();
return (NULL);
}
}
sbd_error_t *
drmach_mem_enable(drmachid_t id)
{
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
else
return (NULL);
}
sbd_error_t *
drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
{
drmach_mem_t *mp;
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
mp = (drmach_mem_t *)id;
/*
* This is only used by dr to round up/down the memory
* for copying. Our unit of memory isolation is 64 MB.
*/
mem->mi_alignment_mask = (64 * 1024 * 1024 - 1);
mem->mi_basepa = mp->base_pa;
mem->mi_size = mp->nbytes;
mem->mi_slice_size = mp->slice_size;
return (NULL);
}
sbd_error_t *
drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
{
drmach_mem_t *mp;
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
mp = (drmach_mem_t *)id;
*pa = mp->base_pa;
return (NULL);
}
sbd_error_t *
drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
{
drmach_mem_t *mem;
#ifdef DEBUG
int rv;
#endif
struct memlist *mlist;
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
mem = (drmach_mem_t *)id;
mlist = memlist_dup(mem->memlist);
#ifdef DEBUG
/*
* Make sure the incoming memlist doesn't already
* intersect with what's present in the system (phys_install).
*/
memlist_read_lock();
rv = memlist_intersect(phys_install, mlist);
memlist_read_unlock();
if (rv) {
DRMACH_PR("Derived memlist intersects with phys_install\n");
memlist_dump(mlist);
DRMACH_PR("phys_install memlist:\n");
memlist_dump(phys_install);
memlist_delete(mlist);
return (DRMACH_INTERNAL_ERROR());
}
DRMACH_PR("Derived memlist:");
memlist_dump(mlist);
#endif
*ml = mlist;
return (NULL);
}
sbd_error_t *
drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
{
drmach_mem_t *mem;
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
mem = (drmach_mem_t *)id;
*bytes = mem->slice_size;
return (NULL);
}
/* ARGSUSED */
processorid_t
drmach_mem_cpu_affinity(drmachid_t id)
{
return (CPU_CURRENT);
}
static sbd_error_t *
drmach_mem_release(drmachid_t id)
{
if (!DRMACH_IS_MEM_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
return (NULL);
}
static sbd_error_t *
drmach_mem_status(drmachid_t id, drmach_status_t *stat)
{
drmach_mem_t *dp;
uint64_t pa, slice_size;
struct memlist *ml;
ASSERT(DRMACH_IS_MEM_ID(id));
dp = id;
/* get starting physical address of target memory */
pa = dp->base_pa;
/* round down to slice boundary */
slice_size = dp->slice_size;
pa &= ~(slice_size - 1);
/* stop at first span that is in slice */
memlist_read_lock();
for (ml = phys_install; ml; ml = ml->next)
if (ml->address >= pa && ml->address < pa + slice_size)
break;
memlist_read_unlock();
stat->assigned = dp->dev.bp->assigned;
stat->powered = dp->dev.bp->powered;
stat->configured = (ml != NULL);
stat->busy = dp->dev.busy;
(void) strncpy(stat->type, dp->dev.type, sizeof (stat->type));
stat->info[0] = '\0';
return (NULL);
}
sbd_error_t *
drmach_board_deprobe(drmachid_t id)
{
drmach_board_t *bp;
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
bp = id;
cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
if (bp->tree) {
drmach_node_dispose(bp->tree);
bp->tree = NULL;
}
if (bp->devices) {
drmach_array_dispose(bp->devices, drmach_device_dispose);
bp->devices = NULL;
}
bp->boot_board = 0;
return (NULL);
}
/*ARGSUSED*/
static sbd_error_t *
drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
{
drmach_board_t *bp = (drmach_board_t *)id;
sbd_error_t *err = NULL;
int rv;
unsigned cpu_impl;
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum);
rv = opl_probe_sb(bp->bnum, &cpu_impl);
if (rv != 0) {
err = drerr_new(1, EOPL_PROBE, bp->cm.name);
return (err);
}
return (err);
}
/*ARGSUSED*/
static sbd_error_t *
drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
{
drmach_board_t *bp;
sbd_error_t *err = NULL;
int rv;
if (!DRMACH_IS_BOARD_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
bp = (drmach_board_t *)id;
cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
rv = opl_unprobe_sb(bp->bnum);
if (rv != 0) {
err = drerr_new(1, EOPL_DEPROBE, bp->cm.name);
}
return (err);
}
/*ARGSUSED*/
sbd_error_t *
drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
{
struct memlist *ml;
uint64_t src_pa;
uint64_t dst_pa;
uint64_t dst;
dst_pa = va_to_pa(&dst);
memlist_read_lock();
for (ml = phys_install; ml; ml = ml->next) {
uint64_t nbytes;
src_pa = ml->address;
nbytes = ml->size;
while (nbytes != 0ull) {
/* copy 32 bytes at arc_pa to dst_pa */
bcopy32_il(src_pa, dst_pa);
/* increment by 32 bytes */
src_pa += (4 * sizeof (uint64_t));
/* decrement by 32 bytes */
nbytes -= (4 * sizeof (uint64_t));
}
}
memlist_read_unlock();
return (NULL);
}
static struct {
const char *name;
sbd_error_t *(*handler)(drmachid_t id, drmach_opts_t *opts);
} drmach_pt_arr[] = {
{ "readmem", drmach_pt_readmem },
{ "ikprobe", drmach_pt_ikprobe },
{ "ikdeprobe", drmach_pt_ikdeprobe },
/* the following line must always be last */
{ NULL, NULL }
};
/*ARGSUSED*/
sbd_error_t *
drmach_passthru(drmachid_t id, drmach_opts_t *opts)
{
int i;
sbd_error_t *err;
i = 0;
while (drmach_pt_arr[i].name != NULL) {
int len = strlen(drmach_pt_arr[i].name);
if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
break;
i += 1;
}
if (drmach_pt_arr[i].name == NULL)
err = drerr_new(0, EOPL_UNKPTCMD, opts->copts);
else
err = (*drmach_pt_arr[i].handler)(id, opts);
return (err);
}
sbd_error_t *
drmach_release(drmachid_t id)
{
drmach_common_t *cp;
if (!DRMACH_IS_DEVICE_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
cp = id;
return (cp->release(id));
}
sbd_error_t *
drmach_status(drmachid_t id, drmach_status_t *stat)
{
drmach_common_t *cp;
sbd_error_t *err;
rw_enter(&drmach_boards_rwlock, RW_READER);
if (!DRMACH_IS_ID(id)) {
rw_exit(&drmach_boards_rwlock);
return (drerr_new(0, EOPL_NOTID, NULL));
}
cp = (drmach_common_t *)id;
err = cp->status(id, stat);
rw_exit(&drmach_boards_rwlock);
return (err);
}
static sbd_error_t *
drmach_i_status(drmachid_t id, drmach_status_t *stat)
{
drmach_common_t *cp;
if (!DRMACH_IS_ID(id))
return (drerr_new(0, EOPL_NOTID, NULL));
cp = id;
return (cp->status(id, stat));
}
/*ARGSUSED*/
sbd_error_t *
drmach_unconfigure(drmachid_t id, int flags)
{
drmach_device_t *dp;
dev_info_t *rdip, *fdip = NULL;
char name[OBP_MAXDRVNAME];
int rv;
if (DRMACH_IS_CPU_ID(id))
return (NULL);
if (!DRMACH_IS_DEVICE_ID(id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
dp = id;
rdip = dp->node->n_getdip(dp->node);
ASSERT(rdip);
rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME);
if (rv)
return (NULL);
/*
* Note: FORCE flag is no longer necessary under devfs
*/
ASSERT(e_ddi_branch_held(rdip));
if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
sbd_error_t *err;
char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
/*
* If non-NULL, fdip is returned held and must be released.
*/
if (fdip != NULL) {
(void) ddi_pathname(fdip, path);
ndi_rele_devi(fdip);
} else {
(void) ddi_pathname(rdip, path);
}
err = drerr_new(1, EOPL_DRVFAIL, path);
kmem_free(path, MAXPATHLEN);
return (err);
}
return (NULL);
}
int
drmach_cpu_poweron(struct cpu *cp)
{
int bnum, cpuid, onb_core_num, strand_id;
drmach_board_t *bp;
DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
cpuid = cp->cpu_id;
bnum = LSB_ID(cpuid);
onb_core_num = ON_BOARD_CORE_NUM(cpuid);
strand_id = STRAND_ID(cpuid);
bp = drmach_get_board_by_bnum(bnum);
ASSERT(bp);
if (bp->cores[onb_core_num].core_hotadded == 0) {
if (drmach_add_remove_cpu(bnum, onb_core_num,
HOTADD_CPU) != 0) {
cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n",
onb_core_num, bnum);
return (EIO);
}
}
ASSERT(MUTEX_HELD(&cpu_lock));
if (drmach_cpu_start(cp) != 0) {
if (bp->cores[onb_core_num].core_started == 0) {
/*
* we must undo the hotadd or no one will do that
* If this fails, we will do this again in
* drmach_board_disconnect.
*/
if (drmach_add_remove_cpu(bnum, onb_core_num,
HOTREMOVE_CPU) != 0) {
cmn_err(CE_WARN, "Failed to remove CMP %d "
"on board %d\n", onb_core_num, bnum);
}
}
return (EBUSY);
} else {
bp->cores[onb_core_num].core_started |= (1 << strand_id);
return (0);
}
}
int
drmach_cpu_poweroff(struct cpu *cp)
{
int rv = 0;
processorid_t cpuid = cp->cpu_id;
DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
ASSERT(MUTEX_HELD(&cpu_lock));
/*
* Capture all CPUs (except for detaching proc) to prevent
* crosscalls to the detaching proc until it has cleared its
* bit in cpu_ready_set.
*
* The CPU's remain paused and the prom_mutex is known to be free.
* This prevents the x-trap victim from blocking when doing prom
* IEEE-1275 calls at a high PIL level.
*/
promsafe_pause_cpus();
/*
* Quiesce interrupts on the target CPU. We do this by setting
* the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
* prevent it from receiving cross calls and cross traps.
* This prevents the processor from receiving any new soft interrupts.
*/
mp_cpu_quiesce(cp);
rv = prom_stopcpu_bycpuid(cpuid);
if (rv == 0)
cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
start_cpus();
if (rv == 0) {
int bnum, onb_core_num, strand_id;
drmach_board_t *bp;
CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
bnum = LSB_ID(cpuid);
onb_core_num = ON_BOARD_CORE_NUM(cpuid);
strand_id = STRAND_ID(cpuid);
bp = drmach_get_board_by_bnum(bnum);
ASSERT(bp);
bp->cores[onb_core_num].core_started &= ~(1 << strand_id);
if (bp->cores[onb_core_num].core_started == 0) {
if (drmach_add_remove_cpu(bnum, onb_core_num,
HOTREMOVE_CPU) != 0) {
cmn_err(CE_WARN, "Failed to remove CMP %d LSB "
"%d\n", onb_core_num, bnum);
return (EIO);
}
}
}
return (rv);
}
/*ARGSUSED*/
int
drmach_verify_sr(dev_info_t *dip, int sflag)
{
return (0);
}
void
drmach_suspend_last(void)
{
}
void
drmach_resume_first(void)
{
}
/*
* Log a DR sysevent.
* Return value: 0 success, non-zero failure.
*/
int
drmach_log_sysevent(int board, char *hint, int flag, int verbose)
{
sysevent_t *ev;
sysevent_id_t eid;
int rv, km_flag;
sysevent_value_t evnt_val;
sysevent_attr_list_t *evnt_attr_list = NULL;
char attach_pnt[MAXNAMELEN];
km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
attach_pnt[0] = '\0';
if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
rv = -1;
goto logexit;
}
if (verbose) {
DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
attach_pnt, hint, flag, verbose);
}
if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
SUNW_KERN_PUB"dr", km_flag)) == NULL) {
rv = -2;
goto logexit;
}
evnt_val.value_type = SE_DATA_TYPE_STRING;
evnt_val.value.sv_string = attach_pnt;
if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val,
km_flag)) != 0)
goto logexit;
evnt_val.value_type = SE_DATA_TYPE_STRING;
evnt_val.value.sv_string = hint;
if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val,
km_flag)) != 0) {
sysevent_free_attr(evnt_attr_list);
goto logexit;
}
(void) sysevent_attach_attributes(ev, evnt_attr_list);
/*
* Log the event but do not sleep waiting for its
* delivery. This provides insulation from syseventd.
*/
rv = log_sysevent(ev, SE_NOSLEEP, &eid);
logexit:
if (ev)
sysevent_free(ev);
if ((rv != 0) && verbose)
cmn_err(CE_WARN, "drmach_log_sysevent failed (rv %d) for %s "
" %s\n", rv, attach_pnt, hint);
return (rv);
}
#define OPL_DR_STATUS_PROP "dr-status"
static int
opl_check_dr_status()
{
pnode_t node;
int rtn, len;
char *str;
node = prom_rootnode();
if (node == OBP_BADNODE) {
return (1);
}
len = prom_getproplen(node, OPL_DR_STATUS_PROP);
if (len == -1) {
/*
* dr-status doesn't exist when DR is activated and
* any warning messages aren't needed.
*/
return (1);
}
str = (char *)kmem_zalloc(len+1, KM_SLEEP);
rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str);
kmem_free(str, len + 1);
if (rtn == -1) {
return (1);
} else {
return (0);
}
}
/* we are allocating memlist from TLB locked pages to avoid tlbmisses */
static struct memlist *
drmach_memlist_add_span(drmach_copy_rename_program_t *p,
struct memlist *mlist, uint64_t base, uint64_t len)
{
struct memlist *ml, *tl, *nl;
if (len == 0ull)
return (NULL);
if (mlist == NULL) {
mlist = p->free_mlist;
if (mlist == NULL)
return (NULL);
p->free_mlist = mlist->next;
mlist->address = base;
mlist->size = len;
mlist->next = mlist->prev = NULL;
return (mlist);
}
for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
if (base < ml->address) {
if ((base + len) < ml->address) {
nl = p->free_mlist;
if (nl == NULL)
return (NULL);
p->free_mlist = nl->next;
nl->address = base;
nl->size = len;
nl->next = ml;
if ((nl->prev = ml->prev) != NULL)
nl->prev->next = nl;
ml->prev = nl;
if (mlist == ml)
mlist = nl;
} else {
ml->size = MAX((base + len), (ml->address +
ml->size)) - base;
ml->address = base;
}
break;
} else if (base <= (ml->address + ml->size)) {
ml->size = MAX((base + len), (ml->address + ml->size)) -
MIN(ml->address, base);
ml->address = MIN(ml->address, base);
break;
}
}
if (ml == NULL) {
nl = p->free_mlist;
if (nl == NULL)
return (NULL);
p->free_mlist = nl->next;
nl->address = base;
nl->size = len;
nl->next = NULL;
nl->prev = tl;
tl->next = nl;
}
return (mlist);
}
/*
* The routine performs the necessary memory COPY and MC adr SWITCH.
* Both operations MUST be at the same "level" so that the stack is
* maintained correctly between the copy and switch. The switch
* portion implements a caching mechanism to guarantee the code text
* is cached prior to execution. This is to guard against possible
* memory access while the MC adr's are being modified.
*
* IMPORTANT: The _drmach_copy_rename_end() function must immediately
* follow drmach_copy_rename_prog__relocatable() so that the correct
* "length" of the drmach_copy_rename_prog__relocatable can be
* calculated. This routine MUST be a LEAF function, i.e. it can
* make NO function calls, primarily for two reasons:
*
* 1. We must keep the stack consistent across the "switch".
* 2. Function calls are compiled to relative offsets, and
* we execute this function we'll be executing it from
* a copied version in a different area of memory, thus
* the relative offsets will be bogus.
*
* Moreover, it must have the "__relocatable" suffix to inform DTrace
* providers (and anything else, for that matter) that this
* function's text is manually relocated elsewhere before it is
* executed. That is, it cannot be safely instrumented with any
* methodology that is PC-relative.
*/
/*
* We multiply this to system_clock_frequency so we
* are setting a delay of fmem_timeout second for
* the rename command.
*
* FMEM command itself should complete within 15 sec.
* We add 2 more sec to be conservative.
*
* Note that there is also a SCF BUSY bit checking
* in drmach_asm.s right before FMEM command is
* issued. XSCF sets the SCF BUSY bit when the
* other domain on the same PSB reboots and it
* will not be able to service the FMEM command
* within 15 sec. After setting the SCF BUSY
* bit, XSCF will wait a while before servicing
* other reboot command so there is no race
* condition.
*/
static int fmem_timeout = 17;
/*
* The empirical data on some OPL system shows that
* we can copy 250 MB per second. We set it to
* 80 MB to be conservative. In normal case,
* this timeout does not affect anything.
*/
static int min_copy_size_per_sec = 80 * 1024 * 1024;
/*
* This is the timeout value for the xcall synchronization
* to get all the CPU ready to do the parallel copying.
* Even on a fully loaded system, 10 sec. should be long
* enough.
*/
static int cpu_xcall_delay = 10;
int drmach_disable_mcopy = 0;
/*
* The following delay loop executes sleep instruction to yield the
* CPU to other strands. If this is not done, some strand will tie
* up the CPU in busy loops while the other strand cannot do useful
* work. The copy procedure will take a much longer time without this.
*/
#define DR_DELAY_IL(ms, freq) \
{ \
uint64_t start; \
uint64_t nstick; \
volatile uint64_t now; \
nstick = ((uint64_t)ms * freq)/1000; \
start = drmach_get_stick_il(); \
now = start; \
while ((now - start) <= nstick) { \
drmach_sleep_il(); \
now = drmach_get_stick_il(); \
} \
}
/* Each loop is 2ms, timeout at 1000ms */
static int drmach_copy_rename_timeout = 500;
static int
drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog,
int cpuid)
{
struct memlist *ml;
register int rtn;
int i;
register uint64_t curr, limit;
extern uint64_t drmach_get_stick_il();
extern void membar_sync_il();
extern void flush_instr_mem_il(void*);
extern void flush_windows_il(void);
uint64_t copy_start;
/*
* flush_windows is moved here to make sure all
* registers used in the callers are flushed to
* memory before the copy.
*
* If flush_windows() is called too early in the
* calling function, the compiler might put some
* data in the local registers after flush_windows().
* After FMA, if there is any fill trap, the registers
* will contain stale data.
*/
flush_windows_il();
prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY;
membar_sync_il();
if (prog->data->cpuid == cpuid) {
limit = drmach_get_stick_il();
limit += cpu_xcall_delay * system_clock_freq;
for (i = 0; i < NCPU; i++) {
if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
/* wait for all CPU's to be ready */
for (;;) {
if (prog->critical->stat[i] ==
FMEM_LOOP_COPY_READY) {
break;
}
DR_DELAY_IL(1, prog->data->stick_freq);
}
curr = drmach_get_stick_il();
if (curr > limit) {
prog->data->fmem_status.error =
EOPL_FMEM_XC_TIMEOUT;
return (EOPL_FMEM_XC_TIMEOUT);
}
}
}
prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY;
membar_sync_il();
copy_start = drmach_get_stick_il();
} else {
for (;;) {
if (prog->data->fmem_status.stat ==
FMEM_LOOP_COPY_READY) {
break;
}
if (prog->data->fmem_status.error) {
prog->data->error[cpuid] = EOPL_FMEM_TERMINATE;
return (EOPL_FMEM_TERMINATE);
}
DR_DELAY_IL(1, prog->data->stick_freq);
}
}
/*
* DO COPY.
*/
if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) {
for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->next) {
uint64_t s_pa, t_pa;
uint64_t nbytes;
s_pa = prog->data->s_copybasepa + ml->address;
t_pa = prog->data->t_copybasepa + ml->address;
nbytes = ml->size;
while (nbytes != 0ull) {
/*
* If the master has detected error, we just
* bail out
*/
if (prog->data->fmem_status.error !=
ESBD_NOERROR) {
prog->data->error[cpuid] =
EOPL_FMEM_TERMINATE;
return (EOPL_FMEM_TERMINATE);
}
/*
* This copy does NOT use an ASI
* that avoids the Ecache, therefore
* the dst_pa addresses may remain
* in our Ecache after the dst_pa
* has been removed from the system.
* A subsequent write-back to memory
* will cause an ARB-stop because the
* physical address no longer exists
* in the system. Therefore we must
* flush out local Ecache after we
* finish the copy.
*/
/* copy 32 bytes at src_pa to dst_pa */
bcopy32_il(s_pa, t_pa);
/*
* increment the counter to signal that we are
* alive
*/
prog->stat->nbytes[cpuid] += 32;
/* increment by 32 bytes */
s_pa += (4 * sizeof (uint64_t));
t_pa += (4 * sizeof (uint64_t));
/* decrement by 32 bytes */
nbytes -= (4 * sizeof (uint64_t));
}
}
prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE;
membar_sync_il();
}
/*
* Since bcopy32_il() does NOT use an ASI to bypass
* the Ecache, we need to flush our Ecache after
* the copy is complete.
*/
flush_cache_il();
/*
* drmach_fmem_exec_script()
*/
if (prog->data->cpuid == cpuid) {
uint64_t last, now;
limit = copy_start + prog->data->copy_delay;
for (i = 0; i < NCPU; i++) {
if (!CPU_IN_SET(prog->data->cpu_slave_set, i))
continue;
for (;;) {
/*
* we get FMEM_LOOP_FMEM_READY in
* normal case
*/
if (prog->critical->stat[i] ==
FMEM_LOOP_FMEM_READY) {
break;
}
/* got error traps */
if (prog->data->error[i] ==
EOPL_FMEM_COPY_ERROR) {
prog->data->fmem_status.error =
EOPL_FMEM_COPY_ERROR;
return (EOPL_FMEM_COPY_ERROR);
}
/*
* if we have not reached limit, wait
* more
*/
curr = drmach_get_stick_il();
if (curr <= limit)
continue;
prog->data->slowest_cpuid = i;
prog->data->copy_wait_time = curr - copy_start;
/* now check if slave is alive */
last = prog->stat->nbytes[i];
DR_DELAY_IL(1, prog->data->stick_freq);
now = prog->stat->nbytes[i];
if (now <= last) {
/*
* no progress, perhaps just
* finished
*/
DR_DELAY_IL(1, prog->data->stick_freq);
if (prog->critical->stat[i] ==
FMEM_LOOP_FMEM_READY)
break;
/* copy error */
if (prog->data->error[i] ==
EOPL_FMEM_COPY_ERROR) {
prog->data-> fmem_status.error =
EOPL_FMEM_COPY_ERROR;
return (EOPL_FMEM_COPY_ERROR);
}
prog->data->copy_rename_count++;
if (prog->data->copy_rename_count
< drmach_copy_rename_timeout) {
continue;
} else {
prog->data->fmem_status.error =
EOPL_FMEM_COPY_TIMEOUT;
return (EOPL_FMEM_COPY_TIMEOUT);
}
}
}
}
prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY;
prog->data->fmem_status.stat = FMEM_LOOP_FMEM_READY;
membar_sync_il();
flush_instr_mem_il((void*) (prog->critical));
/*
* drmach_fmem_exec_script()
*/
rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE);
return (rtn);
} else {
flush_instr_mem_il((void*) (prog->critical));
/*
* drmach_fmem_loop_script()
*/
rtn = prog->critical->loop((void *)(prog->critical), PAGESIZE,
(void *)&(prog->critical->stat[cpuid]));
prog->data->error[cpuid] = rtn;
/* slave thread does not care the rv */
return (0);
}
}
static void
drmach_copy_rename_end(void)
{
/*
* IMPORTANT: This function's location MUST be located immediately
* following drmach_copy_rename_prog__relocatable to
* accurately estimate its size. Note that this assumes
* the compiler keeps these functions in the order in
* which they appear :-o
*/
}
static int
drmach_setup_memlist(drmach_copy_rename_program_t *p)
{
struct memlist *ml;
caddr_t buf;
int nbytes, s, n_elements;
nbytes = PAGESIZE;
n_elements = 0;
s = roundup(sizeof (struct memlist), sizeof (void *));
p->free_mlist = NULL;
buf = p->memlist_buffer;
while (nbytes >= sizeof (struct memlist)) {
ml = (struct memlist *)buf;
ml->next = p->free_mlist;
p->free_mlist = ml;
buf += s;
n_elements++;
nbytes -= s;
}
return (n_elements);
}
static void
drmach_lock_critical(caddr_t va, caddr_t new_va)
{
tte_t tte;
int i;
kpreempt_disable();
for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
vtag_flushpage(new_va, (uint64_t)ksfmmup);
sfmmu_memtte(&tte, va_to_pfn(va), PROC_DATA|HAT_NOSYNC, TTE8K);
tte.tte_intlo |= TTE_LCK_INT;
sfmmu_dtlb_ld_kva(new_va, &tte);
sfmmu_itlb_ld_kva(new_va, &tte);
va += PAGESIZE;
new_va += PAGESIZE;
}
}
static void
drmach_unlock_critical(caddr_t va)
{
int i;
for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
vtag_flushpage(va, (uint64_t)ksfmmup);
va += PAGESIZE;
}
kpreempt_enable();
}
sbd_error_t *
drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id,
struct memlist *c_ml, drmachid_t *pgm_id)
{
drmach_mem_t *s_mem;
drmach_mem_t *t_mem;
struct memlist *x_ml;
uint64_t s_copybasepa, t_copybasepa;
uint_t len;
caddr_t bp, wp;
int s_bd, t_bd, cpuid, active_cpus, i;
int max_elms, mlist_size, rv;
uint64_t c_addr;
size_t c_size, copy_sz, sz;
extern void drmach_fmem_loop_script();
extern void drmach_fmem_loop_script_rtn();
extern int drmach_fmem_exec_script();
extern void drmach_fmem_exec_script_end();
sbd_error_t *err;
drmach_copy_rename_program_t *prog = NULL;
drmach_copy_rename_program_t *prog_kmem = NULL;
void (*mc_suspend)(void);
void (*mc_resume)(void);
int (*scf_fmem_start)(int, int);
int (*scf_fmem_end)(void);
int (*scf_fmem_cancel)(void);
uint64_t (*scf_get_base_addr)(void);
if (!DRMACH_IS_MEM_ID(s_id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
if (!DRMACH_IS_MEM_ID(t_id))
return (drerr_new(0, EOPL_INAPPROP, NULL));
for (i = 0; i < NCPU; i++) {
int lsb_id, onb_core_num, strand_id;
drmach_board_t *bp;
/*
* this kind of CPU will spin in cache
*/
if (CPU_IN_SET(cpu_ready_set, i))
continue;
/*
* Now check for any inactive CPU's that
* have been hotadded. This can only occur in
* error condition in drmach_cpu_poweron().
*/
lsb_id = LSB_ID(i);
onb_core_num = ON_BOARD_CORE_NUM(i);
strand_id = STRAND_ID(i);
bp = drmach_get_board_by_bnum(lsb_id);
if (bp == NULL)
continue;
if (bp->cores[onb_core_num].core_hotadded &
(1 << strand_id)) {
if (!(bp->cores[onb_core_num].core_started &
(1 << strand_id))) {
return (drerr_new(1, EOPL_CPU_STATE, NULL));
}
}
}
mc_suspend = (void (*)(void))
modgetsymvalue("opl_mc_suspend", 0);
mc_resume = (void (*)(void))
modgetsymvalue("opl_mc_resume", 0);
if (mc_suspend == NULL || mc_resume == NULL) {
return (drerr_new(1, EOPL_MC_OPL, NULL));
}
scf_fmem_start = (int (*)(int, int))
modgetsymvalue("scf_fmem_start", 0);
if (scf_fmem_start == NULL) {
return (drerr_new(1, EOPL_SCF_FMEM, NULL));
}
scf_fmem_end = (int (*)(void))
modgetsymvalue("scf_fmem_end", 0);
if (scf_fmem_end == NULL) {
return (drerr_new(1, EOPL_SCF_FMEM, NULL));
}
scf_fmem_cancel = (int (*)(void))
modgetsymvalue("scf_fmem_cancel", 0);
if (scf_fmem_cancel == NULL) {
return (drerr_new(1, EOPL_SCF_FMEM, NULL));
}
scf_get_base_addr = (uint64_t (*)(void))
modgetsymvalue("scf_get_base_addr", 0);
if (scf_get_base_addr == NULL) {
return (drerr_new(1, EOPL_SCF_FMEM, NULL));
}
s_mem = s_id;
t_mem = t_id;
s_bd = s_mem->dev.bp->bnum;
t_bd = t_mem->dev.bp->bnum;
/* calculate source and target base pa */
s_copybasepa = s_mem->slice_base;
t_copybasepa = t_mem->slice_base;
/* adjust copy memlist addresses to be relative to copy base pa */
x_ml = c_ml;
mlist_size = 0;
while (x_ml != NULL) {
x_ml->address -= s_copybasepa;
x_ml = x_ml->next;
mlist_size++;
}
/*
* bp will be page aligned, since we're calling
* kmem_zalloc() with an exact multiple of PAGESIZE.
*/
prog_kmem = (drmach_copy_rename_program_t *)kmem_zalloc(
DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, KM_SLEEP);
prog_kmem->prog = prog_kmem;
/*
* To avoid MTLB hit, we allocate a new VM space and remap
* the kmem_alloc buffer to that address. This solves
* 2 problems we found:
* - the kmem_alloc buffer can be just a chunk inside
* a much larger, e.g. 4MB buffer and MTLB will occur
* if there are both a 4MB and a 8K TLB mapping to
* the same VA range.
* - the kmem mapping got dropped into the TLB by other
* strands, unintentionally.
* Note that the pointers like data, critical, memlist_buffer,
* and stat inside the copy rename structure are mapped to this
* alternate VM space so we must make sure we lock the TLB mapping
* whenever we access data pointed to by these pointers.
*/
prog = prog_kmem->locked_prog = vmem_alloc(heap_arena,
DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, VM_SLEEP);
wp = bp = (caddr_t)prog;
/* Now remap prog_kmem to prog */
drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
/* All pointers in prog are based on the alternate mapping */
prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog +
sizeof (drmach_copy_rename_program_t)), sizeof (void *));
ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t))
<= ((uint64_t)prog + PAGESIZE));
prog->critical = (drmach_copy_rename_critical_t *)
(wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE);
prog->memlist_buffer = (caddr_t)(wp + DRMACH_FMEM_MLIST_PAGE *
PAGESIZE);
prog->stat = (drmach_cr_stat_t *)(wp + DRMACH_FMEM_STAT_PAGE *
PAGESIZE);
/* LINTED */
ASSERT(sizeof (drmach_cr_stat_t) <= ((DRMACH_FMEM_LOCKED_PAGES -
DRMACH_FMEM_STAT_PAGE) * PAGESIZE));
prog->critical->scf_reg_base = (uint64_t)-1;
prog->critical->scf_td[0] = (s_bd & 0xff);
prog->critical->scf_td[1] = (t_bd & 0xff);
for (i = 2; i < 15; i++) {
prog->critical->scf_td[i] = 0;
}
prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff);
bp = (caddr_t)prog->critical;
len = sizeof (drmach_copy_rename_critical_t);
wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
len = (uint_t)((ulong_t)drmach_copy_rename_end -
(ulong_t)drmach_copy_rename_prog__relocatable);
/*
* We always leave 1K nop's to prevent the processor from
* speculative execution that causes memory access
*/
wp = wp + len + 1024;
len = (uint_t)((ulong_t)drmach_fmem_exec_script_end -
(ulong_t)drmach_fmem_exec_script);
/* this is the entry point of the loop script */
wp = wp + len + 1024;
len = (uint_t)((ulong_t)drmach_fmem_exec_script -
(ulong_t)drmach_fmem_loop_script);
wp = wp + len + 1024;
/* now we make sure there is 1K extra */
if ((wp - bp) > PAGESIZE) {
err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
goto out;
}
bp = (caddr_t)prog->critical;
len = sizeof (drmach_copy_rename_critical_t);
wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
prog->critical->run = (int (*)())(wp);
len = (uint_t)((ulong_t)drmach_copy_rename_end -
(ulong_t)drmach_copy_rename_prog__relocatable);
bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
prog->critical->fmem = (int (*)())(wp);
len = (int)((ulong_t)drmach_fmem_exec_script_end -
(ulong_t)drmach_fmem_exec_script);
bcopy((caddr_t)drmach_fmem_exec_script, wp, len);
len = (int)((ulong_t)drmach_fmem_exec_script_end -
(ulong_t)drmach_fmem_exec_script);
wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
prog->critical->loop = (int (*)())(wp);
len = (int)((ulong_t)drmach_fmem_exec_script -
(ulong_t)drmach_fmem_loop_script);
bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len);
len = (int)((ulong_t)drmach_fmem_loop_script_rtn-
(ulong_t)drmach_fmem_loop_script);
prog->critical->loop_rtn = (void (*)()) (wp+len);
prog->data->fmem_status.error = ESBD_NOERROR;
/* now we are committed, call SCF, soft suspend mac patrol */
if ((*scf_fmem_start)(s_bd, t_bd)) {
err = drerr_new(1, EOPL_SCF_FMEM_START, NULL);
goto out;
}
prog->data->scf_fmem_end = scf_fmem_end;
prog->data->scf_fmem_cancel = scf_fmem_cancel;
prog->data->scf_get_base_addr = scf_get_base_addr;
prog->data->fmem_status.op |= OPL_FMEM_SCF_START;
/* soft suspend mac patrol */
(*mc_suspend)();
prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND;
prog->data->mc_resume = mc_resume;
prog->critical->inst_loop_ret =
*(uint64_t *)(prog->critical->loop_rtn);
/*
* 0x30800000 is op code "ba,a +0"
*/
*(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000);
/*
* set the value of SCF FMEM TIMEOUT
*/
prog->critical->delay = fmem_timeout * system_clock_freq;
prog->data->s_mem = (drmachid_t)s_mem;
prog->data->t_mem = (drmachid_t)t_mem;
cpuid = CPU->cpu_id;
prog->data->cpuid = cpuid;
prog->data->cpu_ready_set = cpu_ready_set;
prog->data->cpu_slave_set = cpu_ready_set;
prog->data->slowest_cpuid = (processorid_t)-1;
prog->data->copy_wait_time = 0;
prog->data->copy_rename_count = 0;
CPUSET_DEL(prog->data->cpu_slave_set, cpuid);
for (i = 0; i < NCPU; i++) {
prog->data->cpu_ml[i] = NULL;
}
/*
* max_elms - max number of memlist structures that
* may be allocated for the CPU memory list.
* If there are too many memory span (because
* of fragmentation) than number of memlist
* available, we should return error.
*/
max_elms = drmach_setup_memlist(prog);
if (max_elms < mlist_size) {
err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
goto err_out;
}
active_cpus = 0;
if (drmach_disable_mcopy) {
active_cpus = 1;
CPUSET_ADD(prog->data->cpu_copy_set, cpuid);
} else {
int max_cpu_num;
/*
* The parallel copy procedure is going to split some
* of the elements of the original memory copy list.
* The number of added elements can be up to
* (max_cpu_num - 1). It means that max_cpu_num
* should satisfy the following condition:
* (max_cpu_num - 1) + mlist_size <= max_elms.
*/
max_cpu_num = max_elms - mlist_size + 1;
for (i = 0; i < NCPU; i++) {
if (CPU_IN_SET(cpu_ready_set, i) &&
CPU_ACTIVE(cpu[i])) {
/*
* To reduce the level-2 cache contention only
* one strand per core will participate
* in the copy. If the strand with even cpu_id
* number is present in the ready set, we will
* include this strand in the copy set. If it
* is not present in the ready set, we check for
* the strand with the consecutive odd cpu_id
* and include it, provided that it is
* present in the ready set.
*/
if (!(i & 0x1) ||
!CPU_IN_SET(prog->data->cpu_copy_set,
i - 1)) {
CPUSET_ADD(prog->data->cpu_copy_set, i);
active_cpus++;
/*
* We cannot have more than
* max_cpu_num CPUs in the copy
* set, because each CPU has to
* have at least one element
* long memory copy list.
*/
if (active_cpus >= max_cpu_num)
break;
}
}
}
}
x_ml = c_ml;
sz = 0;
while (x_ml != NULL) {
sz += x_ml->size;
x_ml = x_ml->next;
}
copy_sz = sz/active_cpus;
copy_sz = roundup(copy_sz, MMU_PAGESIZE4M);
while (sz > copy_sz*active_cpus) {
copy_sz += MMU_PAGESIZE4M;
}
prog->data->stick_freq = system_clock_freq;
prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) *
system_clock_freq;
x_ml = c_ml;
c_addr = x_ml->address;
c_size = x_ml->size;
for (i = 0; i < NCPU; i++) {
prog->stat->nbytes[i] = 0;
if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) {
continue;
}
sz = copy_sz;
while (sz) {
if (c_size > sz) {
if ((prog->data->cpu_ml[i] =
drmach_memlist_add_span(prog,
prog->data->cpu_ml[i],
c_addr, sz)) == NULL) {
cmn_err(CE_WARN,
"Unexpected drmach_memlist_add_span"
" failure.");
err = drerr_new(1, EOPL_FMEM_SETUP,
NULL);
mc_resume();
goto out;
}
c_addr += sz;
c_size -= sz;
break;
} else {
sz -= c_size;
if ((prog->data->cpu_ml[i] =
drmach_memlist_add_span(prog,
prog->data->cpu_ml[i],
c_addr, c_size)) == NULL) {
cmn_err(CE_WARN,
"Unexpected drmach_memlist_add_span"
" failure.");
err = drerr_new(1, EOPL_FMEM_SETUP,
NULL);
mc_resume();
goto out;
}
x_ml = x_ml->next;
if (x_ml != NULL) {
c_addr = x_ml->address;
c_size = x_ml->size;
} else {
goto end;
}
}
}
}
end:
prog->data->s_copybasepa = s_copybasepa;
prog->data->t_copybasepa = t_copybasepa;
prog->data->c_ml = c_ml;
*pgm_id = prog_kmem;
/* Unmap the alternate space. It will have to be remapped again */
drmach_unlock_critical((caddr_t)prog);
return (NULL);
err_out:
mc_resume();
rv = (*prog->data->scf_fmem_cancel)();
if (rv) {
cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x", rv);
}
out:
if (prog != NULL) {
drmach_unlock_critical((caddr_t)prog);
vmem_free(heap_arena, prog, DRMACH_FMEM_LOCKED_PAGES *
PAGESIZE);
}
if (prog_kmem != NULL) {
kmem_free(prog_kmem, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
}
return (err);
}
sbd_error_t *
drmach_copy_rename_fini(drmachid_t id)
{
drmach_copy_rename_program_t *prog = id;
sbd_error_t *err = NULL;
int rv;
uint_t fmem_error;
/*
* Note that we have to delay calling SCF to find out the
* status of the FMEM operation here because SCF cannot
* respond while it is suspended.
* This create a small window when we are sure about the
* base address of the system board.
* If there is any call to mc-opl to get memory unum,
* mc-opl will return UNKNOWN as the unum.
*/
/*
* we have to remap again because all the pointer like data,
* critical in prog are based on the alternate vmem space.
*/
(void) drmach_lock_critical((caddr_t)prog, (caddr_t)prog->locked_prog);
if (prog->data->c_ml != NULL)
memlist_delete(prog->data->c_ml);
if ((prog->data->fmem_status.op &
(OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) !=
(OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) {
cmn_err(CE_PANIC, "drmach_copy_rename_fini: invalid op "
"code %x\n", prog->data->fmem_status.op);
}
fmem_error = prog->data->fmem_status.error;
if (fmem_error != ESBD_NOERROR) {
err = drerr_new(1, fmem_error, NULL);
}
/* possible ops are SCF_START, MC_SUSPEND */
if (prog->critical->fmem_issued) {
if (fmem_error != ESBD_NOERROR) {
cmn_err(CE_PANIC, "Irrecoverable FMEM error %d\n",
fmem_error);
}
rv = (*prog->data->scf_fmem_end)();
if (rv) {
cmn_err(CE_PANIC, "scf_fmem_end() failed rv=%d", rv);
}
/*
* If we get here, rename is successful.
* Do all the copy rename post processing.
*/
drmach_swap_pa((drmach_mem_t *)prog->data->s_mem,
(drmach_mem_t *)prog->data->t_mem);
} else {
rv = (*prog->data->scf_fmem_cancel)();
if (rv) {
cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x",
rv);
if (!err) {
err = drerr_new(1, EOPL_SCF_FMEM_CANCEL,
"scf_fmem_cancel() failed. rv = 0x%x", rv);
}
}
}
/* soft resume mac patrol */
(*prog->data->mc_resume)();
drmach_unlock_critical((caddr_t)prog->locked_prog);
vmem_free(heap_arena, prog->locked_prog,
DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
return (err);
}
/*ARGSUSED*/
static void
drmach_copy_rename_slave(struct regs *rp, drmachid_t id)
{
drmach_copy_rename_program_t *prog =
(drmach_copy_rename_program_t *)id;
register int cpuid;
extern void drmach_flush();
extern void membar_sync_il();
extern void drmach_flush_icache();
on_trap_data_t otd;
cpuid = CPU->cpu_id;
if (on_trap(&otd, OT_DATA_EC)) {
no_trap();
prog->data->error[cpuid] = EOPL_FMEM_COPY_ERROR;
prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
drmach_flush_icache();
membar_sync_il();
return;
}
/*
* jmp drmach_copy_rename_prog().
*/
drmach_flush(prog->critical, PAGESIZE);
(void) prog->critical->run(prog, cpuid);
drmach_flush_icache();
no_trap();
prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
membar_sync_il();
}
static void
drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem)
{
uint64_t s_base, t_base;
drmach_board_t *s_board, *t_board;
struct memlist *ml;
s_board = s_mem->dev.bp;
t_board = t_mem->dev.bp;
if (s_board == NULL || t_board == NULL) {
cmn_err(CE_PANIC, "Cannot locate source or target board\n");
return;
}
s_base = s_mem->slice_base;
t_base = t_mem->slice_base;
s_mem->slice_base = t_base;
s_mem->base_pa = (s_mem->base_pa - s_base) + t_base;
for (ml = s_mem->memlist; ml; ml = ml->next) {
ml->address = ml->address - s_base + t_base;
}
t_mem->slice_base = s_base;
t_mem->base_pa = (t_mem->base_pa - t_base) + s_base;
for (ml = t_mem->memlist; ml; ml = ml->next) {
ml->address = ml->address - t_base + s_base;
}
/*
* IKP has to update the sb-mem-ranges for mac patrol driver
* when it resumes, it will re-read the sb-mem-range property
* to get the new base address
*/
if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0)
cmn_err(CE_PANIC, "Could not update device nodes\n");
}
void
drmach_copy_rename(drmachid_t id)
{
drmach_copy_rename_program_t *prog_kmem = id;
drmach_copy_rename_program_t *prog;
cpuset_t cpuset;
int cpuid;
uint64_t inst;
register int rtn;
extern int in_sync;
int old_in_sync;
extern void drmach_sys_trap();
extern void drmach_flush();
extern void drmach_flush_icache();
extern uint64_t patch_inst(uint64_t *, uint64_t);
on_trap_data_t otd;
prog = prog_kmem->locked_prog;
/*
* We must immediately drop in the TLB because all pointers
* are based on the alternate vmem space.
*/
(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
/*
* we call scf to get the base address here becuase if scf
* has not been suspended yet, the active path can be changing and
* sometimes it is not even mapped. We call the interface when
* the OS has been quiesced.
*/
prog->critical->scf_reg_base = (*prog->data->scf_get_base_addr)();
if (prog->critical->scf_reg_base == (uint64_t)-1 ||
prog->critical->scf_reg_base == NULL) {
prog->data->fmem_status.error = EOPL_FMEM_SCF_ERR;
drmach_unlock_critical((caddr_t)prog);
return;
}
cpuset = prog->data->cpu_ready_set;
for (cpuid = 0; cpuid < NCPU; cpuid++) {
if (CPU_IN_SET(cpuset, cpuid)) {
prog->critical->stat[cpuid] = FMEM_LOOP_START;
prog->data->error[cpuid] = ESBD_NOERROR;
}
}
old_in_sync = in_sync;
in_sync = 1;
cpuid = CPU->cpu_id;
CPUSET_DEL(cpuset, cpuid);
for (cpuid = 0; cpuid < NCPU; cpuid++) {
if (CPU_IN_SET(cpuset, cpuid)) {
xc_one(cpuid, (xcfunc_t *)drmach_lock_critical,
(uint64_t)prog_kmem, (uint64_t)prog);
}
}
cpuid = CPU->cpu_id;
xt_some(cpuset, (xcfunc_t *)drmach_sys_trap,
(uint64_t)drmach_copy_rename_slave, (uint64_t)prog);
xt_sync(cpuset);
if (on_trap(&otd, OT_DATA_EC)) {
rtn = EOPL_FMEM_COPY_ERROR;
drmach_flush_icache();
goto done;
}
/*
* jmp drmach_copy_rename_prog().
*/
drmach_flush(prog->critical, PAGESIZE);
rtn = prog->critical->run(prog, cpuid);
drmach_flush_icache();
done:
no_trap();
if (rtn == EOPL_FMEM_HW_ERROR) {
kpreempt_enable();
prom_panic("URGENT_ERROR_TRAP is detected during FMEM.\n");
}
/*
* In normal case, all slave CPU's are still spinning in
* the assembly code. The master has to patch the instruction
* to get them out.
* In error case, e.g. COPY_ERROR, some slave CPU's might
* have aborted and already returned and sset LOOP_EXIT status.
* Some CPU might still be copying.
* In any case, some delay is necessary to give them
* enough time to set the LOOP_EXIT status.
*/
for (;;) {
inst = patch_inst((uint64_t *)prog->critical->loop_rtn,
prog->critical->inst_loop_ret);
if (prog->critical->inst_loop_ret == inst) {
break;
}
}
for (cpuid = 0; cpuid < NCPU; cpuid++) {
uint64_t last, now;
if (!CPU_IN_SET(cpuset, cpuid)) {
continue;
}
last = prog->stat->nbytes[cpuid];
/*
* Wait for all CPU to exit.
* However we do not want an infinite loop
* so we detect hangup situation here.
* If the slave CPU is still copying data,
* we will continue to wait.
* In error cases, the master has already set
* fmem_status.error to abort the copying.
* 1 m.s delay for them to abort copying and
* return to drmach_copy_rename_slave to set
* FMEM_LOOP_EXIT status should be enough.
*/
for (;;) {
if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
break;
drmach_sleep_il();
drv_usecwait(1000);
now = prog->stat->nbytes[cpuid];
if (now <= last) {
drv_usecwait(1000);
if (prog->critical->stat[cpuid] ==
FMEM_LOOP_EXIT)
break;
cmn_err(CE_PANIC, "CPU %d hang during Copy "
"Rename", cpuid);
}
last = now;
}
if (prog->data->error[cpuid] == EOPL_FMEM_HW_ERROR) {
prom_panic("URGENT_ERROR_TRAP is detected during "
"FMEM.\n");
}
}
/*
* This must be done after all strands have exit.
* Removing the TLB entry will affect both strands
* in the same core.
*/
for (cpuid = 0; cpuid < NCPU; cpuid++) {
if (CPU_IN_SET(cpuset, cpuid)) {
xc_one(cpuid, (xcfunc_t *)drmach_unlock_critical,
(uint64_t)prog, 0);
}
}
in_sync = old_in_sync;
/*
* we should unlock before the following lock to keep the kpreempt
* count correct.
*/
(void) drmach_unlock_critical((caddr_t)prog);
/*
* we must remap again. TLB might have been removed in above xcall.
*/
(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
if (prog->data->fmem_status.error == ESBD_NOERROR)
prog->data->fmem_status.error = rtn;
if (prog->data->copy_wait_time > 0) {
DRMACH_PR("Unexpected long wait time %ld seconds "
"during copy rename on CPU %d\n",
prog->data->copy_wait_time/prog->data->stick_freq,
prog->data->slowest_cpuid);
}
drmach_unlock_critical((caddr_t)prog);
}