/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
*/
/*
* Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
* more detailed discussion of the overall mpxio architecture.
*
* Default locking order:
*
* _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
* _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
* _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
* _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
* _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
* _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
* _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
*/
#include <sys/note.h>
#include <sys/types.h>
#include <sys/varargs.h>
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/buf.h>
#include <sys/modctl.h>
#include <sys/open.h>
#include <sys/kmem.h>
#include <sys/poll.h>
#include <sys/conf.h>
#include <sys/bootconf.h>
#include <sys/cmn_err.h>
#include <sys/stat.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ddipropdefs.h>
#include <sys/sunndi.h>
#include <sys/ndi_impldefs.h>
#include <sys/promif.h>
#include <sys/sunmdi.h>
#include <sys/mdi_impldefs.h>
#include <sys/taskq.h>
#include <sys/epm.h>
#include <sys/sunpm.h>
#include <sys/modhash.h>
#include <sys/disp.h>
#include <sys/autoconf.h>
#include <sys/sysmacros.h>
#ifdef DEBUG
#include <sys/debug.h>
int mdi_debug = 1;
int mdi_debug_logonly = 0;
#define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs
#define MDI_WARN CE_WARN, __func__
#define MDI_NOTE CE_NOTE, __func__
#define MDI_CONT CE_CONT, __func__
static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
#else /* !DEBUG */
#define MDI_DEBUG(dbglevel, pargs)
#endif /* DEBUG */
int mdi_debug_consoleonly = 0;
int mdi_delay = 3;
extern pri_t minclsyspri;
extern int modrootloaded;
/*
* Global mutex:
* Protects vHCI list and structure members.
*/
kmutex_t mdi_mutex;
/*
* Registered vHCI class driver lists
*/
int mdi_vhci_count;
mdi_vhci_t *mdi_vhci_head;
mdi_vhci_t *mdi_vhci_tail;
/*
* Client Hash Table size
*/
static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
/*
* taskq interface definitions
*/
#define MDI_TASKQ_N_THREADS 8
#define MDI_TASKQ_PRI minclsyspri
#define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads)
#define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads)
taskq_t *mdi_taskq;
static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
#define TICKS_PER_SECOND (drv_usectohz(1000000))
/*
* The data should be "quiet" for this interval (in seconds) before the
* vhci cached data is flushed to the disk.
*/
static int mdi_vhcache_flush_delay = 10;
/* number of seconds the vhcache flush daemon will sleep idle before exiting */
static int mdi_vhcache_flush_daemon_idle_time = 60;
/*
* MDI falls back to discovery of all paths when a bus_config_one fails.
* The following parameters can be used to tune this operation.
*
* mdi_path_discovery_boot
* Number of times path discovery will be attempted during early boot.
* Probably there is no reason to ever set this value to greater than one.
*
* mdi_path_discovery_postboot
* Number of times path discovery will be attempted after early boot.
* Set it to a minimum of two to allow for discovery of iscsi paths which
* may happen very late during booting.
*
* mdi_path_discovery_interval
* Minimum number of seconds MDI will wait between successive discovery
* of all paths. Set it to -1 to disable discovery of all paths.
*/
static int mdi_path_discovery_boot = 1;
static int mdi_path_discovery_postboot = 2;
static int mdi_path_discovery_interval = 10;
/*
* number of seconds the asynchronous configuration thread will sleep idle
* before exiting.
*/
static int mdi_async_config_idle_time = 600;
static int mdi_bus_config_cache_hash_size = 256;
/* turns off multithreaded configuration for certain operations */
static int mdi_mtc_off = 0;
/*
* The "path" to a pathinfo node is identical to the /devices path to a
* devinfo node had the device been enumerated under a pHCI instead of
* a vHCI. This pathinfo "path" is associated with a 'path_instance'.
* This association persists across create/delete of the pathinfo nodes,
* but not across reboot.
*/
static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */
static int mdi_pathmap_hash_size = 256;
static kmutex_t mdi_pathmap_mutex;
static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */
static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */
static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */
/*
* MDI component property name/value string definitions
*/
const char *mdi_component_prop = "mpxio-component";
const char *mdi_component_prop_vhci = "vhci";
const char *mdi_component_prop_phci = "phci";
const char *mdi_component_prop_client = "client";
/*
* MDI client global unique identifier property name
*/
const char *mdi_client_guid_prop = "client-guid";
/*
* MDI client load balancing property name/value string definitions
*/
const char *mdi_load_balance = "load-balance";
const char *mdi_load_balance_none = "none";
const char *mdi_load_balance_rr = "round-robin";
const char *mdi_load_balance_lba = "logical-block";
/*
* Obsolete vHCI class definition; to be removed after Leadville update
*/
const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
static char vhci_greeting[] =
"\tThere already exists one vHCI driver for class %s\n"
"\tOnly one vHCI driver for each class is allowed\n";
/*
* Static function prototypes
*/
static int i_mdi_phci_offline(dev_info_t *, uint_t);
static int i_mdi_client_offline(dev_info_t *, uint_t);
static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
static void i_mdi_phci_post_detach(dev_info_t *,
ddi_detach_cmd_t, int);
static int i_mdi_client_pre_detach(dev_info_t *,
ddi_detach_cmd_t);
static void i_mdi_client_post_detach(dev_info_t *,
ddi_detach_cmd_t, int);
static void i_mdi_pm_hold_pip(mdi_pathinfo_t *);
static void i_mdi_pm_rele_pip(mdi_pathinfo_t *);
static int i_mdi_lba_lb(mdi_client_t *ct,
mdi_pathinfo_t **ret_pip, struct buf *buf);
static void i_mdi_pm_hold_client(mdi_client_t *, int);
static void i_mdi_pm_rele_client(mdi_client_t *, int);
static void i_mdi_pm_reset_client(mdi_client_t *);
static int i_mdi_power_all_phci(mdi_client_t *);
static void i_mdi_log_sysevent(dev_info_t *, char *, char *);
/*
* Internal mdi_pathinfo node functions
*/
static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
static mdi_vhci_t *i_mdi_vhci_class2vhci(char *);
static mdi_vhci_t *i_devi_get_vhci(dev_info_t *);
static mdi_phci_t *i_devi_get_phci(dev_info_t *);
static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
static void i_mdi_phci_unlock(mdi_phci_t *);
static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
mdi_client_t *);
static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
static void i_mdi_client_remove_path(mdi_client_t *,
mdi_pathinfo_t *);
static int i_mdi_pi_state_change(mdi_pathinfo_t *,
mdi_pathinfo_state_t, int);
static int i_mdi_pi_offline(mdi_pathinfo_t *, int);
static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
char **, int);
static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
static int i_mdi_is_child_present(dev_info_t *, dev_info_t *);
static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *);
static void i_mdi_client_update_state(mdi_client_t *);
static int i_mdi_client_compute_state(mdi_client_t *,
mdi_phci_t *);
static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
static void i_mdi_client_unlock(mdi_client_t *);
static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
static mdi_client_t *i_devi_get_client(dev_info_t *);
/*
* NOTE: this will be removed once the NWS files are changed to use the new
* mdi_{enable,disable}_path interfaces
*/
static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
int, int);
static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
mdi_vhci_t *vh, int flags, int op);
/*
* Failover related function prototypes
*/
static int i_mdi_failover(void *);
/*
* misc internal functions
*/
static int i_mdi_get_hash_key(char *);
static int i_map_nvlist_error_to_mdi(int);
static void i_mdi_report_path_state(mdi_client_t *,
mdi_pathinfo_t *);
static void setup_vhci_cache(mdi_vhci_t *);
static int destroy_vhci_cache(mdi_vhci_t *);
static int stop_vhcache_async_threads(mdi_vhci_config_t *);
static boolean_t stop_vhcache_flush_thread(void *, int);
static void free_string_array(char **, int);
static void free_vhcache_phci(mdi_vhcache_phci_t *);
static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
static void free_vhcache_client(mdi_vhcache_client_t *);
static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *);
static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
static void vhcache_pi_add(mdi_vhci_config_t *,
struct mdi_pathinfo *);
static void vhcache_pi_remove(mdi_vhci_config_t *,
struct mdi_pathinfo *);
static void free_phclient_path_list(mdi_phys_path_t *);
static void sort_vhcache_paths(mdi_vhcache_client_t *);
static int flush_vhcache(mdi_vhci_config_t *, int);
static void vhcache_dirty(mdi_vhci_config_t *);
static void free_async_client_config(mdi_async_client_config_t *);
static void single_threaded_vhconfig_enter(mdi_vhci_config_t *);
static void single_threaded_vhconfig_exit(mdi_vhci_config_t *);
static nvlist_t *read_on_disk_vhci_cache(char *);
extern int fread_nvlist(char *, nvlist_t **);
extern int fwrite_nvlist(char *, nvlist_t *);
/* called once when first vhci registers with mdi */
static void
i_mdi_init()
{
static int initialized = 0;
if (initialized)
return;
initialized = 1;
mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
/* Create our taskq resources */
mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
ASSERT(mdi_taskq != NULL); /* taskq_create never fails */
/* Allocate ['path_instance' <-> "path"] maps */
mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
mdi_pathmap_bypath = mod_hash_create_strhash(
"mdi_pathmap_bypath", mdi_pathmap_hash_size,
mod_hash_null_valdtor);
mdi_pathmap_byinstance = mod_hash_create_idhash(
"mdi_pathmap_byinstance", mdi_pathmap_hash_size,
mod_hash_null_valdtor);
mdi_pathmap_sbyinstance = mod_hash_create_idhash(
"mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
mod_hash_null_valdtor);
}
/*
* mdi_get_component_type():
* Return mpxio component type
* Return Values:
* MDI_COMPONENT_NONE
* MDI_COMPONENT_VHCI
* MDI_COMPONENT_PHCI
* MDI_COMPONENT_CLIENT
* XXX This doesn't work under multi-level MPxIO and should be
* removed when clients migrate mdi_component_is_*() interfaces.
*/
int
mdi_get_component_type(dev_info_t *dip)
{
return (DEVI(dip)->devi_mdi_component);
}
/*
* mdi_vhci_register():
* Register a vHCI module with the mpxio framework
* mdi_vhci_register() is called by vHCI drivers to register the
* 'class_driver' vHCI driver and its MDI entrypoints with the
* mpxio framework. The vHCI driver must call this interface as
* part of its attach(9e) handler.
* Competing threads may try to attach mdi_vhci_register() as
* the vHCI drivers are loaded and attached as a result of pHCI
* driver instance registration (mdi_phci_register()) with the
* framework.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
/*ARGSUSED*/
int
mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
int flags)
{
mdi_vhci_t *vh = NULL;
/* Registrant can't be older */
ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
#ifdef DEBUG
/*
* IB nexus driver is loaded only when IB hardware is present.
* In order to be able to do this there is a need to drive the loading
* and attaching of the IB nexus driver (especially when an IB hardware
* is dynamically plugged in) when an IB HCA driver (PHCI)
* is being attached. Unfortunately this gets into the limitations
* of devfs as there seems to be no clean way to drive configuration
* of a subtree from another subtree of a devfs. Hence, do not ASSERT
* for IB.
*/
if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
#endif
i_mdi_init();
mutex_enter(&mdi_mutex);
/*
* Scan for already registered vhci
*/
for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
if (strcmp(vh->vh_class, class) == 0) {
/*
* vHCI has already been created. Check for valid
* vHCI ops registration. We only support one vHCI
* module per class
*/
if (vh->vh_ops != NULL) {
mutex_exit(&mdi_mutex);
cmn_err(CE_NOTE, vhci_greeting, class);
return (MDI_FAILURE);
}
break;
}
}
/*
* if not yet created, create the vHCI component
*/
if (vh == NULL) {
struct client_hash *hash = NULL;
char *load_balance;
/*
* Allocate and initialize the mdi extensions
*/
vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
KM_SLEEP);
vh->vh_client_table = hash;
vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
(void) strcpy(vh->vh_class, class);
vh->vh_lb = LOAD_BALANCE_RR;
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
vh->vh_lb = LOAD_BALANCE_NONE;
} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
== 0) {
vh->vh_lb = LOAD_BALANCE_LBA;
}
ddi_prop_free(load_balance);
}
mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
/*
* Store the vHCI ops vectors
*/
vh->vh_dip = vdip;
vh->vh_ops = vops;
setup_vhci_cache(vh);
if (mdi_vhci_head == NULL) {
mdi_vhci_head = vh;
}
if (mdi_vhci_tail) {
mdi_vhci_tail->vh_next = vh;
}
mdi_vhci_tail = vh;
mdi_vhci_count++;
}
/*
* Claim the devfs node as a vhci component
*/
DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
/*
* Initialize our back reference from dev_info node
*/
DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
mutex_exit(&mdi_mutex);
return (MDI_SUCCESS);
}
/*
* mdi_vhci_unregister():
* Unregister a vHCI module from mpxio framework
* mdi_vhci_unregister() is called from the detach(9E) entrypoint
* of a vhci to unregister it from the framework.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
/*ARGSUSED*/
int
mdi_vhci_unregister(dev_info_t *vdip, int flags)
{
mdi_vhci_t *found, *vh, *prev = NULL;
ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
/*
* Check for invalid VHCI
*/
if ((vh = i_devi_get_vhci(vdip)) == NULL)
return (MDI_FAILURE);
/*
* Scan the list of registered vHCIs for a match
*/
mutex_enter(&mdi_mutex);
for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
if (found == vh)
break;
prev = found;
}
if (found == NULL) {
mutex_exit(&mdi_mutex);
return (MDI_FAILURE);
}
/*
* Check the vHCI, pHCI and client count. All the pHCIs and clients
* should have been unregistered, before a vHCI can be
* unregistered.
*/
MDI_VHCI_PHCI_LOCK(vh);
if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
MDI_VHCI_PHCI_UNLOCK(vh);
mutex_exit(&mdi_mutex);
return (MDI_FAILURE);
}
MDI_VHCI_PHCI_UNLOCK(vh);
if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
mutex_exit(&mdi_mutex);
return (MDI_FAILURE);
}
/*
* Remove the vHCI from the global list
*/
if (vh == mdi_vhci_head) {
mdi_vhci_head = vh->vh_next;
} else {
prev->vh_next = vh->vh_next;
}
if (vh == mdi_vhci_tail) {
mdi_vhci_tail = prev;
}
mdi_vhci_count--;
mutex_exit(&mdi_mutex);
vh->vh_ops = NULL;
DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
DEVI(vdip)->devi_mdi_xhci = NULL;
kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
kmem_free(vh->vh_client_table,
mdi_client_table_size * sizeof (struct client_hash));
mutex_destroy(&vh->vh_phci_mutex);
mutex_destroy(&vh->vh_client_mutex);
kmem_free(vh, sizeof (mdi_vhci_t));
return (MDI_SUCCESS);
}
/*
* i_mdi_vhci_class2vhci():
* Look for a matching vHCI module given a vHCI class name
* Return Values:
* Handle to a vHCI component
* NULL
*/
static mdi_vhci_t *
i_mdi_vhci_class2vhci(char *class)
{
mdi_vhci_t *vh = NULL;
ASSERT(!MUTEX_HELD(&mdi_mutex));
mutex_enter(&mdi_mutex);
for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
if (strcmp(vh->vh_class, class) == 0) {
break;
}
}
mutex_exit(&mdi_mutex);
return (vh);
}
/*
* i_devi_get_vhci():
* Utility function to get the handle to a vHCI component
* Return Values:
* Handle to a vHCI component
* NULL
*/
mdi_vhci_t *
i_devi_get_vhci(dev_info_t *vdip)
{
mdi_vhci_t *vh = NULL;
if (MDI_VHCI(vdip)) {
vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
}
return (vh);
}
/*
* mdi_phci_register():
* Register a pHCI module with mpxio framework
* mdi_phci_register() is called by pHCI drivers to register with
* the mpxio framework and a specific 'class_driver' vHCI. The
* pHCI driver must call this interface as part of its attach(9e)
* handler.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
/*ARGSUSED*/
int
mdi_phci_register(char *class, dev_info_t *pdip, int flags)
{
mdi_phci_t *ph;
mdi_vhci_t *vh;
char *data;
/*
* Some subsystems, like fcp, perform pHCI registration from a
* different thread than the one doing the pHCI attach(9E) - the
* driver attach code is waiting for this other thread to complete.
* This means we can only ASSERT DEVI_BUSY_CHANGING of parent
* (indicating that some thread has done an ndi_devi_enter of parent)
* not DEVI_BUSY_OWNED (which would indicate that we did the enter).
*/
ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
/*
* Check for mpxio-disable property. Enable mpxio if the property is
* missing or not set to "yes".
* If the property is set to "yes" then emit a brief message.
*/
if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
&data) == DDI_SUCCESS)) {
if (strcmp(data, "yes") == 0) {
MDI_DEBUG(1, (MDI_CONT, pdip,
"?multipath capabilities disabled via %s.conf.",
ddi_driver_name(pdip)));
ddi_prop_free(data);
return (MDI_FAILURE);
}
ddi_prop_free(data);
}
/*
* Search for a matching vHCI
*/
vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
if (vh == NULL) {
return (MDI_FAILURE);
}
ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
ph->ph_dip = pdip;
ph->ph_vhci = vh;
ph->ph_next = NULL;
ph->ph_unstable = 0;
ph->ph_vprivate = 0;
cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
MDI_PHCI_LOCK(ph);
MDI_PHCI_SET_POWER_UP(ph);
MDI_PHCI_UNLOCK(ph);
DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
vhcache_phci_add(vh->vh_config, ph);
MDI_VHCI_PHCI_LOCK(vh);
if (vh->vh_phci_head == NULL) {
vh->vh_phci_head = ph;
}
if (vh->vh_phci_tail) {
vh->vh_phci_tail->ph_next = ph;
}
vh->vh_phci_tail = ph;
vh->vh_phci_count++;
MDI_VHCI_PHCI_UNLOCK(vh);
i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
return (MDI_SUCCESS);
}
/*
* mdi_phci_unregister():
* Unregister a pHCI module from mpxio framework
* mdi_phci_unregister() is called by the pHCI drivers from their
* detach(9E) handler to unregister their instances from the
* framework.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
/*ARGSUSED*/
int
mdi_phci_unregister(dev_info_t *pdip, int flags)
{
mdi_vhci_t *vh;
mdi_phci_t *ph;
mdi_phci_t *tmp;
mdi_phci_t *prev = NULL;
mdi_pathinfo_t *pip;
ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
ph = i_devi_get_phci(pdip);
if (ph == NULL) {
MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
return (MDI_FAILURE);
}
vh = ph->ph_vhci;
ASSERT(vh != NULL);
if (vh == NULL) {
MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
return (MDI_FAILURE);
}
MDI_VHCI_PHCI_LOCK(vh);
tmp = vh->vh_phci_head;
while (tmp) {
if (tmp == ph) {
break;
}
prev = tmp;
tmp = tmp->ph_next;
}
if (ph == vh->vh_phci_head) {
vh->vh_phci_head = ph->ph_next;
} else {
prev->ph_next = ph->ph_next;
}
if (ph == vh->vh_phci_tail) {
vh->vh_phci_tail = prev;
}
vh->vh_phci_count--;
MDI_VHCI_PHCI_UNLOCK(vh);
/* Walk remaining pathinfo nodes and disassociate them from pHCI */
MDI_PHCI_LOCK(ph);
for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
MDI_PI(pip)->pi_phci = NULL;
MDI_PHCI_UNLOCK(ph);
i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
ESC_DDI_INITIATOR_UNREGISTER);
vhcache_phci_remove(vh->vh_config, ph);
cv_destroy(&ph->ph_unstable_cv);
mutex_destroy(&ph->ph_mutex);
kmem_free(ph, sizeof (mdi_phci_t));
DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
DEVI(pdip)->devi_mdi_xhci = NULL;
return (MDI_SUCCESS);
}
/*
* i_devi_get_phci():
* Utility function to return the phci extensions.
*/
static mdi_phci_t *
i_devi_get_phci(dev_info_t *pdip)
{
mdi_phci_t *ph = NULL;
if (MDI_PHCI(pdip)) {
ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
}
return (ph);
}
/*
* Single thread mdi entry into devinfo node for modifying its children.
* If necessary we perform an ndi_devi_enter of the vHCI before doing
* an ndi_devi_enter of 'dip'. We maintain circular in two parts: one
* for the vHCI and one for the pHCI.
*/
void
mdi_devi_enter(dev_info_t *phci_dip, int *circular)
{
dev_info_t *vdip;
int vcircular, pcircular;
/* Verify calling context */
ASSERT(MDI_PHCI(phci_dip));
vdip = mdi_devi_get_vdip(phci_dip);
ASSERT(vdip); /* A pHCI always has a vHCI */
/*
* If pHCI is detaching then the framework has already entered the
* vHCI on a threads that went down the code path leading to
* detach_node(). This framework enter of the vHCI during pHCI
* detach is done to avoid deadlock with vHCI power management
* operations which enter the vHCI and the enter down the path
* to the pHCI. If pHCI is detaching then we piggyback this calls
* enter of the vHCI on frameworks vHCI enter that has already
* occurred - this is OK because we know that the framework thread
* doing detach is waiting for our completion.
*
* We should DEVI_IS_DETACHING under an enter of the parent to avoid
* race with detach - but we can't do that because the framework has
* already entered the parent, so we have some complexity instead.
*/
for (;;) {
if (ndi_devi_tryenter(vdip, &vcircular)) {
ASSERT(vcircular != -1);
if (DEVI_IS_DETACHING(phci_dip)) {
ndi_devi_exit(vdip, vcircular);
vcircular = -1;
}
break;
} else if (DEVI_IS_DETACHING(phci_dip)) {
vcircular = -1;
break;
} else if (servicing_interrupt()) {
/*
* Don't delay an interrupt (and ensure adaptive
* mutex inversion support).
*/
ndi_devi_enter(vdip, &vcircular);
break;
} else {
delay_random(mdi_delay);
}
}
ndi_devi_enter(phci_dip, &pcircular);
*circular = (vcircular << 16) | (pcircular & 0xFFFF);
}
/*
* Attempt to mdi_devi_enter.
*/
int
mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
{
dev_info_t *vdip;
int vcircular, pcircular;
/* Verify calling context */
ASSERT(MDI_PHCI(phci_dip));
vdip = mdi_devi_get_vdip(phci_dip);
ASSERT(vdip); /* A pHCI always has a vHCI */
if (ndi_devi_tryenter(vdip, &vcircular)) {
if (ndi_devi_tryenter(phci_dip, &pcircular)) {
*circular = (vcircular << 16) | (pcircular & 0xFFFF);
return (1); /* locked */
}
ndi_devi_exit(vdip, vcircular);
}
return (0); /* busy */
}
/*
* Release mdi_devi_enter or successful mdi_devi_tryenter.
*/
void
mdi_devi_exit(dev_info_t *phci_dip, int circular)
{
dev_info_t *vdip;
int vcircular, pcircular;
/* Verify calling context */
ASSERT(MDI_PHCI(phci_dip));
vdip = mdi_devi_get_vdip(phci_dip);
ASSERT(vdip); /* A pHCI always has a vHCI */
/* extract two circular recursion values from single int */
pcircular = (short)(circular & 0xFFFF);
vcircular = (short)((circular >> 16) & 0xFFFF);
ndi_devi_exit(phci_dip, pcircular);
if (vcircular != -1)
ndi_devi_exit(vdip, vcircular);
}
/*
* The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
* around a pHCI drivers calls to mdi_pi_online/offline, after holding
* the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
* with vHCI power management code during path online/offline. Each
* mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
* occur within the scope of an active mdi_devi_enter that establishes the
* circular value.
*/
void
mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
{
int pcircular;
/* Verify calling context */
ASSERT(MDI_PHCI(phci_dip));
/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
ndi_hold_devi(phci_dip);
pcircular = (short)(circular & 0xFFFF);
ndi_devi_exit(phci_dip, pcircular);
}
void
mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
{
int pcircular;
/* Verify calling context */
ASSERT(MDI_PHCI(phci_dip));
ndi_devi_enter(phci_dip, &pcircular);
/* Drop hold from mdi_devi_exit_phci. */
ndi_rele_devi(phci_dip);
/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
}
/*
* mdi_devi_get_vdip():
* given a pHCI dip return vHCI dip
*/
dev_info_t *
mdi_devi_get_vdip(dev_info_t *pdip)
{
mdi_phci_t *ph;
ph = i_devi_get_phci(pdip);
if (ph && ph->ph_vhci)
return (ph->ph_vhci->vh_dip);
return (NULL);
}
/*
* mdi_devi_pdip_entered():
* Return 1 if we are vHCI and have done an ndi_devi_enter
* of a pHCI
*/
int
mdi_devi_pdip_entered(dev_info_t *vdip)
{
mdi_vhci_t *vh;
mdi_phci_t *ph;
vh = i_devi_get_vhci(vdip);
if (vh == NULL)
return (0);
MDI_VHCI_PHCI_LOCK(vh);
ph = vh->vh_phci_head;
while (ph) {
if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
MDI_VHCI_PHCI_UNLOCK(vh);
return (1);
}
ph = ph->ph_next;
}
MDI_VHCI_PHCI_UNLOCK(vh);
return (0);
}
/*
* mdi_phci_path2devinfo():
* Utility function to search for a valid phci device given
* the devfs pathname.
*/
dev_info_t *
mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
{
char *temp_pathname;
mdi_vhci_t *vh;
mdi_phci_t *ph;
dev_info_t *pdip = NULL;
vh = i_devi_get_vhci(vdip);
ASSERT(vh != NULL);
if (vh == NULL) {
/*
* Invalid vHCI component, return failure
*/
return (NULL);
}
temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
MDI_VHCI_PHCI_LOCK(vh);
ph = vh->vh_phci_head;
while (ph != NULL) {
pdip = ph->ph_dip;
ASSERT(pdip != NULL);
*temp_pathname = '\0';
(void) ddi_pathname(pdip, temp_pathname);
if (strcmp(temp_pathname, pathname) == 0) {
break;
}
ph = ph->ph_next;
}
if (ph == NULL) {
pdip = NULL;
}
MDI_VHCI_PHCI_UNLOCK(vh);
kmem_free(temp_pathname, MAXPATHLEN);
return (pdip);
}
/*
* mdi_phci_get_path_count():
* get number of path information nodes associated with a given
* pHCI device.
*/
int
mdi_phci_get_path_count(dev_info_t *pdip)
{
mdi_phci_t *ph;
int count = 0;
ph = i_devi_get_phci(pdip);
if (ph != NULL) {
count = ph->ph_path_count;
}
return (count);
}
/*
* i_mdi_phci_lock():
* Lock a pHCI device
* Return Values:
* None
* Note:
* The default locking order is:
* _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
* But there are number of situations where locks need to be
* grabbed in reverse order. This routine implements try and lock
* mechanism depending on the requested parameter option.
*/
static void
i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
{
if (pip) {
/* Reverse locking is requested. */
while (MDI_PHCI_TRYLOCK(ph) == 0) {
if (servicing_interrupt()) {
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
MDI_PHCI_LOCK(ph);
MDI_PI_LOCK(pip);
MDI_PI_RELE(pip);
break;
} else {
/*
* tryenter failed. Try to grab again
* after a small delay
*/
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
delay_random(mdi_delay);
MDI_PI_LOCK(pip);
MDI_PI_RELE(pip);
}
}
} else {
MDI_PHCI_LOCK(ph);
}
}
/*
* i_mdi_phci_unlock():
* Unlock the pHCI component
*/
static void
i_mdi_phci_unlock(mdi_phci_t *ph)
{
MDI_PHCI_UNLOCK(ph);
}
/*
* i_mdi_devinfo_create():
* create client device's devinfo node
* Return Values:
* dev_info
* NULL
* Notes:
*/
static dev_info_t *
i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
char **compatible, int ncompatible)
{
dev_info_t *cdip = NULL;
ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
/* Verify for duplicate entry */
cdip = i_mdi_devinfo_find(vh, name, guid);
ASSERT(cdip == NULL);
if (cdip) {
cmn_err(CE_WARN,
"i_mdi_devinfo_create: client %s@%s already exists",
name ? name : "", guid ? guid : "");
}
ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
if (cdip == NULL)
goto fail;
/*
* Create component type and Global unique identifier
* properties
*/
if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
goto fail;
}
/* Decorate the node with compatible property */
if (compatible &&
(ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
"compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
goto fail;
}
return (cdip);
fail:
if (cdip) {
(void) ndi_prop_remove_all(cdip);
(void) ndi_devi_free(cdip);
}
return (NULL);
}
/*
* i_mdi_devinfo_find():
* Find a matching devinfo node for given client node name
* and its guid.
* Return Values:
* Handle to a dev_info node or NULL
*/
static dev_info_t *
i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
{
char *data;
dev_info_t *cdip = NULL;
dev_info_t *ndip = NULL;
int circular;
ndi_devi_enter(vh->vh_dip, &circular);
ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
while ((cdip = ndip) != NULL) {
ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
if (strcmp(DEVI(cdip)->devi_node_name, name)) {
continue;
}
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
&data) != DDI_PROP_SUCCESS) {
continue;
}
if (strcmp(data, guid) != 0) {
ddi_prop_free(data);
continue;
}
ddi_prop_free(data);
break;
}
ndi_devi_exit(vh->vh_dip, circular);
return (cdip);
}
/*
* i_mdi_devinfo_remove():
* Remove a client device node
*/
static int
i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
{
int rv = MDI_SUCCESS;
if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
(flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
if (rv != NDI_SUCCESS) {
MDI_DEBUG(1, (MDI_NOTE, cdip,
"!failed: cdip %p", (void *)cdip));
}
/*
* Convert to MDI error code
*/
switch (rv) {
case NDI_SUCCESS:
rv = MDI_SUCCESS;
break;
case NDI_BUSY:
rv = MDI_BUSY;
break;
default:
rv = MDI_FAILURE;
break;
}
}
return (rv);
}
/*
* i_devi_get_client()
* Utility function to get mpxio component extensions
*/
static mdi_client_t *
i_devi_get_client(dev_info_t *cdip)
{
mdi_client_t *ct = NULL;
if (MDI_CLIENT(cdip)) {
ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
}
return (ct);
}
/*
* i_mdi_is_child_present():
* Search for the presence of client device dev_info node
*/
static int
i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
{
int rv = MDI_FAILURE;
struct dev_info *dip;
int circular;
ndi_devi_enter(vdip, &circular);
dip = DEVI(vdip)->devi_child;
while (dip) {
if (dip == DEVI(cdip)) {
rv = MDI_SUCCESS;
break;
}
dip = dip->devi_sibling;
}
ndi_devi_exit(vdip, circular);
return (rv);
}
/*
* i_mdi_client_lock():
* Grab client component lock
* Return Values:
* None
* Note:
* The default locking order is:
* _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
* But there are number of situations where locks need to be
* grabbed in reverse order. This routine implements try and lock
* mechanism depending on the requested parameter option.
*/
static void
i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
{
if (pip) {
/*
* Reverse locking is requested.
*/
while (MDI_CLIENT_TRYLOCK(ct) == 0) {
if (servicing_interrupt()) {
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
MDI_CLIENT_LOCK(ct);
MDI_PI_LOCK(pip);
MDI_PI_RELE(pip);
break;
} else {
/*
* tryenter failed. Try to grab again
* after a small delay
*/
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
delay_random(mdi_delay);
MDI_PI_LOCK(pip);
MDI_PI_RELE(pip);
}
}
} else {
MDI_CLIENT_LOCK(ct);
}
}
/*
* i_mdi_client_unlock():
* Unlock a client component
*/
static void
i_mdi_client_unlock(mdi_client_t *ct)
{
MDI_CLIENT_UNLOCK(ct);
}
/*
* i_mdi_client_alloc():
* Allocate and initialize a client structure. Caller should
* hold the vhci client lock.
* Return Values:
* Handle to a client component
*/
/*ARGSUSED*/
static mdi_client_t *
i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
{
mdi_client_t *ct;
ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
/*
* Allocate and initialize a component structure.
*/
ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
ct->ct_hnext = NULL;
ct->ct_hprev = NULL;
ct->ct_dip = NULL;
ct->ct_vhci = vh;
ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
(void) strcpy(ct->ct_drvname, name);
ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
(void) strcpy(ct->ct_guid, lguid);
ct->ct_cprivate = NULL;
ct->ct_vprivate = NULL;
ct->ct_flags = 0;
ct->ct_state = MDI_CLIENT_STATE_FAILED;
MDI_CLIENT_LOCK(ct);
MDI_CLIENT_SET_OFFLINE(ct);
MDI_CLIENT_SET_DETACH(ct);
MDI_CLIENT_SET_POWER_UP(ct);
MDI_CLIENT_UNLOCK(ct);
ct->ct_failover_flags = 0;
ct->ct_failover_status = 0;
cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
ct->ct_unstable = 0;
cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
ct->ct_lb = vh->vh_lb;
ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
ct->ct_path_count = 0;
ct->ct_path_head = NULL;
ct->ct_path_tail = NULL;
ct->ct_path_last = NULL;
/*
* Add this client component to our client hash queue
*/
i_mdi_client_enlist_table(vh, ct);
return (ct);
}
/*
* i_mdi_client_enlist_table():
* Attach the client device to the client hash table. Caller
* should hold the vhci client lock.
*/
static void
i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
{
int index;
struct client_hash *head;
ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
index = i_mdi_get_hash_key(ct->ct_guid);
head = &vh->vh_client_table[index];
ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
head->ct_hash_head = ct;
head->ct_hash_count++;
vh->vh_client_count++;
}
/*
* i_mdi_client_delist_table():
* Attach the client device to the client hash table.
* Caller should hold the vhci client lock.
*/
static void
i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
{
int index;
char *guid;
struct client_hash *head;
mdi_client_t *next;
mdi_client_t *last;
ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
guid = ct->ct_guid;
index = i_mdi_get_hash_key(guid);
head = &vh->vh_client_table[index];
last = NULL;
next = (mdi_client_t *)head->ct_hash_head;
while (next != NULL) {
if (next == ct) {
break;
}
last = next;
next = next->ct_hnext;
}
if (next) {
head->ct_hash_count--;
if (last == NULL) {
head->ct_hash_head = ct->ct_hnext;
} else {
last->ct_hnext = ct->ct_hnext;
}
ct->ct_hnext = NULL;
vh->vh_client_count--;
}
}
/*
* i_mdi_client_free():
* Free a client component
*/
static int
i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
{
int rv = MDI_SUCCESS;
int flags = ct->ct_flags;
dev_info_t *cdip;
dev_info_t *vdip;
ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
vdip = vh->vh_dip;
cdip = ct->ct_dip;
(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
DEVI(cdip)->devi_mdi_client = NULL;
/*
* Clear out back ref. to dev_info_t node
*/
ct->ct_dip = NULL;
/*
* Remove this client from our hash queue
*/
i_mdi_client_delist_table(vh, ct);
/*
* Uninitialize and free the component
*/
kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
cv_destroy(&ct->ct_failover_cv);
cv_destroy(&ct->ct_unstable_cv);
cv_destroy(&ct->ct_powerchange_cv);
mutex_destroy(&ct->ct_mutex);
kmem_free(ct, sizeof (*ct));
if (cdip != NULL) {
MDI_VHCI_CLIENT_UNLOCK(vh);
(void) i_mdi_devinfo_remove(vdip, cdip, flags);
MDI_VHCI_CLIENT_LOCK(vh);
}
return (rv);
}
/*
* i_mdi_client_find():
* Find the client structure corresponding to a given guid
* Caller should hold the vhci client lock.
*/
static mdi_client_t *
i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
{
int index;
struct client_hash *head;
mdi_client_t *ct;
ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
index = i_mdi_get_hash_key(guid);
head = &vh->vh_client_table[index];
ct = head->ct_hash_head;
while (ct != NULL) {
if (strcmp(ct->ct_guid, guid) == 0 &&
(cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
break;
}
ct = ct->ct_hnext;
}
return (ct);
}
/*
* i_mdi_client_update_state():
* Compute and update client device state
* Notes:
* A client device can be in any of three possible states:
*
* MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
* one online/standby paths. Can tolerate failures.
* MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
* no alternate paths available as standby. A failure on the online
* would result in loss of access to device data.
* MDI_CLIENT_STATE_FAILED - Client device in failed state with
* no paths available to access the device.
*/
static void
i_mdi_client_update_state(mdi_client_t *ct)
{
int state;
ASSERT(MDI_CLIENT_LOCKED(ct));
state = i_mdi_client_compute_state(ct, NULL);
MDI_CLIENT_SET_STATE(ct, state);
}
/*
* i_mdi_client_compute_state():
* Compute client device state
*
* mdi_phci_t * Pointer to pHCI structure which should
* while computing the new value. Used by
* i_mdi_phci_offline() to find the new
* client state after DR of a pHCI.
*/
static int
i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
{
int state;
int online_count = 0;
int standby_count = 0;
mdi_pathinfo_t *pip, *next;
ASSERT(MDI_CLIENT_LOCKED(ct));
pip = ct->ct_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
if (MDI_PI(pip)->pi_phci == ph) {
MDI_PI_UNLOCK(pip);
pip = next;
continue;
}
if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
== MDI_PATHINFO_STATE_ONLINE)
online_count++;
else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
== MDI_PATHINFO_STATE_STANDBY)
standby_count++;
MDI_PI_UNLOCK(pip);
pip = next;
}
if (online_count == 0) {
if (standby_count == 0) {
state = MDI_CLIENT_STATE_FAILED;
MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
"client state failed: ct = %p", (void *)ct));
} else if (standby_count == 1) {
state = MDI_CLIENT_STATE_DEGRADED;
} else {
state = MDI_CLIENT_STATE_OPTIMAL;
}
} else if (online_count == 1) {
if (standby_count == 0) {
state = MDI_CLIENT_STATE_DEGRADED;
} else {
state = MDI_CLIENT_STATE_OPTIMAL;
}
} else {
state = MDI_CLIENT_STATE_OPTIMAL;
}
return (state);
}
/*
* i_mdi_client2devinfo():
* Utility function
*/
dev_info_t *
i_mdi_client2devinfo(mdi_client_t *ct)
{
return (ct->ct_dip);
}
/*
* mdi_client_path2_devinfo():
* Given the parent devinfo and child devfs pathname, search for
* a valid devfs node handle.
*/
dev_info_t *
mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
{
dev_info_t *cdip = NULL;
dev_info_t *ndip = NULL;
char *temp_pathname;
int circular;
/*
* Allocate temp buffer
*/
temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
/*
* Lock parent against changes
*/
ndi_devi_enter(vdip, &circular);
ndip = (dev_info_t *)DEVI(vdip)->devi_child;
while ((cdip = ndip) != NULL) {
ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
*temp_pathname = '\0';
(void) ddi_pathname(cdip, temp_pathname);
if (strcmp(temp_pathname, pathname) == 0) {
break;
}
}
/*
* Release devinfo lock
*/
ndi_devi_exit(vdip, circular);
/*
* Free the temp buffer
*/
kmem_free(temp_pathname, MAXPATHLEN);
return (cdip);
}
/*
* mdi_client_get_path_count():
* Utility function to get number of path information nodes
* associated with a given client device.
*/
int
mdi_client_get_path_count(dev_info_t *cdip)
{
mdi_client_t *ct;
int count = 0;
ct = i_devi_get_client(cdip);
if (ct != NULL) {
count = ct->ct_path_count;
}
return (count);
}
/*
* i_mdi_get_hash_key():
* Create a hash using strings as keys
*
*/
static int
i_mdi_get_hash_key(char *str)
{
uint32_t g, hash = 0;
char *p;
for (p = str; *p != '\0'; p++) {
g = *p;
hash += g;
}
return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
}
/*
* mdi_get_lb_policy():
* Get current load balancing policy for a given client device
*/
client_lb_t
mdi_get_lb_policy(dev_info_t *cdip)
{
client_lb_t lb = LOAD_BALANCE_NONE;
mdi_client_t *ct;
ct = i_devi_get_client(cdip);
if (ct != NULL) {
lb = ct->ct_lb;
}
return (lb);
}
/*
* mdi_set_lb_region_size():
* Set current region size for the load-balance
*/
int
mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
{
mdi_client_t *ct;
int rv = MDI_FAILURE;
ct = i_devi_get_client(cdip);
if (ct != NULL && ct->ct_lb_args != NULL) {
ct->ct_lb_args->region_size = region_size;
rv = MDI_SUCCESS;
}
return (rv);
}
/*
* mdi_Set_lb_policy():
* Set current load balancing policy for a given client device
*/
int
mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
{
mdi_client_t *ct;
int rv = MDI_FAILURE;
ct = i_devi_get_client(cdip);
if (ct != NULL) {
ct->ct_lb = lb;
rv = MDI_SUCCESS;
}
return (rv);
}
/*
* mdi_failover():
* failover function called by the vHCI drivers to initiate
* a failover operation. This is typically due to non-availability
* of online paths to route I/O requests. Failover can be
* triggered through user application also.
*
* The vHCI driver calls mdi_failover() to initiate a failover
* operation. mdi_failover() calls back into the vHCI driver's
* vo_failover() entry point to perform the actual failover
* operation. The reason for requiring the vHCI driver to
* initiate failover by calling mdi_failover(), instead of directly
* executing vo_failover() itself, is to ensure that the mdi
* framework can keep track of the client state properly.
* Additionally, mdi_failover() provides as a convenience the
* option of performing the failover operation synchronously or
* asynchronously
*
* Upon successful completion of the failover operation, the
* paths that were previously ONLINE will be in the STANDBY state,
* and the newly activated paths will be in the ONLINE state.
*
* The flags modifier determines whether the activation is done
* synchronously: MDI_FAILOVER_SYNC
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
* MDI_BUSY
*/
/*ARGSUSED*/
int
mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
{
int rv;
mdi_client_t *ct;
ct = i_devi_get_client(cdip);
ASSERT(ct != NULL);
if (ct == NULL) {
/* cdip is not a valid client device. Nothing more to do. */
return (MDI_FAILURE);
}
MDI_CLIENT_LOCK(ct);
if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
/* A path to the client is being freed */
MDI_CLIENT_UNLOCK(ct);
return (MDI_BUSY);
}
if (MDI_CLIENT_IS_FAILED(ct)) {
/*
* Client is in failed state. Nothing more to do.
*/
MDI_CLIENT_UNLOCK(ct);
return (MDI_FAILURE);
}
if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
/*
* Failover is already in progress; return BUSY
*/
MDI_CLIENT_UNLOCK(ct);
return (MDI_BUSY);
}
/*
* Make sure that mdi_pathinfo node state changes are processed.
* We do not allow failovers to progress while client path state
* changes are in progress
*/
if (ct->ct_unstable) {
if (flags == MDI_FAILOVER_ASYNC) {
MDI_CLIENT_UNLOCK(ct);
return (MDI_BUSY);
} else {
while (ct->ct_unstable)
cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
}
}
/*
* Client device is in stable state. Before proceeding, perform sanity
* checks again.
*/
if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
(!i_ddi_devi_attached(cdip))) {
/*
* Client is in failed state. Nothing more to do.
*/
MDI_CLIENT_UNLOCK(ct);
return (MDI_FAILURE);
}
/*
* Set the client state as failover in progress.
*/
MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
ct->ct_failover_flags = flags;
MDI_CLIENT_UNLOCK(ct);
if (flags == MDI_FAILOVER_ASYNC) {
/*
* Submit the initiate failover request via CPR safe
* taskq threads.
*/
(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
ct, KM_SLEEP);
return (MDI_ACCEPT);
} else {
/*
* Synchronous failover mode. Typically invoked from the user
* land.
*/
rv = i_mdi_failover(ct);
}
return (rv);
}
/*
* i_mdi_failover():
* internal failover function. Invokes vHCI drivers failover
* callback function and process the failover status
* Return Values:
* None
*
* Note: A client device in failover state can not be detached or freed.
*/
static int
i_mdi_failover(void *arg)
{
int rv = MDI_SUCCESS;
mdi_client_t *ct = (mdi_client_t *)arg;
mdi_vhci_t *vh = ct->ct_vhci;
ASSERT(!MDI_CLIENT_LOCKED(ct));
if (vh->vh_ops->vo_failover != NULL) {
/*
* Call vHCI drivers callback routine
*/
rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
ct->ct_failover_flags);
}
MDI_CLIENT_LOCK(ct);
MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
/*
* Save the failover return status
*/
ct->ct_failover_status = rv;
/*
* As a result of failover, client status would have been changed.
* Update the client state and wake up anyone waiting on this client
* device.
*/
i_mdi_client_update_state(ct);
cv_broadcast(&ct->ct_failover_cv);
MDI_CLIENT_UNLOCK(ct);
return (rv);
}
/*
* Load balancing is logical block.
* IOs within the range described by region_size
* would go on the same path. This would improve the
* performance by cache-hit on some of the RAID devices.
* Search only for online paths(At some point we
* may want to balance across target ports).
* If no paths are found then default to round-robin.
*/
static int
i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
{
int path_index = -1;
int online_path_count = 0;
int online_nonpref_path_count = 0;
int region_size = ct->ct_lb_args->region_size;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *next;
int preferred, path_cnt;
pip = ct->ct_path_head;
while (pip) {
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
online_path_count++;
} else if (MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
online_nonpref_path_count++;
}
next = (mdi_pathinfo_t *)
MDI_PI(pip)->pi_client_link;
MDI_PI_UNLOCK(pip);
pip = next;
}
/* if found any online/preferred then use this type */
if (online_path_count > 0) {
path_cnt = online_path_count;
preferred = 1;
} else if (online_nonpref_path_count > 0) {
path_cnt = online_nonpref_path_count;
preferred = 0;
} else {
path_cnt = 0;
}
if (path_cnt) {
path_index = (bp->b_blkno >> region_size) % path_cnt;
pip = ct->ct_path_head;
while (pip && path_index != -1) {
MDI_PI_LOCK(pip);
if (path_index == 0 &&
(MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE) &&
MDI_PI(pip)->pi_preferred == preferred) {
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
*ret_pip = pip;
return (MDI_SUCCESS);
}
path_index --;
next = (mdi_pathinfo_t *)
MDI_PI(pip)->pi_client_link;
MDI_PI_UNLOCK(pip);
pip = next;
}
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"lba %llx: path %s %p",
bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
}
return (MDI_FAILURE);
}
/*
* mdi_select_path():
* select a path to access a client device.
*
* mdi_select_path() function is called by the vHCI drivers to
* select a path to route the I/O request to. The caller passes
* the block I/O data transfer structure ("buf") as one of the
* parameters. The mpxio framework uses the buf structure
* contents to maintain per path statistics (total I/O size /
* count pending). If more than one online paths are available to
* select, the framework automatically selects a suitable path
* for routing I/O request. If a failover operation is active for
* this client device the call shall be failed with MDI_BUSY error
* code.
*
* By default this function returns a suitable path in online
* state based on the current load balancing policy. Currently
* we support LOAD_BALANCE_NONE (Previously selected online path
* will continue to be used till the path is usable) and
* LOAD_BALANCE_RR (Online paths will be selected in a round
* robin fashion), LOAD_BALANCE_LB(Online paths will be selected
* based on the logical block). The load balancing
* through vHCI drivers configuration file (driver.conf).
*
* vHCI drivers may override this default behavior by specifying
* appropriate flags. The meaning of the thrid argument depends
* on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
* then the argument is the "path instance" of the path to select.
* If MDI_SELECT_PATH_INSTANCE is not set then the argument is
* "start_pip". A non NULL "start_pip" is the starting point to
* walk and find the next appropriate path. The following values
* are currently defined: MDI_SELECT_ONLINE_PATH (to select an
* ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
* STANDBY path).
*
* The non-standard behavior is used by the scsi_vhci driver,
* whenever it has to use a STANDBY/FAULTED path. Eg. during
* attach of client devices (to avoid an unnecessary failover
* when the STANDBY path comes up first), during failover
* (to activate a STANDBY path as ONLINE).
*
* The selected path is returned in a a mdi_hold_path() state
* (pi_ref_cnt). Caller should release the hold by calling
* mdi_rele_path().
*
* Return Values:
* MDI_SUCCESS - Completed successfully
* MDI_BUSY - Client device is busy failing over
* MDI_NOPATH - Client device is online, but no valid path are
* available to access this client device
* MDI_FAILURE - Invalid client device or state
* MDI_DEVI_ONLINING
* - Client device (struct dev_info state) is in
* onlining state.
*/
/*ARGSUSED*/
int
mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
void *arg, mdi_pathinfo_t **ret_pip)
{
mdi_client_t *ct;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *next;
mdi_pathinfo_t *head;
mdi_pathinfo_t *start;
client_lb_t lbp; /* load balancing policy */
int sb = 1; /* standard behavior */
int preferred = 1; /* preferred path */
int cond, cont = 1;
int retry = 0;
mdi_pathinfo_t *start_pip; /* request starting pathinfo */
int path_instance; /* request specific path instance */
/* determine type of arg based on flags */
if (flags & MDI_SELECT_PATH_INSTANCE) {
path_instance = (int)(intptr_t)arg;
start_pip = NULL;
} else {
path_instance = 0;
start_pip = (mdi_pathinfo_t *)arg;
}
if (flags != 0) {
/*
* disable default behavior
*/
sb = 0;
}
*ret_pip = NULL;
ct = i_devi_get_client(cdip);
if (ct == NULL) {
/* mdi extensions are NULL, Nothing more to do */
return (MDI_FAILURE);
}
MDI_CLIENT_LOCK(ct);
if (sb) {
if (MDI_CLIENT_IS_FAILED(ct)) {
/*
* Client is not ready to accept any I/O requests.
* Fail this request.
*/
MDI_DEBUG(2, (MDI_NOTE, cdip,
"client state offline ct = %p", (void *)ct));
MDI_CLIENT_UNLOCK(ct);
return (MDI_FAILURE);
}
if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
/*
* Check for Failover is in progress. If so tell the
* caller that this device is busy.
*/
MDI_DEBUG(2, (MDI_NOTE, cdip,
"client failover in progress ct = %p",
(void *)ct));
MDI_CLIENT_UNLOCK(ct);
return (MDI_BUSY);
}
/*
* Check to see whether the client device is attached.
* If not so, let the vHCI driver manually select a path
* (standby) and let the probe/attach process to continue.
*/
if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
MDI_DEBUG(4, (MDI_NOTE, cdip,
"devi is onlining ct = %p", (void *)ct));
MDI_CLIENT_UNLOCK(ct);
return (MDI_DEVI_ONLINING);
}
}
/*
* Cache in the client list head. If head of the list is NULL
* return MDI_NOPATH
*/
head = ct->ct_path_head;
if (head == NULL) {
MDI_CLIENT_UNLOCK(ct);
return (MDI_NOPATH);
}
/* Caller is specifying a specific pathinfo path by path_instance */
if (path_instance) {
/* search for pathinfo with correct path_instance */
for (pip = head;
pip && (mdi_pi_get_path_instance(pip) != path_instance);
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
;
/* If path can't be selected then MDI_NOPATH is returned. */
if (pip == NULL) {
MDI_CLIENT_UNLOCK(ct);
return (MDI_NOPATH);
}
/*
* Verify state of path. When asked to select a specific
* path_instance, we select the requested path in any
* state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
* We don't however select paths where the pHCI has detached.
* NOTE: last pathinfo node of an opened client device may
* exist in an OFFLINE state after the pHCI associated with
* that path has detached (but pi_phci will be NULL if that
* has occurred).
*/
MDI_PI_LOCK(pip);
if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
(MDI_PI(pip)->pi_phci == NULL)) {
MDI_PI_UNLOCK(pip);
MDI_CLIENT_UNLOCK(ct);
return (MDI_FAILURE);
}
/* Return MDI_BUSY if we have a transient condition */
if (MDI_PI_IS_TRANSIENT(pip)) {
MDI_PI_UNLOCK(pip);
MDI_CLIENT_UNLOCK(ct);
return (MDI_BUSY);
}
/*
* Return the path in hold state. Caller should release the
* lock by calling mdi_rele_path()
*/
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
*ret_pip = pip;
MDI_CLIENT_UNLOCK(ct);
return (MDI_SUCCESS);
}
/*
* for non default behavior, bypass current
* load balancing policy and always use LOAD_BALANCE_RR
* except that the start point will be adjusted based
* on the provided start_pip
*/
lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
switch (lbp) {
case LOAD_BALANCE_NONE:
/*
* Load balancing is None or Alternate path mode
* Start looking for a online mdi_pathinfo node starting from
* last known selected path
*/
preferred = 1;
pip = (mdi_pathinfo_t *)ct->ct_path_last;
if (pip == NULL) {
pip = head;
}
start = pip;
do {
MDI_PI_LOCK(pip);
/*
* No need to explicitly check if the path is disabled.
* Since we are checking for state == ONLINE and the
* same variable is used for DISABLE/ENABLE information.
*/
if ((MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE) &&
preferred == MDI_PI(pip)->pi_preferred) {
/*
* Return the path in hold state. Caller should
* release the lock by calling mdi_rele_path()
*/
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
ct->ct_path_last = pip;
*ret_pip = pip;
MDI_CLIENT_UNLOCK(ct);
return (MDI_SUCCESS);
}
/*
* Path is busy.
*/
if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
MDI_PI_IS_TRANSIENT(pip))
retry = 1;
/*
* Keep looking for a next available online path
*/
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
if (next == NULL) {
next = head;
}
MDI_PI_UNLOCK(pip);
pip = next;
if (start == pip && preferred) {
preferred = 0;
} else if (start == pip && !preferred) {
cont = 0;
}
} while (cont);
break;
case LOAD_BALANCE_LBA:
/*
* Make sure we are looking
* for an online path. Otherwise, if it is for a STANDBY
* path request, it will go through and fetch an ONLINE
* path which is not desirable.
*/
if ((ct->ct_lb_args != NULL) &&
(ct->ct_lb_args->region_size) && bp &&
(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
if (i_mdi_lba_lb(ct, ret_pip, bp)
== MDI_SUCCESS) {
MDI_CLIENT_UNLOCK(ct);
return (MDI_SUCCESS);
}
}
/* FALLTHROUGH */
case LOAD_BALANCE_RR:
/*
* Load balancing is Round Robin. Start looking for a online
* mdi_pathinfo node starting from last known selected path
* as the start point. If override flags are specified,
* process accordingly.
* If the search is already in effect(start_pip not null),
* then lets just use the same path preference to continue the
* traversal.
*/
if (start_pip != NULL) {
preferred = MDI_PI(start_pip)->pi_preferred;
} else {
preferred = 1;
}
start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
if (start == NULL) {
pip = head;
} else {
pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
if (pip == NULL) {
if ( flags & MDI_SELECT_NO_PREFERRED) {
/*
* Return since we hit the end of list
*/
MDI_CLIENT_UNLOCK(ct);
return (MDI_NOPATH);
}
if (!sb) {
if (preferred == 0) {
/*
* Looks like we have completed
* the traversal as preferred
* value is 0. Time to bail out.
*/
*ret_pip = NULL;
MDI_CLIENT_UNLOCK(ct);
return (MDI_NOPATH);
} else {
/*
* Looks like we reached the
* end of the list. Lets enable
* traversal of non preferred
* paths.
*/
preferred = 0;
}
}
pip = head;
}
}
start = pip;
do {
MDI_PI_LOCK(pip);
if (sb) {
cond = ((MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE &&
MDI_PI(pip)->pi_preferred ==
preferred) ? 1 : 0);
} else {
if (flags == MDI_SELECT_ONLINE_PATH) {
cond = ((MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE &&
MDI_PI(pip)->pi_preferred ==
preferred) ? 1 : 0);
} else if (flags == MDI_SELECT_STANDBY_PATH) {
cond = ((MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_STANDBY &&
MDI_PI(pip)->pi_preferred ==
preferred) ? 1 : 0);
} else if (flags == (MDI_SELECT_ONLINE_PATH |
MDI_SELECT_STANDBY_PATH)) {
cond = (((MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE ||
(MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_STANDBY)) &&
MDI_PI(pip)->pi_preferred ==
preferred) ? 1 : 0);
} else if (flags ==
(MDI_SELECT_STANDBY_PATH |
MDI_SELECT_ONLINE_PATH |
MDI_SELECT_USER_DISABLE_PATH)) {
cond = (((MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE ||
(MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_STANDBY) ||
(MDI_PI(pip)->pi_state ==
(MDI_PATHINFO_STATE_ONLINE|
MDI_PATHINFO_STATE_USER_DISABLE)) ||
(MDI_PI(pip)->pi_state ==
(MDI_PATHINFO_STATE_STANDBY |
MDI_PATHINFO_STATE_USER_DISABLE)))&&
MDI_PI(pip)->pi_preferred ==
preferred) ? 1 : 0);
} else if (flags ==
(MDI_SELECT_STANDBY_PATH |
MDI_SELECT_ONLINE_PATH |
MDI_SELECT_NO_PREFERRED)) {
cond = (((MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_ONLINE) ||
(MDI_PI(pip)->pi_state ==
MDI_PATHINFO_STATE_STANDBY))
? 1 : 0);
} else {
cond = 0;
}
}
/*
* No need to explicitly check if the path is disabled.
* Since we are checking for state == ONLINE and the
* same variable is used for DISABLE/ENABLE information.
*/
if (cond) {
/*
* Return the path in hold state. Caller should
* release the lock by calling mdi_rele_path()
*/
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
if (sb)
ct->ct_path_last = pip;
*ret_pip = pip;
MDI_CLIENT_UNLOCK(ct);
return (MDI_SUCCESS);
}
/*
* Path is busy.
*/
if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
MDI_PI_IS_TRANSIENT(pip))
retry = 1;
/*
* Keep looking for a next available online path
*/
do_again:
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
if (next == NULL) {
if ( flags & MDI_SELECT_NO_PREFERRED) {
/*
* Bail out since we hit the end of list
*/
MDI_PI_UNLOCK(pip);
break;
}
if (!sb) {
if (preferred == 1) {
/*
* Looks like we reached the
* end of the list. Lets enable
* traversal of non preferred
* paths.
*/
preferred = 0;
next = head;
} else {
/*
* We have done both the passes
* Preferred as well as for
* Non-preferred. Bail out now.
*/
cont = 0;
}
} else {
/*
* Standard behavior case.
*/
next = head;
}
}
MDI_PI_UNLOCK(pip);
if (cont == 0) {
break;
}
pip = next;
if (!sb) {
/*
* We need to handle the selection of
* non-preferred path in the following
* case:
*
* +------+ +------+ +------+ +-----+
* | A : 1| - | B : 1| - | C : 0| - |NULL |
* +------+ +------+ +------+ +-----+
*
* If we start the search with B, we need to
* skip beyond B to pick C which is non -
* preferred in the second pass. The following
* test, if true, will allow us to skip over
* the 'start'(B in the example) to select
* other non preferred elements.
*/
if ((start_pip != NULL) && (start_pip == pip) &&
(MDI_PI(start_pip)->pi_preferred
!= preferred)) {
/*
* try again after going past the start
* pip
*/
MDI_PI_LOCK(pip);
goto do_again;
}
} else {
/*
* Standard behavior case
*/
if (start == pip && preferred) {
/* look for nonpreferred paths */
preferred = 0;
} else if (start == pip && !preferred) {
/*
* Exit condition
*/
cont = 0;
}
}
} while (cont);
break;
}
MDI_CLIENT_UNLOCK(ct);
if (retry == 1) {
return (MDI_BUSY);
} else {
return (MDI_NOPATH);
}
}
/*
* For a client, return the next available path to any phci
*
* Note:
* Caller should hold the branch's devinfo node to get a consistent
* snap shot of the mdi_pathinfo nodes.
*
* Please note that even the list is stable the mdi_pathinfo
* node state and properties are volatile. The caller should lock
* and unlock the nodes by calling mdi_pi_lock() and
* mdi_pi_unlock() functions to get a stable properties.
*
* If there is a need to use the nodes beyond the hold of the
* devinfo node period (For ex. I/O), then mdi_pathinfo node
* need to be held against unexpected removal by calling
* mdi_hold_path() and should be released by calling
* mdi_rele_path() on completion.
*/
mdi_pathinfo_t *
mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
{
mdi_client_t *ct;
if (!MDI_CLIENT(ct_dip))
return (NULL);
/*
* Walk through client link
*/
ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
ASSERT(ct != NULL);
if (pip == NULL)
return ((mdi_pathinfo_t *)ct->ct_path_head);
return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
}
/*
* For a phci, return the next available path to any client
* Note: ditto mdi_get_next_phci_path()
*/
mdi_pathinfo_t *
mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
{
mdi_phci_t *ph;
if (!MDI_PHCI(ph_dip))
return (NULL);
/*
* Walk through pHCI link
*/
ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
ASSERT(ph != NULL);
if (pip == NULL)
return ((mdi_pathinfo_t *)ph->ph_path_head);
return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
}
/*
* mdi_hold_path():
* Hold the mdi_pathinfo node against unwanted unexpected free.
* Return Values:
* None
*/
void
mdi_hold_path(mdi_pathinfo_t *pip)
{
if (pip) {
MDI_PI_LOCK(pip);
MDI_PI_HOLD(pip);
MDI_PI_UNLOCK(pip);
}
}
/*
* mdi_rele_path():
* Release the mdi_pathinfo node which was selected
* through mdi_select_path() mechanism or manually held by
* calling mdi_hold_path().
* Return Values:
* None
*/
void
mdi_rele_path(mdi_pathinfo_t *pip)
{
if (pip) {
MDI_PI_LOCK(pip);
MDI_PI_RELE(pip);
if (MDI_PI(pip)->pi_ref_cnt == 0) {
cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
}
MDI_PI_UNLOCK(pip);
}
}
/*
* mdi_pi_lock():
* Lock the mdi_pathinfo node.
* Note:
* The caller should release the lock by calling mdi_pi_unlock()
*/
void
mdi_pi_lock(mdi_pathinfo_t *pip)
{
ASSERT(pip != NULL);
if (pip) {
MDI_PI_LOCK(pip);
}
}
/*
* mdi_pi_unlock():
* Unlock the mdi_pathinfo node.
* Note:
* The mdi_pathinfo node should have been locked with mdi_pi_lock()
*/
void
mdi_pi_unlock(mdi_pathinfo_t *pip)
{
ASSERT(pip != NULL);
if (pip) {
MDI_PI_UNLOCK(pip);
}
}
/*
* mdi_pi_find():
* Search the list of mdi_pathinfo nodes attached to the
* pHCI/Client device node whose path address matches "paddr".
* Returns a pointer to the mdi_pathinfo node if a matching node is
* found.
* Return Values:
* mdi_pathinfo node handle
* NULL
* Notes:
* Caller need not hold any locks to call this function.
*/
mdi_pathinfo_t *
mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
{
mdi_phci_t *ph;
mdi_vhci_t *vh;
mdi_client_t *ct;
mdi_pathinfo_t *pip = NULL;
MDI_DEBUG(2, (MDI_NOTE, pdip,
"caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
if ((pdip == NULL) || (paddr == NULL)) {
return (NULL);
}
ph = i_devi_get_phci(pdip);
if (ph == NULL) {
/*
* Invalid pHCI device, Nothing more to do.
*/
MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
return (NULL);
}
vh = ph->ph_vhci;
if (vh == NULL) {
/*
* Invalid vHCI device, Nothing more to do.
*/
MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
return (NULL);
}
/*
* Look for pathinfo node identified by paddr.
*/
if (caddr == NULL) {
/*
* Find a mdi_pathinfo node under pHCI list for a matching
* unit address.
*/
MDI_PHCI_LOCK(ph);
if (MDI_PHCI_IS_OFFLINE(ph)) {
MDI_DEBUG(2, (MDI_WARN, pdip,
"offline phci %p", (void *)ph));
MDI_PHCI_UNLOCK(ph);
return (NULL);
}
pip = (mdi_pathinfo_t *)ph->ph_path_head;
while (pip != NULL) {
if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
break;
}
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
}
MDI_PHCI_UNLOCK(ph);
MDI_DEBUG(2, (MDI_NOTE, pdip,
"found %s %p", mdi_pi_spathname(pip), (void *)pip));
return (pip);
}
/*
* XXX - Is the rest of the code in this function really necessary?
* The consumers of mdi_pi_find() can search for the desired pathinfo
* node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
* whether the search is based on the pathinfo nodes attached to
* the pHCI or the client node, the result will be the same.
*/
/*
* Find the client device corresponding to 'caddr'
*/
MDI_VHCI_CLIENT_LOCK(vh);
/*
* XXX - Passing NULL to the following function works as long as the
* the client addresses (caddr) are unique per vhci basis.
*/
ct = i_mdi_client_find(vh, NULL, caddr);
if (ct == NULL) {
/*
* Client not found, Obviously mdi_pathinfo node has not been
* created yet.
*/
MDI_VHCI_CLIENT_UNLOCK(vh);
MDI_DEBUG(2, (MDI_NOTE, pdip,
"client not found for caddr @%s", caddr ? caddr : ""));
return (NULL);
}
/*
* Hold the client lock and look for a mdi_pathinfo node with matching
* pHCI and paddr
*/
MDI_CLIENT_LOCK(ct);
/*
* Release the global mutex as it is no more needed. Note: We always
* respect the locking order while acquiring.
*/
MDI_VHCI_CLIENT_UNLOCK(vh);
pip = (mdi_pathinfo_t *)ct->ct_path_head;
while (pip != NULL) {
/*
* Compare the unit address
*/
if ((MDI_PI(pip)->pi_phci == ph) &&
strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
break;
}
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
}
MDI_CLIENT_UNLOCK(ct);
MDI_DEBUG(2, (MDI_NOTE, pdip,
"found: %s %p", mdi_pi_spathname(pip), (void *)pip));
return (pip);
}
/*
* mdi_pi_alloc():
* Allocate and initialize a new instance of a mdi_pathinfo node.
* The mdi_pathinfo node returned by this function identifies a
* unique device path is capable of having properties attached
* and passed to mdi_pi_online() to fully attach and online the
* path and client device node.
* The mdi_pathinfo node returned by this function must be
* destroyed using mdi_pi_free() if the path is no longer
* operational or if the caller fails to attach a client device
* node when calling mdi_pi_online(). The framework will not free
* the resources allocated.
* This function can be called from both interrupt and kernel
* contexts. DDI_NOSLEEP flag should be used while calling
* from interrupt contexts.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
* MDI_NOMEM
*/
/*ARGSUSED*/
int
mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
{
mdi_vhci_t *vh;
mdi_phci_t *ph;
mdi_client_t *ct;
mdi_pathinfo_t *pip = NULL;
dev_info_t *cdip;
int rv = MDI_NOMEM;
int path_allocated = 0;
MDI_DEBUG(2, (MDI_NOTE, pdip,
"cname %s: caddr@%s paddr@%s",
cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
ret_pip == NULL) {
/* Nothing more to do */
return (MDI_FAILURE);
}
*ret_pip = NULL;
/* No allocations on detaching pHCI */
if (DEVI_IS_DETACHING(pdip)) {
/* Invalid pHCI device, return failure */
MDI_DEBUG(1, (MDI_WARN, pdip,
"!detaching pHCI=%p", (void *)pdip));
return (MDI_FAILURE);
}
ph = i_devi_get_phci(pdip);
ASSERT(ph != NULL);
if (ph == NULL) {
/* Invalid pHCI device, return failure */
MDI_DEBUG(1, (MDI_WARN, pdip,
"!invalid pHCI=%p", (void *)pdip));
return (MDI_FAILURE);
}
MDI_PHCI_LOCK(ph);
vh = ph->ph_vhci;
if (vh == NULL) {
/* Invalid vHCI device, return failure */
MDI_DEBUG(1, (MDI_WARN, pdip,
"!invalid vHCI=%p", (void *)pdip));
MDI_PHCI_UNLOCK(ph);
return (MDI_FAILURE);
}
if (MDI_PHCI_IS_READY(ph) == 0) {
/*
* Do not allow new node creation when pHCI is in
* offline/suspended states
*/
MDI_DEBUG(1, (MDI_WARN, pdip,
"pHCI=%p is not ready", (void *)ph));
MDI_PHCI_UNLOCK(ph);
return (MDI_BUSY);
}
MDI_PHCI_UNSTABLE(ph);
MDI_PHCI_UNLOCK(ph);
/* look for a matching client, create one if not found */
MDI_VHCI_CLIENT_LOCK(vh);
ct = i_mdi_client_find(vh, cname, caddr);
if (ct == NULL) {
ct = i_mdi_client_alloc(vh, cname, caddr);
ASSERT(ct != NULL);
}
if (ct->ct_dip == NULL) {
/*
* Allocate a devinfo node
*/
ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
compatible, ncompatible);
if (ct->ct_dip == NULL) {
(void) i_mdi_client_free(vh, ct);
goto fail;
}
}
cdip = ct->ct_dip;
DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
MDI_CLIENT_LOCK(ct);
pip = (mdi_pathinfo_t *)ct->ct_path_head;
while (pip != NULL) {
/*
* Compare the unit address
*/
if ((MDI_PI(pip)->pi_phci == ph) &&
strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
break;
}
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
}
MDI_CLIENT_UNLOCK(ct);
if (pip == NULL) {
/*
* This is a new path for this client device. Allocate and
* initialize a new pathinfo node
*/
pip = i_mdi_pi_alloc(ph, paddr, ct);
ASSERT(pip != NULL);
path_allocated = 1;
}
rv = MDI_SUCCESS;
fail:
/*
* Release the global mutex.
*/
MDI_VHCI_CLIENT_UNLOCK(vh);
/*
* Mark the pHCI as stable
*/
MDI_PHCI_LOCK(ph);
MDI_PHCI_STABLE(ph);
MDI_PHCI_UNLOCK(ph);
*ret_pip = pip;
MDI_DEBUG(2, (MDI_NOTE, pdip,
"alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
if (path_allocated)
vhcache_pi_add(vh->vh_config, MDI_PI(pip));
return (rv);
}
/*ARGSUSED*/
int
mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
int flags, mdi_pathinfo_t **ret_pip)
{
return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
flags, ret_pip));
}
/*
* i_mdi_pi_alloc():
* Allocate a mdi_pathinfo node and add to the pHCI path list
* Return Values:
* mdi_pathinfo
*/
/*ARGSUSED*/
static mdi_pathinfo_t *
i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
{
mdi_pathinfo_t *pip;
int ct_circular;
int ph_circular;
static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */
char *path_persistent;
int path_instance;
mod_hash_val_t hv;
ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
MDI_PATHINFO_STATE_TRANSIENT;
if (MDI_PHCI_IS_USER_DISABLED(ph))
MDI_PI_SET_USER_DISABLE(pip);
if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
MDI_PI_SET_DRV_DISABLE_TRANS(pip);
if (MDI_PHCI_IS_DRV_DISABLED(ph))
MDI_PI_SET_DRV_DISABLE(pip);
MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
MDI_PI(pip)->pi_client = ct;
MDI_PI(pip)->pi_phci = ph;
MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
/*
* We form the "path" to the pathinfo node, and see if we have
* already allocated a 'path_instance' for that "path". If so,
* we use the already allocated 'path_instance'. If not, we
* allocate a new 'path_instance' and associate it with a copy of
* the "path" string (which is never freed). The association
* between a 'path_instance' this "path" string persists until
* reboot.
*/
mutex_enter(&mdi_pathmap_mutex);
(void) ddi_pathname(ph->ph_dip, path);
(void) sprintf(path + strlen(path), "/%s@%s",
mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
path_instance = (uint_t)(intptr_t)hv;
} else {
/* allocate a new 'path_instance' and persistent "path" */
path_instance = mdi_pathmap_instance++;
path_persistent = i_ddi_strdup(path, KM_SLEEP);
(void) mod_hash_insert(mdi_pathmap_bypath,
(mod_hash_key_t)path_persistent,
(mod_hash_val_t)(intptr_t)path_instance);
(void) mod_hash_insert(mdi_pathmap_byinstance,
(mod_hash_key_t)(intptr_t)path_instance,
(mod_hash_val_t)path_persistent);
/* create shortpath name */
(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
path_persistent = i_ddi_strdup(path, KM_SLEEP);
(void) mod_hash_insert(mdi_pathmap_sbyinstance,
(mod_hash_key_t)(intptr_t)path_instance,
(mod_hash_val_t)path_persistent);
}
mutex_exit(&mdi_pathmap_mutex);
MDI_PI(pip)->pi_path_instance = path_instance;
(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
ASSERT(MDI_PI(pip)->pi_prop != NULL);
MDI_PI(pip)->pi_pprivate = NULL;
MDI_PI(pip)->pi_cprivate = NULL;
MDI_PI(pip)->pi_vprivate = NULL;
MDI_PI(pip)->pi_client_link = NULL;
MDI_PI(pip)->pi_phci_link = NULL;
MDI_PI(pip)->pi_ref_cnt = 0;
MDI_PI(pip)->pi_kstats = NULL;
MDI_PI(pip)->pi_preferred = 1;
cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
/*
* Lock both dev_info nodes against changes in parallel.
*
* The ndi_devi_enter(Client), is atypical since the client is a leaf.
* This atypical operation is done to synchronize pathinfo nodes
* during devinfo snapshot (see di_register_pip) by 'pretending' that
* the pathinfo nodes are children of the Client.
*/
ndi_devi_enter(ct->ct_dip, &ct_circular);
ndi_devi_enter(ph->ph_dip, &ph_circular);
i_mdi_phci_add_path(ph, pip);
i_mdi_client_add_path(ct, pip);
ndi_devi_exit(ph->ph_dip, ph_circular);
ndi_devi_exit(ct->ct_dip, ct_circular);
return (pip);
}
/*
* mdi_pi_pathname_by_instance():
* Lookup of "path" by 'path_instance'. Return "path".
* NOTE: returned "path" remains valid forever (until reboot).
*/
char *
mdi_pi_pathname_by_instance(int path_instance)
{
char *path;
mod_hash_val_t hv;
/* mdi_pathmap lookup of "path" by 'path_instance' */
mutex_enter(&mdi_pathmap_mutex);
if (mod_hash_find(mdi_pathmap_byinstance,
(mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
path = (char *)hv;
else
path = NULL;
mutex_exit(&mdi_pathmap_mutex);
return (path);
}
/*
* mdi_pi_spathname_by_instance():
* Lookup of "shortpath" by 'path_instance'. Return "shortpath".
* NOTE: returned "shortpath" remains valid forever (until reboot).
*/
char *
mdi_pi_spathname_by_instance(int path_instance)
{
char *path;
mod_hash_val_t hv;
/* mdi_pathmap lookup of "path" by 'path_instance' */
mutex_enter(&mdi_pathmap_mutex);
if (mod_hash_find(mdi_pathmap_sbyinstance,
(mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
path = (char *)hv;
else
path = NULL;
mutex_exit(&mdi_pathmap_mutex);
return (path);
}
/*
* i_mdi_phci_add_path():
* Add a mdi_pathinfo node to pHCI list.
* Notes:
* Caller should per-pHCI mutex
*/
static void
i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
{
ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
MDI_PHCI_LOCK(ph);
if (ph->ph_path_head == NULL) {
ph->ph_path_head = pip;
} else {
MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
}
ph->ph_path_tail = pip;
ph->ph_path_count++;
MDI_PHCI_UNLOCK(ph);
}
/*
* i_mdi_client_add_path():
* Add mdi_pathinfo node to client list
*/
static void
i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
{
ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
MDI_CLIENT_LOCK(ct);
if (ct->ct_path_head == NULL) {
ct->ct_path_head = pip;
} else {
MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
}
ct->ct_path_tail = pip;
ct->ct_path_count++;
MDI_CLIENT_UNLOCK(ct);
}
/*
* mdi_pi_free():
* Free the mdi_pathinfo node and also client device node if this
* is the last path to the device
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
* MDI_BUSY
*/
/*ARGSUSED*/
int
mdi_pi_free(mdi_pathinfo_t *pip, int flags)
{
int rv;
mdi_vhci_t *vh;
mdi_phci_t *ph;
mdi_client_t *ct;
int (*f)();
int client_held = 0;
MDI_PI_LOCK(pip);
ph = MDI_PI(pip)->pi_phci;
ASSERT(ph != NULL);
if (ph == NULL) {
/*
* Invalid pHCI device, return failure
*/
MDI_DEBUG(1, (MDI_WARN, NULL,
"!invalid pHCI: pip %s %p",
mdi_pi_spathname(pip), (void *)pip));
MDI_PI_UNLOCK(pip);
return (MDI_FAILURE);
}
vh = ph->ph_vhci;
ASSERT(vh != NULL);
if (vh == NULL) {
/* Invalid pHCI device, return failure */
MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
"!invalid vHCI: pip %s %p",
mdi_pi_spathname(pip), (void *)pip));
MDI_PI_UNLOCK(pip);
return (MDI_FAILURE);
}
ct = MDI_PI(pip)->pi_client;
ASSERT(ct != NULL);
if (ct == NULL) {
/*
* Invalid Client device, return failure
*/
MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
"!invalid client: pip %s %p",
mdi_pi_spathname(pip), (void *)pip));
MDI_PI_UNLOCK(pip);
return (MDI_FAILURE);
}
/*
* Check to see for busy condition. A mdi_pathinfo can only be freed
* if the node state is either offline or init and the reference count
* is zero.
*/
if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
MDI_PI_IS_INITING(pip))) {
/*
* Node is busy
*/
MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
"!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
MDI_PI_UNLOCK(pip);
return (MDI_BUSY);
}
while (MDI_PI(pip)->pi_ref_cnt != 0) {
/*
* Give a chance for pending I/Os to complete.
*/
MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
"!%d cmds still pending on path: %s %p",
MDI_PI(pip)->pi_ref_cnt,
mdi_pi_spathname(pip), (void *)pip));
if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
&MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
TR_CLOCK_TICK) == -1) {
/*
* The timeout time reached without ref_cnt being zero
* being signaled.
*/
MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
"!Timeout reached on path %s %p without the cond",
mdi_pi_spathname(pip), (void *)pip));
MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
"!%d cmds still pending on path %s %p",
MDI_PI(pip)->pi_ref_cnt,
mdi_pi_spathname(pip), (void *)pip));
MDI_PI_UNLOCK(pip);
return (MDI_BUSY);
}
}
if (MDI_PI(pip)->pi_pm_held) {
client_held = 1;
}
MDI_PI_UNLOCK(pip);
vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
MDI_CLIENT_LOCK(ct);
/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
/*
* Wait till failover is complete before removing this node.
*/
while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
MDI_CLIENT_UNLOCK(ct);
MDI_VHCI_CLIENT_LOCK(vh);
MDI_CLIENT_LOCK(ct);
MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
if (!MDI_PI_IS_INITING(pip)) {
f = vh->vh_ops->vo_pi_uninit;
if (f != NULL) {
rv = (*f)(vh->vh_dip, pip, 0);
}
} else
rv = MDI_SUCCESS;
/*
* If vo_pi_uninit() completed successfully.
*/
if (rv == MDI_SUCCESS) {
if (client_held) {
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"i_mdi_pm_rele_client\n"));
i_mdi_pm_rele_client(ct, 1);
}
i_mdi_pi_free(ph, pip, ct);
if (ct->ct_path_count == 0) {
/*
* Client lost its last path.
* Clean up the client device
*/
MDI_CLIENT_UNLOCK(ct);
(void) i_mdi_client_free(ct->ct_vhci, ct);
MDI_VHCI_CLIENT_UNLOCK(vh);
return (rv);
}
}
MDI_CLIENT_UNLOCK(ct);
MDI_VHCI_CLIENT_UNLOCK(vh);
if (rv == MDI_FAILURE)
vhcache_pi_add(vh->vh_config, MDI_PI(pip));
return (rv);
}
/*
* i_mdi_pi_free():
* Free the mdi_pathinfo node
*/
static void
i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
{
int ct_circular;
int ph_circular;
ASSERT(MDI_CLIENT_LOCKED(ct));
/*
* remove any per-path kstats
*/
i_mdi_pi_kstat_destroy(pip);
/* See comments in i_mdi_pi_alloc() */
ndi_devi_enter(ct->ct_dip, &ct_circular);
ndi_devi_enter(ph->ph_dip, &ph_circular);
i_mdi_client_remove_path(ct, pip);
i_mdi_phci_remove_path(ph, pip);
ndi_devi_exit(ph->ph_dip, ph_circular);
ndi_devi_exit(ct->ct_dip, ct_circular);
mutex_destroy(&MDI_PI(pip)->pi_mutex);
cv_destroy(&MDI_PI(pip)->pi_state_cv);
cv_destroy(&MDI_PI(pip)->pi_ref_cv);
if (MDI_PI(pip)->pi_addr) {
kmem_free(MDI_PI(pip)->pi_addr,
strlen(MDI_PI(pip)->pi_addr) + 1);
MDI_PI(pip)->pi_addr = NULL;
}
if (MDI_PI(pip)->pi_prop) {
(void) nvlist_free(MDI_PI(pip)->pi_prop);
MDI_PI(pip)->pi_prop = NULL;
}
kmem_free(pip, sizeof (struct mdi_pathinfo));
}
/*
* i_mdi_phci_remove_path():
* Remove a mdi_pathinfo node from pHCI list.
* Notes:
* Caller should hold per-pHCI mutex
*/
static void
i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
{
mdi_pathinfo_t *prev = NULL;
mdi_pathinfo_t *path = NULL;
ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
MDI_PHCI_LOCK(ph);
path = ph->ph_path_head;
while (path != NULL) {
if (path == pip) {
break;
}
prev = path;
path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
}
if (path) {
ph->ph_path_count--;
if (prev) {
MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
} else {
ph->ph_path_head =
(mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
}
if (ph->ph_path_tail == path) {
ph->ph_path_tail = prev;
}
}
/*
* Clear the pHCI link
*/
MDI_PI(pip)->pi_phci_link = NULL;
MDI_PI(pip)->pi_phci = NULL;
MDI_PHCI_UNLOCK(ph);
}
/*
* i_mdi_client_remove_path():
* Remove a mdi_pathinfo node from client path list.
*/
static void
i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
{
mdi_pathinfo_t *prev = NULL;
mdi_pathinfo_t *path;
ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
ASSERT(MDI_CLIENT_LOCKED(ct));
path = ct->ct_path_head;
while (path != NULL) {
if (path == pip) {
break;
}
prev = path;
path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
}
if (path) {
ct->ct_path_count--;
if (prev) {
MDI_PI(prev)->pi_client_link =
MDI_PI(path)->pi_client_link;
} else {
ct->ct_path_head =
(mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
}
if (ct->ct_path_tail == path) {
ct->ct_path_tail = prev;
}
if (ct->ct_path_last == path) {
ct->ct_path_last = ct->ct_path_head;
}
}
MDI_PI(pip)->pi_client_link = NULL;
MDI_PI(pip)->pi_client = NULL;
}
/*
* i_mdi_pi_state_change():
* online a mdi_pathinfo node
*
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
/*ARGSUSED*/
static int
i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
{
int rv = MDI_SUCCESS;
mdi_vhci_t *vh;
mdi_phci_t *ph;
mdi_client_t *ct;
int (*f)();
dev_info_t *cdip;
MDI_PI_LOCK(pip);
ph = MDI_PI(pip)->pi_phci;
ASSERT(ph);
if (ph == NULL) {
/*
* Invalid pHCI device, fail the request
*/
MDI_PI_UNLOCK(pip);
MDI_DEBUG(1, (MDI_WARN, NULL,
"!invalid phci: pip %s %p",
mdi_pi_spathname(pip), (void *)pip));
return (MDI_FAILURE);
}
vh = ph->ph_vhci;
ASSERT(vh);
if (vh == NULL) {
/*
* Invalid vHCI device, fail the request
*/
MDI_PI_UNLOCK(pip);
MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
"!invalid vhci: pip %s %p",
mdi_pi_spathname(pip), (void *)pip));
return (MDI_FAILURE);
}
ct = MDI_PI(pip)->pi_client;
ASSERT(ct != NULL);
if (ct == NULL) {
/*
* Invalid client device, fail the request
*/
MDI_PI_UNLOCK(pip);
MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
"!invalid client: pip %s %p",
mdi_pi_spathname(pip), (void *)pip));
return (MDI_FAILURE);
}
/*
* If this path has not been initialized yet, Callback vHCI driver's
* pathinfo node initialize entry point
*/
if (MDI_PI_IS_INITING(pip)) {
MDI_PI_UNLOCK(pip);
f = vh->vh_ops->vo_pi_init;
if (f != NULL) {
rv = (*f)(vh->vh_dip, pip, 0);
if (rv != MDI_SUCCESS) {
MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
"!vo_pi_init failed: vHCI %p, pip %s %p",
(void *)vh, mdi_pi_spathname(pip),
(void *)pip));
return (MDI_FAILURE);
}
}
MDI_PI_LOCK(pip);
MDI_PI_CLEAR_TRANSIENT(pip);
}
/*
* Do not allow state transition when pHCI is in offline/suspended
* states
*/
i_mdi_phci_lock(ph, pip);
if (MDI_PHCI_IS_READY(ph) == 0) {
MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
"!pHCI not ready, pHCI=%p", (void *)ph));
MDI_PI_UNLOCK(pip);
i_mdi_phci_unlock(ph);
return (MDI_BUSY);
}
MDI_PHCI_UNSTABLE(ph);
i_mdi_phci_unlock(ph);
/*
* Check if mdi_pathinfo state is in transient state.
* If yes, offlining is in progress and wait till transient state is
* cleared.
*/
if (MDI_PI_IS_TRANSIENT(pip)) {
while (MDI_PI_IS_TRANSIENT(pip)) {
cv_wait(&MDI_PI(pip)->pi_state_cv,
&MDI_PI(pip)->pi_mutex);
}
}
/*
* Grab the client lock in reverse order sequence and release the
* mdi_pathinfo mutex.
*/
i_mdi_client_lock(ct, pip);
MDI_PI_UNLOCK(pip);
/*
* Wait till failover state is cleared
*/
while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
/*
* Mark the mdi_pathinfo node state as transient
*/
MDI_PI_LOCK(pip);
switch (state) {
case MDI_PATHINFO_STATE_ONLINE:
MDI_PI_SET_ONLINING(pip);
break;
case MDI_PATHINFO_STATE_STANDBY:
MDI_PI_SET_STANDBYING(pip);
break;
case MDI_PATHINFO_STATE_FAULT:
/*
* Mark the pathinfo state as FAULTED
*/
MDI_PI_SET_FAULTING(pip);
MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
break;
case MDI_PATHINFO_STATE_OFFLINE:
/*
* ndi_devi_offline() cannot hold pip or ct locks.
*/
MDI_PI_UNLOCK(pip);
/*
* If this is a user initiated path online->offline operation
* who's success would transition a client from DEGRADED to
* FAILED then only proceed if we can offline the client first.
*/
cdip = ct->ct_dip;
if ((flag & NDI_USER_REQ) &&
MDI_PI_IS_ONLINE(pip) &&
(MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
i_mdi_client_unlock(ct);
rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
if (rv != NDI_SUCCESS) {
/*
* Convert to MDI error code
*/
switch (rv) {
case NDI_BUSY:
rv = MDI_BUSY;
break;
default:
rv = MDI_FAILURE;
break;
}
goto state_change_exit;
} else {
i_mdi_client_lock(ct, NULL);
}
}
/*
* Mark the mdi_pathinfo node state as transient
*/
MDI_PI_LOCK(pip);
MDI_PI_SET_OFFLINING(pip);
break;
}
MDI_PI_UNLOCK(pip);
MDI_CLIENT_UNSTABLE(ct);
i_mdi_client_unlock(ct);
f = vh->vh_ops->vo_pi_state_change;
if (f != NULL)
rv = (*f)(vh->vh_dip, pip, state, 0, flag);
MDI_CLIENT_LOCK(ct);
MDI_PI_LOCK(pip);
if (rv == MDI_NOT_SUPPORTED) {
MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
}
if (rv != MDI_SUCCESS) {
MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
"vo_pi_state_change failed: rv %x", rv));
}
if (MDI_PI_IS_TRANSIENT(pip)) {
if (rv == MDI_SUCCESS) {
MDI_PI_CLEAR_TRANSIENT(pip);
} else {
MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
}
}
/*
* Wake anyone waiting for this mdi_pathinfo node
*/
cv_broadcast(&MDI_PI(pip)->pi_state_cv);
MDI_PI_UNLOCK(pip);
/*
* Mark the client device as stable
*/
MDI_CLIENT_STABLE(ct);
if (rv == MDI_SUCCESS) {
if (ct->ct_unstable == 0) {
cdip = ct->ct_dip;
/*
* Onlining the mdi_pathinfo node will impact the
* client state Update the client and dev_info node
* state accordingly
*/
rv = NDI_SUCCESS;
i_mdi_client_update_state(ct);
switch (MDI_CLIENT_STATE(ct)) {
case MDI_CLIENT_STATE_OPTIMAL:
case MDI_CLIENT_STATE_DEGRADED:
if (cdip && !i_ddi_devi_attached(cdip) &&
((state == MDI_PATHINFO_STATE_ONLINE) ||
(state == MDI_PATHINFO_STATE_STANDBY))) {
/*
* Must do ndi_devi_online() through
* hotplug thread for deferred
* attach mechanism to work
*/
MDI_CLIENT_UNLOCK(ct);
rv = ndi_devi_online(cdip, 0);
MDI_CLIENT_LOCK(ct);
if ((rv != NDI_SUCCESS) &&
(MDI_CLIENT_STATE(ct) ==
MDI_CLIENT_STATE_DEGRADED)) {
MDI_DEBUG(1, (MDI_WARN, cdip,
"!ndi_devi_online failed "
"error %x", rv));
}
rv = NDI_SUCCESS;
}
break;
case MDI_CLIENT_STATE_FAILED:
/*
* This is the last path case for
* non-user initiated events.
*/
if (((flag & NDI_USER_REQ) == 0) &&
cdip && (i_ddi_node_state(cdip) >=
DS_INITIALIZED)) {
MDI_CLIENT_UNLOCK(ct);
rv = ndi_devi_offline(cdip,
NDI_DEVFS_CLEAN);
MDI_CLIENT_LOCK(ct);
if (rv != NDI_SUCCESS) {
/*
* ndi_devi_offline failed.
* Reset client flags to
* online as the path could not
* be offlined.
*/
MDI_DEBUG(1, (MDI_WARN, cdip,
"!ndi_devi_offline failed: "
"error %x", rv));
MDI_CLIENT_SET_ONLINE(ct);
}
}
break;
}
/*
* Convert to MDI error code
*/
switch (rv) {
case NDI_SUCCESS:
MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
i_mdi_report_path_state(ct, pip);
rv = MDI_SUCCESS;
break;
case NDI_BUSY:
rv = MDI_BUSY;
break;
default:
rv = MDI_FAILURE;
break;
}
}
}
MDI_CLIENT_UNLOCK(ct);
state_change_exit:
/*
* Mark the pHCI as stable again.
*/
MDI_PHCI_LOCK(ph);
MDI_PHCI_STABLE(ph);
MDI_PHCI_UNLOCK(ph);
return (rv);
}
/*
* mdi_pi_online():
* Place the path_info node in the online state. The path is
* now available to be selected by mdi_select_path() for
* transporting I/O requests to client devices.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
int
mdi_pi_online(mdi_pathinfo_t *pip, int flags)
{
mdi_client_t *ct = MDI_PI(pip)->pi_client;
int client_held = 0;
int rv;
ASSERT(ct != NULL);
rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
if (rv != MDI_SUCCESS)
return (rv);
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_pm_held == 0) {
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"i_mdi_pm_hold_pip %p", (void *)pip));
i_mdi_pm_hold_pip(pip);
client_held = 1;
}
MDI_PI_UNLOCK(pip);
if (client_held) {
MDI_CLIENT_LOCK(ct);
if (ct->ct_power_cnt == 0) {
rv = i_mdi_power_all_phci(ct);
}
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"i_mdi_pm_hold_client %p", (void *)ct));
i_mdi_pm_hold_client(ct, 1);
MDI_CLIENT_UNLOCK(ct);
}
return (rv);
}
/*
* mdi_pi_standby():
* Place the mdi_pathinfo node in standby state
*
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
int
mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
{
return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
}
/*
* mdi_pi_fault():
* Place the mdi_pathinfo node in fault'ed state
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
int
mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
{
return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
}
/*
* mdi_pi_offline():
* Offline a mdi_pathinfo node.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
int
mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
{
int ret, client_held = 0;
mdi_client_t *ct;
/*
* Original code overloaded NDI_DEVI_REMOVE to this interface, and
* used it to mean "user initiated operation" (i.e. devctl). Callers
* should now just use NDI_USER_REQ.
*/
if (flags & NDI_DEVI_REMOVE) {
flags &= ~NDI_DEVI_REMOVE;
flags |= NDI_USER_REQ;
}
ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
if (ret == MDI_SUCCESS) {
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_pm_held) {
client_held = 1;
}
MDI_PI_UNLOCK(pip);
if (client_held) {
ct = MDI_PI(pip)->pi_client;
MDI_CLIENT_LOCK(ct);
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"i_mdi_pm_rele_client\n"));
i_mdi_pm_rele_client(ct, 1);
MDI_CLIENT_UNLOCK(ct);
}
}
return (ret);
}
/*
* i_mdi_pi_offline():
* Offline a mdi_pathinfo node and call the vHCI driver's callback
*/
static int
i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
{
dev_info_t *vdip = NULL;
mdi_vhci_t *vh = NULL;
mdi_client_t *ct = NULL;
int (*f)();
int rv;
MDI_PI_LOCK(pip);
ct = MDI_PI(pip)->pi_client;
ASSERT(ct != NULL);
while (MDI_PI(pip)->pi_ref_cnt != 0) {
/*
* Give a chance for pending I/Os to complete.
*/
MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
"!%d cmds still pending on path %s %p",
MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
(void *)pip));
if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
&MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
TR_CLOCK_TICK) == -1) {
/*
* The timeout time reached without ref_cnt being zero
* being signaled.
*/
MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
"!Timeout reached on path %s %p without the cond",
mdi_pi_spathname(pip), (void *)pip));
MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
"!%d cmds still pending on path %s %p",
MDI_PI(pip)->pi_ref_cnt,
mdi_pi_spathname(pip), (void *)pip));
}
}
vh = ct->ct_vhci;
vdip = vh->vh_dip;
/*
* Notify vHCI that has registered this event
*/
ASSERT(vh->vh_ops);
f = vh->vh_ops->vo_pi_state_change;
if (f != NULL) {
MDI_PI_UNLOCK(pip);
if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
flags)) != MDI_SUCCESS) {
MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
"!vo_path_offline failed: vdip %s%d %p: path %s %p",
ddi_driver_name(vdip), ddi_get_instance(vdip),
(void *)vdip, mdi_pi_spathname(pip), (void *)pip));
}
MDI_PI_LOCK(pip);
}
/*
* Set the mdi_pathinfo node state and clear the transient condition
*/
MDI_PI_SET_OFFLINE(pip);
cv_broadcast(&MDI_PI(pip)->pi_state_cv);
MDI_PI_UNLOCK(pip);
MDI_CLIENT_LOCK(ct);
if (rv == MDI_SUCCESS) {
if (ct->ct_unstable == 0) {
dev_info_t *cdip = ct->ct_dip;
/*
* Onlining the mdi_pathinfo node will impact the
* client state Update the client and dev_info node
* state accordingly
*/
i_mdi_client_update_state(ct);
rv = NDI_SUCCESS;
if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
if (cdip &&
(i_ddi_node_state(cdip) >=
DS_INITIALIZED)) {
MDI_CLIENT_UNLOCK(ct);
rv = ndi_devi_offline(cdip,
NDI_DEVFS_CLEAN);
MDI_CLIENT_LOCK(ct);
if (rv != NDI_SUCCESS) {
/*
* ndi_devi_offline failed.
* Reset client flags to
* online.
*/
MDI_DEBUG(4, (MDI_WARN, cdip,
"ndi_devi_offline failed: "
"error %x", rv));
MDI_CLIENT_SET_ONLINE(ct);
}
}
}
/*
* Convert to MDI error code
*/
switch (rv) {
case NDI_SUCCESS:
rv = MDI_SUCCESS;
break;
case NDI_BUSY:
rv = MDI_BUSY;
break;
default:
rv = MDI_FAILURE;
break;
}
}
MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
i_mdi_report_path_state(ct, pip);
}
MDI_CLIENT_UNLOCK(ct);
/*
* Change in the mdi_pathinfo node state will impact the client state
*/
MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
"ct = %p pip = %p", (void *)ct, (void *)pip));
return (rv);
}
/*
* i_mdi_pi_online():
* Online a mdi_pathinfo node and call the vHCI driver's callback
*/
static int
i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
{
mdi_vhci_t *vh = NULL;
mdi_client_t *ct = NULL;
mdi_phci_t *ph;
int (*f)();
int rv;
MDI_PI_LOCK(pip);
ph = MDI_PI(pip)->pi_phci;
vh = ph->ph_vhci;
ct = MDI_PI(pip)->pi_client;
MDI_PI_SET_ONLINING(pip)
MDI_PI_UNLOCK(pip);
f = vh->vh_ops->vo_pi_state_change;
if (f != NULL)
rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
flags);
MDI_CLIENT_LOCK(ct);
MDI_PI_LOCK(pip);
cv_broadcast(&MDI_PI(pip)->pi_state_cv);
MDI_PI_UNLOCK(pip);
if (rv == MDI_SUCCESS) {
dev_info_t *cdip = ct->ct_dip;
rv = MDI_SUCCESS;
i_mdi_client_update_state(ct);
if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
if (cdip && !i_ddi_devi_attached(cdip)) {
MDI_CLIENT_UNLOCK(ct);
rv = ndi_devi_online(cdip, 0);
MDI_CLIENT_LOCK(ct);
if ((rv != NDI_SUCCESS) &&
(MDI_CLIENT_STATE(ct) ==
MDI_CLIENT_STATE_DEGRADED)) {
MDI_CLIENT_SET_OFFLINE(ct);
}
if (rv != NDI_SUCCESS) {
/* Reset the path state */
MDI_PI_LOCK(pip);
MDI_PI(pip)->pi_state =
MDI_PI_OLD_STATE(pip);
MDI_PI_UNLOCK(pip);
}
}
}
switch (rv) {
case NDI_SUCCESS:
MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
i_mdi_report_path_state(ct, pip);
rv = MDI_SUCCESS;
break;
case NDI_BUSY:
rv = MDI_BUSY;
break;
default:
rv = MDI_FAILURE;
break;
}
} else {
/* Reset the path state */
MDI_PI_LOCK(pip);
MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
MDI_PI_UNLOCK(pip);
}
MDI_CLIENT_UNLOCK(ct);
return (rv);
}
/*
* mdi_pi_get_node_name():
* Get the name associated with a mdi_pathinfo node.
* Since pathinfo nodes are not directly named, we
* return the node_name of the client.
*
* Return Values:
* char *
*/
char *
mdi_pi_get_node_name(mdi_pathinfo_t *pip)
{
mdi_client_t *ct;
if (pip == NULL)
return (NULL);
ct = MDI_PI(pip)->pi_client;
if ((ct == NULL) || (ct->ct_dip == NULL))
return (NULL);
return (ddi_node_name(ct->ct_dip));
}
/*
* mdi_pi_get_addr():
* Get the unit address associated with a mdi_pathinfo node
*
* Return Values:
* char *
*/
char *
mdi_pi_get_addr(mdi_pathinfo_t *pip)
{
if (pip == NULL)
return (NULL);
return (MDI_PI(pip)->pi_addr);
}
/*
* mdi_pi_get_path_instance():
* Get the 'path_instance' of a mdi_pathinfo node
*
* Return Values:
* path_instance
*/
int
mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
{
if (pip == NULL)
return (0);
return (MDI_PI(pip)->pi_path_instance);
}
/*
* mdi_pi_pathname():
* Return pointer to path to pathinfo node.
*/
char *
mdi_pi_pathname(mdi_pathinfo_t *pip)
{
if (pip == NULL)
return (NULL);
return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
}
/*
* mdi_pi_spathname():
* Return pointer to shortpath to pathinfo node. Used for debug
* messages, so return "" instead of NULL when unknown.
*/
char *
mdi_pi_spathname(mdi_pathinfo_t *pip)
{
char *spath = "";
if (pip) {
spath = mdi_pi_spathname_by_instance(
mdi_pi_get_path_instance(pip));
if (spath == NULL)
spath = "";
}
return (spath);
}
char *
mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
{
char *obp_path = NULL;
if ((pip == NULL) || (path == NULL))
return (NULL);
if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
(void) strcpy(path, obp_path);
(void) mdi_prop_free(obp_path);
} else {
path = NULL;
}
return (path);
}
int
mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
{
dev_info_t *pdip;
char *obp_path = NULL;
int rc = MDI_FAILURE;
if (pip == NULL)
return (MDI_FAILURE);
pdip = mdi_pi_get_phci(pip);
if (pdip == NULL)
return (MDI_FAILURE);
obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
if (ddi_pathname_obp(pdip, obp_path) == NULL) {
(void) ddi_pathname(pdip, obp_path);
}
if (component) {
(void) strncat(obp_path, "/", MAXPATHLEN);
(void) strncat(obp_path, component, MAXPATHLEN);
}
rc = mdi_prop_update_string(pip, "obp-path", obp_path);
if (obp_path)
kmem_free(obp_path, MAXPATHLEN);
return (rc);
}
/*
* mdi_pi_get_client():
* Get the client devinfo associated with a mdi_pathinfo node
*
* Return Values:
* Handle to client device dev_info node
*/
dev_info_t *
mdi_pi_get_client(mdi_pathinfo_t *pip)
{
dev_info_t *dip = NULL;
if (pip) {
dip = MDI_PI(pip)->pi_client->ct_dip;
}
return (dip);
}
/*
* mdi_pi_get_phci():
* Get the pHCI devinfo associated with the mdi_pathinfo node
* Return Values:
* Handle to dev_info node
*/
dev_info_t *
mdi_pi_get_phci(mdi_pathinfo_t *pip)
{
dev_info_t *dip = NULL;
mdi_phci_t *ph;
if (pip) {
ph = MDI_PI(pip)->pi_phci;
if (ph)
dip = ph->ph_dip;
}
return (dip);
}
/*
* mdi_pi_get_client_private():
* Get the client private information associated with the
* mdi_pathinfo node
*/
void *
mdi_pi_get_client_private(mdi_pathinfo_t *pip)
{
void *cprivate = NULL;
if (pip) {
cprivate = MDI_PI(pip)->pi_cprivate;
}
return (cprivate);
}
/*
* mdi_pi_set_client_private():
* Set the client private information in the mdi_pathinfo node
*/
void
mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
{
if (pip) {
MDI_PI(pip)->pi_cprivate = priv;
}
}
/*
* mdi_pi_get_phci_private():
* Get the pHCI private information associated with the
* mdi_pathinfo node
*/
caddr_t
mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
{
caddr_t pprivate = NULL;
if (pip) {
pprivate = MDI_PI(pip)->pi_pprivate;
}
return (pprivate);
}
/*
* mdi_pi_set_phci_private():
* Set the pHCI private information in the mdi_pathinfo node
*/
void
mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
{
if (pip) {
MDI_PI(pip)->pi_pprivate = priv;
}
}
/*
* mdi_pi_get_state():
* Get the mdi_pathinfo node state. Transient states are internal
* and not provided to the users
*/
mdi_pathinfo_state_t
mdi_pi_get_state(mdi_pathinfo_t *pip)
{
mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT;
if (pip) {
if (MDI_PI_IS_TRANSIENT(pip)) {
/*
* mdi_pathinfo is in state transition. Return the
* last good state.
*/
state = MDI_PI_OLD_STATE(pip);
} else {
state = MDI_PI_STATE(pip);
}
}
return (state);
}
/*
* mdi_pi_get_flags():
* Get the mdi_pathinfo node flags.
*/
uint_t
mdi_pi_get_flags(mdi_pathinfo_t *pip)
{
return (pip ? MDI_PI(pip)->pi_flags : 0);
}
/*
* Note that the following function needs to be the new interface for
* mdi_pi_get_state when mpxio gets integrated to ON.
*/
int
mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
uint32_t *ext_state)
{
*state = MDI_PATHINFO_STATE_INIT;
if (pip) {
if (MDI_PI_IS_TRANSIENT(pip)) {
/*
* mdi_pathinfo is in state transition. Return the
* last good state.
*/
*state = MDI_PI_OLD_STATE(pip);
*ext_state = MDI_PI_OLD_EXT_STATE(pip);
} else {
*state = MDI_PI_STATE(pip);
*ext_state = MDI_PI_EXT_STATE(pip);
}
}
return (MDI_SUCCESS);
}
/*
* mdi_pi_get_preferred:
* Get the preferred path flag
*/
int
mdi_pi_get_preferred(mdi_pathinfo_t *pip)
{
if (pip) {
return (MDI_PI(pip)->pi_preferred);
}
return (0);
}
/*
* mdi_pi_set_preferred:
* Set the preferred path flag
*/
void
mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
{
if (pip) {
MDI_PI(pip)->pi_preferred = preferred;
}
}
/*
* mdi_pi_set_state():
* Set the mdi_pathinfo node state
*/
void
mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
{
uint32_t ext_state;
if (pip) {
ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
MDI_PI(pip)->pi_state = state;
MDI_PI(pip)->pi_state |= ext_state;
/* Path has changed state, invalidate DINFOCACHE snap shot. */
i_ddi_di_cache_invalidate();
}
}
/*
* Property functions:
*/
int
i_map_nvlist_error_to_mdi(int val)
{
int rv;
switch (val) {
case 0:
rv = DDI_PROP_SUCCESS;
break;
case EINVAL:
case ENOTSUP:
rv = DDI_PROP_INVAL_ARG;
break;
case ENOMEM:
rv = DDI_PROP_NO_MEMORY;
break;
default:
rv = DDI_PROP_NOT_FOUND;
break;
}
return (rv);
}
/*
* mdi_pi_get_next_prop():
* Property walk function. The caller should hold mdi_pi_lock()
* and release by calling mdi_pi_unlock() at the end of walk to
* get a consistent value.
*/
nvpair_t *
mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
{
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (NULL);
}
ASSERT(MDI_PI_LOCKED(pip));
return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
}
/*
* mdi_prop_remove():
* Remove the named property from the named list.
*/
int
mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
{
if (pip == NULL) {
return (DDI_PROP_NOT_FOUND);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
if (name) {
(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
} else {
char nvp_name[MAXNAMELEN];
nvpair_t *nvp;
nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
while (nvp) {
nvpair_t *next;
next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
(void) snprintf(nvp_name, sizeof(nvp_name), "%s",
nvpair_name(nvp));
(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
nvp_name);
nvp = next;
}
}
MDI_PI_UNLOCK(pip);
return (DDI_PROP_SUCCESS);
}
/*
* mdi_prop_size():
* Get buffer size needed to pack the property data.
* Caller should hold the mdi_pathinfo_t lock to get a consistent
* buffer size.
*/
int
mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
{
int rv;
size_t bufsize;
*buflenp = 0;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
ASSERT(MDI_PI_LOCKED(pip));
rv = nvlist_size(MDI_PI(pip)->pi_prop,
&bufsize, NV_ENCODE_NATIVE);
*buflenp = bufsize;
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_pack():
* pack the property list. The caller should hold the
* mdi_pathinfo_t node to get a consistent data
*/
int
mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
{
int rv;
size_t bufsize;
if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
return (DDI_PROP_NOT_FOUND);
}
ASSERT(MDI_PI_LOCKED(pip));
bufsize = buflen;
rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
NV_ENCODE_NATIVE, KM_SLEEP);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_update_byte():
* Create/Update a byte property
*/
int
mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
{
int rv;
if (pip == NULL) {
return (DDI_PROP_INVAL_ARG);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
MDI_PI_UNLOCK(pip);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_update_byte_array():
* Create/Update a byte array property
*/
int
mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
uint_t nelements)
{
int rv;
if (pip == NULL) {
return (DDI_PROP_INVAL_ARG);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
MDI_PI_UNLOCK(pip);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_update_int():
* Create/Update a 32 bit integer property
*/
int
mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
{
int rv;
if (pip == NULL) {
return (DDI_PROP_INVAL_ARG);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
MDI_PI_UNLOCK(pip);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_update_int64():
* Create/Update a 64 bit integer property
*/
int
mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
{
int rv;
if (pip == NULL) {
return (DDI_PROP_INVAL_ARG);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
MDI_PI_UNLOCK(pip);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_update_int_array():
* Create/Update a int array property
*/
int
mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
uint_t nelements)
{
int rv;
if (pip == NULL) {
return (DDI_PROP_INVAL_ARG);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
nelements);
MDI_PI_UNLOCK(pip);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_update_string():
* Create/Update a string property
*/
int
mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
{
int rv;
if (pip == NULL) {
return (DDI_PROP_INVAL_ARG);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
MDI_PI_UNLOCK(pip);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_update_string_array():
* Create/Update a string array property
*/
int
mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
uint_t nelements)
{
int rv;
if (pip == NULL) {
return (DDI_PROP_INVAL_ARG);
}
ASSERT(!MDI_PI_LOCKED(pip));
MDI_PI_LOCK(pip);
if (MDI_PI(pip)->pi_prop == NULL) {
MDI_PI_UNLOCK(pip);
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
nelements);
MDI_PI_UNLOCK(pip);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_lookup_byte():
* Look for byte property identified by name. The data returned
* is the actual property and valid as long as mdi_pathinfo_t node
* is alive.
*/
int
mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
{
int rv;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_lookup_byte_array():
* Look for byte array property identified by name. The data
* returned is the actual property and valid as long as
* mdi_pathinfo_t node is alive.
*/
int
mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
uint_t *nelements)
{
int rv;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
nelements);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_lookup_int():
* Look for int property identified by name. The data returned
* is the actual property and valid as long as mdi_pathinfo_t
* node is alive.
*/
int
mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
{
int rv;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_lookup_int64():
* Look for int64 property identified by name. The data returned
* is the actual property and valid as long as mdi_pathinfo_t node
* is alive.
*/
int
mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
{
int rv;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_lookup_int_array():
* Look for int array property identified by name. The data
* returned is the actual property and valid as long as
* mdi_pathinfo_t node is alive.
*/
int
mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
uint_t *nelements)
{
int rv;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
(int32_t **)data, nelements);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_lookup_string():
* Look for string property identified by name. The data
* returned is the actual property and valid as long as
* mdi_pathinfo_t node is alive.
*/
int
mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
{
int rv;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_lookup_string_array():
* Look for string array property identified by name. The data
* returned is the actual property and valid as long as
* mdi_pathinfo_t node is alive.
*/
int
mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
uint_t *nelements)
{
int rv;
if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
return (DDI_PROP_NOT_FOUND);
}
rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
nelements);
return (i_map_nvlist_error_to_mdi(rv));
}
/*
* mdi_prop_free():
* Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
* functions return the pointer to actual property data and not a
* copy of it. So the data returned is valid as long as
* mdi_pathinfo_t node is valid.
*/
/*ARGSUSED*/
int
mdi_prop_free(void *data)
{
return (DDI_PROP_SUCCESS);
}
/*ARGSUSED*/
static void
i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
{
char *ct_path;
char *ct_status;
char *status;
dev_info_t *cdip = ct->ct_dip;
char lb_buf[64];
int report_lb_c = 0, report_lb_p = 0;
ASSERT(MDI_CLIENT_LOCKED(ct));
if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
(MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
return;
}
if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
ct_status = "optimal";
report_lb_c = 1;
} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
ct_status = "degraded";
} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
ct_status = "failed";
} else {
ct_status = "unknown";
}
lb_buf[0] = 0; /* not interested in load balancing config */
if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
status = "removed";
} else if (MDI_PI_IS_OFFLINE(pip)) {
status = "offline";
} else if (MDI_PI_IS_ONLINE(pip)) {
status = "online";
report_lb_p = 1;
} else if (MDI_PI_IS_STANDBY(pip)) {
status = "standby";
} else if (MDI_PI_IS_FAULT(pip)) {
status = "faulted";
} else {
status = "unknown";
}
if (cdip) {
ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
/*
* NOTE: Keeping "multipath status: %s" and
* "Load balancing: %s" format unchanged in case someone
* scrubs /var/adm/messages looking for these messages.
*/
if (report_lb_c && report_lb_p) {
if (ct->ct_lb == LOAD_BALANCE_LBA) {
(void) snprintf(lb_buf, sizeof (lb_buf),
"%s, region-size: %d", mdi_load_balance_lba,
ct->ct_lb_args->region_size);
} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
(void) snprintf(lb_buf, sizeof (lb_buf),
"%s", mdi_load_balance_none);
} else {
(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
mdi_load_balance_rr);
}
cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
"?%s (%s%d) multipath status: %s: "
"path %d %s is %s: Load balancing: %s\n",
ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
ddi_get_instance(cdip), ct_status,
mdi_pi_get_path_instance(pip),
mdi_pi_spathname(pip), status, lb_buf);
} else {
cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
"?%s (%s%d) multipath status: %s: "
"path %d %s is %s\n",
ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
ddi_get_instance(cdip), ct_status,
mdi_pi_get_path_instance(pip),
mdi_pi_spathname(pip), status);
}
kmem_free(ct_path, MAXPATHLEN);
MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
}
}
#ifdef DEBUG
/*
* i_mdi_log():
* Utility function for error message management
*
* NOTE: Implementation takes care of trailing \n for cmn_err,
* MDI_DEBUG should not terminate fmt strings with \n.
*
* NOTE: If the level is >= 2, and there is no leading !?^
* then a leading ! is implied (but can be overriden via
* mdi_debug_consoleonly). If you are using kmdb on the console,
* consider setting mdi_debug_consoleonly to 1 as an aid.
*/
/*PRINTFLIKE4*/
static void
i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
{
char name[MAXNAMELEN];
char buf[512];
char *bp;
va_list ap;
int log_only = 0;
int boot_only = 0;
int console_only = 0;
if (dip) {
(void) snprintf(name, sizeof(name), "%s%d: ",
ddi_driver_name(dip), ddi_get_instance(dip));
} else {
name[0] = 0;
}
va_start(ap, fmt);
(void) vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
switch (buf[0]) {
case '!':
bp = &buf[1];
log_only = 1;
break;
case '?':
bp = &buf[1];
boot_only = 1;
break;
case '^':
bp = &buf[1];
console_only = 1;
break;
default:
if (level >= 2)
log_only = 1; /* ! implied */
bp = buf;
break;
}
if (mdi_debug_logonly) {
log_only = 1;
boot_only = 0;
console_only = 0;
}
if (mdi_debug_consoleonly) {
log_only = 0;
boot_only = 0;
console_only = 1;
level = CE_NOTE;
goto console;
}
switch (level) {
case CE_NOTE:
level = CE_CONT;
/* FALLTHROUGH */
case CE_CONT:
if (boot_only) {
cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
} else if (console_only) {
cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
} else if (log_only) {
cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
} else {
cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
}
break;
case CE_WARN:
case CE_PANIC:
console:
if (boot_only) {
cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
} else if (console_only) {
cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
} else if (log_only) {
cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
} else {
cmn_err(level, "mdi: %s%s: %s", name, func, bp);
}
break;
default:
cmn_err(level, "mdi: %s%s", name, bp);
break;
}
}
#endif /* DEBUG */
void
i_mdi_client_online(dev_info_t *ct_dip)
{
mdi_client_t *ct;
/*
* Client online notification. Mark client state as online
* restore our binding with dev_info node
*/
ct = i_devi_get_client(ct_dip);
ASSERT(ct != NULL);
MDI_CLIENT_LOCK(ct);
MDI_CLIENT_SET_ONLINE(ct);
/* catch for any memory leaks */
ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
ct->ct_dip = ct_dip;
if (ct->ct_power_cnt == 0)
(void) i_mdi_power_all_phci(ct);
MDI_DEBUG(4, (MDI_NOTE, ct_dip,
"i_mdi_pm_hold_client %p", (void *)ct));
i_mdi_pm_hold_client(ct, 1);
MDI_CLIENT_UNLOCK(ct);
}
void
i_mdi_phci_online(dev_info_t *ph_dip)
{
mdi_phci_t *ph;
/* pHCI online notification. Mark state accordingly */
ph = i_devi_get_phci(ph_dip);
ASSERT(ph != NULL);
MDI_PHCI_LOCK(ph);
MDI_PHCI_SET_ONLINE(ph);
MDI_PHCI_UNLOCK(ph);
}
/*
* mdi_devi_online():
* Online notification from NDI framework on pHCI/client
* device online.
* Return Values:
* NDI_SUCCESS
* MDI_FAILURE
*/
/*ARGSUSED*/
int
mdi_devi_online(dev_info_t *dip, uint_t flags)
{
if (MDI_PHCI(dip)) {
i_mdi_phci_online(dip);
}
if (MDI_CLIENT(dip)) {
i_mdi_client_online(dip);
}
return (NDI_SUCCESS);
}
/*
* mdi_devi_offline():
* Offline notification from NDI framework on pHCI/Client device
* offline.
*
* Return Values:
* NDI_SUCCESS
* NDI_FAILURE
*/
/*ARGSUSED*/
int
mdi_devi_offline(dev_info_t *dip, uint_t flags)
{
int rv = NDI_SUCCESS;
if (MDI_CLIENT(dip)) {
rv = i_mdi_client_offline(dip, flags);
if (rv != NDI_SUCCESS)
return (rv);
}
if (MDI_PHCI(dip)) {
rv = i_mdi_phci_offline(dip, flags);
if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
/* set client back online */
i_mdi_client_online(dip);
}
}
return (rv);
}
/*ARGSUSED*/
static int
i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
{
int rv = NDI_SUCCESS;
mdi_phci_t *ph;
mdi_client_t *ct;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *next;
mdi_pathinfo_t *failed_pip = NULL;
dev_info_t *cdip;
/*
* pHCI component offline notification
* Make sure that this pHCI instance is free to be offlined.
* If it is OK to proceed, Offline and remove all the child
* mdi_pathinfo nodes. This process automatically offlines
* corresponding client devices, for which this pHCI provides
* critical services.
*/
ph = i_devi_get_phci(dip);
MDI_DEBUG(2, (MDI_NOTE, dip,
"called %p %p", (void *)dip, (void *)ph));
if (ph == NULL) {
return (rv);
}
MDI_PHCI_LOCK(ph);
if (MDI_PHCI_IS_OFFLINE(ph)) {
MDI_DEBUG(1, (MDI_WARN, dip,
"!pHCI already offlined: %p", (void *)dip));
MDI_PHCI_UNLOCK(ph);
return (NDI_SUCCESS);
}
/*
* Check to see if the pHCI can be offlined
*/
if (ph->ph_unstable) {
MDI_DEBUG(1, (MDI_WARN, dip,
"!One or more target devices are in transient state. "
"This device can not be removed at this moment. "
"Please try again later."));
MDI_PHCI_UNLOCK(ph);
return (NDI_BUSY);
}
pip = ph->ph_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
/*
* The mdi_pathinfo state is OK. Check the client state.
* If failover in progress fail the pHCI from offlining
*/
ct = MDI_PI(pip)->pi_client;
i_mdi_client_lock(ct, pip);
if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
(ct->ct_unstable)) {
/*
* Failover is in progress, Fail the DR
*/
MDI_DEBUG(1, (MDI_WARN, dip,
"!pHCI device is busy. "
"This device can not be removed at this moment. "
"Please try again later."));
MDI_PI_UNLOCK(pip);
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
return (NDI_BUSY);
}
MDI_PI_UNLOCK(pip);
/*
* Check to see of we are removing the last path of this
* client device...
*/
cdip = ct->ct_dip;
if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
(i_mdi_client_compute_state(ct, ph) ==
MDI_CLIENT_STATE_FAILED)) {
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
if (ndi_devi_offline(cdip,
NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
/*
* ndi_devi_offline() failed.
* This pHCI provides the critical path
* to one or more client devices.
* Return busy.
*/
MDI_PHCI_LOCK(ph);
MDI_DEBUG(1, (MDI_WARN, dip,
"!pHCI device is busy. "
"This device can not be removed at this "
"moment. Please try again later."));
failed_pip = pip;
break;
} else {
MDI_PHCI_LOCK(ph);
pip = next;
}
} else {
i_mdi_client_unlock(ct);
pip = next;
}
}
if (failed_pip) {
pip = ph->ph_path_head;
while (pip != failed_pip) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
ct = MDI_PI(pip)->pi_client;
i_mdi_client_lock(ct, pip);
cdip = ct->ct_dip;
switch (MDI_CLIENT_STATE(ct)) {
case MDI_CLIENT_STATE_OPTIMAL:
case MDI_CLIENT_STATE_DEGRADED:
if (cdip) {
MDI_PI_UNLOCK(pip);
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
(void) ndi_devi_online(cdip, 0);
MDI_PHCI_LOCK(ph);
pip = next;
continue;
}
break;
case MDI_CLIENT_STATE_FAILED:
if (cdip) {
MDI_PI_UNLOCK(pip);
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
(void) ndi_devi_offline(cdip,
NDI_DEVFS_CLEAN);
MDI_PHCI_LOCK(ph);
pip = next;
continue;
}
break;
}
MDI_PI_UNLOCK(pip);
i_mdi_client_unlock(ct);
pip = next;
}
MDI_PHCI_UNLOCK(ph);
return (NDI_BUSY);
}
/*
* Mark the pHCI as offline
*/
MDI_PHCI_SET_OFFLINE(ph);
/*
* Mark the child mdi_pathinfo nodes as transient
*/
pip = ph->ph_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
MDI_PI_SET_OFFLINING(pip);
MDI_PI_UNLOCK(pip);
pip = next;
}
MDI_PHCI_UNLOCK(ph);
/*
* Give a chance for any pending commands to execute
*/
delay_random(mdi_delay);
MDI_PHCI_LOCK(ph);
pip = ph->ph_path_head;
while (pip != NULL) {
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
(void) i_mdi_pi_offline(pip, flags);
MDI_PI_LOCK(pip);
ct = MDI_PI(pip)->pi_client;
if (!MDI_PI_IS_OFFLINE(pip)) {
MDI_DEBUG(1, (MDI_WARN, dip,
"!pHCI device is busy. "
"This device can not be removed at this moment. "
"Please try again later."));
MDI_PI_UNLOCK(pip);
MDI_PHCI_SET_ONLINE(ph);
MDI_PHCI_UNLOCK(ph);
return (NDI_BUSY);
}
MDI_PI_UNLOCK(pip);
pip = next;
}
MDI_PHCI_UNLOCK(ph);
return (rv);
}
void
mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
{
mdi_phci_t *ph;
mdi_client_t *ct;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *next;
dev_info_t *cdip;
if (!MDI_PHCI(dip))
return;
ph = i_devi_get_phci(dip);
if (ph == NULL) {
return;
}
MDI_PHCI_LOCK(ph);
if (MDI_PHCI_IS_OFFLINE(ph)) {
/* has no last path */
MDI_PHCI_UNLOCK(ph);
return;
}
pip = ph->ph_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
ct = MDI_PI(pip)->pi_client;
i_mdi_client_lock(ct, pip);
MDI_PI_UNLOCK(pip);
cdip = ct->ct_dip;
if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
(i_mdi_client_compute_state(ct, ph) ==
MDI_CLIENT_STATE_FAILED)) {
/* Last path. Mark client dip as retiring */
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
(void) e_ddi_mark_retiring(cdip, cons_array);
MDI_PHCI_LOCK(ph);
pip = next;
} else {
i_mdi_client_unlock(ct);
pip = next;
}
}
MDI_PHCI_UNLOCK(ph);
return;
}
void
mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
{
mdi_phci_t *ph;
mdi_client_t *ct;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *next;
dev_info_t *cdip;
if (!MDI_PHCI(dip))
return;
ph = i_devi_get_phci(dip);
if (ph == NULL)
return;
MDI_PHCI_LOCK(ph);
if (MDI_PHCI_IS_OFFLINE(ph)) {
MDI_PHCI_UNLOCK(ph);
/* not last path */
return;
}
if (ph->ph_unstable) {
MDI_PHCI_UNLOCK(ph);
/* can't check for constraints */
*constraint = 0;
return;
}
pip = ph->ph_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
/*
* The mdi_pathinfo state is OK. Check the client state.
* If failover in progress fail the pHCI from offlining
*/
ct = MDI_PI(pip)->pi_client;
i_mdi_client_lock(ct, pip);
if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
(ct->ct_unstable)) {
/*
* Failover is in progress, can't check for constraints
*/
MDI_PI_UNLOCK(pip);
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
*constraint = 0;
return;
}
MDI_PI_UNLOCK(pip);
/*
* Check to see of we are retiring the last path of this
* client device...
*/
cdip = ct->ct_dip;
if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
(i_mdi_client_compute_state(ct, ph) ==
MDI_CLIENT_STATE_FAILED)) {
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
(void) e_ddi_retire_notify(cdip, constraint);
MDI_PHCI_LOCK(ph);
pip = next;
} else {
i_mdi_client_unlock(ct);
pip = next;
}
}
MDI_PHCI_UNLOCK(ph);
return;
}
/*
* offline the path(s) hanging off the pHCI. If the
* last path to any client, check that constraints
* have been applied.
*
* If constraint is 0, we aren't going to retire the
* pHCI. However we still need to go through the paths
* calling e_ddi_retire_finalize() to clear their
* contract barriers.
*/
void
mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
{
mdi_phci_t *ph;
mdi_client_t *ct;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *next;
dev_info_t *cdip;
int unstable = 0;
int tmp_constraint;
if (!MDI_PHCI(dip))
return;
ph = i_devi_get_phci(dip);
if (ph == NULL) {
/* no last path and no pips */
return;
}
MDI_PHCI_LOCK(ph);
if (MDI_PHCI_IS_OFFLINE(ph)) {
MDI_PHCI_UNLOCK(ph);
/* no last path and no pips */
return;
}
/*
* Check to see if the pHCI can be offlined
*/
if (ph->ph_unstable) {
unstable = 1;
}
pip = ph->ph_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
/*
* if failover in progress fail the pHCI from offlining
*/
ct = MDI_PI(pip)->pi_client;
i_mdi_client_lock(ct, pip);
if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
(ct->ct_unstable)) {
unstable = 1;
}
MDI_PI_UNLOCK(pip);
/*
* Check to see of we are removing the last path of this
* client device...
*/
cdip = ct->ct_dip;
if (!phci_only && cdip &&
(i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
(i_mdi_client_compute_state(ct, ph) ==
MDI_CLIENT_STATE_FAILED)) {
i_mdi_client_unlock(ct);
MDI_PHCI_UNLOCK(ph);
/*
* This is the last path to this client.
*
* Constraint will only be set to 1 if this client can
* be retired (as already determined by
* mdi_phci_retire_notify). However we don't actually
* need to retire the client (we just retire the last
* path - MPXIO will then fail all I/Os to the client).
* But we still need to call e_ddi_retire_finalize so
* the contract barriers can be cleared. Therefore we
* temporarily set constraint = 0 so that the client
* dip is not retired.
*/
tmp_constraint = 0;
(void) e_ddi_retire_finalize(cdip, &tmp_constraint);
MDI_PHCI_LOCK(ph);
pip = next;
} else {
i_mdi_client_unlock(ct);
pip = next;
}
}
if (!phci_only && *((int *)constraint) == 0) {
MDI_PHCI_UNLOCK(ph);
return;
}
/*
* Cannot offline pip(s)
*/
if (unstable) {
cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
"pHCI in transient state, cannot retire",
ddi_driver_name(dip), ddi_get_instance(dip));
MDI_PHCI_UNLOCK(ph);
return;
}
/*
* Mark the pHCI as offline
*/
MDI_PHCI_SET_OFFLINE(ph);
/*
* Mark the child mdi_pathinfo nodes as transient
*/
pip = ph->ph_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
MDI_PI_SET_OFFLINING(pip);
MDI_PI_UNLOCK(pip);
pip = next;
}
MDI_PHCI_UNLOCK(ph);
/*
* Give a chance for any pending commands to execute
*/
delay_random(mdi_delay);
MDI_PHCI_LOCK(ph);
pip = ph->ph_path_head;
while (pip != NULL) {
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
(void) i_mdi_pi_offline(pip, 0);
MDI_PI_LOCK(pip);
ct = MDI_PI(pip)->pi_client;
if (!MDI_PI_IS_OFFLINE(pip)) {
cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
"path %d %s busy, cannot offline",
mdi_pi_get_path_instance(pip),
mdi_pi_spathname(pip));
MDI_PI_UNLOCK(pip);
MDI_PHCI_SET_ONLINE(ph);
MDI_PHCI_UNLOCK(ph);
return;
}
MDI_PI_UNLOCK(pip);
pip = next;
}
MDI_PHCI_UNLOCK(ph);
return;
}
void
mdi_phci_unretire(dev_info_t *dip)
{
mdi_phci_t *ph;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *next;
ASSERT(MDI_PHCI(dip));
/*
* Online the phci
*/
i_mdi_phci_online(dip);
ph = i_devi_get_phci(dip);
MDI_PHCI_LOCK(ph);
pip = ph->ph_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
MDI_PI_UNLOCK(pip);
(void) i_mdi_pi_online(pip, 0);
pip = next;
}
MDI_PHCI_UNLOCK(ph);
}
/*ARGSUSED*/
static int
i_mdi_client_offline(dev_info_t *dip, uint_t flags)
{
int rv = NDI_SUCCESS;
mdi_client_t *ct;
/*
* Client component to go offline. Make sure that we are
* not in failing over state and update client state
* accordingly
*/
ct = i_devi_get_client(dip);
MDI_DEBUG(2, (MDI_NOTE, dip,
"called %p %p", (void *)dip, (void *)ct));
if (ct != NULL) {
MDI_CLIENT_LOCK(ct);
if (ct->ct_unstable) {
/*
* One or more paths are in transient state,
* Dont allow offline of a client device
*/
MDI_DEBUG(1, (MDI_WARN, dip,
"!One or more paths to "
"this device are in transient state. "
"This device can not be removed at this moment. "
"Please try again later."));
MDI_CLIENT_UNLOCK(ct);
return (NDI_BUSY);
}
if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
/*
* Failover is in progress, Dont allow DR of
* a client device
*/
MDI_DEBUG(1, (MDI_WARN, dip,
"!Client device is Busy. "
"This device can not be removed at this moment. "
"Please try again later."));
MDI_CLIENT_UNLOCK(ct);
return (NDI_BUSY);
}
MDI_CLIENT_SET_OFFLINE(ct);
/*
* Unbind our relationship with the dev_info node
*/
if (flags & NDI_DEVI_REMOVE) {
ct->ct_dip = NULL;
}
MDI_CLIENT_UNLOCK(ct);
}
return (rv);
}
/*
* mdi_pre_attach():
* Pre attach() notification handler
*/
/*ARGSUSED*/
int
mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
/* don't support old DDI_PM_RESUME */
if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
(cmd == DDI_PM_RESUME))
return (DDI_FAILURE);
return (DDI_SUCCESS);
}
/*
* mdi_post_attach():
* Post attach() notification handler
*/
/*ARGSUSED*/
void
mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
{
mdi_phci_t *ph;
mdi_client_t *ct;
mdi_vhci_t *vh;
if (MDI_PHCI(dip)) {
ph = i_devi_get_phci(dip);
ASSERT(ph != NULL);
MDI_PHCI_LOCK(ph);
switch (cmd) {
case DDI_ATTACH:
MDI_DEBUG(2, (MDI_NOTE, dip,
"phci post_attach called %p", (void *)ph));
if (error == DDI_SUCCESS) {
MDI_PHCI_SET_ATTACH(ph);
} else {
MDI_DEBUG(1, (MDI_NOTE, dip,
"!pHCI post_attach failed: error %d",
error));
MDI_PHCI_SET_DETACH(ph);
}
break;
case DDI_RESUME:
MDI_DEBUG(2, (MDI_NOTE, dip,
"pHCI post_resume: called %p", (void *)ph));
if (error == DDI_SUCCESS) {
MDI_PHCI_SET_RESUME(ph);
} else {
MDI_DEBUG(1, (MDI_NOTE, dip,
"!pHCI post_resume failed: error %d",
error));
MDI_PHCI_SET_SUSPEND(ph);
}
break;
}
MDI_PHCI_UNLOCK(ph);
}
if (MDI_CLIENT(dip)) {
ct = i_devi_get_client(dip);
ASSERT(ct != NULL);
MDI_CLIENT_LOCK(ct);
switch (cmd) {
case DDI_ATTACH:
MDI_DEBUG(2, (MDI_NOTE, dip,
"client post_attach called %p", (void *)ct));
if (error != DDI_SUCCESS) {
MDI_DEBUG(1, (MDI_NOTE, dip,
"!client post_attach failed: error %d",
error));
MDI_CLIENT_SET_DETACH(ct);
MDI_DEBUG(4, (MDI_WARN, dip,
"i_mdi_pm_reset_client"));
i_mdi_pm_reset_client(ct);
break;
}
/*
* Client device has successfully attached, inform
* the vhci.
*/
vh = ct->ct_vhci;
if (vh->vh_ops->vo_client_attached)
(*vh->vh_ops->vo_client_attached)(dip);
MDI_CLIENT_SET_ATTACH(ct);
break;
case DDI_RESUME:
MDI_DEBUG(2, (MDI_NOTE, dip,
"client post_attach: called %p", (void *)ct));
if (error == DDI_SUCCESS) {
MDI_CLIENT_SET_RESUME(ct);
} else {
MDI_DEBUG(1, (MDI_NOTE, dip,
"!client post_resume failed: error %d",
error));
MDI_CLIENT_SET_SUSPEND(ct);
}
break;
}
MDI_CLIENT_UNLOCK(ct);
}
}
/*
* mdi_pre_detach():
* Pre detach notification handler
*/
/*ARGSUSED*/
int
mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
int rv = DDI_SUCCESS;
if (MDI_CLIENT(dip)) {
(void) i_mdi_client_pre_detach(dip, cmd);
}
if (MDI_PHCI(dip)) {
rv = i_mdi_phci_pre_detach(dip, cmd);
}
return (rv);
}
/*ARGSUSED*/
static int
i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
int rv = DDI_SUCCESS;
mdi_phci_t *ph;
mdi_client_t *ct;
mdi_pathinfo_t *pip;
mdi_pathinfo_t *failed_pip = NULL;
mdi_pathinfo_t *next;
ph = i_devi_get_phci(dip);
if (ph == NULL) {
return (rv);
}
MDI_PHCI_LOCK(ph);
switch (cmd) {
case DDI_DETACH:
MDI_DEBUG(2, (MDI_NOTE, dip,
"pHCI pre_detach: called %p", (void *)ph));
if (!MDI_PHCI_IS_OFFLINE(ph)) {
/*
* mdi_pathinfo nodes are still attached to
* this pHCI. Fail the detach for this pHCI.
*/
MDI_DEBUG(2, (MDI_WARN, dip,
"pHCI pre_detach: paths are still attached %p",
(void *)ph));
rv = DDI_FAILURE;
break;
}
MDI_PHCI_SET_DETACH(ph);
break;
case DDI_SUSPEND:
/*
* pHCI is getting suspended. Since mpxio client
* devices may not be suspended at this point, to avoid
* a potential stack overflow, it is important to suspend
* client devices before pHCI can be suspended.
*/
MDI_DEBUG(2, (MDI_NOTE, dip,
"pHCI pre_suspend: called %p", (void *)ph));
/*
* Suspend all the client devices accessible through this pHCI
*/
pip = ph->ph_path_head;
while (pip != NULL && rv == DDI_SUCCESS) {
dev_info_t *cdip;
MDI_PI_LOCK(pip);
next =
(mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
ct = MDI_PI(pip)->pi_client;
i_mdi_client_lock(ct, pip);
cdip = ct->ct_dip;
MDI_PI_UNLOCK(pip);
if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
i_mdi_client_unlock(ct);
if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
DDI_SUCCESS) {
/*
* Suspend of one of the client
* device has failed.
*/
MDI_DEBUG(1, (MDI_WARN, dip,
"!suspend of device (%s%d) failed.",
ddi_driver_name(cdip),
ddi_get_instance(cdip)));
failed_pip = pip;
break;
}
} else {
i_mdi_client_unlock(ct);
}
pip = next;
}
if (rv == DDI_SUCCESS) {
/*
* Suspend of client devices is complete. Proceed
* with pHCI suspend.
*/
MDI_PHCI_SET_SUSPEND(ph);
} else {
/*
* Revert back all the suspended client device states
* to converse.
*/
pip = ph->ph_path_head;
while (pip != failed_pip) {
dev_info_t *cdip;
MDI_PI_LOCK(pip);
next =
(mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
ct = MDI_PI(pip)->pi_client;
i_mdi_client_lock(ct, pip);
cdip = ct->ct_dip;
MDI_PI_UNLOCK(pip);
if (MDI_CLIENT_IS_SUSPENDED(ct)) {
i_mdi_client_unlock(ct);
(void) devi_attach(cdip, DDI_RESUME);
} else {
i_mdi_client_unlock(ct);
}
pip = next;
}
}
break;
default:
rv = DDI_FAILURE;
break;
}
MDI_PHCI_UNLOCK(ph);
return (rv);
}
/*ARGSUSED*/
static int
i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
int rv = DDI_SUCCESS;
mdi_client_t *ct;
ct = i_devi_get_client(dip);
if (ct == NULL) {
return (rv);
}
MDI_CLIENT_LOCK(ct);
switch (cmd) {
case DDI_DETACH:
MDI_DEBUG(2, (MDI_NOTE, dip,
"client pre_detach: called %p",
(void *)ct));
MDI_CLIENT_SET_DETACH(ct);
break;
case DDI_SUSPEND:
MDI_DEBUG(2, (MDI_NOTE, dip,
"client pre_suspend: called %p",
(void *)ct));
MDI_CLIENT_SET_SUSPEND(ct);
break;
default:
rv = DDI_FAILURE;
break;
}
MDI_CLIENT_UNLOCK(ct);
return (rv);
}
/*
* mdi_post_detach():
* Post detach notification handler
*/
/*ARGSUSED*/
void
mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
{
/*
* Detach/Suspend of mpxio component failed. Update our state
* too
*/
if (MDI_PHCI(dip))
i_mdi_phci_post_detach(dip, cmd, error);
if (MDI_CLIENT(dip))
i_mdi_client_post_detach(dip, cmd, error);
}
/*ARGSUSED*/
static void
i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
{
mdi_phci_t *ph;
/*
* Detach/Suspend of phci component failed. Update our state
* too
*/
ph = i_devi_get_phci(dip);
if (ph == NULL) {
return;
}
MDI_PHCI_LOCK(ph);
/*
* Detach of pHCI failed. Restore back converse
* state
*/
switch (cmd) {
case DDI_DETACH:
MDI_DEBUG(2, (MDI_NOTE, dip,
"pHCI post_detach: called %p",
(void *)ph));
if (error != DDI_SUCCESS)
MDI_PHCI_SET_ATTACH(ph);
break;
case DDI_SUSPEND:
MDI_DEBUG(2, (MDI_NOTE, dip,
"pHCI post_suspend: called %p",
(void *)ph));
if (error != DDI_SUCCESS)
MDI_PHCI_SET_RESUME(ph);
break;
}
MDI_PHCI_UNLOCK(ph);
}
/*ARGSUSED*/
static void
i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
{
mdi_client_t *ct;
ct = i_devi_get_client(dip);
if (ct == NULL) {
return;
}
MDI_CLIENT_LOCK(ct);
/*
* Detach of Client failed. Restore back converse
* state
*/
switch (cmd) {
case DDI_DETACH:
MDI_DEBUG(2, (MDI_NOTE, dip,
"client post_detach: called %p", (void *)ct));
if (DEVI_IS_ATTACHING(dip)) {
MDI_DEBUG(4, (MDI_NOTE, dip,
"i_mdi_pm_rele_client\n"));
i_mdi_pm_rele_client(ct, ct->ct_path_count);
} else {
MDI_DEBUG(4, (MDI_NOTE, dip,
"i_mdi_pm_reset_client\n"));
i_mdi_pm_reset_client(ct);
}
if (error != DDI_SUCCESS)
MDI_CLIENT_SET_ATTACH(ct);
break;
case DDI_SUSPEND:
MDI_DEBUG(2, (MDI_NOTE, dip,
"called %p", (void *)ct));
if (error != DDI_SUCCESS)
MDI_CLIENT_SET_RESUME(ct);
break;
}
MDI_CLIENT_UNLOCK(ct);
}
int
mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
{
return (MDI_PI(pip)->pi_kstats ? 1 : 0);
}
/*
* create and install per-path (client - pHCI) statistics
* I/O stats supported: nread, nwritten, reads, and writes
* Error stats - hard errors, soft errors, & transport errors
*/
int
mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
{
kstat_t *kiosp, *kerrsp;
struct pi_errs *nsp;
struct mdi_pi_kstats *mdi_statp;
if (MDI_PI(pip)->pi_kstats != NULL)
return (MDI_SUCCESS);
if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
return (MDI_FAILURE);
}
(void) strcat(ksname, ",err");
kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
KSTAT_TYPE_NAMED,
sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
if (kerrsp == NULL) {
kstat_delete(kiosp);
return (MDI_FAILURE);
}
nsp = (struct pi_errs *)kerrsp->ks_data;
kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_failedfrom, "Failed From",
KSTAT_DATA_UINT32);
kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
mdi_statp->pi_kstat_ref = 1;
mdi_statp->pi_kstat_iostats = kiosp;
mdi_statp->pi_kstat_errstats = kerrsp;
kstat_install(kiosp);
kstat_install(kerrsp);
MDI_PI(pip)->pi_kstats = mdi_statp;
return (MDI_SUCCESS);
}
/*
* destroy per-path properties
*/
static void
i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
{
struct mdi_pi_kstats *mdi_statp;
if (MDI_PI(pip)->pi_kstats == NULL)
return;
if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
return;
MDI_PI(pip)->pi_kstats = NULL;
/*
* the kstat may be shared between multiple pathinfo nodes
* decrement this pathinfo's usage, removing the kstats
* themselves when the last pathinfo reference is removed.
*/
ASSERT(mdi_statp->pi_kstat_ref > 0);
if (--mdi_statp->pi_kstat_ref != 0)
return;
kstat_delete(mdi_statp->pi_kstat_iostats);
kstat_delete(mdi_statp->pi_kstat_errstats);
kmem_free(mdi_statp, sizeof (*mdi_statp));
}
/*
* update I/O paths KSTATS
*/
void
mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
{
kstat_t *iostatp;
size_t xfer_cnt;
ASSERT(pip != NULL);
/*
* I/O can be driven across a path prior to having path
* statistics available, i.e. probe(9e).
*/
if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
xfer_cnt = bp->b_bcount - bp->b_resid;
if (bp->b_flags & B_READ) {
KSTAT_IO_PTR(iostatp)->reads++;
KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
} else {
KSTAT_IO_PTR(iostatp)->writes++;
KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
}
}
}
/*
* Enable the path(specific client/target/initiator)
* Enabling a path means that MPxIO may select the enabled path for routing
* future I/O requests, subject to other path state constraints.
*/
int
mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
{
mdi_phci_t *ph;
ph = MDI_PI(pip)->pi_phci;
if (ph == NULL) {
MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
"!failed: path %s %p: NULL ph",
mdi_pi_spathname(pip), (void *)pip));
return (MDI_FAILURE);
}
(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
MDI_ENABLE_OP);
MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
"!returning success pip = %p. ph = %p",
(void *)pip, (void *)ph));
return (MDI_SUCCESS);
}
/*
* Disable the path (specific client/target/initiator)
* Disabling a path means that MPxIO will not select the disabled path for
* routing any new I/O requests.
*/
int
mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
{
mdi_phci_t *ph;
ph = MDI_PI(pip)->pi_phci;
if (ph == NULL) {
MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
"!failed: path %s %p: NULL ph",
mdi_pi_spathname(pip), (void *)pip));
return (MDI_FAILURE);
}
(void) i_mdi_enable_disable_path(pip,
ph->ph_vhci, flags, MDI_DISABLE_OP);
MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
"!returning success pip = %p. ph = %p",
(void *)pip, (void *)ph));
return (MDI_SUCCESS);
}
/*
* disable the path to a particular pHCI (pHCI specified in the phci_path
* argument) for a particular client (specified in the client_path argument).
* Disabling a path means that MPxIO will not select the disabled path for
* routing any new I/O requests.
* NOTE: this will be removed once the NWS files are changed to use the new
* mdi_{enable,disable}_path interfaces
*/
int
mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
{
return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
}
/*
* Enable the path to a particular pHCI (pHCI specified in the phci_path
* argument) for a particular client (specified in the client_path argument).
* Enabling a path means that MPxIO may select the enabled path for routing
* future I/O requests, subject to other path state constraints.
* NOTE: this will be removed once the NWS files are changed to use the new
* mdi_{enable,disable}_path interfaces
*/
int
mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
{
return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
}
/*
* Common routine for doing enable/disable.
*/
static mdi_pathinfo_t *
i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
int op)
{
int sync_flag = 0;
int rv;
mdi_pathinfo_t *next;
int (*f)() = NULL;
/*
* Check to make sure the path is not already in the
* requested state. If it is just return the next path
* as we have nothing to do here.
*/
if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
(!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
MDI_PI_UNLOCK(pip);
return (next);
}
f = vh->vh_ops->vo_pi_state_change;
sync_flag = (flags << 8) & 0xf00;
/*
* Do a callback into the mdi consumer to let it
* know that path is about to get enabled/disabled.
*/
if (f != NULL) {
rv = (*f)(vh->vh_dip, pip, 0,
MDI_PI_EXT_STATE(pip),
MDI_EXT_STATE_CHANGE | sync_flag |
op | MDI_BEFORE_STATE_CHANGE);
if (rv != MDI_SUCCESS) {
MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
"vo_pi_state_change: failed rv = %x", rv));
}
}
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
switch (flags) {
case USER_DISABLE:
if (op == MDI_DISABLE_OP) {
MDI_PI_SET_USER_DISABLE(pip);
} else {
MDI_PI_SET_USER_ENABLE(pip);
}
break;
case DRIVER_DISABLE:
if (op == MDI_DISABLE_OP) {
MDI_PI_SET_DRV_DISABLE(pip);
} else {
MDI_PI_SET_DRV_ENABLE(pip);
}
break;
case DRIVER_DISABLE_TRANSIENT:
if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
MDI_PI_SET_DRV_DISABLE_TRANS(pip);
} else {
MDI_PI_SET_DRV_ENABLE_TRANS(pip);
}
break;
}
MDI_PI_UNLOCK(pip);
/*
* Do a callback into the mdi consumer to let it
* know that path is now enabled/disabled.
*/
if (f != NULL) {
rv = (*f)(vh->vh_dip, pip, 0,
MDI_PI_EXT_STATE(pip),
MDI_EXT_STATE_CHANGE | sync_flag |
op | MDI_AFTER_STATE_CHANGE);
if (rv != MDI_SUCCESS) {
MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
"vo_pi_state_change failed: rv = %x", rv));
}
}
return (next);
}
/*
* Common routine for doing enable/disable.
* NOTE: this will be removed once the NWS files are changed to use the new
* mdi_{enable,disable}_path has been putback
*/
int
i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
{
mdi_phci_t *ph;
mdi_vhci_t *vh = NULL;
mdi_client_t *ct;
mdi_pathinfo_t *next, *pip;
int found_it;
ph = i_devi_get_phci(pdip);
MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
"!op = %d pdip = %p cdip = %p", op, (void *)pdip,
(void *)cdip));
if (ph == NULL) {
MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
"!failed: operation %d: NULL ph", op));
return (MDI_FAILURE);
}
if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
"!failed: invalid operation %d", op));
return (MDI_FAILURE);
}
vh = ph->ph_vhci;
if (cdip == NULL) {
/*
* Need to mark the Phci as enabled/disabled.
*/
MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
"op %d for the phci", op));
MDI_PHCI_LOCK(ph);
switch (flags) {
case USER_DISABLE:
if (op == MDI_DISABLE_OP) {
MDI_PHCI_SET_USER_DISABLE(ph);
} else {
MDI_PHCI_SET_USER_ENABLE(ph);
}
break;
case DRIVER_DISABLE:
if (op == MDI_DISABLE_OP) {
MDI_PHCI_SET_DRV_DISABLE(ph);
} else {
MDI_PHCI_SET_DRV_ENABLE(ph);
}
break;
case DRIVER_DISABLE_TRANSIENT:
if (op == MDI_DISABLE_OP) {
MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
} else {
MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
}
break;
default:
MDI_PHCI_UNLOCK(ph);
MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
"!invalid flag argument= %d", flags));
}
/*
* Phci has been disabled. Now try to enable/disable
* path info's to each client.
*/
pip = ph->ph_path_head;
while (pip != NULL) {
pip = i_mdi_enable_disable_path(pip, vh, flags, op);
}
MDI_PHCI_UNLOCK(ph);
} else {
/*
* Disable a specific client.
*/
ct = i_devi_get_client(cdip);
if (ct == NULL) {
MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
"!failed: operation = %d: NULL ct", op));
return (MDI_FAILURE);
}
MDI_CLIENT_LOCK(ct);
pip = ct->ct_path_head;
found_it = 0;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
if (MDI_PI(pip)->pi_phci == ph) {
MDI_PI_UNLOCK(pip);
found_it = 1;
break;
}
MDI_PI_UNLOCK(pip);
pip = next;
}
MDI_CLIENT_UNLOCK(ct);
if (found_it == 0) {
MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
"!failed. Could not find corresponding pip\n"));
return (MDI_FAILURE);
}
(void) i_mdi_enable_disable_path(pip, vh, flags, op);
}
MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
"!op %d returning success pdip = %p cdip = %p",
op, (void *)pdip, (void *)cdip));
return (MDI_SUCCESS);
}
/*
* Ensure phci powered up
*/
static void
i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
{
dev_info_t *ph_dip;
ASSERT(pip != NULL);
ASSERT(MDI_PI_LOCKED(pip));
if (MDI_PI(pip)->pi_pm_held) {
return;
}
ph_dip = mdi_pi_get_phci(pip);
MDI_DEBUG(4, (MDI_NOTE, ph_dip,
"%s %p", mdi_pi_spathname(pip), (void *)pip));
if (ph_dip == NULL) {
return;
}
MDI_PI_UNLOCK(pip);
MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
DEVI(ph_dip)->devi_pm_kidsupcnt));
pm_hold_power(ph_dip);
MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
DEVI(ph_dip)->devi_pm_kidsupcnt));
MDI_PI_LOCK(pip);
/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
if (DEVI(ph_dip)->devi_pm_info)
MDI_PI(pip)->pi_pm_held = 1;
}
/*
* Allow phci powered down
*/
static void
i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
{
dev_info_t *ph_dip = NULL;
ASSERT(pip != NULL);
ASSERT(MDI_PI_LOCKED(pip));
if (MDI_PI(pip)->pi_pm_held == 0) {
return;
}
ph_dip = mdi_pi_get_phci(pip);
ASSERT(ph_dip != NULL);
MDI_DEBUG(4, (MDI_NOTE, ph_dip,
"%s %p", mdi_pi_spathname(pip), (void *)pip));
MDI_PI_UNLOCK(pip);
MDI_DEBUG(4, (MDI_NOTE, ph_dip,
"kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
pm_rele_power(ph_dip);
MDI_DEBUG(4, (MDI_NOTE, ph_dip,
"kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
MDI_PI_LOCK(pip);
MDI_PI(pip)->pi_pm_held = 0;
}
static void
i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
{
ASSERT(MDI_CLIENT_LOCKED(ct));
ct->ct_power_cnt += incr;
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"%p ct_power_cnt = %d incr = %d",
(void *)ct, ct->ct_power_cnt, incr));
ASSERT(ct->ct_power_cnt >= 0);
}
static void
i_mdi_rele_all_phci(mdi_client_t *ct)
{
mdi_pathinfo_t *pip;
ASSERT(MDI_CLIENT_LOCKED(ct));
pip = (mdi_pathinfo_t *)ct->ct_path_head;
while (pip != NULL) {
mdi_hold_path(pip);
MDI_PI_LOCK(pip);
i_mdi_pm_rele_pip(pip);
MDI_PI_UNLOCK(pip);
mdi_rele_path(pip);
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
}
}
static void
i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
{
ASSERT(MDI_CLIENT_LOCKED(ct));
if (i_ddi_devi_attached(ct->ct_dip)) {
ct->ct_power_cnt -= decr;
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"%p ct_power_cnt = %d decr = %d",
(void *)ct, ct->ct_power_cnt, decr));
}
ASSERT(ct->ct_power_cnt >= 0);
if (ct->ct_power_cnt == 0) {
i_mdi_rele_all_phci(ct);
return;
}
}
static void
i_mdi_pm_reset_client(mdi_client_t *ct)
{
MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
"%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
ASSERT(MDI_CLIENT_LOCKED(ct));
ct->ct_power_cnt = 0;
i_mdi_rele_all_phci(ct);
ct->ct_powercnt_config = 0;
ct->ct_powercnt_unconfig = 0;
ct->ct_powercnt_reset = 1;
}
static int
i_mdi_power_one_phci(mdi_pathinfo_t *pip)
{
int ret;
dev_info_t *ph_dip;
MDI_PI_LOCK(pip);
i_mdi_pm_hold_pip(pip);
ph_dip = mdi_pi_get_phci(pip);
MDI_PI_UNLOCK(pip);
/* bring all components of phci to full power */
MDI_DEBUG(4, (MDI_NOTE, ph_dip,
"pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
ddi_get_instance(ph_dip), (void *)pip));
ret = pm_powerup(ph_dip);
if (ret == DDI_FAILURE) {
MDI_DEBUG(4, (MDI_NOTE, ph_dip,
"pm_powerup FAILED for %s%d %p",
ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
(void *)pip));
MDI_PI_LOCK(pip);
i_mdi_pm_rele_pip(pip);
MDI_PI_UNLOCK(pip);
return (MDI_FAILURE);
}
return (MDI_SUCCESS);
}
static int
i_mdi_power_all_phci(mdi_client_t *ct)
{
mdi_pathinfo_t *pip;
int succeeded = 0;
ASSERT(MDI_CLIENT_LOCKED(ct));
pip = (mdi_pathinfo_t *)ct->ct_path_head;
while (pip != NULL) {
/*
* Don't power if MDI_PATHINFO_STATE_FAULT
* or MDI_PATHINFO_STATE_OFFLINE.
*/
if (MDI_PI_IS_INIT(pip) ||
MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
mdi_hold_path(pip);
MDI_CLIENT_UNLOCK(ct);
if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
succeeded = 1;
ASSERT(ct == MDI_PI(pip)->pi_client);
MDI_CLIENT_LOCK(ct);
mdi_rele_path(pip);
}
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
}
return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
}
/*
* mdi_bus_power():
* 1. Place the phci(s) into powered up state so that
* client can do power management
* 2. Ensure phci powered up as client power managing
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
int
mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
void *arg, void *result)
{
int ret = MDI_SUCCESS;
pm_bp_child_pwrchg_t *bpc;
mdi_client_t *ct;
dev_info_t *cdip;
pm_bp_has_changed_t *bphc;
/*
* BUS_POWER_NOINVOL not supported
*/
if (op == BUS_POWER_NOINVOL)
return (MDI_FAILURE);
/*
* ignore other OPs.
* return quickly to save cou cycles on the ct processing
*/
switch (op) {
case BUS_POWER_PRE_NOTIFICATION:
case BUS_POWER_POST_NOTIFICATION:
bpc = (pm_bp_child_pwrchg_t *)arg;
cdip = bpc->bpc_dip;
break;
case BUS_POWER_HAS_CHANGED:
bphc = (pm_bp_has_changed_t *)arg;
cdip = bphc->bphc_dip;
break;
default:
return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
}
ASSERT(MDI_CLIENT(cdip));
ct = i_devi_get_client(cdip);
if (ct == NULL)
return (MDI_FAILURE);
/*
* wait till the mdi_pathinfo node state change are processed
*/
MDI_CLIENT_LOCK(ct);
switch (op) {
case BUS_POWER_PRE_NOTIFICATION:
MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
"BUS_POWER_PRE_NOTIFICATION:"
"%s@%s, olevel=%d, nlevel=%d, comp=%d",
ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
/* serialize power level change per client */
while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
MDI_CLIENT_SET_POWER_TRANSITION(ct);
if (ct->ct_power_cnt == 0) {
ret = i_mdi_power_all_phci(ct);
}
/*
* if new_level > 0:
* - hold phci(s)
* - power up phci(s) if not already
* ignore power down
*/
if (bpc->bpc_nlevel > 0) {
if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
"i_mdi_pm_hold_client\n"));
i_mdi_pm_hold_client(ct, ct->ct_path_count);
}
}
break;
case BUS_POWER_POST_NOTIFICATION:
MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
"BUS_POWER_POST_NOTIFICATION:"
"%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
*(int *)result));
if (*(int *)result == DDI_SUCCESS) {
if (bpc->bpc_nlevel > 0) {
MDI_CLIENT_SET_POWER_UP(ct);
} else {
MDI_CLIENT_SET_POWER_DOWN(ct);
}
}
/* release the hold we did in pre-notification */
if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
!DEVI_IS_ATTACHING(ct->ct_dip)) {
MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
"i_mdi_pm_rele_client\n"));
i_mdi_pm_rele_client(ct, ct->ct_path_count);
}
if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
/* another thread might started attaching */
if (DEVI_IS_ATTACHING(ct->ct_dip)) {
MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
"i_mdi_pm_rele_client\n"));
i_mdi_pm_rele_client(ct, ct->ct_path_count);
/* detaching has been taken care in pm_post_unconfig */
} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
"i_mdi_pm_reset_client\n"));
i_mdi_pm_reset_client(ct);
}
}
MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
cv_broadcast(&ct->ct_powerchange_cv);
break;
/* need to do more */
case BUS_POWER_HAS_CHANGED:
MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
"BUS_POWER_HAS_CHANGED:"
"%s@%s, olevel=%d, nlevel=%d, comp=%d",
ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
if (bphc->bphc_nlevel > 0 &&
bphc->bphc_nlevel > bphc->bphc_olevel) {
if (ct->ct_power_cnt == 0) {
ret = i_mdi_power_all_phci(ct);
}
MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
"i_mdi_pm_hold_client\n"));
i_mdi_pm_hold_client(ct, ct->ct_path_count);
}
if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
"i_mdi_pm_rele_client\n"));
i_mdi_pm_rele_client(ct, ct->ct_path_count);
}
break;
}
MDI_CLIENT_UNLOCK(ct);
return (ret);
}
static int
i_mdi_pm_pre_config_one(dev_info_t *child)
{
int ret = MDI_SUCCESS;
mdi_client_t *ct;
ct = i_devi_get_client(child);
if (ct == NULL)
return (MDI_FAILURE);
MDI_CLIENT_LOCK(ct);
while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
if (!MDI_CLIENT_IS_FAILED(ct)) {
MDI_CLIENT_UNLOCK(ct);
MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
return (MDI_SUCCESS);
}
if (ct->ct_powercnt_config) {
MDI_CLIENT_UNLOCK(ct);
MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
return (MDI_SUCCESS);
}
if (ct->ct_power_cnt == 0) {
ret = i_mdi_power_all_phci(ct);
}
MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
i_mdi_pm_hold_client(ct, ct->ct_path_count);
ct->ct_powercnt_config = 1;
ct->ct_powercnt_reset = 0;
MDI_CLIENT_UNLOCK(ct);
return (ret);
}
static int
i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
{
int ret = MDI_SUCCESS;
dev_info_t *cdip;
int circ;
ASSERT(MDI_VHCI(vdip));
/* ndi_devi_config_one */
if (child) {
ASSERT(DEVI_BUSY_OWNED(vdip));
return (i_mdi_pm_pre_config_one(child));
}
/* devi_config_common */
ndi_devi_enter(vdip, &circ);
cdip = ddi_get_child(vdip);
while (cdip) {
dev_info_t *next = ddi_get_next_sibling(cdip);
ret = i_mdi_pm_pre_config_one(cdip);
if (ret != MDI_SUCCESS)
break;
cdip = next;
}
ndi_devi_exit(vdip, circ);
return (ret);
}
static int
i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
{
int ret = MDI_SUCCESS;
mdi_client_t *ct;
ct = i_devi_get_client(child);
if (ct == NULL)
return (MDI_FAILURE);
MDI_CLIENT_LOCK(ct);
while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
if (!i_ddi_devi_attached(child)) {
MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
MDI_CLIENT_UNLOCK(ct);
return (MDI_SUCCESS);
}
if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
(flags & NDI_AUTODETACH)) {
MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
MDI_CLIENT_UNLOCK(ct);
return (MDI_FAILURE);
}
if (ct->ct_powercnt_unconfig) {
MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
MDI_CLIENT_UNLOCK(ct);
*held = 1;
return (MDI_SUCCESS);
}
if (ct->ct_power_cnt == 0) {
ret = i_mdi_power_all_phci(ct);
}
MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
i_mdi_pm_hold_client(ct, ct->ct_path_count);
ct->ct_powercnt_unconfig = 1;
ct->ct_powercnt_reset = 0;
MDI_CLIENT_UNLOCK(ct);
if (ret == MDI_SUCCESS)
*held = 1;
return (ret);
}
static int
i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
int flags)
{
int ret = MDI_SUCCESS;
dev_info_t *cdip;
int circ;
ASSERT(MDI_VHCI(vdip));
*held = 0;
/* ndi_devi_unconfig_one */
if (child) {
ASSERT(DEVI_BUSY_OWNED(vdip));
return (i_mdi_pm_pre_unconfig_one(child, held, flags));
}
/* devi_unconfig_common */
ndi_devi_enter(vdip, &circ);
cdip = ddi_get_child(vdip);
while (cdip) {
dev_info_t *next = ddi_get_next_sibling(cdip);
ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
cdip = next;
}
ndi_devi_exit(vdip, circ);
if (*held)
ret = MDI_SUCCESS;
return (ret);
}
static void
i_mdi_pm_post_config_one(dev_info_t *child)
{
mdi_client_t *ct;
ct = i_devi_get_client(child);
if (ct == NULL)
return;
MDI_CLIENT_LOCK(ct);
while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
MDI_CLIENT_UNLOCK(ct);
return;
}
/* client has not been updated */
if (MDI_CLIENT_IS_FAILED(ct)) {
MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
MDI_CLIENT_UNLOCK(ct);
return;
}
/* another thread might have powered it down or detached it */
if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
!DEVI_IS_ATTACHING(child)) ||
(!i_ddi_devi_attached(child) &&
!DEVI_IS_ATTACHING(child))) {
MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
i_mdi_pm_reset_client(ct);
} else {
mdi_pathinfo_t *pip, *next;
int valid_path_count = 0;
MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
pip = ct->ct_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
valid_path_count ++;
MDI_PI_UNLOCK(pip);
pip = next;
}
i_mdi_pm_rele_client(ct, valid_path_count);
}
ct->ct_powercnt_config = 0;
MDI_CLIENT_UNLOCK(ct);
}
static void
i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
{
int circ;
dev_info_t *cdip;
ASSERT(MDI_VHCI(vdip));
/* ndi_devi_config_one */
if (child) {
ASSERT(DEVI_BUSY_OWNED(vdip));
i_mdi_pm_post_config_one(child);
return;
}
/* devi_config_common */
ndi_devi_enter(vdip, &circ);
cdip = ddi_get_child(vdip);
while (cdip) {
dev_info_t *next = ddi_get_next_sibling(cdip);
i_mdi_pm_post_config_one(cdip);
cdip = next;
}
ndi_devi_exit(vdip, circ);
}
static void
i_mdi_pm_post_unconfig_one(dev_info_t *child)
{
mdi_client_t *ct;
ct = i_devi_get_client(child);
if (ct == NULL)
return;
MDI_CLIENT_LOCK(ct);
while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
MDI_CLIENT_UNLOCK(ct);
return;
}
/* failure detaching or another thread just attached it */
if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
i_ddi_devi_attached(child)) ||
(!i_ddi_devi_attached(child) &&
!DEVI_IS_ATTACHING(child))) {
MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
i_mdi_pm_reset_client(ct);
} else {
mdi_pathinfo_t *pip, *next;
int valid_path_count = 0;
MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
pip = ct->ct_path_head;
while (pip != NULL) {
MDI_PI_LOCK(pip);
next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
valid_path_count ++;
MDI_PI_UNLOCK(pip);
pip = next;
}
i_mdi_pm_rele_client(ct, valid_path_count);
ct->ct_powercnt_unconfig = 0;
}
MDI_CLIENT_UNLOCK(ct);
}
static void
i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
{
int circ;
dev_info_t *cdip;
ASSERT(MDI_VHCI(vdip));
if (!held) {
MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
return;
}
if (child) {
ASSERT(DEVI_BUSY_OWNED(vdip));
i_mdi_pm_post_unconfig_one(child);
return;
}
ndi_devi_enter(vdip, &circ);
cdip = ddi_get_child(vdip);
while (cdip) {
dev_info_t *next = ddi_get_next_sibling(cdip);
i_mdi_pm_post_unconfig_one(cdip);
cdip = next;
}
ndi_devi_exit(vdip, circ);
}
int
mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
{
int circ, ret = MDI_SUCCESS;
dev_info_t *client_dip = NULL;
mdi_client_t *ct;
/*
* Handling ndi_devi_config_one and ndi_devi_unconfig_one.
* Power up pHCI for the named client device.
* Note: Before the client is enumerated under vhci by phci,
* client_dip can be NULL. Then proceed to power up all the
* pHCIs.
*/
if (devnm != NULL) {
ndi_devi_enter(vdip, &circ);
client_dip = ndi_devi_findchild(vdip, devnm);
}
MDI_DEBUG(4, (MDI_NOTE, vdip,
"op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
switch (op) {
case MDI_PM_PRE_CONFIG:
ret = i_mdi_pm_pre_config(vdip, client_dip);
break;
case MDI_PM_PRE_UNCONFIG:
ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
flags);
break;
case MDI_PM_POST_CONFIG:
i_mdi_pm_post_config(vdip, client_dip);
break;
case MDI_PM_POST_UNCONFIG:
i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
break;
case MDI_PM_HOLD_POWER:
case MDI_PM_RELE_POWER:
ASSERT(args);
client_dip = (dev_info_t *)args;
ASSERT(MDI_CLIENT(client_dip));
ct = i_devi_get_client(client_dip);
MDI_CLIENT_LOCK(ct);
if (op == MDI_PM_HOLD_POWER) {
if (ct->ct_power_cnt == 0) {
(void) i_mdi_power_all_phci(ct);
MDI_DEBUG(4, (MDI_NOTE, client_dip,
"i_mdi_pm_hold_client\n"));
i_mdi_pm_hold_client(ct, ct->ct_path_count);
}
} else {
if (DEVI_IS_ATTACHING(client_dip)) {
MDI_DEBUG(4, (MDI_NOTE, client_dip,
"i_mdi_pm_rele_client\n"));
i_mdi_pm_rele_client(ct, ct->ct_path_count);
} else {
MDI_DEBUG(4, (MDI_NOTE, client_dip,
"i_mdi_pm_reset_client\n"));
i_mdi_pm_reset_client(ct);
}
}
MDI_CLIENT_UNLOCK(ct);
break;
default:
break;
}
if (devnm)
ndi_devi_exit(vdip, circ);
return (ret);
}
int
mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
{
mdi_vhci_t *vhci;
if (!MDI_VHCI(dip))
return (MDI_FAILURE);
if (mdi_class) {
vhci = DEVI(dip)->devi_mdi_xhci;
ASSERT(vhci);
*mdi_class = vhci->vh_class;
}
return (MDI_SUCCESS);
}
int
mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
{
mdi_phci_t *phci;
if (!MDI_PHCI(dip))
return (MDI_FAILURE);
if (mdi_class) {
phci = DEVI(dip)->devi_mdi_xhci;
ASSERT(phci);
*mdi_class = phci->ph_vhci->vh_class;
}
return (MDI_SUCCESS);
}
int
mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
{
mdi_client_t *client;
if (!MDI_CLIENT(dip))
return (MDI_FAILURE);
if (mdi_class) {
client = DEVI(dip)->devi_mdi_client;
ASSERT(client);
*mdi_class = client->ct_vhci->vh_class;
}
return (MDI_SUCCESS);
}
void *
mdi_client_get_vhci_private(dev_info_t *dip)
{
ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
mdi_client_t *ct;
ct = i_devi_get_client(dip);
return (ct->ct_vprivate);
}
return (NULL);
}
void
mdi_client_set_vhci_private(dev_info_t *dip, void *data)
{
ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
mdi_client_t *ct;
ct = i_devi_get_client(dip);
ct->ct_vprivate = data;
}
}
/*
* mdi_pi_get_vhci_private():
* Get the vhci private information associated with the
* mdi_pathinfo node
*/
void *
mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
{
caddr_t vprivate = NULL;
if (pip) {
vprivate = MDI_PI(pip)->pi_vprivate;
}
return (vprivate);
}
/*
* mdi_pi_set_vhci_private():
* Set the vhci private information in the mdi_pathinfo node
*/
void
mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
{
if (pip) {
MDI_PI(pip)->pi_vprivate = priv;
}
}
/*
* mdi_phci_get_vhci_private():
* Get the vhci private information associated with the
* mdi_phci node
*/
void *
mdi_phci_get_vhci_private(dev_info_t *dip)
{
ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
mdi_phci_t *ph;
ph = i_devi_get_phci(dip);
return (ph->ph_vprivate);
}
return (NULL);
}
/*
* mdi_phci_set_vhci_private():
* Set the vhci private information in the mdi_phci node
*/
void
mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
{
ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
mdi_phci_t *ph;
ph = i_devi_get_phci(dip);
ph->ph_vprivate = priv;
}
}
int
mdi_pi_ishidden(mdi_pathinfo_t *pip)
{
return (MDI_PI_FLAGS_IS_HIDDEN(pip));
}
int
mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
{
return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
}
/* Return 1 if all client paths are device_removed */
static int
i_mdi_client_all_devices_removed(mdi_client_t *ct)
{
mdi_pathinfo_t *pip;
int all_devices_removed = 1;
MDI_CLIENT_LOCK(ct);
for (pip = ct->ct_path_head; pip;
pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
if (!mdi_pi_device_isremoved(pip)) {
all_devices_removed = 0;
break;
}
}
MDI_CLIENT_UNLOCK(ct);
return (all_devices_removed);
}
/*
* When processing path hotunplug, represent device removal.
*/
int
mdi_pi_device_remove(mdi_pathinfo_t *pip)
{
mdi_client_t *ct;
MDI_PI_LOCK(pip);
if (mdi_pi_device_isremoved(pip)) {
MDI_PI_UNLOCK(pip);
return (0);
}
MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
MDI_PI_FLAGS_SET_HIDDEN(pip);
MDI_PI_UNLOCK(pip);
/*
* If all paths associated with the client are now DEVICE_REMOVED,
* reflect DEVICE_REMOVED in the client.
*/
ct = MDI_PI(pip)->pi_client;
if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
(void) ndi_devi_device_remove(ct->ct_dip);
else
i_ddi_di_cache_invalidate();
return (1);
}
/*
* When processing hotplug, if a path marked mdi_pi_device_isremoved()
* is now accessible then this interfaces is used to represent device insertion.
*/
int
mdi_pi_device_insert(mdi_pathinfo_t *pip)
{
MDI_PI_LOCK(pip);
if (!mdi_pi_device_isremoved(pip)) {
MDI_PI_UNLOCK(pip);
return (0);
}
MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
MDI_PI_FLAGS_CLR_HIDDEN(pip);
MDI_PI_UNLOCK(pip);
i_ddi_di_cache_invalidate();
return (1);
}
/*
* List of vhci class names:
* A vhci class name must be in this list only if the corresponding vhci
* driver intends to use the mdi provided bus config implementation
* (i.e., mdi_vhci_bus_config()).
*/
static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
#define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *))
/*
* During boot time, the on-disk vhci cache for every vhci class is read
* in the form of an nvlist and stored here.
*/
static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
/* nvpair names in vhci cache nvlist */
#define MDI_VHCI_CACHE_VERSION 1
#define MDI_NVPNAME_VERSION "version"
#define MDI_NVPNAME_PHCIS "phcis"
#define MDI_NVPNAME_CTADDRMAP "clientaddrmap"
/*
* Given vhci class name, return its on-disk vhci cache filename.
* Memory for the returned filename which includes the full path is allocated
* by this function.
*/
static char *
vhclass2vhcache_filename(char *vhclass)
{
char *filename;
int len;
static char *fmt = "/etc/devices/mdi_%s_cache";
/*
* fmt contains the on-disk vhci cache file name format;
* for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
*/
/* the -1 below is to account for "%s" in the format string */
len = strlen(fmt) + strlen(vhclass) - 1;
filename = kmem_alloc(len, KM_SLEEP);
(void) snprintf(filename, len, fmt, vhclass);
ASSERT(len == (strlen(filename) + 1));
return (filename);
}
/*
* initialize the vhci cache related data structures and read the on-disk
* vhci cached data into memory.
*/
static void
setup_vhci_cache(mdi_vhci_t *vh)
{
mdi_vhci_config_t *vhc;
mdi_vhci_cache_t *vhcache;
int i;
nvlist_t *nvl = NULL;
vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
vh->vh_config = vhc;
vhcache = &vhc->vhc_vhcache;
vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
/*
* Create string hash; same as mod_hash_create_strhash() except that
* we use NULL key destructor.
*/
vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
mdi_bus_config_cache_hash_size,
mod_hash_null_keydtor, mod_hash_null_valdtor,
mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
/*
* The on-disk vhci cache is read during booting prior to the
* lights-out period by mdi_read_devices_files().
*/
for (i = 0; i < N_VHCI_CLASSES; i++) {
if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
nvl = vhcache_nvl[i];
vhcache_nvl[i] = NULL;
break;
}
}
/*
* this is to cover the case of some one manually causing unloading
* (or detaching) and reloading (or attaching) of a vhci driver.
*/
if (nvl == NULL && modrootloaded)
nvl = read_on_disk_vhci_cache(vh->vh_class);
if (nvl != NULL) {
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
else {
cmn_err(CE_WARN,
"%s: data file corrupted, will recreate",
vhc->vhc_vhcache_filename);
}
rw_exit(&vhcache->vhcache_lock);
nvlist_free(nvl);
}
vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
}
/*
* free all vhci cache related resources
*/
static int
destroy_vhci_cache(mdi_vhci_t *vh)
{
mdi_vhci_config_t *vhc = vh->vh_config;
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_vhcache_phci_t *cphci, *cphci_next;
mdi_vhcache_client_t *cct, *cct_next;
mdi_vhcache_pathinfo_t *cpi, *cpi_next;
if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
return (MDI_FAILURE);
kmem_free(vhc->vhc_vhcache_filename,
strlen(vhc->vhc_vhcache_filename) + 1);
mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
cphci = cphci_next) {
cphci_next = cphci->cphci_next;
free_vhcache_phci(cphci);
}
for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
cct_next = cct->cct_next;
for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
cpi_next = cpi->cpi_next;
free_vhcache_pathinfo(cpi);
}
free_vhcache_client(cct);
}
rw_destroy(&vhcache->vhcache_lock);
mutex_destroy(&vhc->vhc_lock);
cv_destroy(&vhc->vhc_cv);
kmem_free(vhc, sizeof (mdi_vhci_config_t));
return (MDI_SUCCESS);
}
/*
* Stop all vhci cache related async threads and free their resources.
*/
static int
stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
{
mdi_async_client_config_t *acc, *acc_next;
mutex_enter(&vhc->vhc_lock);
vhc->vhc_flags |= MDI_VHC_EXIT;
ASSERT(vhc->vhc_acc_thrcount >= 0);
cv_broadcast(&vhc->vhc_cv);
while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
vhc->vhc_acc_thrcount != 0) {
mutex_exit(&vhc->vhc_lock);
delay_random(mdi_delay);
mutex_enter(&vhc->vhc_lock);
}
vhc->vhc_flags &= ~MDI_VHC_EXIT;
for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
acc_next = acc->acc_next;
free_async_client_config(acc);
}
vhc->vhc_acc_list_head = NULL;
vhc->vhc_acc_list_tail = NULL;
vhc->vhc_acc_count = 0;
if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
mutex_exit(&vhc->vhc_lock);
if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
vhcache_dirty(vhc);
return (MDI_FAILURE);
}
} else
mutex_exit(&vhc->vhc_lock);
if (callb_delete(vhc->vhc_cbid) != 0)
return (MDI_FAILURE);
return (MDI_SUCCESS);
}
/*
* Stop vhci cache flush thread
*/
/* ARGSUSED */
static boolean_t
stop_vhcache_flush_thread(void *arg, int code)
{
mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
mutex_enter(&vhc->vhc_lock);
vhc->vhc_flags |= MDI_VHC_EXIT;
cv_broadcast(&vhc->vhc_cv);
while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
mutex_exit(&vhc->vhc_lock);
delay_random(mdi_delay);
mutex_enter(&vhc->vhc_lock);
}
if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
mutex_exit(&vhc->vhc_lock);
(void) flush_vhcache(vhc, 1);
} else
mutex_exit(&vhc->vhc_lock);
return (B_TRUE);
}
/*
* Enqueue the vhcache phci (cphci) at the tail of the list
*/
static void
enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
{
cphci->cphci_next = NULL;
if (vhcache->vhcache_phci_head == NULL)
vhcache->vhcache_phci_head = cphci;
else
vhcache->vhcache_phci_tail->cphci_next = cphci;
vhcache->vhcache_phci_tail = cphci;
}
/*
* Enqueue the vhcache pathinfo (cpi) at the tail of the list
*/
static void
enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
mdi_vhcache_pathinfo_t *cpi)
{
cpi->cpi_next = NULL;
if (cct->cct_cpi_head == NULL)
cct->cct_cpi_head = cpi;
else
cct->cct_cpi_tail->cpi_next = cpi;
cct->cct_cpi_tail = cpi;
}
/*
* Enqueue the vhcache pathinfo (cpi) at the correct location in the
* ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
* flag set come at the beginning of the list. All cpis which have this
* flag set come at the end of the list.
*/
static void
enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
mdi_vhcache_pathinfo_t *newcpi)
{
mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
if (cct->cct_cpi_head == NULL ||
(newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
enqueue_tail_vhcache_pathinfo(cct, newcpi);
else {
for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
!(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
prev_cpi = cpi, cpi = cpi->cpi_next)
;
if (prev_cpi == NULL)
cct->cct_cpi_head = newcpi;
else
prev_cpi->cpi_next = newcpi;
newcpi->cpi_next = cpi;
if (cpi == NULL)
cct->cct_cpi_tail = newcpi;
}
}
/*
* Enqueue the vhcache client (cct) at the tail of the list
*/
static void
enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
mdi_vhcache_client_t *cct)
{
cct->cct_next = NULL;
if (vhcache->vhcache_client_head == NULL)
vhcache->vhcache_client_head = cct;
else
vhcache->vhcache_client_tail->cct_next = cct;
vhcache->vhcache_client_tail = cct;
}
static void
free_string_array(char **str, int nelem)
{
int i;
if (str) {
for (i = 0; i < nelem; i++) {
if (str[i])
kmem_free(str[i], strlen(str[i]) + 1);
}
kmem_free(str, sizeof (char *) * nelem);
}
}
static void
free_vhcache_phci(mdi_vhcache_phci_t *cphci)
{
kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
kmem_free(cphci, sizeof (*cphci));
}
static void
free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
{
kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
kmem_free(cpi, sizeof (*cpi));
}
static void
free_vhcache_client(mdi_vhcache_client_t *cct)
{
kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
kmem_free(cct, sizeof (*cct));
}
static char *
vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
{
char *name_addr;
int len;
len = strlen(ct_name) + strlen(ct_addr) + 2;
name_addr = kmem_alloc(len, KM_SLEEP);
(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
if (ret_len)
*ret_len = len;
return (name_addr);
}
/*
* Copy the contents of paddrnvl to vhci cache.
* paddrnvl nvlist contains path information for a vhci client.
* See the comment in mainnvl_to_vhcache() for the format of this nvlist.
*/
static void
paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
mdi_vhcache_client_t *cct)
{
nvpair_t *nvp = NULL;
mdi_vhcache_pathinfo_t *cpi;
uint_t nelem;
uint32_t *val;
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
(void) nvpair_value_uint32_array(nvp, &val, &nelem);
ASSERT(nelem == 2);
cpi->cpi_cphci = cphci_list[val[0]];
cpi->cpi_flags = val[1];
enqueue_tail_vhcache_pathinfo(cct, cpi);
}
}
/*
* Copy the contents of caddrmapnvl to vhci cache.
* caddrmapnvl nvlist contains vhci client address to phci client address
* mappings. See the comment in mainnvl_to_vhcache() for the format of
* this nvlist.
*/
static void
caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
mdi_vhcache_phci_t *cphci_list[])
{
nvpair_t *nvp = NULL;
nvlist_t *paddrnvl;
mdi_vhcache_client_t *cct;
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
(void) nvpair_value_nvlist(nvp, &paddrnvl);
paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
/* the client must contain at least one path */
ASSERT(cct->cct_cpi_head != NULL);
enqueue_vhcache_client(vhcache, cct);
(void) mod_hash_insert(vhcache->vhcache_client_hash,
(mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
}
}
/*
* Copy the contents of the main nvlist to vhci cache.
*
* VHCI busconfig cached data is stored in the form of a nvlist on the disk.
* The nvlist contains the mappings between the vhci client addresses and
* their corresponding phci client addresses.
*
* The structure of the nvlist is as follows:
*
* Main nvlist:
* NAME TYPE DATA
* version int32 version number
* phcis string array array of phci paths
* clientaddrmap nvlist_t c2paddrs_nvl (see below)
*
* structure of c2paddrs_nvl:
* NAME TYPE DATA
* caddr1 nvlist_t paddrs_nvl1
* caddr2 nvlist_t paddrs_nvl2
* ...
* where caddr1, caddr2, ... are vhci client name and addresses in the
* form of "<clientname>@<clientaddress>".
* (for example: "ssd@2000002037cd9f72");
* paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
*
* structure of paddrs_nvl:
* NAME TYPE DATA
* pi_addr1 uint32_array (phci-id, cpi_flags)
* pi_addr2 uint32_array (phci-id, cpi_flags)
* ...
* where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
* (so called pi_addrs, for example: "w2100002037cd9f72,0");
* phci-ids are integers that identify pHCIs to which the
* the bus specific address belongs to. These integers are used as an index
* into to the phcis string array in the main nvlist to get the pHCI path.
*/
static int
mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
{
char **phcis, **phci_namep;
uint_t nphcis;
mdi_vhcache_phci_t *cphci, **cphci_list;
nvlist_t *caddrmapnvl;
int32_t ver;
int i;
size_t cphci_list_size;
ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
ver != MDI_VHCI_CACHE_VERSION)
return (MDI_FAILURE);
if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
&nphcis) != 0)
return (MDI_SUCCESS);
ASSERT(nphcis > 0);
cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
enqueue_vhcache_phci(vhcache, cphci);
cphci_list[i] = cphci;
}
ASSERT(vhcache->vhcache_phci_head != NULL);
if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
kmem_free(cphci_list, cphci_list_size);
return (MDI_SUCCESS);
}
/*
* Build paddrnvl for the specified client using the information in the
* vhci cache and add it to the caddrmapnnvl.
* Returns 0 on success, errno on failure.
*/
static int
vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
nvlist_t *caddrmapnvl)
{
mdi_vhcache_pathinfo_t *cpi;
nvlist_t *nvl;
int err;
uint32_t val[2];
ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
return (err);
for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
val[0] = cpi->cpi_cphci->cphci_id;
val[1] = cpi->cpi_flags;
if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
!= 0)
goto out;
}
err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
out:
nvlist_free(nvl);
return (err);
}
/*
* Build caddrmapnvl using the information in the vhci cache
* and add it to the mainnvl.
* Returns 0 on success, errno on failure.
*/
static int
vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
{
mdi_vhcache_client_t *cct;
nvlist_t *nvl;
int err;
ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
return (err);
for (cct = vhcache->vhcache_client_head; cct != NULL;
cct = cct->cct_next) {
if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
goto out;
}
err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
out:
nvlist_free(nvl);
return (err);
}
/*
* Build nvlist using the information in the vhci cache.
* See the comment in mainnvl_to_vhcache() for the format of the nvlist.
* Returns nvl on success, NULL on failure.
*/
static nvlist_t *
vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
{
mdi_vhcache_phci_t *cphci;
uint_t phci_count;
char **phcis;
nvlist_t *nvl;
int err, i;
if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
nvl = NULL;
goto out;
}
if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
MDI_VHCI_CACHE_VERSION)) != 0)
goto out;
rw_enter(&vhcache->vhcache_lock, RW_READER);
if (vhcache->vhcache_phci_head == NULL) {
rw_exit(&vhcache->vhcache_lock);
return (nvl);
}
phci_count = 0;
for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
cphci = cphci->cphci_next)
cphci->cphci_id = phci_count++;
/* build phci pathname list */
phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
cphci = cphci->cphci_next, i++)
phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
phci_count);
free_string_array(phcis, phci_count);
if (err == 0 &&
(err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
rw_exit(&vhcache->vhcache_lock);
return (nvl);
}
rw_exit(&vhcache->vhcache_lock);
out:
nvlist_free(nvl);
return (NULL);
}
/*
* Lookup vhcache phci structure for the specified phci path.
*/
static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
{
mdi_vhcache_phci_t *cphci;
ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
cphci = cphci->cphci_next) {
if (strcmp(cphci->cphci_path, phci_path) == 0)
return (cphci);
}
return (NULL);
}
/*
* Lookup vhcache phci structure for the specified phci.
*/
static mdi_vhcache_phci_t *
lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
{
mdi_vhcache_phci_t *cphci;
ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
cphci = cphci->cphci_next) {
if (cphci->cphci_phci == ph)
return (cphci);
}
return (NULL);
}
/*
* Add the specified phci to the vhci cache if not already present.
*/
static void
vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_vhcache_phci_t *cphci;
char *pathname;
int cache_updated;
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) ddi_pathname(ph->ph_dip, pathname);
if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
!= NULL) {
cphci->cphci_phci = ph;
cache_updated = 0;
} else {
cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
cphci->cphci_phci = ph;
enqueue_vhcache_phci(vhcache, cphci);
cache_updated = 1;
}
rw_exit(&vhcache->vhcache_lock);
/*
* Since a new phci has been added, reset
* vhc_path_discovery_cutoff_time to allow for discovery of paths
* during next vhcache_discover_paths().
*/
mutex_enter(&vhc->vhc_lock);
vhc->vhc_path_discovery_cutoff_time = 0;
mutex_exit(&vhc->vhc_lock);
kmem_free(pathname, MAXPATHLEN);
if (cache_updated)
vhcache_dirty(vhc);
}
/*
* Remove the reference to the specified phci from the vhci cache.
*/
static void
vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_vhcache_phci_t *cphci;
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
/* do not remove the actual mdi_vhcache_phci structure */
cphci->cphci_phci = NULL;
}
rw_exit(&vhcache->vhcache_lock);
}
static void
init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
mdi_vhcache_lookup_token_t *src)
{
if (src == NULL) {
dst->lt_cct = NULL;
dst->lt_cct_lookup_time = 0;
} else {
dst->lt_cct = src->lt_cct;
dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
}
}
/*
* Look up vhcache client for the specified client.
*/
static mdi_vhcache_client_t *
lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
mdi_vhcache_lookup_token_t *token)
{
mod_hash_val_t hv;
char *name_addr;
int len;
ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
/*
* If no vhcache clean occurred since the last lookup, we can
* simply return the cct from the last lookup operation.
* It works because ccts are never freed except during the vhcache
* cleanup operation.
*/
if (token != NULL &&
vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
return (token->lt_cct);
name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
if (mod_hash_find(vhcache->vhcache_client_hash,
(mod_hash_key_t)name_addr, &hv) == 0) {
if (token) {
token->lt_cct = (mdi_vhcache_client_t *)hv;
token->lt_cct_lookup_time = ddi_get_lbolt64();
}
} else {
if (token) {
token->lt_cct = NULL;
token->lt_cct_lookup_time = 0;
}
hv = NULL;
}
kmem_free(name_addr, len);
return ((mdi_vhcache_client_t *)hv);
}
/*
* Add the specified path to the vhci cache if not already present.
* Also add the vhcache client for the client corresponding to this path
* if it doesn't already exist.
*/
static void
vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_vhcache_client_t *cct;
mdi_vhcache_pathinfo_t *cpi;
mdi_phci_t *ph = pip->pi_phci;
mdi_client_t *ct = pip->pi_client;
int cache_updated = 0;
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
/* if vhcache client for this pip doesn't already exist, add it */
if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
NULL)) == NULL) {
cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
ct->ct_guid, NULL);
enqueue_vhcache_client(vhcache, cct);
(void) mod_hash_insert(vhcache->vhcache_client_hash,
(mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
cache_updated = 1;
}
for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
if (cpi->cpi_cphci->cphci_phci == ph &&
strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
cpi->cpi_pip = pip;
if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
cpi->cpi_flags &=
~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
sort_vhcache_paths(cct);
cache_updated = 1;
}
break;
}
}
if (cpi == NULL) {
cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
ASSERT(cpi->cpi_cphci != NULL);
cpi->cpi_pip = pip;
enqueue_vhcache_pathinfo(cct, cpi);
cache_updated = 1;
}
rw_exit(&vhcache->vhcache_lock);
if (cache_updated)
vhcache_dirty(vhc);
}
/*
* Remove the reference to the specified path from the vhci cache.
*/
static void
vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_client_t *ct = pip->pi_client;
mdi_vhcache_client_t *cct;
mdi_vhcache_pathinfo_t *cpi;
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
NULL)) != NULL) {
for (cpi = cct->cct_cpi_head; cpi != NULL;
cpi = cpi->cpi_next) {
if (cpi->cpi_pip == pip) {
cpi->cpi_pip = NULL;
break;
}
}
}
rw_exit(&vhcache->vhcache_lock);
}
/*
* Flush the vhci cache to disk.
* Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
*/
static int
flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
{
nvlist_t *nvl;
int err;
int rv;
/*
* It is possible that the system may shutdown before
* i_ddi_io_initialized (during stmsboot for example). To allow for
* flushing the cache in this case do not check for
* i_ddi_io_initialized when force flag is set.
*/
if (force_flag == 0 && !i_ddi_io_initialized())
return (MDI_FAILURE);
if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
nvlist_free(nvl);
} else
err = EFAULT;
rv = MDI_SUCCESS;
mutex_enter(&vhc->vhc_lock);
if (err != 0) {
if (err == EROFS) {
vhc->vhc_flags |= MDI_VHC_READONLY_FS;
vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
MDI_VHC_VHCACHE_DIRTY);
} else {
if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
cmn_err(CE_CONT, "%s: update failed\n",
vhc->vhc_vhcache_filename);
vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
}
rv = MDI_FAILURE;
}
} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
cmn_err(CE_CONT,
"%s: update now ok\n", vhc->vhc_vhcache_filename);
vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
}
mutex_exit(&vhc->vhc_lock);
return (rv);
}
/*
* Call flush_vhcache() to flush the vhci cache at the scheduled time.
* Exits itself if left idle for the idle timeout period.
*/
static void
vhcache_flush_thread(void *arg)
{
mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
clock_t idle_time, quit_at_ticks;
callb_cpr_t cprinfo;
/* number of seconds to sleep idle before exiting */
idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
"mdi_vhcache_flush");
mutex_enter(&vhc->vhc_lock);
for (; ; ) {
while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
(void) cv_timedwait(&vhc->vhc_cv,
&vhc->vhc_lock, vhc->vhc_flush_at_ticks);
CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
} else {
vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
mutex_exit(&vhc->vhc_lock);
if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
vhcache_dirty(vhc);
mutex_enter(&vhc->vhc_lock);
}
}
quit_at_ticks = ddi_get_lbolt() + idle_time;
while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
!(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
ddi_get_lbolt() < quit_at_ticks) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
quit_at_ticks);
CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
}
if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
!(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
goto out;
}
out:
vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
CALLB_CPR_EXIT(&cprinfo);
}
/*
* Make vhci cache dirty and schedule flushing by vhcache flush thread.
*/
static void
vhcache_dirty(mdi_vhci_config_t *vhc)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
int create_thread;
rw_enter(&vhcache->vhcache_lock, RW_READER);
/* do not flush cache until the cache is fully built */
if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
rw_exit(&vhcache->vhcache_lock);
return;
}
rw_exit(&vhcache->vhcache_lock);
mutex_enter(&vhc->vhc_lock);
if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
mutex_exit(&vhc->vhc_lock);
return;
}
vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
mdi_vhcache_flush_delay * TICKS_PER_SECOND;
if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
cv_broadcast(&vhc->vhc_cv);
create_thread = 0;
} else {
vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
create_thread = 1;
}
mutex_exit(&vhc->vhc_lock);
if (create_thread)
(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
0, &p0, TS_RUN, minclsyspri);
}
/*
* phci bus config structure - one for for each phci bus config operation that
* we initiate on behalf of a vhci.
*/
typedef struct mdi_phci_bus_config_s {
char *phbc_phci_path;
struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
struct mdi_phci_bus_config_s *phbc_next;
} mdi_phci_bus_config_t;
/* vhci bus config structure - one for each vhci bus config operation */
typedef struct mdi_vhci_bus_config_s {
ddi_bus_config_op_t vhbc_op; /* bus config op */
major_t vhbc_op_major; /* bus config op major */
uint_t vhbc_op_flags; /* bus config op flags */
kmutex_t vhbc_lock;
kcondvar_t vhbc_cv;
int vhbc_thr_count;
} mdi_vhci_bus_config_t;
/*
* bus config the specified phci
*/
static void
bus_config_phci(void *arg)
{
mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
dev_info_t *ph_dip;
/*
* first configure all path components upto phci and then configure
* the phci children.
*/
if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
!= NULL) {
if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
(void) ndi_devi_config_driver(ph_dip,
vhbc->vhbc_op_flags,
vhbc->vhbc_op_major);
} else
(void) ndi_devi_config(ph_dip,
vhbc->vhbc_op_flags);
/* release the hold that e_ddi_hold_devi_by_path() placed */
ndi_rele_devi(ph_dip);
}
kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
kmem_free(phbc, sizeof (*phbc));
mutex_enter(&vhbc->vhbc_lock);
vhbc->vhbc_thr_count--;
if (vhbc->vhbc_thr_count == 0)
cv_broadcast(&vhbc->vhbc_cv);
mutex_exit(&vhbc->vhbc_lock);
}
/*
* Bus config all phcis associated with the vhci in parallel.
* op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
*/
static void
bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
ddi_bus_config_op_t op, major_t maj)
{
mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
mdi_vhci_bus_config_t *vhbc;
mdi_vhcache_phci_t *cphci;
rw_enter(&vhcache->vhcache_lock, RW_READER);
if (vhcache->vhcache_phci_head == NULL) {
rw_exit(&vhcache->vhcache_lock);
return;
}
vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
cphci = cphci->cphci_next) {
/* skip phcis that haven't attached before root is available */
if (!modrootloaded && (cphci->cphci_phci == NULL))
continue;
phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
KM_SLEEP);
phbc->phbc_vhbusconfig = vhbc;
phbc->phbc_next = phbc_head;
phbc_head = phbc;
vhbc->vhbc_thr_count++;
}
rw_exit(&vhcache->vhcache_lock);
vhbc->vhbc_op = op;
vhbc->vhbc_op_major = maj;
vhbc->vhbc_op_flags = NDI_NO_EVENT |
(flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
/* now create threads to initiate bus config on all phcis in parallel */
for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
phbc_next = phbc->phbc_next;
if (mdi_mtc_off)
bus_config_phci((void *)phbc);
else
(void) thread_create(NULL, 0, bus_config_phci, phbc,
0, &p0, TS_RUN, minclsyspri);
}
mutex_enter(&vhbc->vhbc_lock);
/* wait until all threads exit */
while (vhbc->vhbc_thr_count > 0)
cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
mutex_exit(&vhbc->vhbc_lock);
mutex_destroy(&vhbc->vhbc_lock);
cv_destroy(&vhbc->vhbc_cv);
kmem_free(vhbc, sizeof (*vhbc));
}
/*
* Single threaded version of bus_config_all_phcis()
*/
static void
st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
ddi_bus_config_op_t op, major_t maj)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
single_threaded_vhconfig_enter(vhc);
bus_config_all_phcis(vhcache, flags, op, maj);
single_threaded_vhconfig_exit(vhc);
}
/*
* Perform BUS_CONFIG_ONE on the specified child of the phci.
* The path includes the child component in addition to the phci path.
*/
static int
bus_config_one_phci_child(char *path)
{
dev_info_t *ph_dip, *child;
char *devnm;
int rv = MDI_FAILURE;
/* extract the child component of the phci */
devnm = strrchr(path, '/');
*devnm++ = '\0';
/*
* first configure all path components upto phci and then
* configure the phci child.
*/
if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
NDI_SUCCESS) {
/*
* release the hold that ndi_devi_config_one() placed
*/
ndi_rele_devi(child);
rv = MDI_SUCCESS;
}
/* release the hold that e_ddi_hold_devi_by_path() placed */
ndi_rele_devi(ph_dip);
}
devnm--;
*devnm = '/';
return (rv);
}
/*
* Build a list of phci client paths for the specified vhci client.
* The list includes only those phci client paths which aren't configured yet.
*/
static mdi_phys_path_t *
build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
{
mdi_vhcache_pathinfo_t *cpi;
mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
int config_path, len;
for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
/*
* include only those paths that aren't configured.
*/
config_path = 0;
if (cpi->cpi_pip == NULL)
config_path = 1;
else {
MDI_PI_LOCK(cpi->cpi_pip);
if (MDI_PI_IS_INIT(cpi->cpi_pip))
config_path = 1;
MDI_PI_UNLOCK(cpi->cpi_pip);
}
if (config_path) {
pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
len = strlen(cpi->cpi_cphci->cphci_path) +
strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
pp->phys_path = kmem_alloc(len, KM_SLEEP);
(void) snprintf(pp->phys_path, len, "%s/%s@%s",
cpi->cpi_cphci->cphci_path, ct_name,
cpi->cpi_addr);
pp->phys_path_next = NULL;
if (pp_head == NULL)
pp_head = pp;
else
pp_tail->phys_path_next = pp;
pp_tail = pp;
}
}
return (pp_head);
}
/*
* Free the memory allocated for phci client path list.
*/
static void
free_phclient_path_list(mdi_phys_path_t *pp_head)
{
mdi_phys_path_t *pp, *pp_next;
for (pp = pp_head; pp != NULL; pp = pp_next) {
pp_next = pp->phys_path_next;
kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
kmem_free(pp, sizeof (*pp));
}
}
/*
* Allocated async client structure and initialize with the specified values.
*/
static mdi_async_client_config_t *
alloc_async_client_config(char *ct_name, char *ct_addr,
mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
{
mdi_async_client_config_t *acc;
acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
acc->acc_phclient_path_list_head = pp_head;
init_vhcache_lookup_token(&acc->acc_token, tok);
acc->acc_next = NULL;
return (acc);
}
/*
* Free the memory allocated for the async client structure and their members.
*/
static void
free_async_client_config(mdi_async_client_config_t *acc)
{
if (acc->acc_phclient_path_list_head)
free_phclient_path_list(acc->acc_phclient_path_list_head);
kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
kmem_free(acc, sizeof (*acc));
}
/*
* Sort vhcache pathinfos (cpis) of the specified client.
* All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
* flag set come at the beginning of the list. All cpis which have this
* flag set come at the end of the list.
*/
static void
sort_vhcache_paths(mdi_vhcache_client_t *cct)
{
mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
cpi_head = cct->cct_cpi_head;
cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
cpi_next = cpi->cpi_next;
enqueue_vhcache_pathinfo(cct, cpi);
}
}
/*
* Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
* every vhcache pathinfo of the specified client. If not adjust the flag
* setting appropriately.
*
* Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
* on-disk vhci cache. So every time this flag is updated the cache must be
* flushed.
*/
static void
adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
mdi_vhcache_lookup_token_t *tok)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_vhcache_client_t *cct;
mdi_vhcache_pathinfo_t *cpi;
rw_enter(&vhcache->vhcache_lock, RW_READER);
if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
== NULL) {
rw_exit(&vhcache->vhcache_lock);
return;
}
/*
* to avoid unnecessary on-disk cache updates, first check if an
* update is really needed. If no update is needed simply return.
*/
for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
if ((cpi->cpi_pip != NULL &&
(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
(cpi->cpi_pip == NULL &&
!(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
break;
}
}
if (cpi == NULL) {
rw_exit(&vhcache->vhcache_lock);
return;
}
if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
rw_exit(&vhcache->vhcache_lock);
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
tok)) == NULL) {
rw_exit(&vhcache->vhcache_lock);
return;
}
}
for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
if (cpi->cpi_pip != NULL)
cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
else
cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
}
sort_vhcache_paths(cct);
rw_exit(&vhcache->vhcache_lock);
vhcache_dirty(vhc);
}
/*
* Configure all specified paths of the client.
*/
static void
config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
{
mdi_phys_path_t *pp;
for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
(void) bus_config_one_phci_child(pp->phys_path);
adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
}
/*
* Dequeue elements from vhci async client config list and bus configure
* their corresponding phci clients.
*/
static void
config_client_paths_thread(void *arg)
{
mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
mdi_async_client_config_t *acc;
clock_t quit_at_ticks;
clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
callb_cpr_t cprinfo;
CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
"mdi_config_client_paths");
for (; ; ) {
quit_at_ticks = ddi_get_lbolt() + idle_time;
mutex_enter(&vhc->vhc_lock);
while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
vhc->vhc_acc_list_head == NULL &&
ddi_get_lbolt() < quit_at_ticks) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
quit_at_ticks);
CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
}
if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
vhc->vhc_acc_list_head == NULL)
goto out;
acc = vhc->vhc_acc_list_head;
vhc->vhc_acc_list_head = acc->acc_next;
if (vhc->vhc_acc_list_head == NULL)
vhc->vhc_acc_list_tail = NULL;
vhc->vhc_acc_count--;
mutex_exit(&vhc->vhc_lock);
config_client_paths_sync(vhc, acc->acc_ct_name,
acc->acc_ct_addr, acc->acc_phclient_path_list_head,
&acc->acc_token);
free_async_client_config(acc);
}
out:
vhc->vhc_acc_thrcount--;
/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
CALLB_CPR_EXIT(&cprinfo);
}
/*
* Arrange for all the phci client paths (pp_head) for the specified client
* to be bus configured asynchronously by a thread.
*/
static void
config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
{
mdi_async_client_config_t *acc, *newacc;
int create_thread;
if (pp_head == NULL)
return;
if (mdi_mtc_off) {
config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
free_phclient_path_list(pp_head);
return;
}
newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
ASSERT(newacc);
mutex_enter(&vhc->vhc_lock);
for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
strcmp(ct_addr, acc->acc_ct_addr) == 0) {
free_async_client_config(newacc);
mutex_exit(&vhc->vhc_lock);
return;
}
}
if (vhc->vhc_acc_list_head == NULL)
vhc->vhc_acc_list_head = newacc;
else
vhc->vhc_acc_list_tail->acc_next = newacc;
vhc->vhc_acc_list_tail = newacc;
vhc->vhc_acc_count++;
if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
cv_broadcast(&vhc->vhc_cv);
create_thread = 0;
} else {
vhc->vhc_acc_thrcount++;
create_thread = 1;
}
mutex_exit(&vhc->vhc_lock);
if (create_thread)
(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
0, &p0, TS_RUN, minclsyspri);
}
/*
* Return number of online paths for the specified client.
*/
static int
nonline_paths(mdi_vhcache_client_t *cct)
{
mdi_vhcache_pathinfo_t *cpi;
int online_count = 0;
for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
if (cpi->cpi_pip != NULL) {
MDI_PI_LOCK(cpi->cpi_pip);
if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
online_count++;
MDI_PI_UNLOCK(cpi->cpi_pip);
}
}
return (online_count);
}
/*
* Bus configure all paths for the specified vhci client.
* If at least one path for the client is already online, the remaining paths
* will be configured asynchronously. Otherwise, it synchronously configures
* the paths until at least one path is online and then rest of the paths
* will be configured asynchronously.
*/
static void
config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_phys_path_t *pp_head, *pp;
mdi_vhcache_client_t *cct;
mdi_vhcache_lookup_token_t tok;
ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
init_vhcache_lookup_token(&tok, NULL);
if (ct_name == NULL || ct_addr == NULL ||
(cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
== NULL ||
(pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
rw_exit(&vhcache->vhcache_lock);
return;
}
/* if at least one path is online, configure the rest asynchronously */
if (nonline_paths(cct) > 0) {
rw_exit(&vhcache->vhcache_lock);
config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
return;
}
rw_exit(&vhcache->vhcache_lock);
for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
rw_enter(&vhcache->vhcache_lock, RW_READER);
if ((cct = lookup_vhcache_client(vhcache, ct_name,
ct_addr, &tok)) == NULL) {
rw_exit(&vhcache->vhcache_lock);
goto out;
}
if (nonline_paths(cct) > 0 &&
pp->phys_path_next != NULL) {
rw_exit(&vhcache->vhcache_lock);
config_client_paths_async(vhc, ct_name, ct_addr,
pp->phys_path_next, &tok);
pp->phys_path_next = NULL;
goto out;
}
rw_exit(&vhcache->vhcache_lock);
}
}
adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
out:
free_phclient_path_list(pp_head);
}
static void
single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
{
mutex_enter(&vhc->vhc_lock);
while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
mutex_exit(&vhc->vhc_lock);
}
static void
single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
{
mutex_enter(&vhc->vhc_lock);
vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
cv_broadcast(&vhc->vhc_cv);
mutex_exit(&vhc->vhc_lock);
}
typedef struct mdi_phci_driver_info {
char *phdriver_name; /* name of the phci driver */
/* set to non zero if the phci driver supports root device */
int phdriver_root_support;
} mdi_phci_driver_info_t;
/*
* vhci class and root support capability of a phci driver can be
* specified using ddi-vhci-class and ddi-no-root-support properties in the
* phci driver.conf file. The built-in tables below contain this information
* for those phci drivers whose driver.conf files don't yet contain this info.
*
* All phci drivers expect iscsi have root device support.
*/
static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
{ "fp", 1 },
{ "iscsi", 0 },
{ "ibsrp", 1 }
};
static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
static void *
mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
{
void *new_ptr;
new_ptr = kmem_zalloc(new_size, KM_SLEEP);
if (old_ptr) {
bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
kmem_free(old_ptr, old_size);
}
return (new_ptr);
}
static void
add_to_phci_list(char ***driver_list, int **root_support_list,
int *cur_elements, int *max_elements, char *driver_name, int root_support)
{
ASSERT(*cur_elements <= *max_elements);
if (*cur_elements == *max_elements) {
*max_elements += 10;
*driver_list = mdi_realloc(*driver_list,
sizeof (char *) * (*cur_elements),
sizeof (char *) * (*max_elements));
*root_support_list = mdi_realloc(*root_support_list,
sizeof (int) * (*cur_elements),
sizeof (int) * (*max_elements));
}
(*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
(*root_support_list)[*cur_elements] = root_support;
(*cur_elements)++;
}
static void
get_phci_driver_list(char *vhci_class, char ***driver_list,
int **root_support_list, int *cur_elements, int *max_elements)
{
mdi_phci_driver_info_t *st_driver_list, *p;
int st_ndrivers, root_support, i, j, driver_conf_count;
major_t m;
struct devnames *dnp;
ddi_prop_t *propp;
*driver_list = NULL;
*root_support_list = NULL;
*cur_elements = 0;
*max_elements = 0;
/* add the phci drivers derived from the phci driver.conf files */
for (m = 0; m < devcnt; m++) {
dnp = &devnamesp[m];
if (dnp->dn_flags & DN_PHCI_DRIVER) {
LOCK_DEV_OPS(&dnp->dn_lock);
if (dnp->dn_global_prop_ptr != NULL &&
(propp = i_ddi_prop_search(DDI_DEV_T_ANY,
DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
&dnp->dn_global_prop_ptr->prop_list)) != NULL &&
strcmp(propp->prop_val, vhci_class) == 0) {
root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
&dnp->dn_global_prop_ptr->prop_list)
== NULL) ? 1 : 0;
add_to_phci_list(driver_list, root_support_list,
cur_elements, max_elements, dnp->dn_name,
root_support);
UNLOCK_DEV_OPS(&dnp->dn_lock);
} else
UNLOCK_DEV_OPS(&dnp->dn_lock);
}
}
driver_conf_count = *cur_elements;
/* add the phci drivers specified in the built-in tables */
if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
st_driver_list = scsi_phci_driver_list;
st_ndrivers = sizeof (scsi_phci_driver_list) /
sizeof (mdi_phci_driver_info_t);
} else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
st_driver_list = ib_phci_driver_list;
st_ndrivers = sizeof (ib_phci_driver_list) /
sizeof (mdi_phci_driver_info_t);
} else {
st_driver_list = NULL;
st_ndrivers = 0;
}
for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
/* add this phci driver if not already added before */
for (j = 0; j < driver_conf_count; j++) {
if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
break;
}
if (j == driver_conf_count) {
add_to_phci_list(driver_list, root_support_list,
cur_elements, max_elements, p->phdriver_name,
p->phdriver_root_support);
}
}
}
/*
* Attach the phci driver instances associated with the specified vhci class.
* If root is mounted attach all phci driver instances.
* If root is not mounted, attach the instances of only those phci
* drivers that have the root support.
*/
static void
attach_phci_drivers(char *vhci_class)
{
char **driver_list, **p;
int *root_support_list;
int cur_elements, max_elements, i;
major_t m;
get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
&cur_elements, &max_elements);
for (i = 0; i < cur_elements; i++) {
if (modrootloaded || root_support_list[i]) {
m = ddi_name_to_major(driver_list[i]);
if (m != DDI_MAJOR_T_NONE &&
ddi_hold_installed_driver(m))
ddi_rele_driver(m);
}
}
if (driver_list) {
for (i = 0, p = driver_list; i < cur_elements; i++, p++)
kmem_free(*p, strlen(*p) + 1);
kmem_free(driver_list, sizeof (char *) * max_elements);
kmem_free(root_support_list, sizeof (int) * max_elements);
}
}
/*
* Build vhci cache:
*
* Attach phci driver instances and then drive BUS_CONFIG_ALL on
* the phci driver instances. During this process the cache gets built.
*
* Cache is built fully if the root is mounted.
* If the root is not mounted, phci drivers that do not have root support
* are not attached. As a result the cache is built partially. The entries
* in the cache reflect only those phci drivers that have root support.
*/
static int
build_vhci_cache(mdi_vhci_t *vh)
{
mdi_vhci_config_t *vhc = vh->vh_config;
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
single_threaded_vhconfig_enter(vhc);
rw_enter(&vhcache->vhcache_lock, RW_READER);
if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
rw_exit(&vhcache->vhcache_lock);
single_threaded_vhconfig_exit(vhc);
return (0);
}
rw_exit(&vhcache->vhcache_lock);
attach_phci_drivers(vh->vh_class);
bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
rw_exit(&vhcache->vhcache_lock);
single_threaded_vhconfig_exit(vhc);
vhcache_dirty(vhc);
return (1);
}
/*
* Determine if discovery of paths is needed.
*/
static int
vhcache_do_discovery(mdi_vhci_config_t *vhc)
{
int rv = 1;
mutex_enter(&vhc->vhc_lock);
if (i_ddi_io_initialized() == 0) {
if (vhc->vhc_path_discovery_boot > 0) {
vhc->vhc_path_discovery_boot--;
goto out;
}
} else {
if (vhc->vhc_path_discovery_postboot > 0) {
vhc->vhc_path_discovery_postboot--;
goto out;
}
}
/*
* Do full path discovery at most once per mdi_path_discovery_interval.
* This is to avoid a series of full path discoveries when opening
* stale /dev/[r]dsk links.
*/
if (mdi_path_discovery_interval != -1 &&
ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
goto out;
rv = 0;
out:
mutex_exit(&vhc->vhc_lock);
return (rv);
}
/*
* Discover all paths:
*
* Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
* driver instances. During this process all paths will be discovered.
*/
static int
vhcache_discover_paths(mdi_vhci_t *vh)
{
mdi_vhci_config_t *vhc = vh->vh_config;
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
int rv = 0;
single_threaded_vhconfig_enter(vhc);
if (vhcache_do_discovery(vhc)) {
attach_phci_drivers(vh->vh_class);
bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
mutex_enter(&vhc->vhc_lock);
vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
mdi_path_discovery_interval * TICKS_PER_SECOND;
mutex_exit(&vhc->vhc_lock);
rv = 1;
}
single_threaded_vhconfig_exit(vhc);
return (rv);
}
/*
* Generic vhci bus config implementation:
*
* Parameters
* vdip vhci dip
* flags bus config flags
* op bus config operation
* The remaining parameters are bus config operation specific
*
* for BUS_CONFIG_ONE
* arg pointer to name@addr
* child upon successful return from this function, *child will be
* set to the configured and held devinfo child node of vdip.
* ct_addr pointer to client address (i.e. GUID)
*
* for BUS_CONFIG_DRIVER
* arg major number of the driver
* child and ct_addr parameters are ignored
*
* for BUS_CONFIG_ALL
* arg, child, and ct_addr parameters are ignored
*
* Note that for the rest of the bus config operations, this function simply
* calls the framework provided default bus config routine.
*/
int
mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
void *arg, dev_info_t **child, char *ct_addr)
{
mdi_vhci_t *vh = i_devi_get_vhci(vdip);
mdi_vhci_config_t *vhc = vh->vh_config;
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
int rv = 0;
int params_valid = 0;
char *cp;
/*
* To bus config vhcis we relay operation, possibly using another
* thread, to phcis. The phci driver then interacts with MDI to cause
* vhci child nodes to be enumerated under the vhci node. Adding a
* vhci child requires an ndi_devi_enter of the vhci. Since another
* thread may be adding the child, to avoid deadlock we can't wait
* for the relayed operations to complete if we have already entered
* the vhci node.
*/
if (DEVI_BUSY_OWNED(vdip)) {
MDI_DEBUG(2, (MDI_NOTE, vdip,
"vhci dip is busy owned %p", (void *)vdip));
goto default_bus_config;
}
rw_enter(&vhcache->vhcache_lock, RW_READER);
if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
rw_exit(&vhcache->vhcache_lock);
rv = build_vhci_cache(vh);
rw_enter(&vhcache->vhcache_lock, RW_READER);
}
switch (op) {
case BUS_CONFIG_ONE:
if (arg != NULL && ct_addr != NULL) {
/* extract node name */
cp = (char *)arg;
while (*cp != '\0' && *cp != '@')
cp++;
if (*cp == '@') {
params_valid = 1;
*cp = '\0';
config_client_paths(vhc, (char *)arg, ct_addr);
/* config_client_paths() releases cache_lock */
*cp = '@';
break;
}
}
rw_exit(&vhcache->vhcache_lock);
break;
case BUS_CONFIG_DRIVER:
rw_exit(&vhcache->vhcache_lock);
if (rv == 0)
st_bus_config_all_phcis(vhc, flags, op,
(major_t)(uintptr_t)arg);
break;
case BUS_CONFIG_ALL:
rw_exit(&vhcache->vhcache_lock);
if (rv == 0)
st_bus_config_all_phcis(vhc, flags, op, -1);
break;
default:
rw_exit(&vhcache->vhcache_lock);
break;
}
default_bus_config:
/*
* All requested child nodes are enumerated under the vhci.
* Now configure them.
*/
if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
NDI_SUCCESS) {
return (MDI_SUCCESS);
} else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
/* discover all paths and try configuring again */
if (vhcache_discover_paths(vh) &&
ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
NDI_SUCCESS)
return (MDI_SUCCESS);
}
return (MDI_FAILURE);
}
/*
* Read the on-disk vhci cache into an nvlist for the specified vhci class.
*/
static nvlist_t *
read_on_disk_vhci_cache(char *vhci_class)
{
nvlist_t *nvl;
int err;
char *filename;
filename = vhclass2vhcache_filename(vhci_class);
if ((err = fread_nvlist(filename, &nvl)) == 0) {
kmem_free(filename, strlen(filename) + 1);
return (nvl);
} else if (err == EIO)
cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
else if (err == EINVAL)
cmn_err(CE_WARN,
"%s: data file corrupted, will recreate", filename);
kmem_free(filename, strlen(filename) + 1);
return (NULL);
}
/*
* Read on-disk vhci cache into nvlists for all vhci classes.
* Called during booting by i_ddi_read_devices_files().
*/
void
mdi_read_devices_files(void)
{
int i;
for (i = 0; i < N_VHCI_CLASSES; i++)
vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
}
/*
* Remove all stale entries from vhci cache.
*/
static void
clean_vhcache(mdi_vhci_config_t *vhc)
{
mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
mdi_vhcache_phci_t *phci, *nxt_phci;
mdi_vhcache_client_t *client, *nxt_client;
mdi_vhcache_pathinfo_t *path, *nxt_path;
rw_enter(&vhcache->vhcache_lock, RW_WRITER);
client = vhcache->vhcache_client_head;
vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
for ( ; client != NULL; client = nxt_client) {
nxt_client = client->cct_next;
path = client->cct_cpi_head;
client->cct_cpi_head = client->cct_cpi_tail = NULL;
for ( ; path != NULL; path = nxt_path) {
nxt_path = path->cpi_next;
if ((path->cpi_cphci->cphci_phci != NULL) &&
(path->cpi_pip != NULL)) {
enqueue_tail_vhcache_pathinfo(client, path);
} else if (path->cpi_pip != NULL) {
/* Not valid to have a path without a phci. */
free_vhcache_pathinfo(path);
}
}
if (client->cct_cpi_head != NULL)
enqueue_vhcache_client(vhcache, client);
else {
(void) mod_hash_destroy(vhcache->vhcache_client_hash,
(mod_hash_key_t)client->cct_name_addr);
free_vhcache_client(client);
}
}
phci = vhcache->vhcache_phci_head;
vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
for ( ; phci != NULL; phci = nxt_phci) {
nxt_phci = phci->cphci_next;
if (phci->cphci_phci != NULL)
enqueue_vhcache_phci(vhcache, phci);
else
free_vhcache_phci(phci);
}
vhcache->vhcache_clean_time = ddi_get_lbolt64();
rw_exit(&vhcache->vhcache_lock);
vhcache_dirty(vhc);
}
/*
* Remove all stale entries from vhci cache.
* Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
*/
void
mdi_clean_vhcache(void)
{
mdi_vhci_t *vh;
mutex_enter(&mdi_mutex);
for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
vh->vh_refcnt++;
mutex_exit(&mdi_mutex);
clean_vhcache(vh->vh_config);
mutex_enter(&mdi_mutex);
vh->vh_refcnt--;
}
mutex_exit(&mdi_mutex);
}
/*
* mdi_vhci_walk_clients():
* Walker routine to traverse client dev_info nodes
* ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
* below the client, including nexus devices, which we dont want.
* So we just traverse the immediate siblings, starting from 1st client.
*/
void
mdi_vhci_walk_clients(dev_info_t *vdip,
int (*f)(dev_info_t *, void *), void *arg)
{
mdi_vhci_t *vh = i_devi_get_vhci(vdip);
dev_info_t *cdip;
mdi_client_t *ct;
MDI_VHCI_CLIENT_LOCK(vh);
cdip = ddi_get_child(vdip);
while (cdip) {
ct = i_devi_get_client(cdip);
MDI_CLIENT_LOCK(ct);
if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
cdip = ddi_get_next_sibling(cdip);
else
cdip = NULL;
MDI_CLIENT_UNLOCK(ct);
}
MDI_VHCI_CLIENT_UNLOCK(vh);
}
/*
* mdi_vhci_walk_phcis():
* Walker routine to traverse phci dev_info nodes
*/
void
mdi_vhci_walk_phcis(dev_info_t *vdip,
int (*f)(dev_info_t *, void *), void *arg)
{
mdi_vhci_t *vh = i_devi_get_vhci(vdip);
mdi_phci_t *ph, *next;
MDI_VHCI_PHCI_LOCK(vh);
ph = vh->vh_phci_head;
while (ph) {
MDI_PHCI_LOCK(ph);
if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
next = ph->ph_next;
else
next = NULL;
MDI_PHCI_UNLOCK(ph);
ph = next;
}
MDI_VHCI_PHCI_UNLOCK(vh);
}
/*
* mdi_walk_vhcis():
* Walker routine to traverse vhci dev_info nodes
*/
void
mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
{
mdi_vhci_t *vh = NULL;
mutex_enter(&mdi_mutex);
/*
* Scan for already registered vhci
*/
for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
vh->vh_refcnt++;
mutex_exit(&mdi_mutex);
if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
mutex_enter(&mdi_mutex);
vh->vh_refcnt--;
break;
} else {
mutex_enter(&mdi_mutex);
vh->vh_refcnt--;
}
}
mutex_exit(&mdi_mutex);
}
/*
* i_mdi_log_sysevent():
* Logs events for pickup by syseventd
*/
static void
i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
{
char *path_name;
nvlist_t *attr_list;
if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
KM_SLEEP) != DDI_SUCCESS) {
goto alloc_failed;
}
path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
(void) ddi_pathname(dip, path_name);
if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
ddi_driver_name(dip)) != DDI_SUCCESS) {
goto error;
}
if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
(int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
goto error;
}
if (nvlist_add_int32(attr_list, DDI_INSTANCE,
(int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
goto error;
}
if (nvlist_add_string(attr_list, DDI_PATHNAME,
path_name) != DDI_SUCCESS) {
goto error;
}
if (nvlist_add_string(attr_list, DDI_CLASS,
ph_vh_class) != DDI_SUCCESS) {
goto error;
}
(void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
attr_list, NULL, DDI_SLEEP);
error:
kmem_free(path_name, MAXPATHLEN);
nvlist_free(attr_list);
return;
alloc_failed:
MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
}
char **
mdi_get_phci_driver_list(char *vhci_class, int *ndrivers)
{
char **driver_list, **ret_driver_list = NULL;
int *root_support_list;
int cur_elements, max_elements;
get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
&cur_elements, &max_elements);
if (driver_list) {
kmem_free(root_support_list, sizeof (int) * max_elements);
ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
* max_elements, sizeof (char *) * cur_elements);
}
*ndrivers = cur_elements;
return (ret_driver_list);
}
void
mdi_free_phci_driver_list(char **driver_list, int ndrivers)
{
char **p;
int i;
if (driver_list) {
for (i = 0, p = driver_list; i < ndrivers; i++, p++)
kmem_free(*p, strlen(*p) + 1);
kmem_free(driver_list, sizeof (char *) * ndrivers);
}
}
/*
* mdi_is_dev_supported():
* function called by pHCI bus config operation to determine if a
* device should be represented as a child of the vHCI or the
* pHCI. This decision is made by the vHCI, using cinfo idenity
* information passed by the pHCI - specifics of the cinfo
* representation are by agreement between the pHCI and vHCI.
* Return Values:
* MDI_SUCCESS
* MDI_FAILURE
*/
int
mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
{
mdi_vhci_t *vh;
ASSERT(class && pdip);
/*
* For dev_supported, mdi_phci_register() must have established pdip as
* a pHCI.
*
* NOTE: mdi_phci_register() does "mpxio-disable" processing, and
* MDI_PHCI(pdip) will return false if mpxio is disabled.
*/
if (!MDI_PHCI(pdip))
return (MDI_FAILURE);
/* Return MDI_FAILURE if vHCI does not support asking the question. */
vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
return (MDI_FAILURE);
}
/* Return vHCI answer */
return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
}
int
mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
{
uint_t devstate = 0;
dev_info_t *cdip;
if ((pip == NULL) || (dcp == NULL))
return (MDI_FAILURE);
cdip = mdi_pi_get_client(pip);
switch (mdi_pi_get_state(pip)) {
case MDI_PATHINFO_STATE_INIT:
devstate = DEVICE_DOWN;
break;
case MDI_PATHINFO_STATE_ONLINE:
devstate = DEVICE_ONLINE;
if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
devstate |= DEVICE_BUSY;
break;
case MDI_PATHINFO_STATE_STANDBY:
devstate = DEVICE_ONLINE;
break;
case MDI_PATHINFO_STATE_FAULT:
devstate = DEVICE_DOWN;
break;
case MDI_PATHINFO_STATE_OFFLINE:
devstate = DEVICE_OFFLINE;
break;
default:
ASSERT(MDI_PI(pip)->pi_state);
}
if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
return (MDI_FAILURE);
return (MDI_SUCCESS);
}