/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
*/
/*
* This file works with libdevinfo library and implements devinfo device tree
* specific operations for LU and iSCSI URIs.
*/
#include <hbaapi.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <libintl.h>
#include <netinet/in.h>
#include <libdevinfo.h>
#include "suri_impl.h"
#include "suri_strings.h"
/* Helper structure used in the callback with di_walk_node(). */
typedef struct lu_data {
di_devlink_handle_t devlink_hdl;
char *devname;
char *tmp_dlink;
char *regexp;
boolean_t got_luname_match;
boolean_t multipathing;
int psh_size;
int psh_count;
struct suri_handle ***psh;
struct suri_handle *sh;
int action;
suri_err_t ret;
suri_type_t requested_node_type;
} lu_data_t;
/*
* Get the device link name. This callback is used with a specific regular
* expression so we only make a string dup here.
*
* Set data->ret on error so that the caller can check it. Also, set an error
* description string in that case.
*/
static int
get_devlink_name(di_devlink_t devlink, void *arg)
{
lu_data_t *data = (lu_data_t *)arg;
assert(data->tmp_dlink == NULL);
/*
* The devlink string is on the stack of one of the function used by the
* walker. As soon as the walker returns, we cannot use that memory. So,
* we must make a copy here.
*/
data->tmp_dlink = strdup(di_devlink_path(devlink));
if (data->tmp_dlink == NULL) {
data->ret = suri_err_set_static_desc(data->sh, ESURI_NOMEM);
if (data->psh != NULL)
suri_destroy_handle_array(data->psh);
}
return (DI_WALK_TERMINATE);
}
/*
* There is a bug causing some ports to have a character 'w' prepended. We do
* not accept those in URIs so we must strip those if we find them in the device
* tree.
*/
static char *
strip_w(char *port)
{
if (port == NULL)
return (NULL);
if (port[0] == 'w')
return (port + 1);
return (port);
}
/*
* For a given node find out whether it is under an scsi_vhci subtree or not. It
* is fine to check whether we have any paths.
*/
static boolean_t
is_multipath_node(di_node_t node)
{
di_path_t pi = DI_PATH_NIL;
/* No paths means no multipathing */
if ((pi = di_path_client_next_path(node, pi)) == DI_PATH_NIL)
return (B_FALSE);
return (B_TRUE);
}
/*
* Return B_TRUE if the node is a disk node, B_FALSE otherwise.
*/
static boolean_t
disk_node(di_node_t node)
{
/* iSCSI both cases, SCSI with multipathing */
if (strcmp("disk", di_node_name(node)) == 0)
return (B_TRUE);
/* Fibre Channel (both cases) */
if (strcmp("ssd", di_node_name(node)) == 0)
return (B_TRUE);
/* SAS, SCSI in non-multipathing case */
if (strcmp("sd", di_node_name(node)) == 0)
return (B_TRUE);
return (B_FALSE);
}
/*
* Get one string property. The 'fn' defines how to get the (next) property.
*/
static char *
get_string_path_prop(di_path_t pi, char *prop_name)
{
char *prop_str = NULL;
di_path_prop_t pprop = DI_PROP_NIL;
while ((pprop = di_path_prop_next(pi, pprop)) != DI_PROP_NIL) {
if (strcmp(prop_name, di_path_prop_name(pprop)) == 0) {
assert(di_path_prop_strings(pprop, &prop_str) != 0);
/* We got our property */
break;
}
}
return (prop_str);
}
/*
* Get one string property. The 'fn' defines how to get the (next) property.
*/
static char *
get_string_prop(di_node_t node, char *prop_name,
di_prop_t fn(di_node_t, di_prop_t))
{
char *prop_str = NULL;
di_prop_t prop = DI_PROP_NIL;
while ((prop = fn(node, prop)) != DI_PROP_NIL) {
if (strcmp(prop_name, di_prop_name(prop)) == 0) {
assert(di_prop_strings(prop, &prop_str) != 0);
/* We got our property */
break;
}
}
return (prop_str);
}
/*
* Get initiator-port from the driver properties of the parent node. It is used
* in a non-multipathing case only.
*/
static char *
get_parent_iport(di_node_t node)
{
di_node_t parent;
if ((parent = di_parent_node(node)) == DI_NODE_NIL)
return (NULL);
return (strip_w(get_string_prop(parent, "initiator-port",
di_prop_drv_next)));
}
/*
* Convenience function.
*/
static char *
get_target_port(di_node_t node)
{
char *target;
target = get_string_prop(node, "target-port", di_prop_hw_next);
assert(target != NULL);
return (strip_w(target));
}
/*
* Search minor nodes of a given node and try to match the regular expression
* provided against the list of device name links under this node only. We do it
* the way that we walk through the device links rooted under each minor node
* found for our given node.
*/
static char *
match_devlink_in_minor_nodes(di_node_t node, lu_data_t *data)
{
di_minor_t minor = DI_MINOR_NIL;
while ((minor = di_minor_next(node, minor)) != DI_MINOR_NIL) {
char *path;
path = di_devfs_minor_path(minor);
assert(path != NULL);
data->tmp_dlink = NULL;
(void) di_devlink_walk(data->devlink_hdl,
data->regexp, path, 0, data, get_devlink_name);
di_devfs_path_free(path);
if (data->ret != ESURI_OK)
return (NULL);
if (data->tmp_dlink == NULL)
continue;
return (data->tmp_dlink);
}
return (NULL);
}
/*
* Find the target-port and lun properties in a path.
*/
static void
get_tgt_in_path(di_path_t path, char **target)
{
di_path_prop_t pprop = DI_PROP_NIL;
assert(target != NULL);
/* Target port is in path properties. */
while ((pprop = di_path_prop_next(path, pprop)) != DI_PROP_NIL) {
if (strcmp("target-port", di_path_prop_name(pprop)) == 0) {
assert(di_path_prop_strings(pprop, target) != 0);
break;
}
}
}
/*
* Take a suri handle from 'data' and clone it into the handle array. Expand the
* handle array if necessary. The handle array is always terminated by NULL.
*
* Return 1 on success, 0 otherwise. In case of an error (ie. ENOMEM), data->ret
* is set, the error description string in data->sh, too, and the existing
* handles in the array are freed since we treat this as a non-recoverable
* failure.
*/
static int
clone_sh_into_psh(lu_data_t *data)
{
if ((data->psh_count + 1) < data->psh_size) {
data->ret = suri_clone(data->sh, *data->psh + data->psh_count);
if (data->ret != ESURI_OK) {
suri_destroy_handle_array(data->psh);
return (0);
}
++data->psh_count;
return (1);
}
*data->psh = suri_expand_handle_array(*data->psh, &data->psh_size);
if (*data->psh == NULL) {
data->ret = suri_err_set_static_desc(data->sh, ESURI_NOMEM);
return (0);
}
data->ret = suri_clone(data->sh, *data->psh + data->psh_count);
if (data->ret != ESURI_OK) {
suri_destroy_handle_array(data->psh);
return (0);
}
++data->psh_count;
return (1);
}
/*
* Given a path from a multipathing disk node, find out whether the physical
* node is iSCSI or not.
*/
static boolean_t
is_physical_node_iscsi(di_path_t path)
{
di_node_t phci_node;
/* Give me a node for the physical device. */
phci_node = di_path_phci_node(path);
assert(phci_node != DI_NODE_NIL);
if (strcmp("iscsi", di_node_name(phci_node)) != 0)
return (B_FALSE);
return (B_TRUE);
}
/*
* Given a non-multipathing disk node, find out whether the parent is an iSCSI
* node or not.
*/
static boolean_t
is_parent_node_iscsi(di_node_t node)
{
di_node_t parent;
parent = di_parent_node(node);
assert(parent != DI_NODE_NIL);
if (strcmp("iscsi", di_node_name(parent)) != 0)
return (B_FALSE);
return (B_TRUE);
}
/*
* Given a disk node, find out whether it is an iSCSI or not. Works for both
* multipathing and non-multipathing case.
*/
static boolean_t
iscsi_disk_node(di_node_t node)
{
di_path_t path = DI_PATH_NIL;
/* Make sure this is called on disks only. */
assert(disk_node(node) == B_TRUE);
path = di_path_client_next_path(node, DI_PATH_NIL);
if (path == DI_PATH_NIL)
return (is_parent_node_iscsi(node));
return (is_physical_node_iscsi(path));
}
/*
* Lookup LU URIs for a given device name and a given device tree node.
*/
static int
lookup_lu_uri(di_node_t node, char *lu_id, lu_data_t *data)
{
suri_lu_t *suri_lu;
di_path_t pi = DI_PATH_NIL;
boolean_t multipathing_enabled = B_FALSE;
char *target = NULL, *parent_iport = NULL, *devlink = NULL;
/* iSCSI has its own URI, we lookup only LU URIs here. */
if (iscsi_disk_node(node))
return (DI_WALK_CONTINUE);
devlink = match_devlink_in_minor_nodes(node, data);
if (data->ret != ESURI_OK)
return (DI_WALK_TERMINATE);
if (devlink == NULL)
return (DI_WALK_CONTINUE);
free(devlink);
/*
* Get a URI with an luname now. For a given device name, we can only
* find at most one such node with this GUID. This stands true for both
* MPxIO and non-MPxIO case.
*/
suri_lu = SURIH2LU(data->sh);
(void) snprintf(suri_lu->sl_prop_luname,
sizeof (suri_lu->sl_prop_luname), "naa.%s", lu_id);
(void) snprintf(data->sh->sh_uri, sizeof (data->sh->sh_uri),
"lu:luname.naa.%s", lu_id);
data->sh->sh_state = SURI_STATE_MAPPED;
if (clone_sh_into_psh(data) == 0)
return (DI_WALK_TERMINATE);
/*
* For each path in a multipathing case, an initiator-target pair to
* form a lu:initiator,target,luname URI is needed. Note that we deal
* with non-multipathing after this loop.
*/
pi = DI_PATH_NIL;
while ((pi = di_path_client_next_path(node, pi)) != DI_PATH_NIL) {
di_node_t phci_node;
char *iport = NULL;
multipathing_enabled = B_TRUE;
/* Give me a node for the physical device. */
phci_node = di_path_phci_node(pi);
assert(phci_node != DI_NODE_NIL);
/*
* The initiator port is in the physical node for the adapter
* that corresponds to this particular path.
*/
iport = get_string_prop(phci_node, "initiator-port",
di_prop_drv_next);
assert(*iport != NULL);
iport = strip_w(iport);
target = NULL;
get_tgt_in_path(pi, &target);
assert(target != NULL);
target = strip_w(target);
(void) snprintf(data->sh->sh_uri, sizeof (data->sh->sh_uri),
"lu:initiator.naa.%s,target.naa.%s,luname.naa.%s",
iport, target, lu_id);
(void) snprintf(suri_lu->sl_prop_initiator,
sizeof (suri_lu->sl_prop_initiator), "naa.%s", iport);
(void) snprintf(suri_lu->sl_prop_target,
sizeof (suri_lu->sl_prop_target), "naa.%s", target);
if (clone_sh_into_psh(data) == 0)
return (DI_WALK_TERMINATE);
}
/*
* In case of multipathing, we are done since we cannot find more
* lu:initiator,target,luname URIs than we already got. All storage
* paths for our device path are under this virtual node since we
* already successfully matched the device path - see above.
*/
if (multipathing_enabled)
return (DI_WALK_TERMINATE);
/*
* Non-multipathing code starts here.
*/
/*
* The initiator port is in the driver properties of the parent node.
*/
parent_iport = get_parent_iport(node);
target = get_target_port(node);
/*
* For a non-multipathing case we get one and one only URI like this for
* a given node. Never less, never more.
*/
(void) snprintf(data->sh->sh_uri, sizeof (data->sh->sh_uri),
"lu:initiator.naa.%s,"
"target.naa.%s,luname.naa.%s", parent_iport, target, lu_id);
(void) snprintf(suri_lu->sl_prop_initiator,
sizeof (suri_lu->sl_prop_initiator), "naa.%s", parent_iport);
(void) snprintf(suri_lu->sl_prop_target,
sizeof (suri_lu->sl_prop_target), "naa.%s", target);
/* If this failed the error was set in data and we return anyway. */
(void) clone_sh_into_psh(data);
return (DI_WALK_TERMINATE);
}
/*
* Lookup iSCSI URIs for a given device name and a given device tree node.
*
* The handle from 'data' is used to report an error if hit, and also as a
* "template" handle for cloning. A new handle is inserted into the handle array
* for every discovery address we find as a source of our target name.
*/
static int
lookup_iscsi_uri(di_node_t node, char *lu_id, lu_data_t *data)
{
int i;
suri_err_t ret;
di_path_t pi = DI_PATH_NIL;
IMA_OID_LIST *daddrs = NULL;
char *devlink = NULL, *tgtname = NULL;
suri_iscsi_t *so_iscsi = SURIH2ISCSI(data->sh);
/* Not interested in non-iSCSI devices as fibre channel or SAS. */
if (!iscsi_disk_node(node))
return (DI_WALK_CONTINUE);
devlink = match_devlink_in_minor_nodes(node, data);
if (data->ret != ESURI_OK)
return (DI_WALK_TERMINATE);
if (devlink == NULL)
return (DI_WALK_CONTINUE);
free(devlink);
/*
* Now we know we found our device path in the current node.
*
* For a given device path, we can only find at most one such node with
* this GUID. This stands true for both MPxIO and non-MPxIO case.
*/
/*
* Check multipathing case first. We need just the first path since any
* port is enough for us to lookup up discovery addresses for authority
* sections.
*/
if ((pi = di_path_client_next_path(node, pi)) != DI_PATH_NIL) {
char *c, *tgtprop, *target = NULL;
/*
* We need a target-port to find out what discovery addresses
* configured on the system are relevant for this GUID.
*/
target = get_string_path_prop(pi, "target-port");
assert(target != NULL);
/*
* target-port iSCSI path property value contains a 2 or 3-tuple
* "session,iqn-name[,TPGT]". The TPGT (Target Portal Group Tag)
* part is not present if the tag is ISCSI_DEFAULT_TPGT.
*/
tgtprop = strchr(target, ',');
assert(tgtprop != NULL);
/* Skip comma */
++tgtprop;
/*
* Not interested in the optional TPGT number so get rid of it
* but we must not modify the property string. Work with a copy.
*/
if ((tgtname = strdup(tgtprop)) == NULL) {
data->ret = suri_err_set_static_desc(data->sh,
ESURI_NOMEM);
goto out;
}
if ((c = strchr(tgtname, ',')) != NULL)
*c = '\0';
} else {
/*
* Non-multipathing case. The target name is not in the device
* tree as in the multipathing case. We have to look it up using
* the device path.
*/
ret = suri_get_tgt_name_for_devpath(data->sh,
data->sh->sh_mdev, &tgtname);
/*
* To find the target name, we have to use libima (see above as
* to why). However, this approach is inherently racy since in
* between the iSCSI naming services might have been
* reconfigured and the device may no longer be accessible while
* still in our devinfo snapshot. If that happens, we treat it
* as if the logical unit does not exist.
*/
if (ret == ESURI_NOENT)
goto out;
if (ret != ESURI_OK) {
data->ret = ret;
goto out;
}
}
/* Fill properties in our "template" handle. */
(void) snprintf(so_iscsi->si_prop_luname,
sizeof (so_iscsi->si_prop_luname), "naa.%s", lu_id);
data->sh->sh_state = SURI_STATE_MAPPED;
ret = suri_get_discovery_addresses(data->sh, &daddrs);
if (ret != ESURI_OK) {
data->ret = ret;
goto out;
}
/*
* If we have no discovery address, we have a URI without an authority
* section and we are done here.
*/
if (daddrs->oidCount == 0) {
if (clone_sh_into_psh(data) == 0)
suri_destroy_handle_array(data->psh);
goto out;
}
for (i = 0; i < daddrs->oidCount; ++i) {
data->ret = suri_get_auth_section_for_iscsi_tgt(data->sh,
tgtname, daddrs->oids[i]);
if (data->ret != ESURI_OK) {
suri_destroy_handle_array(data->psh);
goto out;
}
if (clone_sh_into_psh(data) == 0) {
suri_destroy_handle_array(data->psh);
goto out;
}
}
out:
if (daddrs != NULL)
(void) IMA_FreeMemory(daddrs);
if (tgtname != NULL)
free(tgtname);
return (DI_WALK_TERMINATE);
}
/*
* Map an LU URI to a device name. Note that we accept both lu:luname and
* lu:initiator,target,luname formats. In a non-multipathing case we map to the
* first device path found and we work with data->sh, never with data->psh.
*/
static int
map_lu_uri_to_devname(di_node_t node, char *lu_id, lu_data_t *data)
{
char *luname;
di_path_t pi = DI_PATH_NIL;
boolean_t multipathing_enabled = B_FALSE;
suri_lu_t *suri_lu = SURIH2LU(data->sh);
char *iport = NULL, *target = NULL, *devlink = NULL;
luname = suri_lu->sl_prop_luname;
assert(luname[0] != '\0');
/* Let's try to match the GUID. +4 is to skip "naa." */
if (strcasecmp(luname + 4, lu_id) != 0)
return (DI_WALK_CONTINUE);
/*
* This is for lu:luname form only, lu:initiator,target,luname URI form
* is processed below.
*/
if (suri_lu->sl_prop_initiator[0] == '\0') {
/* We have an GUID match so get an s0 slice if present. */
devlink = match_devlink_in_minor_nodes(node, data);
if (data->ret != ESURI_OK)
return (DI_WALK_TERMINATE);
if (devlink == NULL) {
suri_err_set_desc(data->sh, "%s", ERR_SURI_NO_S0_SLICE);
data->ret = ESURI_NOSLICE;
return (DI_WALK_TERMINATE);
}
/* If multipathing is enabled we are done here. */
if (is_multipath_node(node)) {
(void) strlcpy(data->sh->sh_mdev, devlink, MAXPATHLEN);
free(devlink);
return (DI_WALK_TERMINATE);
}
/*
* Non-multipathing case for lu:luname URI form. We have to
* check whether we have multiple device paths to the same
* logical unit to warn the user so we must continue until we
* find two or finish searching the tree.
*/
if (data->devname == NULL) {
/* Rembember the device for the first path. */
data->devname = devlink;
return (DI_WALK_CONTINUE);
} else {
/*
* This is the second device path for the same logical
* unit.
*/
suri_set_warn(data->sh, SURIGTEXT("A path specific "
"\"lu:initiator.naa.<GUID>,target.naa.<GUID>,"
"luname.naa.%s\" URI "
"should be used to identify devices which "
"have multipathing disabled and are accessible via "
"multiple paths. Use \"lookup-uri\" subcommand "
"with suriadm(1M) to list available URIs.\n"),
luname);
(void) strlcpy(data->sh->sh_mdev, data->devname,
MAXPATHLEN);
free(data->devname);
free(devlink);
return (DI_WALK_TERMINATE);
}
}
/*
* Now, process the lu:initiator,target,luname case.
*/
/*
* If we do not match the nexus this is to indicate that we matched the
* logical unit GUID at least.
*/
data->got_luname_match = B_TRUE;
/*
* Multipathing case first. Go through the paths for this node and try
* to match the target name.
*/
while ((pi = di_path_client_next_path(node, pi)) != DI_PATH_NIL) {
di_node_t phci_node;
char *devlink = NULL;
multipathing_enabled = B_TRUE;
/* We got our target match so get any s0 slice. */
devlink = match_devlink_in_minor_nodes(node, data);
if (data->ret != ESURI_OK)
return (DI_WALK_TERMINATE);
if (devlink == NULL) {
suri_err_set_desc(data->sh, "%s", ERR_SURI_NO_S0_SLICE);
data->ret = ESURI_NOSLICE;
return (DI_WALK_TERMINATE);
}
/*
* Remember the first mapped device name. If we have no
* initiator-target match after we search the whole tree we will
* use this remembered devname to print it out.
*/
if (data->devname == NULL)
data->devname = devlink;
/*
* We need this to later decide whether to issue a warning or
* not. It means that we matched the logical unit under a node
* in a scsi_vhci subtree.
*/
data->multipathing = B_TRUE;
/* Now, try to match the target. */
get_tgt_in_path(pi, &target);
if (target == NULL)
continue;
target = strip_w(target);
/* +4 is to skip "naa." */
if (strcasecmp(suri_lu->sl_prop_target + 4, target) != 0)
continue;
/* Let us check if we also have an initiator-port match. */
phci_node = di_path_phci_node(pi);
assert(phci_node != DI_NODE_NIL);
iport = get_string_prop(phci_node, "initiator-port",
di_prop_drv_next);
assert(iport != NULL);
iport = strip_w(iport);
/*
* If we matched this, we matched all we could and are done. +4
* is for "naa."
*/
if (strcasecmp(suri_lu->sl_prop_initiator + 4, iport) == 0) {
suri_set_warn(data->sh,
"%s \"lu:luname.%s\" %s \"%s\" %s",
SURIGTEXT("Non-path specific"), luname,
SURIGTEXT("URI should be used to identify device"),
devlink,
SURIGTEXT("which has multipathing enabled."));
(void) strlcpy(data->sh->sh_mdev, devlink, MAXPATHLEN);
free(devlink);
data->devname = NULL;
return (DI_WALK_TERMINATE);
}
}
/*
* If node was under multipathing and we found nothing we are done and
* move on to another node.
*/
if (multipathing_enabled)
return (DI_WALK_CONTINUE);
/*
* Non-multipathing code start here.
*/
/* We need this to later decide whether to issue a warning or not. */
data->multipathing = B_FALSE;
/* We have an luname match so get an s0 slice. */
devlink = match_devlink_in_minor_nodes(node, data);
if (data->ret != ESURI_OK)
return (DI_WALK_TERMINATE);
if (devlink == NULL) {
suri_err_set_desc(data->sh, "%s",
SURIGTEXT("Logical unit has no s0 slice"));
data->ret = ESURI_ERR;
return (DI_WALK_TERMINATE);
}
/*
* Remember the first matched devname in case we do not have an
* initiator port match and will have to continue in searching the tree.
*/
if (data->devname == NULL)
data->devname = devlink;
/* Now, try to match the nexus. */
target = get_target_port(node);
/* Skip ".naa" in suri_lu->sl_prop_target */
if (strcasecmp(suri_lu->sl_prop_target + 4, target) != 0) {
/* No target match, let us move on to another node. */
return (DI_WALK_CONTINUE);
}
/*
* Let us check if we also have an initiator-port match. In non-mpxio
* case, the initiator port is in the driver properties of the parent
* node.
*/
iport = get_parent_iport(node);
/*
* We are done here, we matched all we could. If not, we just continue
* and try to find the matching initiator in another node. Skip ".naa"
* in the initiator property.
*/
if (strcasecmp(suri_lu->sl_prop_initiator + 4, iport) == 0) {
(void) strlcpy(data->sh->sh_mdev, devlink, MAXPATHLEN);
free(devlink);
data->devname = NULL;
return (DI_WALK_TERMINATE);
}
return (DI_WALK_CONTINUE);
}
/*
* Map an iSCSI URI to a device name. We work with data->sh, never with
* data->psh. Note that we do not care whether multipathing is enabled or
* disabled for iSCSI. If the default is changed and multipathing is disabled,
* we return the first device name we find for the GUID.
*/
static int
map_iscsi_uri_to_devname(di_node_t node, char *lu_id, lu_data_t *data)
{
char *devlink = NULL;
suri_iscsi_t *so_iscsi = SURIH2ISCSI(data->sh);
char *luname = so_iscsi->si_prop_luname;
assert(luname[0] != '\0');
/* Let's try to match the GUID. +4 is to skip "naa." */
if (strcasecmp(luname + 4, lu_id) != 0)
return (DI_WALK_CONTINUE);
/* We have an GUID match so get an s0 slice if present. */
devlink = match_devlink_in_minor_nodes(node, data);
if (data->ret != ESURI_OK)
return (DI_WALK_TERMINATE);
if (devlink == NULL) {
suri_err_set_desc(data->sh, "%s", ERR_SURI_NO_S0_SLICE);
data->ret = ESURI_NOSLICE;
return (DI_WALK_TERMINATE);
}
(void) strlcpy(data->sh->sh_mdev, devlink, MAXPATHLEN);
free(devlink);
return (DI_WALK_TERMINATE);
}
/*
* Callback for the main device tree walker.
*/
static int
devinfo_cback(di_node_t node, void *arg)
{
char *c, *guid, *devid = NULL;
lu_data_t *data = (lu_data_t *)arg;
/* We operate only on disk nodes. */
if (!disk_node(node))
return (DI_WALK_CONTINUE);
/* If we do not get a devid we are not interested in this node. */
if ((devid = get_string_prop(node, "devid", di_prop_hw_next)) == NULL)
return (DI_WALK_CONTINUE);
/*
* Device id has a specific format. See uts/common/sys/ddi_impldefs.h
*/
/* "id" is a magic number, "1" is the currently supported version. */
assert(strncmp(devid, "id1", 3) == 0);
c = strchr(devid, '@');
assert(c != NULL);
/* We accept only devid's with NAA ID type for now. */
if (*(c + 1) != 'n' && *(c + 1) != 'N')
return (DI_WALK_CONTINUE);
/* Now, this is where the GUID starts */
guid = c + 2;
/*
* Note we skip '@' and the GUID type character in the devid property.
* The GUID itself follows the type character.
*/
switch (data->action) {
case SURI_LU_MAP:
return (map_lu_uri_to_devname(node, guid, data));
case SURI_ISCSI_MAP:
return (map_iscsi_uri_to_devname(node, guid, data));
case SURI_LU_LOOKUP_URI:
return (lookup_lu_uri(node, guid, data));
case SURI_ISCSI_LOOKUP_URI:
return (lookup_iscsi_uri(node, guid, data));
default:
assert(0);
}
/* NOTREACHED */
return (DI_WALK_TERMINATE);
}
/*
* For SURI_(ISCSI|LU)_MAP, find the first system device name for a given
* logical unit URI, and use 'sh' to set its mapped device property. psh is
* never used.
*
* For SURI_(ISCSI|LU)_LOOKUP_URI, find logical unit URIs for a given device
* name. For each URI found, create a new handle and put it to *psh. 'sh' will
* be changed in this function. If we return an error, it will be set in sh and
* *psh will be NULL.
*/
static suri_err_t
devinfo_target_mapping_impl(struct suri_handle *sh, int action,
boolean_t force_dev_discovery, struct suri_handle ***psh)
{
char *luname;
lu_data_t data;
suri_lu_t *suri_lu = NULL;
suri_iscsi_t *so_iscsi = NULL;
suri_err_t ret = ESURI_ERR;
di_node_t rnode = DI_NODE_NIL;
(void) memset(&data, '\0', sizeof (data));
data.got_luname_match = B_FALSE;
data.multipathing = B_FALSE;
data.sh = sh;
data.psh = psh;
data.action = action;
data.ret = ESURI_OK;
data.devlink_hdl = DI_LINK_NIL;
if (sh->sh_uri_type == SURI_TYPE_LU) {
suri_lu = SURIH2LU(sh);
luname = suri_lu->sl_prop_luname;
data.requested_node_type = SURI_TYPE_LU;
} else if (sh->sh_uri_type == SURI_TYPE_ISCSI) {
so_iscsi = SURIH2ISCSI(sh);
luname = so_iscsi->si_prop_luname;
data.requested_node_type = SURI_TYPE_ISCSI;
} else {
assert(0);
}
/* We got an URI, looking up a devname. psh is not used. */
if (action == SURI_LU_MAP || action == SURI_ISCSI_MAP) {
assert(psh == NULL);
/* When we find the node only one s0 slice may be there. */
if ((data.regexp = strdup("^dsk/.*s0$")) == NULL)
return (suri_err_set_static_desc(sh, ESURI_NOMEM));
} else {
/*
* We got a devname, looking up URIs. Use psh to store them.
*
* Prepare the regular expression we are going to use later.
* Note that the walker needs the device name without the
* "/dev/" prefix.
*/
assert(psh != NULL);
*psh = suri_expand_handle_array(NULL, &(data.psh_size));
if (*psh == NULL)
return (suri_err_set_static_desc(sh, ESURI_NOMEM));
/* "/dev/dsk/c0t0d0s0" -> "/dsk/c0t0d0s0" */
if ((data.regexp = strdup(sh->sh_mdev + 4)) == NULL) {
suri_destroy_handle_array(psh);
return (suri_err_set_static_desc(sh, ESURI_NOMEM));
}
/*
* We must anchor it in order not to get rdsk devlinks. For
* example, regexp will contain "^dsk/c0t0d0s0" after this.
*/
*data.regexp = '^';
}
if (force_dev_discovery)
rnode = di_init("/", DINFOCPYALL | DINFOPATH | DINFOFORCE);
else
rnode = di_init("/", DINFOCACHE);
if (rnode == DI_NODE_NIL) {
suri_err_set_desc(sh, "%s: %s",
SURIGTEXT("Cannot create a kernel device tree snapshot"),
strerror(errno));
if (psh != NULL)
suri_destroy_handle_array(psh);
return (ESURI_ERR);
}
if (force_dev_discovery)
data.devlink_hdl = di_devlink_init(NULL, DI_MAKE_LINK);
else
data.devlink_hdl = di_devlink_init(NULL, 0);
if (data.devlink_hdl == DI_LINK_NIL) {
suri_err_set_desc(sh, "%s: %s",
SURIGTEXT("Cannot create a devlink snapshot"),
strerror(errno));
goto out;
}
/* All the work is done in the callback function devinfo_cback() */
if (di_walk_node(rnode, DI_WALK_CLDFIRST, &data, devinfo_cback) != 0) {
suri_err_set_desc(sh, "%s: %s",
SURIGTEXT("Failed to traverse libdevinfo device nodes"),
strerror(errno));
goto out;
}
/*
* Major error is indicated by data->ret having been set. In that case,
* data->psh is empty and data->sh contains the specific description
* error.
*/
if (data.ret != ESURI_OK) {
ret = data.ret;
goto out;
}
/*
* There are situations where even when we find a device path we want to
* warn the user.
*
* When we have an luname GUID match but cannot match our nexus
* (initiator,target), we leave sh->sh_mdev unset in hope that we will
* eventually get the nexus match while keeping walking the tree. So, if
* we got our luname match, had initiator/target in the URI, but
* sh->sh_mdev was left unset, it means that we did not find our storage
* path. So, we are gonna set sh->sh_mdev with the first device path
* that matched our luname GUID.
*/
if (action == SURI_LU_MAP && suri_lu->sl_prop_target[0] != '\0' &&
data.got_luname_match == B_TRUE && sh->sh_mdev[0] == '\0') {
if (data.multipathing) {
/*
* lu:luname form should be always used with
* multipathing.
*/
suri_set_warn(data.sh,
"%s \"lu:luname.%s\" %s \"%s\" %s",
SURIGTEXT("Non-path specific"), luname,
SURIGTEXT("URI should be used to identify device"),
data.devname,
SURIGTEXT("which has multipathing enabled."));
}
/*
* This is for a logical unit match without a target-initiator
* path match.
*/
suri_set_warn(sh, SURIGTEXT("The path "
"\"initiator.%s,target.%s\" "
"cannot be found but the logical unit \"%s\" "
"is still accessible and will be accessed via "
"an alternate path."), suri_lu->sl_prop_initiator,
suri_lu->sl_prop_target, suri_lu->sl_prop_luname);
/*
* This is the devlink name remembered from a target/LUN
* match.
*/
(void) strlcpy(sh->sh_mdev, data.devname, MAXPATHLEN);
free(data.devname);
data.devname = NULL;
}
/*
* This is mapping in a non-multipathing case with an lu:luname URI form
* and with only one path found. If we had more paths we would have had
* set sh->sh_mdev already. Note that we had to go through the whole
* tree to make sure only one path existed.
*/
if (action == SURI_LU_MAP && suri_lu->sl_prop_target[0] == '\0' &&
data.devname != NULL) {
(void) strlcpy(sh->sh_mdev, data.devname, MAXPATHLEN);
free(data.devname);
data.devname = NULL;
}
if ((action == SURI_LU_MAP || action == SURI_ISCSI_MAP) &&
sh->sh_mdev[0] == '\0') {
suri_err_set_desc(sh, "%s: \"%s\"",
SURIGTEXT("No such logical unit name found"), luname);
ret = ESURI_NOENT;
goto out;
}
if ((action == SURI_LU_LOOKUP_URI || action == SURI_ISCSI_LOOKUP_URI) &&
data.psh_count == 0) {
/*
* Do not put the device name into the description. It is
* already in the action string.
*/
suri_err_set_desc(sh, "%s",
SURIGTEXT("No logical unit found for device"));
ret = ESURI_NOENT;
goto out;
}
ret = ESURI_OK;
out:
di_fini(rnode);
if (data.devlink_hdl != DI_LINK_NIL)
(void) di_devlink_fini(&data.devlink_hdl);
if (ret != ESURI_OK && psh != NULL)
suri_destroy_handle_array(psh);
free(data.regexp);
return (ret);
}
/*
* There is really no 100% safe maximum retry number here but tests showed that
* ESURI_NOSLICE was hit only during the race when links have not been created
* yet after the drivers were forcefully loaded. 1-2 seconds was what it took
* for the links to be made so the number below seems like a reasonable value
* even for a stressed machine. Note that since ESURI_NOSLICE is used for this
* special situation only, we should really busy wait only when there is a
* chance to succeed. However, we do not want to wait forever in case we hit
* unexpected issues.
*/
#define SURI_NO_S0_SLICE_RETRIES 10
suri_err_t
devinfo_target_mapping(struct suri_handle *sh, int action,
struct suri_handle ***psh)
{
int n;
suri_err_t ret;
ret = devinfo_target_mapping_impl(sh, action, B_FALSE, psh);
if (ret == ESURI_OK)
return (ret);
/*
* When looking up URIs, obviously the device has been already
* enumerated so any error is reported back to the caller right away.
*/
if (action == SURI_LU_LOOKUP_URI || action == SURI_ISCSI_LOOKUP_URI)
return (ret);
/*
* When mapping URIs, more work is needed for ESURI_NOENT. We can return
* right away in other cases, even in the ESURI_NOSLICE case.
*/
if (ret != ESURI_NOENT) {
/* ESURI_NOSLICE is an internal error code only */
return ((ret == ESURI_NOSLICE) ? ESURI_ERR : ret);
}
/*
* With ESURI_NOENT it is possible that the logical unit exists but the
* device has just not been enumerated yet. Call the function again with
* a force option to load all available drivers and to create all the
* links in /dev. Note that we want to resort to full device rediscovery
* only if we hit ESURI_NOENT since doing that can be a very expensive
* operation.
*/
suri_err_clear(sh);
/*
* We only get here if we are mapping URIs so it is safe to use NULL
* instead of psh.
*/
ret = devinfo_target_mapping_impl(sh, action, B_TRUE, NULL);
/*
* We retry just once, whatever we get as a return value is the
* final result unless we hit ESURI_NOSLICE.
*/
if (ret != ESURI_NOSLICE)
return (ret);
/*
* If here, the return value was ESURI_NOSLICE after the initial
* ESURI_NOENT. Due to a bug possibly in libdevinfo, we may still be
* missing the link in /dev for an enumerated device even after we
* called di_devlink_init() with the DI_MAKE_LINK flag. If that was the
* case, give it more time and try again a few times.
*/
for (n = 0; n < SURI_NO_S0_SLICE_RETRIES; ++n) {
(void) sleep(1);
suri_err_clear(sh);
/*
* We only get here if we are mapping URIs so it is safe to use
* NULL instead of psh.
*/
ret = devinfo_target_mapping_impl(sh, action, B_FALSE, NULL);
/* We try again only if we hit ESURI_NOSLICE */
if (ret != ESURI_NOSLICE)
return (ret);
}
assert(ret == ESURI_NOSLICE);
/* ESURI_NOSLICE is an internal error only. */
return (ESURI_ERR);
}