/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
/*
* This file works with libdevinfo library and implements devinfo device tree
* specific operations for LU and iSCSI URIs.
*/
#include <hbaapi.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <libintl.h>
#include <libdevinfo.h>
#include "suri_impl.h"
#include "suri_strings.h"
/* Helper structure used in the callback with di_walk_node(). */
typedef struct lu_data {
char *devname;
char *tmp_dlink;
char *regexp;
int psh_size;
int psh_count;
int action;
} lu_data_t;
/*
* Get the device link name. This callback is used with a specific regular
* expression so we only make a string dup here.
*
* Set data->ret on error so that the caller can check it. Also, set an error
* description string in that case.
*/
static int
{
/*
* The devlink string is on the stack of one of the function used by the
* walker. As soon as the walker returns, we cannot use that memory. So,
* we must make a copy here.
*/
}
return (DI_WALK_TERMINATE);
}
/*
* There is a bug causing some ports to have a character 'w' prepended. We do
* not accept those in URIs so we must strip those if we find them in the device
* tree.
*/
static char *
{
return (NULL);
if (port[0] == 'w')
return (port + 1);
return (port);
}
/*
* For a given node find out whether it is under an scsi_vhci subtree or not. It
* is fine to check whether we have any paths.
*/
static boolean_t
{
/* No paths means no multipathing */
return (B_FALSE);
return (B_TRUE);
}
/*
* Return B_TRUE if the node is a disk node, B_FALSE otherwise.
*/
static boolean_t
{
/* iSCSI both cases, SCSI with multipathing */
return (B_TRUE);
/* Fibre Channel (both cases) */
return (B_TRUE);
/* SAS, SCSI in non-multipathing case */
return (B_TRUE);
return (B_FALSE);
}
/*
* Get one string property. The 'fn' defines how to get the (next) property.
*/
static char *
{
/* We got our property */
break;
}
}
return (prop_str);
}
/*
* Get one string property. The 'fn' defines how to get the (next) property.
*/
static char *
{
/* We got our property */
break;
}
}
return (prop_str);
}
/*
* Get initiator-port from the driver properties of the parent node. It is used
* in a non-multipathing case only.
*/
static char *
{
return (NULL);
di_prop_drv_next)));
}
/*
* Convenience function.
*/
static char *
{
char *target;
}
/*
* Search minor nodes of a given node and try to match the regular expression
* provided against the list of device name links under this node only. We do it
* the way that we walk through the device links rooted under each minor node
* found for our given node.
*/
static char *
{
char *path;
return (NULL);
continue;
}
return (NULL);
}
/*
* Find the target-port and lun properties in a path.
*/
static void
{
/* Target port is in path properties. */
break;
}
}
}
/*
* Take a suri handle from 'data' and clone it into the handle array. Expand the
* handle array if necessary. The handle array is always terminated by NULL.
*
* Return 1 on success, 0 otherwise. In case of an error (ie. ENOMEM), data->ret
* is set, the error description string in data->sh, too, and the existing
* handles in the array are freed since we treat this as a non-recoverable
* failure.
*/
static int
{
return (0);
}
return (1);
}
return (0);
}
return (0);
}
return (1);
}
/*
* Given a path from a multipathing disk node, find out whether the physical
* node is iSCSI or not.
*/
static boolean_t
{
/* Give me a node for the physical device. */
return (B_FALSE);
return (B_TRUE);
}
/*
* Given a non-multipathing disk node, find out whether the parent is an iSCSI
* node or not.
*/
static boolean_t
{
return (B_FALSE);
return (B_TRUE);
}
/*
* Given a disk node, find out whether it is an iSCSI or not. Works for both
* multipathing and non-multipathing case.
*/
static boolean_t
{
/* Make sure this is called on disks only. */
if (path == DI_PATH_NIL)
return (is_parent_node_iscsi(node));
return (is_physical_node_iscsi(path));
}
/*
* Lookup LU URIs for a given device name and a given device tree node.
*/
static int
{
/* iSCSI has its own URI, we lookup only LU URIs here. */
if (iscsi_disk_node(node))
return (DI_WALK_CONTINUE);
return (DI_WALK_TERMINATE);
return (DI_WALK_CONTINUE);
/*
* Get a URI with an luname now. For a given device name, we can only
* find at most one such node with this GUID. This stands true for both
* MPxIO and non-MPxIO case.
*/
"lu:luname.naa.%s", lu_id);
if (clone_sh_into_psh(data) == 0)
return (DI_WALK_TERMINATE);
/*
* For each path in a multipathing case, an initiator-target pair to
* form a lu:initiator,target,luname URI is needed. Note that we deal
* with non-multipathing after this loop.
*/
pi = DI_PATH_NIL;
/* Give me a node for the physical device. */
/*
* The initiator port is in the physical node for the adapter
* that corresponds to this particular path.
*/
"lu:initiator.naa.%s,target.naa.%s,luname.naa.%s",
if (clone_sh_into_psh(data) == 0)
return (DI_WALK_TERMINATE);
}
/*
* In case of multipathing, we are done since we cannot find more
* lu:initiator,target,luname URIs than we already got. All storage
* paths for our device path are under this virtual node since we
* already successfully matched the device path - see above.
*/
if (multipathing_enabled)
return (DI_WALK_TERMINATE);
/*
* Non-multipathing code starts here.
*/
/*
* The initiator port is in the driver properties of the parent node.
*/
/*
* For a non-multipathing case we get one and one only URI like this for
* a given node. Never less, never more.
*/
"lu:initiator.naa.%s,"
/* If this failed the error was set in data and we return anyway. */
(void) clone_sh_into_psh(data);
return (DI_WALK_TERMINATE);
}
/*
* Lookup iSCSI URIs for a given device name and a given device tree node.
*
* The handle from 'data' is used to report an error if hit, and also as a
* "template" handle for cloning. A new handle is inserted into the handle array
* for every discovery address we find as a source of our target name.
*/
static int
{
int i;
/* Not interested in non-iSCSI devices as fibre channel or SAS. */
if (!iscsi_disk_node(node))
return (DI_WALK_CONTINUE);
return (DI_WALK_TERMINATE);
return (DI_WALK_CONTINUE);
/*
* Now we know we found our device path in the current node.
*
* For a given device path, we can only find at most one such node with
* this GUID. This stands true for both MPxIO and non-MPxIO case.
*/
/*
* Check multipathing case first. We need just the first path since any
* port is enough for us to lookup up discovery addresses for authority
* sections.
*/
/*
* We need a target-port to find out what discovery addresses
* configured on the system are relevant for this GUID.
*/
/*
* target-port iSCSI path property value contains a 2 or 3-tuple
* "session,iqn-name[,TPGT]". The TPGT (Target Portal Group Tag)
* part is not present if the tag is ISCSI_DEFAULT_TPGT.
*/
/* Skip comma */
++tgtprop;
/*
* Not interested in the optional TPGT number so get rid of it
* but we must not modify the property string. Work with a copy.
*/
goto out;
}
*c = '\0';
} else {
/*
* Non-multipathing case. The target name is not in the device
* tree as in the multipathing case. We have to look it up using
* the device path.
*/
/*
* To find the target name, we have to use libima (see above as
* to why). However, this approach is inherently racy since in
* between the iSCSI naming services might have been
* reconfigured and the device may no longer be accessible while
* still in our devinfo snapshot. If that happens, we treat it
* as if the logical unit does not exist.
*/
if (ret == ESURI_NOENT)
goto out;
goto out;
}
}
/* Fill properties in our "template" handle. */
goto out;
}
/*
* If we have no discovery address, we have a URI without an authority
* section and we are done here.
*/
if (clone_sh_into_psh(data) == 0)
goto out;
}
goto out;
}
if (clone_sh_into_psh(data) == 0) {
goto out;
}
}
out:
(void) IMA_FreeMemory(daddrs);
return (DI_WALK_TERMINATE);
}
/*
* Map an LU URI to a device name. Note that we accept both lu:luname and
* lu:initiator,target,luname formats. In a non-multipathing case we map to the
* first device path found and we work with data->sh, never with data->psh.
*/
static int
{
char *luname;
/* Let's try to match the GUID. +4 is to skip "naa." */
return (DI_WALK_CONTINUE);
/*
* This is for lu:luname form only, lu:initiator,target,luname URI form
* is processed below.
*/
/* We have an GUID match so get an s0 slice if present. */
return (DI_WALK_TERMINATE);
return (DI_WALK_TERMINATE);
}
/* If multipathing is enabled we are done here. */
if (is_multipath_node(node)) {
return (DI_WALK_TERMINATE);
}
/*
* Non-multipathing case for lu:luname URI form. We have to
* check whether we have multiple device paths to the same
* logical unit to warn the user so we must continue until we
* find two or finish searching the tree.
*/
/* Rembember the device for the first path. */
return (DI_WALK_CONTINUE);
} else {
/*
* This is the second device path for the same logical
* unit.
*/
"\"lu:initiator.naa.<GUID>,target.naa.<GUID>,"
"luname.naa.%s\" URI "
"should be used to identify devices which "
"have multipathing disabled and are accessible via "
"multiple paths. Use \"lookup-uri\" subcommand "
"with suriadm(1M) to list available URIs.\n"),
luname);
return (DI_WALK_TERMINATE);
}
}
/*
* Now, process the lu:initiator,target,luname case.
*/
/*
* If we do not match the nexus this is to indicate that we matched the
* logical unit GUID at least.
*/
/*
* Multipathing case first. Go through the paths for this node and try
* to match the target name.
*/
/* We got our target match so get any s0 slice. */
return (DI_WALK_TERMINATE);
return (DI_WALK_TERMINATE);
}
/*
* Remember the first mapped device name. If we have no
* initiator-target match after we search the whole tree we will
* use this remembered devname to print it out.
*/
/*
* We need this to later decide whether to issue a warning or
* not. It means that we matched the logical unit under a node
* in a scsi_vhci subtree.
*/
/* Now, try to match the target. */
continue;
/* +4 is to skip "naa." */
continue;
/* Let us check if we also have an initiator-port match. */
/*
* If we matched this, we matched all we could and are done. +4
* is for "naa."
*/
"%s \"lu:luname.%s\" %s \"%s\" %s",
SURIGTEXT("URI should be used to identify device"),
SURIGTEXT("which has multipathing enabled."));
return (DI_WALK_TERMINATE);
}
}
/*
* If node was under multipathing and we found nothing we are done and
* move on to another node.
*/
if (multipathing_enabled)
return (DI_WALK_CONTINUE);
/*
* Non-multipathing code start here.
*/
/* We need this to later decide whether to issue a warning or not. */
/* We have an luname match so get an s0 slice. */
return (DI_WALK_TERMINATE);
SURIGTEXT("Logical unit has no s0 slice"));
return (DI_WALK_TERMINATE);
}
/*
* Remember the first matched devname in case we do not have an
* initiator port match and will have to continue in searching the tree.
*/
/* Now, try to match the nexus. */
/* Skip ".naa" in suri_lu->sl_prop_target */
/* No target match, let us move on to another node. */
return (DI_WALK_CONTINUE);
}
/*
* Let us check if we also have an initiator-port match. In non-mpxio
* case, the initiator port is in the driver properties of the parent
* node.
*/
/*
* We are done here, we matched all we could. If not, we just continue
* and try to find the matching initiator in another node. Skip ".naa"
* in the initiator property.
*/
return (DI_WALK_TERMINATE);
}
return (DI_WALK_CONTINUE);
}
/*
* Map an iSCSI URI to a device name. We work with data->sh, never with
* data->psh. Note that we do not care whether multipathing is enabled or
* disabled for iSCSI. If the default is changed and multipathing is disabled,
* we return the first device name we find for the GUID.
*/
static int
{
/* Let's try to match the GUID. +4 is to skip "naa." */
return (DI_WALK_CONTINUE);
/* We have an GUID match so get an s0 slice if present. */
return (DI_WALK_TERMINATE);
return (DI_WALK_TERMINATE);
}
return (DI_WALK_TERMINATE);
}
/*
* Callback for the main device tree walker.
*/
static int
{
/* We operate only on disk nodes. */
return (DI_WALK_CONTINUE);
/* If we do not get a devid we are not interested in this node. */
return (DI_WALK_CONTINUE);
/*
* Device id has a specific format. See uts/common/sys/ddi_impldefs.h
*/
/* "id" is a magic number, "1" is the currently supported version. */
/* We accept only devid's with NAA ID type for now. */
if (*(c + 1) != 'n' && *(c + 1) != 'N')
return (DI_WALK_CONTINUE);
/* Now, this is where the GUID starts */
guid = c + 2;
/*
* Note we skip '@' and the GUID type character in the devid property.
* The GUID itself follows the type character.
*/
case SURI_LU_MAP:
case SURI_ISCSI_MAP:
case SURI_LU_LOOKUP_URI:
case SURI_ISCSI_LOOKUP_URI:
default:
assert(0);
}
/* NOTREACHED */
return (DI_WALK_TERMINATE);
}
/*
* For SURI_(ISCSI|LU)_MAP, find the first system device name for a given
* logical unit URI, and use 'sh' to set its mapped device property. psh is
* never used.
*
* For SURI_(ISCSI|LU)_LOOKUP_URI, find logical unit URIs for a given device
* name. For each URI found, create a new handle and put it to *psh. 'sh' will
* be changed in this function. If we return an error, it will be set in sh and
* *psh will be NULL.
*/
static suri_err_t
{
char *luname;
} else {
assert(0);
}
/* We got an URI, looking up a devname. psh is not used. */
/* When we find the node only one s0 slice may be there. */
} else {
/*
* We got a devname, looking up URIs. Use psh to store them.
*
* Prepare the regular expression we are going to use later.
* Note that the walker needs the device name without the
* "/dev/" prefix.
*/
}
/*
* We must anchor it in order not to get rdsk devlinks. For
*/
}
if (force_dev_discovery)
else
if (rnode == DI_NODE_NIL) {
SURIGTEXT("Cannot create a kernel device tree snapshot"),
return (ESURI_ERR);
}
if (force_dev_discovery)
else
SURIGTEXT("Cannot create a devlink snapshot"),
goto out;
}
/* All the work is done in the callback function devinfo_cback() */
SURIGTEXT("Failed to traverse libdevinfo device nodes"),
goto out;
}
/*
* Major error is indicated by data->ret having been set. In that case,
* data->psh is empty and data->sh contains the specific description
* error.
*/
goto out;
}
/*
* There are situations where even when we find a device path we want to
* warn the user.
*
* When we have an luname GUID match but cannot match our nexus
* (initiator,target), we leave sh->sh_mdev unset in hope that we will
* eventually get the nexus match while keeping walking the tree. So, if
* sh->sh_mdev was left unset, it means that we did not find our storage
* path. So, we are gonna set sh->sh_mdev with the first device path
* that matched our luname GUID.
*/
if (data.multipathing) {
/*
* lu:luname form should be always used with
* multipathing.
*/
"%s \"lu:luname.%s\" %s \"%s\" %s",
SURIGTEXT("URI should be used to identify device"),
SURIGTEXT("which has multipathing enabled."));
}
/*
* This is for a logical unit match without a target-initiator
* path match.
*/
"\"initiator.%s,target.%s\" "
"cannot be found but the logical unit \"%s\" "
"is still accessible and will be accessed via "
/*
* match.
*/
}
/*
* This is mapping in a non-multipathing case with an lu:luname URI form
* and with only one path found. If we had more paths we would have had
* set sh->sh_mdev already. Note that we had to go through the whole
* tree to make sure only one path existed.
*/
}
ret = ESURI_NOENT;
goto out;
}
/*
* Do not put the device name into the description. It is
* already in the action string.
*/
SURIGTEXT("No logical unit found for device"));
ret = ESURI_NOENT;
goto out;
}
out:
return (ret);
}
/*
* There is really no 100% safe maximum retry number here but tests showed that
* ESURI_NOSLICE was hit only during the race when links have not been created
* yet after the drivers were forcefully loaded. 1-2 seconds was what it took
* for the links to be made so the number below seems like a reasonable value
* even for a stressed machine. Note that since ESURI_NOSLICE is used for this
* special situation only, we should really busy wait only when there is a
* chance to succeed. However, we do not want to wait forever in case we hit
* unexpected issues.
*/
struct suri_handle ***psh)
{
int n;
return (ret);
/*
* When looking up URIs, obviously the device has been already
* enumerated so any error is reported back to the caller right away.
*/
return (ret);
/*
* When mapping URIs, more work is needed for ESURI_NOENT. We can return
* right away in other cases, even in the ESURI_NOSLICE case.
*/
if (ret != ESURI_NOENT) {
/* ESURI_NOSLICE is an internal error code only */
}
/*
* With ESURI_NOENT it is possible that the logical unit exists but the
* device has just not been enumerated yet. Call the function again with
* a force option to load all available drivers and to create all the
* links in /dev. Note that we want to resort to full device rediscovery
* only if we hit ESURI_NOENT since doing that can be a very expensive
* operation.
*/
/*
* We only get here if we are mapping URIs so it is safe to use NULL
* instead of psh.
*/
/*
* We retry just once, whatever we get as a return value is the
* final result unless we hit ESURI_NOSLICE.
*/
if (ret != ESURI_NOSLICE)
return (ret);
/*
* If here, the return value was ESURI_NOSLICE after the initial
* ESURI_NOENT. Due to a bug possibly in libdevinfo, we may still be
* missing the link in /dev for an enumerated device even after we
* called di_devlink_init() with the DI_MAKE_LINK flag. If that was the
* case, give it more time and try again a few times.
*/
for (n = 0; n < SURI_NO_S0_SLICE_RETRIES; ++n) {
(void) sleep(1);
/*
* We only get here if we are mapping URIs so it is safe to use
* NULL instead of psh.
*/
/* We try again only if we hit ESURI_NOSLICE */
if (ret != ESURI_NOSLICE)
return (ret);
}
/* ESURI_NOSLICE is an internal error only. */
return (ESURI_ERR);
}