librsmwrsm.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* this library is the plugin module used by RSMAPI to communicate
* with the Wildcat RSM driver. The library offers functions to
* setup a connection with the driver, to enable users of RSMAPI to perform
* put, get and barrier operations.
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <synch.h>
#include <assert.h>
#include <strings.h>
#include <sys/wrsm_plugin.h>
#include "librsmwrsm.h"
#ifdef DEBUG
#define PLUGIN_DEBUG 0x0001
#define PLUGIN_WARN 0x0002
#define PLUGIN_PUT 0x0004
#define PLUGIN_GET 0x0008
#define PLUGIN_BARRIER 0x0010
/*
* for debuging:
* - compile with DEBUG and set environment variable PLUGIN_VERBOSITY.
* PLUGIN_VERBOSITY = 0x1F to turn all ALL debug options
* or an OR'ed combination of:
* PLUGIN_DEBUG 0x0001 -This option is used for messages in all
* initialization functions and local functions
* PLUGIN_WARN 0x0002 - This option spans all areas, when ever error,
* or failure of any sort occurs. Minimally Set THIS ONE!
* PLUGIN_PUT 0x0004 - This option for all put related request.
* PLUGIN_GET 0x0008 - This option for all get related request
* PLUGIN_BARRIER 0x0010- This option for ALL barrier operations (close, open
* init, destroy, etc)
* Note, not all possible errors have a corresponding message printed.
*/
static void
plugin_debug_print(char *format, ...)
{
}
static int plugin_debug = 0; /* initialize to 0 */
#else
#define DEBUGP(a, b) { }
#endif
/*
* the following is based on wci_cluster_error_status_array_u defined
* in wci_regs.h. The plugin is unable to include wci_regs.h. Not
* only is wci_regs.h for use by Kernel modules, wci_regs.h can not
* be used with 32 bit applications.
*/
typedef union {
struct wci_CESR {
} bit;
} wci_CESR_u;
/* Internal functions */
static rsm_ndlib_attr_t wrsm_rsm_ndlib_attr = {
};
static rsm_segops_t wrsm_ops = {
};
static rsm_lib_funcs_t *rsm_lib_funcs;
/* list of file descriptors */
{{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1},
{0, -1, -1}, {0, -1, -1}, {0, -1, -1}, {0, -1, -1}};
/*
* local function used to return a page aligned aligned structure
*/
static void *
{
void *addr;
return (addr);
}
/*
* writes and then reads to barrier scractchpage up to two way link striping
* and 4 way wci striping during the close_barrier that is called for
* a put routine. It is needed to flush out the buffer
* to assure that the transaction has made it to the remote node.
*/
static void
{
int i;
/*
* wrsmlib_blkcopy does a membar sync before, during and after
*/
/* No need to check data, since we check wci_error_cluster_count */
/* write all request */
for (i = 0; i < (MAXWCISTRIPING * 2); i++) {
STRIPE_STRIDE * i;
"write_read_scratchpage write for %dth stripe "
"offset,addr is 0x%lx \n", i, scratch_addr));
}
}
/* read all request */
for (i = 0; i < (MAXWCISTRIPING * 2); i++) {
STRIPE_STRIDE * i;
"write_read_scratchpag read for %dth stripe "
"offset, addr is 0x%lx \n", i, scratch_addr));
}
}
}
/*
* sum's up all the wci_cluster_error_count at the first four
* offsets in barrier_ncslice page (mapped in, in the connect call)
* if the striping bit is set for that offset
* The relation between which bit is set and at what offset to read is as
* follows:
* starting at barriermap->ncslice_addr
* If Bit 0 is set, stripe offset 0
* If Bit 1 is set, stripe offset 128
* IF Bit 2 is set, stripe offset 256 (+ 128 from the previous)
* If Bit 3 is set, stripe offset 384 (128 chunks)
*/
static void
{
int i;
/*
* the location of the wci_cluster_error_count in ncslice page 0
* is (based on wci -2 prm) byte offset 64.
*/
" barrier_ncslice addr start is %p \n",
*total = 0;
for (i = 0; i < MAXWCISTRIPING; i++) {
/*
* STRIPE_STRIDE * i - is the start pointer between
* striping, and SAFARI_OFFSET is the offset into the
* location that the wci_cluster_error_count can be found
*/
/* LINTED */
+ (STRIPE_STRIDE * i) + SAFARI_OFFSET));
}
}
" total is %ld\n", *total));
}
/* gets at most 64 bytes of len from a single cacheline */
static int
{
importsegp->segment_id));
if (!importsegp->isloopback) {
/* offset into aligned cacheline */
/*
* aligned offset relevant to which cacheline to copy from
* segment so mask out the lower 14 bits
*/
/* get entire cacheline starting at seg */
/*
* copy requested len of bytes at cacheline_offset
* into buf
*/
} else {
/* If we're in loopback, just copy */
}
return (RSM_SUCCESS);
}
/*
* local function that performs the meat of the close_barrier routine
* using the plugin's barrier structure. the use of this function is
* needed because when the IMPLICIT barriers are done, there is
* no way to pass the RSMAPI defined barrier structure of which is
* considered to be an opaque type according to the plugin.
* When flag is set to TRUE, close_barrier request is due to a put
* request, hence write_read_scratchpage is required, or the call request
* is due to an EXPLICIT barrier and we do not know wether or not the
* data request was a read or a write.
*/
static int
{
/* export_cnode = local cnode, loopback mode */
return (RSM_SUCCESS);
}
/*
* If set to FAILED, return RSMERR_BARRIER_FAILURE
* rerouting occured on open_barrier
*/
return (RSMERR_BARRIER_FAILURE);
}
/* If set to OPENED, then open_barrier was not previously called */
return (RSMERR_BARRIER_NOT_OPENED);
}
if (flag) {
}
if (bar->wci_cluster_error_count_initial !=
/* span of time errors occured - fail */
"cluster errors detected (initial != final) FAIL\n"));
return (RSMERR_BARRIER_FAILURE);
}
"failure: either routing is changing %d (should be 0)\n"
"\t or route has changed: initial route = %d and should "
" be equal to final route = %d\n",
return (RSMERR_BARRIER_FAILURE);
}
return (RSM_SUCCESS);
}
/*
* local function that performs the meat of the open_barrier routine
* using the plugin's barrier structure. the use of this function is
* needed because when the IMPLICIT barriers are done, there is
* no way to pass the RSMAPI defined barrier structure of which is
* considered to be an opaque type according to the plugin.
*/
static int
{
/* export_cnode = local cnode, loopback mode */
"LOOPBACK\n"));
return (RSM_SUCCESS);
}
/* if reroutingp is set, driver is in process of route change */
"FAILURE occuring - rerouting in progress"
"this will cause close barriers to FAIL\n"));
return (RSM_SUCCESS);
}
/*
* by using the bar->importsegp->init_route_counter initialzied
* in the connect call, we avoid taking the lock for every
* call to open_barrier. instead, we only take the barrier lock
* when there is a route change.
*/
/*
* if these don't match then there has been a route change
* we must now update the importseg->init_route_counter
* and the local bar->route_counter (used for comparison in
* close) recall importseg->route_counterp, is actually the
* read only pointer to the drivers address space.
*/
/*
* bar->route_counter will be used later for comparison
* in close_barrier which will check it against the drivers
* counter - *bar->importseg->route_counterp.
*/
}
return (RSM_SUCCESS);
}
/*
* Initialization routine called from rsm library framework
*/
int
{
int tmpfd;
char devicename[12];
#ifdef DEBUG
char *env;
/* set debug variables */
/* LINTED cast from 64-bit integer to 32-bit integer */
plugin_debug = (int)
} else {
plugin_debug = 0;
}
plugin_debug));
#endif
"%d\n", unit));
/* first opendevice called, initialize count */
} else {
/*
* device already opened - keep count of number of times
* requested so that we only call the device's close
* routine once. This will save us the hassle of keeping
* track of additional fd's.
*/
return (RSM_SUCCESS);
}
if (tmpfd == -1) {
/* no config exist for this controller */
return (RSMERR_BAD_CTLR_HNDL);
} else {
return (RSMERR_CTLR_NOT_PRESENT);
}
}
/*
* libc can only handle fd < 256
* because of this, other libraries need to request fd >= 256
*/
/* than keep tmpfd */
} else {
/* close tmpfd since new fd was assigned */
}
/* get local cnode - for use with loopback */
WRSM_CTLR_PLUGIN_GETLOCALNODE, &args) == 0) {
} else {
/*
* If a local cnode is not returned, that is because
* the controller is not part of network.
*/
"controller %d not part of network \n", unit));
return (RSMERR_CTLR_NOT_PRESENT);
}
/* initialize for use in read_write_barrier_scratch routine */
(void)
return (RSM_SUCCESS);
}
static int
seterr()
{
int retval;
switch (errno) {
case ENXIO:
case EINVAL:
break;
case ENODEV:
break;
case EACCES:
break;
case ENOMEM:
break;
case EPROTO:
break;
case EHOSTUNREACH:
break;
case ENOTSUP:
case EOVERFLOW:
case EAGAIN:
case EBADF:
break;
default:
/* unknown return value from mmap */
errno));
}
return (retval);
}
/* ARGSUSED */
static int
{
int prot;
int ctrl_num;
int err = 0;
&export_cnodeid)) != RSM_SUCCESS) {
return (err);
}
if (importsegp == NULL) {
return (RSMERR_INSUFFICIENT_MEM);
}
/* iniitialize plugin specific importseg */
" controller number is %d, segid is %d cnodeid %lld\n",
/* determine if we should be doing loopback ie, export_cnode = local */
"\n"));
} else {
" mode \n"));
}
/* if not loopback test, set up driver mappings */
if (!importsegp->isloopback) {
/* prepare generic part of pseudo offset */
/* cnodeids are never > 255 that is why we can cast this */
/*
* Remote memory scratch page used by close barrier to ensure
* completion of previous writes
*/
if ((importsegp->barrier_scratch_addr =
== MAP_FAILED) {
" scratch failed on controller number %d export"
" cnode %d errno %d.\n", ctrl_num,
return (seterr());
}
" successfull addr = %p for importsegp %p \n",
/*
* Local WCI error registers to check during barrier close, in
* particular, the plugin is currently only interested in
* wci_cluster_error_count that is assesible via ncslice page 0
*/
if ((importsegp->barrier_ncslice_addr =
== MAP_FAILED) {
" REGS failed on controller number %d export"
return (seterr());
}
"succesfull addr = %p for importsegp %p\n",
/*
* the wci wrsm driver maps the rerouting and the route_counter
* and striping (refered to by the driver as link_stripesp)
* into one address space. the plugin mmaps that address space
* route_info_addr, and then, for clarity, reads them with more
* meaningful names.
*/
if ((importsegp->route_info_addr =
== MAP_FAILED) {
" failed on controller number %d export "
return (seterr());
}
/*
* a counter of the number of times the route have changed
* plugin needs to confirm that the number hasn't changed
* between a barrier open and a barrier close
*/
/* LINTED */
/*
* if reroutingp is > 0, then a rerouting in progress,
* barier_open and barrier_close need to check this.
*/
/* LINTED */
/* needed to determine which stripe offsets to read */
/* LINTED */
"addr is %p *route_counterp %d\n\t and rerouting %d "
"(should be 0) and striping DATA 0x%x for importsegp %p\n",
importsegp));
/*
* initialize init_route_counter. if init_route_counter doesn't
* ever differ from the drivers route_counter
* (*importsegp->route_counterp) we know that routing has
* not changed.
*/
}
"space for mutex\n"));
return (RSMERR_INSUFFICIENT_MEM);
}
return (RSM_SUCCESS);
}
static int
{
int error = RSM_SUCCESS;
if (!importsegp->isloopback) {
"route_info_addr is NULL\n"));
} else {
}
"barrier_scratch_addr is NULL\n"));
} else {
}
"barrier_ncslice_addr is NULL\n"));
} else {
}
}
return (error);
}
/* ARGSUSED */
static int
{
}
/* ARGSUSED */
static int
{
/* Check for valid alignment */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
}
/* ARGSUSED */
static int
{
/* Check for valid alignment */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
}
/* ARGSUSED */
static int
{
/* Check for valid alignment */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
}
/*
* from segment addr, get len bytes starting at offset returned in dst_addr
* caller must gauruntee that the offset + len doesn't exceed segment size.
*/
static int
{
int err = 0;
#ifdef DEBUG
#endif /* DEBUG */
if (len == 0) {
return (RSM_SUCCESS);
}
#ifdef DEBUG
#endif
" mode \n"));
return (err);
}
}
/* handle partial cacheline read at start of buffer */
if (offset & WRSMLIB_CACHELINE_MASK) {
"partial, start of buf at offset 0x%lx "
/* get length within given cacheline */
!= RSM_SUCCESS) {
if (importsegp->barrier_mode ==
return (err);
}
}
if (len == 0) {
if (importsegp->barrier_mode ==
} else {
return (RSM_SUCCESS);
}
}
/* increment to next unread part in buffer */
len -= partial_cacheline;
dp += partial_cacheline;
}
/* handle cacheline size reads */
if (num_cachelines) {
"for %d num_cachelines at offset 0x%lx length %ld\n",
/* get virtual address of offset in mapped segment */
== 0) {
/* aligned cacheline - this is to be fixed */
if (!importsegp->isloopback) {
} else {
}
} else {
while (num_cachelines) {
if (!importsegp->isloopback) {
1);
} else {
}
}
}
}
/* get partial cacheline at end of buffer */
if (len) {
"end of buf at offset 0x%lx length %ld\n",
!= RSM_SUCCESS) {
if (importsegp->barrier_mode ==
}
return (err);
}
}
}
return (err);
}
/* ARGSUSED */
static int
{
}
/* ARGSUSED */
static int
{
/* Check for valid alignment */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
}
/* ARGSUSED */
static int
{
/* Check for valid alignment */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
}
/* ARGSUSED */
static int
{
/* Check for valid alignment */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
}
static int
{
int err = 0;
int ctrl_num;
if (length == 0) {
return (RSM_SUCCESS);
}
/* set unchanging fields in msgargs */
" Barriers\n"));
RSM_SUCCESS) {
return (err);
}
}
/*
* WARNING - if thread-barriers are supported, small put errors must
* be recorded because these errors are currently recorded in the CESR
* register but they are cleared by the driver prior to return from
* the ioctl.
*/
/* handle partial line write at start of buff */
if (offset & WRSMLIB_CACHELINE_MASK) {
" of buf for controller %d, and segid %d offset 0x%lx"
" length %ld buf addr %p export_cnode %d\n",
if (!importsegp->isloopback) {
WRSM_CTLR_PLUGIN_SMALLPUT, &args)) != 0) {
if (importsegp->barrier_mode ==
(void) close_barrier(&bar_implicit,
B_TRUE);
}
"write, start of buf for controller %d, "
"and segid %d IOCTL failed with err %d "
"errno is %d\n",
return (RSMERR_BARRIER_FAILURE);
}
} else {
}
if (length == 0) {
if (importsegp->barrier_mode ==
} else {
return (RSM_SUCCESS);
}
}
}
/* handle cacheline size writes */
if (num_cachelines) {
length));
if (importsegp->isloopback) {
} else {
/*
* args.buf (ie. src_addr) can be any alignment
* and dst is cacheline aligned so we can
* wrsmlib_blkwrite once to send all cachelines
*/
}
}
/* handle partial cacheline write at end of buffer */
if (length) {
"for ctrl_num %d segment id %d offset 0x%lx length "
if (!importsegp->isloopback) {
WRSM_CTLR_PLUGIN_SMALLPUT, &args)) != 0) {
if (importsegp->barrier_mode ==
(void) close_barrier(&bar_implicit,
B_TRUE);
}
"write, end of buf for controller %d, "
"and segid %d IOCTL FAILED WITH ERR %d "
"errno is %d\n",
return (RSMERR_BARRIER_FAILURE);
}
} else {
}
}
} else {
return (RSM_SUCCESS);
}
}
static int
{
return (RSMERR_INSUFFICIENT_RESOURCES);
}
return (RSM_SUCCESS);
}
static int
{
int err = 0;
"wrsm_memseg_import_open_barrier\n"));
return (err);
}
static int
{
int err = 0;
"wrsm_memseg_import_order_barrier\n"));
/*
* to allow code reuse, we call close_barrier here since order_barrier
* and close barrier perform the same function expect that
* order_barrier does not change the barrier_state to CLOSED. We set
* it back to opened here so that close_barrier doesn't need to
* perform and additional check
*/
return (err);
}
static int
{
int err = 0;
return (err);
}
static int
{
"wrsm_memseg_import_destroy_barrier\n"));
if (bar)
return (RSM_SUCCESS);
}
static int
{
return (RSM_SUCCESS);
}
static int
{
return (RSM_SUCCESS);
}
static int
{
int64_t i;
int err = 0;
/*
* iovec for Wildcat always just uses local.vaddr
*/
for (i = 0; i < sg_io->io_request_count; i++) {
iovec->transfer_length));
if (err != RSM_SUCCESS) {
" wrsm_memseg_import_putv err detected\n"));
/*
* set io_residual_count to the number of putv's that
* failed including this one.
*/
return (err);
}
iovec++;
}
sg_io->io_residual_count = 0;
return (err);
}
static int
{
int64_t i;
int err = 0;
/*
* iovec for Wildcat always just uses local.vaddr
*/
for (i = 0; i < sg_io->io_request_count; i++) {
iovec->transfer_length));
if (err != RSM_SUCCESS) {
" wrsm_memseg_import_getv err detected\n"));
/*
* set io_residual_count to the number of getv's that
* failed including this one.
*/
return (err);
}
iovec++;
}
sg_io->io_residual_count = 0;
return (err);
}
/* ARGSUSED */
static int
{
return (RSM_SUCCESS);
}
/* ARGSUSED */
static int
{
return (RSM_SUCCESS);
}
static int
{
return (RSM_SUCCESS);
}
static int
{
return (RSM_SUCCESS);
}
/*
* the kernel will call the drivers close only on the last close of
* for all open instances. Hence, this close routines spares the kernel
* the added work, and only request a close on the last close. By doing
* this, we save ourselves the hassle of keeping track of additional
* fd.
*/
static int
{
int ctrl_num;
"%d\n", ctrl_num));
}
}
return (RSM_SUCCESS);
}