wrsm_memseg_export.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file implements the RSMPI export side memory segment functions
* for the Wildcat RSM driver.
*/
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/vmsystm.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/buf.h>
#include <vm/seg_kmem.h>
#include <vm/page.h>
#include <sys/sunddi.h>
#include <sys/ddi.h>
#include <sys/ddimapreq.h>
#include <sys/rsm/rsmpi.h>
#include <sys/wrsm_common.h>
#include <sys/wrsm_nc.h>
#include <sys/wrsm_session.h>
#include <sys/wrsm_memseg.h>
#include <sys/wrsm_memseg_impl.h>
#include <sys/wrsm_intr.h>
#ifdef DEBUG
#define DBG_WARN 0x001
#define DBG_EXPORT 0x002
#define DBG_EXPORT_EXTRA 0x040
static uint_t wrsm_export_memseg_debug = DBG_WARN;
#define DPRINTF(a, b) { if (wrsm_export_memseg_debug & a) wrsmdprintf b; }
#else /* DEBUG */
#define DPRINTF(a, b) { }
#endif /* DEBUG */
static int wrsm_hw_protection = 0;
/*
* lock hierarchy:
* network->lock
* all_exportsegs_lock
* exportseg->lock
* node->memseg->lock
*
* Note: it is always safe to take all_exportsegs_lock.
* It is also safe to take network->lock: the network must
* unregister (unregister_controller), which it can't do
* until clients all release the network (release_controller).
* If a client accesses these functions after doing a release
* controller, all bets are off.
*/
static exportseg_t *all_exportsegs_hash[WRSM_PTR_HASH_SIZE];
/*
* Find the right starting cmmugrp for offset <off>. <sz> is the size of
* the region starting at <off> that falls within this cmmugrp. <ci> is
* the index of the cmmu entry within the entire cmmugrp's tuples array of
* the entry for this offset.
*/
static void
get_start_cmmugrp(cmmugrp_t **grpp, size_t off, unsigned *ci, size_t *sz)
{
off_t remainder;
while ((*grpp)->offset + (*grpp)->len < off) {
(*grpp) = (*grpp)->next;
ASSERT(grpp);
}
ASSERT((*grpp)->offset <= off);
ASSERT((*grpp)->offset + (*grpp)->len > off);
remainder = off - (*grpp)->offset;
ASSERT(remainder < (*grpp)->len);
*sz = (*grpp)->len - remainder;
*ci = remainder / (*grpp)->pgbytes;
}
/*
* Get the next cmmugrp. <cc> is the index into the new cmmugrp's tuples
* array. <ci> is the cmmu entry within the tuple. (Both are set to 0.)
*/
static void
get_next_cmmugrp(cmmugrp_t **grpp, unsigned *cc, unsigned *ci, size_t *sz,
wrsm_cmmu_tuple_t **tp)
{
*grpp = (*grpp)->next;
ASSERT(*grpp);
*sz = (*grpp)->len;
*cc = 0;
*ci = 0;
*tp = &((*grpp)->tuples[(*cc)]);
}
/*
* Get the starting tuple and index into this tuple within cmmugrp <grp>
* for this offset. <cc> is the index into the cmmugrp's tuples array.
* <tp> is the tuple. The index into the cmmgrup for this offset is passed
* in through <ci>. <ci> is modified to contain the cmmu entry within the tuple
* for this offset.
*/
static void
get_start_entry(cmmugrp_t *grp, wrsm_cmmu_tuple_t **tp, unsigned *cc,
unsigned *ci)
{
(*cc) = 0;
*tp = &(grp->tuples[(*cc)]);
while ((*tp)->count <= *ci) {
*ci -= (*tp)->count;
(*cc)++;
ASSERT((*cc) < grp->num_tuples);
(*tp) = &(grp->tuples[(*cc)]);
ASSERT((*tp));
ASSERT(*ci < (*tp)->count);
}
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "get_start_entry: tuple_index %d "
"cmmu_index %d\n", *cc, *ci));
}
/*
* Get next entry from this tuple. If no more entries from this tuple, use
* entries from next tuple. <cc> is the index into the tuples array. <tp>
* is the tuple. <ci> is the cmmu entry within the tuple for this offset.
*/
void
get_next_entry(wrsm_cmmu_tuple_t *tuple_list, wrsm_cmmu_tuple_t **tp,
unsigned *cc, unsigned *ci)
{
(*ci)++;
if ((*ci) == (*tp)->count) {
(*cc)++;
(*tp) = &(tuple_list[(*cc)]);
(*ci) = 0;
}
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "get_next_entry: tuple_index %d "
"cmmu_index %d\n", *cc, *ci));
}
/*
* Get the number of entries in this cmmugrp needed to cover region of size
* <len>, or the maximum number of entries. <sz> is the size in bytes of
* the cmmugrp. <pgbytes> is the number of bytes covered by each entry.
* <num> returns the number of entries.
*/
static void
get_num_entries(size_t *len, unsigned *num, size_t sz, size_t pgbytes)
{
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "get_num_entries start: len 0x%lx "
"num %d sz 0x%lx pgbytes 0x%lx\n", *len, *num, sz, pgbytes));
if ((*len) > sz) {
(*num) = sz / pgbytes;
(*len) -= sz;
} else {
ASSERT((*len) % pgbytes == 0);
(*num) = (*len) / pgbytes;
(*len) = 0;
}
ASSERT((*num) >= 1);
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "get_num_entries end: len 0x%lx "
"num %d sz 0x%lx pgbytes 0x%lx\n", *len, *num, sz, pgbytes));
}
/*
* Find exportseg structure in network exportseg hash from segment id.
*/
static exportseg_t *
segid_to_exportseg(wrsm_network_t *network, rsm_memseg_id_t segid)
{
int index;
exportseg_t *exportseg;
ASSERT(MUTEX_HELD(&network->lock));
index = WRSM_SEGID_HASH_FUNC(segid);
ASSERT(index < WRSM_SEGID_HASH_SIZE);
exportseg = network->memseg->exportseg_hash[index];
while (exportseg) {
if (exportseg->segid == segid)
return (exportseg);
exportseg = exportseg->segid_next;
}
return (NULL);
}
/*
* Set segid of exportseg, add to network hash table.
*/
static int
exportseg_set_segid(exportseg_t *exportseg, rsm_memseg_id_t segid)
{
wrsm_network_t *network = exportseg->network;
int index;
boolean_t found = B_FALSE;
exportseg_t *expsg;
ASSERT(MUTEX_HELD(&exportseg->lock));
/*
* release exportseg lock in order to take network lock
*/
index = WRSM_PTR_HASH_FUNC(exportseg);
mutex_exit(&exportseg->lock);
mutex_enter(&network->lock);
mutex_enter(&all_exportsegs_lock);
expsg = all_exportsegs_hash[index];
while (expsg) {
if (expsg == exportseg) {
mutex_enter(&exportseg->lock);
found = B_TRUE;
break;
}
expsg = expsg->all_next;
}
mutex_exit(&all_exportsegs_lock);
if (!found) {
mutex_exit(&network->lock);
return (RSMERR_BAD_SEG_HNDL);
}
if (exportseg->state != memseg_unpublished) {
/* segment is already published */
mutex_exit(&network->lock);
return (RSMERR_SEG_ALREADY_PUBLISHED);
}
if (segid_to_exportseg(network, segid)) {
/* segment id already in use */
mutex_exit(&network->lock);
return (RSMERR_SEGID_IN_USE);
}
exportseg->segid = segid;
exportseg->state = memseg_published;
network->memseg->export_published++;
/*
* add to hash
*/
index = WRSM_SEGID_HASH_FUNC(segid);
ASSERT(index < WRSM_SEGID_HASH_SIZE);
exportseg->segid_next = network->memseg->exportseg_hash[index];
network->memseg->exportseg_hash[index] = exportseg;
mutex_exit(&network->lock);
return (RSM_SUCCESS);
}
/*
* Stop using current segment id, and remove exportseg structure from
* network hash. Note: exportseg is prevented from disappearing until
* exportseg->state is unpublished.
*/
static void
exportseg_unset_segid(exportseg_t *exportseg, rsm_memseg_id_t segid)
{
wrsm_network_t *network = exportseg->network;
exportseg_t **exportsegp;
int index;
index = WRSM_SEGID_HASH_FUNC(segid);
ASSERT(index < WRSM_SEGID_HASH_SIZE);
mutex_enter(&network->lock);
/*
* find and remove exportseg from hash table
*/
exportsegp = &(network->memseg->exportseg_hash[index]);
while (*exportsegp != NULL && *exportsegp != exportseg) {
exportsegp = &((*exportsegp)->segid_next);
}
if (*exportsegp == NULL) {
/* someone else already unpublished this segment */
DPRINTF(DBG_EXPORT, (CE_NOTE, "exportseg %p (segid %d) not "
"in hash table", (void *) exportseg, exportseg->segid));
mutex_exit(&network->lock);
return;
}
/*
* Found exportseg; remove from segid hash table.
* If exportseg is in segid hash table, it cannot
* be in unpublished state.
*/
mutex_enter(&exportseg->lock);
*exportsegp = (*exportsegp)->segid_next;
network->memseg->export_published--;
mutex_exit(&network->lock);
ASSERT(exportseg->state != memseg_unpublished);
exportseg->state = memseg_unpublished;
mutex_exit(&exportseg->lock);
}
/*
* Find an exportseg with specified segid in network's exportseg hash and
* lock it.
*/
static exportseg_t *
find_and_lock_exportseg(wrsm_network_t *network, rsm_memseg_id_t segid)
{
exportseg_t *exportseg = NULL;
mutex_enter(&network->lock);
exportseg = segid_to_exportseg(network, segid);
if (exportseg)
mutex_enter(&exportseg->lock);
mutex_exit(&network->lock);
return (exportseg);
}
/*
* Make sure this exportseg is still in all_exportsegs_hash.
*/
static int
lock_exportseg(exportseg_t *exportseg)
{
exportseg_t *expsg;
uint_t index;
int err = RSMERR_BAD_SEG_HNDL;
index = WRSM_PTR_HASH_FUNC(exportseg);
ASSERT(index < WRSM_PTR_HASH_SIZE);
mutex_enter(&all_exportsegs_lock);
expsg = all_exportsegs_hash[index];
while (expsg) {
if (expsg == exportseg) {
mutex_enter(&exportseg->lock);
err = RSM_SUCCESS;
break;
}
expsg = expsg->all_next;
}
mutex_exit(&all_exportsegs_lock);
/*
* make sure exportseg is not currently being removed
*/
if ((err == RSM_SUCCESS) && (exportseg->valid == B_FALSE)) {
mutex_exit(&exportseg->lock);
err = RSMERR_BAD_SEG_HNDL;
}
#ifdef DEBUG
if (err == RSMERR_BAD_SEG_HNDL) {
DPRINTF(DBG_EXPORT, (CE_CONT, "lock_exportseg - "
"invalid memseg 0x%p\n", (void *)exportseg));
}
#endif
return (err);
}
/*
* Free all cmmu entries for this exported segment.
*/
static void
mseg_free_cmmus(exportseg_t *exportseg)
{
cmmugrp_t *cmmugrp, *ocmmugrp;
wrsm_cmmu_t cmmu;
wrsm_cmmu_index_t index;
unsigned count;
unsigned i, j;
DPRINTF(DBG_EXPORT, (CE_CONT, "mseg_free_cmmus() exportseg 0x%p\n",
(void *)exportseg));
cmmu.entry_0.bit.valid = B_FALSE;
cmmugrp = exportseg->cmmugrps;
while (cmmugrp != NULL) {
/*
* invalidate and free cmmu entries
*/
for (i = 0; i < cmmugrp->num_tuples; i++) {
index = cmmugrp->tuples[i].index;
count = cmmugrp->tuples[i].count;
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT,
"mseg_free_cmmus() freeing tuples %d - %d\n",
index, index + count - 1));
for (j = 0; j < count; j++) {
wrsm_cmmu_update(exportseg->network, &cmmu,
index, CMMU_UPDATE_VALID);
index++;
}
}
wrsm_cmmu_free(exportseg->network, cmmugrp->num_tuples,
cmmugrp->tuples);
/*
* free cmmugrp structures
*/
ocmmugrp = cmmugrp;
cmmugrp = cmmugrp->next;
kmem_free(ocmmugrp, sizeof (cmmugrp_t));
}
exportseg->cmmugrps = NULL;
}
/*
* In the cmmu entries in the range specifed by <seg_offset, len>, set or
* unset the valid field and set or unset the writable field as specified
* by <flag>.
*/
static void
update_cmmu_fields(exportseg_t *exportseg, size_t seg_offset, size_t len,
memseg_cmmufield_t flag)
{
wrsm_network_t *network = exportseg->network;
cmmugrp_t *cmmugrp = exportseg->cmmugrps;
size_t cmmugrp_size;
unsigned cmmutuples;
unsigned cmmu_index;
unsigned num_entries;
unsigned pfn_index;
wrsm_cmmu_tuple_t *tuple;
wrsm_cmmu_t cmmu;
wrsm_cmmu_flags_t cmmu_flag;
DPRINTF(DBG_EXPORT, (CE_CONT, "update_cmmu_fields() - "
"seg_offset 0x%lx len 0x%lx flag %s\n", seg_offset, len,
CMMU_UPDATE_STR(flag)));
ASSERT(MUTEX_HELD(&exportseg->lock));
if (exportseg->size == 0) {
/* nothing to do */
return;
}
/*
* Update the valid field; also update the writable field if this
* was requested.
*/
cmmu_flag = CMMU_UPDATE_VALID;
if (flag == memseg_set_writeable) {
cmmu.entry_0.bit.writable = 1;
cmmu_flag |= CMMU_UPDATE_WRITABLE;
} else if (flag == memseg_unset_writeable) {
cmmu.entry_0.bit.writable = 0;
cmmu_flag |= CMMU_UPDATE_WRITABLE;
} else if (flag == memseg_unset_valid) {
cmmu_flag |= CMMU_UPDATE_FLUSH;
}
pfn_index = seg_offset >> MMU_PAGESHIFT;
/*
* Find the right cmmugrp structure, tuple, and cmmu entry within
* the tuple (as indicated by cmmu_index) for <seg_offset>.
*/
get_start_cmmugrp(&cmmugrp, seg_offset, &cmmu_index, &cmmugrp_size);
get_start_entry(cmmugrp, &tuple, &cmmutuples, &cmmu_index);
while (len > 0) {
/*
* Calculate the number of entries from this cmmugrp that
* should be reset, and subtract covered bytes from len.
*/
get_num_entries(&len, &num_entries, cmmugrp_size,
cmmugrp->pgbytes);
while (num_entries) {
/*
* If writable field is being updated, the valid
* field also set to true if there is memory
* backing the cmmu entry.
*/
if (flag == memseg_unset_valid ||
exportseg->pfn_list[pfn_index] == PFN_INVALID) {
cmmu.entry_0.bit.valid = 0;
} else {
cmmu.entry_0.bit.valid = 1;
}
wrsm_cmmu_update(network, &cmmu,
tuple->index + cmmu_index, cmmu_flag);
DPRINTF(DBG_EXPORT, (CE_CONT, "updated "
"index %d\n", tuple->index + cmmu_index));
/* get next CMMU entry in this cmmugrp */
get_next_entry(cmmugrp->tuples, &tuple, &cmmutuples,
&cmmu_index);
num_entries--;
pfn_index += cmmugrp->pgbytes >> MMU_PAGESHIFT;
}
if (len == 0)
break;
get_next_cmmugrp(&cmmugrp, &cmmutuples, &cmmu_index,
&cmmugrp_size, &tuple);
}
}
/*
* The lpa fields in cmmu entries in the range specified by <seg_offset,
* len> are no longer valid. Set the valid field of all cmmu entries in
* this range to invalid, and set the affected entries in the pfn_list to
* PFN_INVALID.
*/
static int
clear_lpa_fields(exportseg_t *exportseg, size_t seg_offset, size_t len,
boolean_t mapping_required)
{
wrsm_network_t *network = exportseg->network;
cmmugrp_t *cmmugrp = exportseg->cmmugrps;
size_t cmmugrp_size;
unsigned cmmutuples;
unsigned cmmu_index;
unsigned num_entries;
unsigned pfn_index;
wrsm_cmmu_tuple_t *tuple;
wrsm_cmmu_t cmmu;
int i;
DPRINTF(DBG_EXPORT, (CE_CONT, "clear_lpa_fields() - "
"seg_offset 0x%lx len 0x%lx\n", seg_offset, len));
/*
* Check if any pfn fields are not valid. Fail with
* RSMERR_MEM_NOT_BOUND if it is required that they be valid.
*/
pfn_index = seg_offset >> MMU_PAGESHIFT;
if (mapping_required) {
for (i = 0; i < (len >> MMU_PAGESHIFT); i++) {
if (exportseg->pfn_list[pfn_index + i] == PFN_INVALID) {
return (RSMERR_MEM_NOT_BOUND);
}
}
}
/*
* Invalidate all affected entries in the pfn list.
*/
for (i = pfn_index; i < ((seg_offset + len) >> MMU_PAGESHIFT); i++) {
if (exportseg->pfn_list[i] != PFN_INVALID) {
network->memseg->bytes_bound -= MMU_PAGESIZE;
}
exportseg->pfn_list[i] = PFN_INVALID;
}
/*
* Set all cmmu entries in range to invalid if segment is published.
* Otherwise, they are already set to invalid.
*/
if (exportseg->state != memseg_published) {
return (WRSM_SUCCESS);
}
cmmu.entry_0.bit.valid = 0;
/*
* Find the right cmmugrp structure, tuple, and cmmu entry within
* the tuple (as indicated by cmmu_index) for <seg_offset>.
*/
get_start_cmmugrp(&cmmugrp, seg_offset, &cmmu_index, &cmmugrp_size);
get_start_entry(cmmugrp, &tuple, &cmmutuples, &cmmu_index);
while (len > 0) {
/*
* Calculate the number of entries from this cmmugrp that
* should be cleared, and subtract covered bytes from len.
*/
get_num_entries(&len, &num_entries, cmmugrp_size,
cmmugrp->pgbytes);
while (num_entries) {
wrsm_cmmu_update(network,
&cmmu,
tuple->index + cmmu_index,
CMMU_UPDATE_VALID);
/* get next CMMU entry in this cmmugrp */
get_next_entry(cmmugrp->tuples, &tuple, &cmmutuples,
&cmmu_index);
num_entries--;
}
if (len == 0)
break;
get_next_cmmugrp(&cmmugrp, &cmmutuples, &cmmu_index,
&cmmugrp_size, &tuple);
}
return (WRSM_SUCCESS);
}
/*
* Set up the cmmu lpa fields to point to the physical memory backing the
* region pointed to by <as, vaddr> or to the pages in the pagelist
* starting with <startpp>. Use as many entries as needed to map <len>
* bytes.
*
* For each physical page backing the region, update the lpa fields of as
* many cmmu entries as are needed to map the page -- either one cmmu entry
* if the passed in page size matches the CMMU entry page size, or multiple
* cmmu entries if a large page is passed in but small page cmmu entries
* are being used. Also record the pfn for each 8k region in the segment
* pfn_list, and set the entry to valid if it is published.
*
* Update the cmmu entries in cmmugrp/tuple sequential order starting with
* the entry specified by <seg_offset>.
*/
static int
set_lpa_fields(exportseg_t *exportseg, size_t seg_offset, size_t len,
struct as *as, caddr_t vaddr, page_t *startpp)
{
int err = 0;
int pgbytes;
size_t bytesleft;
size_t used_in_group;
pfn_t pfn, pfn_8k;
wrsm_network_t *network = exportseg->network;
cmmugrp_t *cmmugrp = exportseg->cmmugrps;
size_t cmmugrp_size;
unsigned cmmutuples;
unsigned cmmu_index;
unsigned num_entries;
unsigned pfn_index;
wrsm_cmmu_tuple_t *tuple;
wrsm_cmmu_t cmmu;
off_t cur_offset = 0;
page_t *pp = startpp;
int i;
DPRINTF(DBG_EXPORT, (CE_CONT, "set_lpa_fields() - "
"seg_offset 0x%lx len 0x%lx\n", seg_offset, len));
ASSERT(cmmugrp);
/*
* If any pfn entries are already valid, fail with
* RSMERR_MEM_ALREADY_BOUND.
*/
pfn_index = seg_offset >> MMU_PAGESHIFT;
for (i = 0; i < (len >> MMU_PAGESHIFT); i++) {
if (exportseg->pfn_list[pfn_index + i] != PFN_INVALID) {
return (RSMERR_MEM_ALREADY_BOUND);
}
}
/*
* Set cmmu entries to valid if segment has been published.
*/
if (exportseg->state == memseg_published) {
cmmu.entry_0.bit.valid = 1;
} else {
cmmu.entry_0.bit.valid = 0;
}
/*
* Find the right cmmugrp structure, tuple, and cmmu entry within
* the tuple (as indicated by cmmu_index) for <seg_offset>.
*/
get_start_cmmugrp(&cmmugrp, seg_offset, &cmmu_index, &cmmugrp_size);
get_start_entry(cmmugrp, &tuple, &cmmutuples, &cmmu_index);
used_in_group = 0;
while (len > 0) {
/*
* Get the pfn and size of the next page.
*/
if (startpp) {
/*
* Get the pfn for next page in pagelist. This is
* guaranteed to be real memory, as we have been
* given page structures.
*/
if (!pp) {
err = RSMERR_NO_BACKING_MEM;
goto bad_memory;
}
pfn = page_pptonum(pp);
pgbytes = PAGESIZE; /* same as bp_mapin */
page_unlock(pp);
pp = pp->p_next;
} else {
/*
* Get the pfn for the page backing <as, vaddr +
* cur_offset>. Make sure this is real memory.
* Grab AS_LOCK to make sure as mappings don't
* change.
*/
AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
pfn = hat_getpfnum(as->a_hat, vaddr + cur_offset);
AS_LOCK_EXIT(as, &as->a_lock);
if (pfn == PFN_INVALID) {
err = RSMERR_NO_BACKING_MEM;
goto bad_memory;
}
if (!pf_is_memory(pfn)) {
err = RSMERR_NOT_MEM;
goto bad_memory;
}
pgbytes = MMU_PAGESIZE;
}
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT,
"mapping page with pfn 0x%lx size 0x%x\n",
pfn, pgbytes));
ASSERT(pgbytes == MMU_PAGESIZE || pgbytes == MMU_PAGESIZE4M);
ASSERT(pgbytes >= cmmugrp->pgbytes);
pfn_8k = pfn;
bytesleft = pgbytes;
/*
* If we've already allocated all the entries from the
* current cmmugrp, move to the next one.
*/
if (used_in_group >= cmmugrp_size) {
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT,
"set_lpa_fields used all in one group"
" used_in_group = %lx, size = %lx\n",
used_in_group, cmmugrp_size));
get_next_cmmugrp(&cmmugrp, &cmmutuples, &cmmu_index,
&cmmugrp_size, &tuple);
used_in_group = 0;
}
while (bytesleft > 0) {
/*
* Calculate the number of cmmu entries from this
* cmmugrp that will be used to map this page, and
* subtract covered bytes from bytesleft.
*/
get_num_entries(&bytesleft, &num_entries,
cmmugrp_size, cmmugrp->pgbytes);
while (num_entries) {
/*
* record lpa for this region of the page
*/
cmmu.entry_1.addr.lpa_page = pfn;
wrsm_cmmu_update(network, &cmmu,
tuple->index + cmmu_index,
CMMU_UPDATE_LPA | CMMU_UPDATE_VALID);
DPRINTF(DBG_EXPORT_EXTRA,
(CE_CONT, "set_lpa_fields "
"cmmu index %d pfn 0x%lx valid %ld\n",
tuple->index + cmmu_index,
cmmu.entry_1.addr.lpa_page,
cmmu.entry_0.bit.valid));
/* get next CMMU entry */
get_next_entry(cmmugrp->tuples, &tuple,
&cmmutuples, &cmmu_index);
num_entries--;
/*
* If cmmu entries are for small pages,
* get physaddr (pfn) for next 8k page.
*/
if (cmmugrp->pgbytes == MMU_PAGESIZE)
pfn += MMU_PAGESIZE >> MMU_PAGESHIFT;
}
if (bytesleft == 0)
break;
get_next_cmmugrp(&cmmugrp, &cmmutuples, &cmmu_index,
&cmmugrp_size, &tuple);
used_in_group = 0;
}
/*
* record the 8k-based pfns for this page in pfn_list
*/
for (i = 0; i < (pgbytes >> MMU_PAGESHIFT); i++) {
exportseg->pfn_list[pfn_index + i] = pfn_8k;
pfn_8k += MMU_PAGESIZE >> MMU_PAGESHIFT;
}
pfn_index += pgbytes >> MMU_PAGESHIFT;
used_in_group += pgbytes;
cur_offset += pgbytes;
network->memseg->bytes_bound += pgbytes;
len -= pgbytes;
ASSERT(len >= 0);
}
return (WRSM_SUCCESS);
bad_memory:
/*
* There was a problem with the backing memory. Tear down
* previously set up stuff, and return error.
*/
pfn_index = seg_offset >> MMU_PAGESHIFT;
for (i = 0; i < (cur_offset >> MMU_PAGESHIFT); i++) {
exportseg->pfn_list[pfn_index + i] = PFN_INVALID;
}
(void) clear_lpa_fields(exportseg, seg_offset, cur_offset, B_FALSE);
return (err);
}
/*
* Allocate <num_entries> cmmu entries of <pgbytes> page size from the cmmu
* allocator. Create a cmmugrp entry to store info about these entries.
*/
static int
alloc_cmmu_tuples(exportseg_t *exportseg, int num_entries, off_t seg_offset,
int pgbytes, cmmugrp_t **cmmugrpp, boolean_t sleep)
{
wrsm_network_t *network = exportseg->network;
int err = WRSM_SUCCESS;
size_t cmmu_page_size;
cmmugrp_t *cmmugrp;
wrsm_cmmu_t cmmu;
wrsm_cmmu_index_t index;
boolean_t lg_page;
int i, j;
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "alloc_cmmu_tuples() - "
"num_entries %d seg_offset 0x%lx pgbytes 0x%x\n",
num_entries, seg_offset, pgbytes));
cmmugrp = (cmmugrp_t *)kmem_zalloc(sizeof (cmmugrp_t),
KM_SLEEP);
cmmugrp->offset = seg_offset;
cmmugrp->len = num_entries * pgbytes;
cmmugrp->pgbytes = pgbytes;
cmmu_page_size = (pgbytes == MMU_PAGESIZE) ?
CMMU_PAGE_SIZE_SMALL : CMMU_PAGE_SIZE_LARGE;
lg_page = (pgbytes == MMU_PAGESIZE4M) ? B_TRUE : B_FALSE;
if ((err = wrsm_cmmu_alloc(network, cmmu_page_size, num_entries,
&cmmugrp->tuples, &cmmugrp->num_tuples, sleep)) !=
WRSM_SUCCESS) {
if (cmmu_page_size == CMMU_PAGE_SIZE_LARGE) {
/*
* try allocating cmmu entries for small pages
*/
lg_page = B_FALSE;
cmmugrp->pgbytes = MMU_PAGESIZE;
cmmu_page_size = CMMU_PAGE_SIZE_SMALL;
num_entries *= MMU_PAGESIZE4M >> MMU_PAGESHIFT;
if ((err = wrsm_cmmu_alloc(network,
cmmu_page_size, num_entries,
&cmmugrp->tuples, &cmmugrp->num_tuples, sleep)) !=
WRSM_SUCCESS) {
kmem_free(cmmugrp, sizeof (cmmugrp_t));
/* return RSMPI complaint error code */
return (RSMERR_INSUFFICIENT_RESOURCES);
}
} else {
/* give up */
kmem_free(cmmugrp, sizeof (cmmugrp_t));
return (err);
}
}
/*
* Update each CMMU entry to reflect how it is being used
*/
cmmu.entry_0.val = 0;
cmmu.entry_0.bit.count_enable = B_FALSE;
cmmu.entry_0.bit.large_page = lg_page;
cmmu.entry_0.bit.user_err = B_FALSE;
cmmu.entry_0.bit.writable = B_FALSE;
cmmu.entry_0.bit.from_all = B_TRUE;
cmmu.entry_0.bit.from_node = 255;
cmmu.entry_0.bit.valid = B_FALSE;
cmmu.entry_0.bit.type = CMMU_TYPE_CACHEABLE;
cmmu.entry_1.val = 0;
for (i = 0; i < cmmugrp->num_tuples; i++) {
index = cmmugrp->tuples[i].index;
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT,
"alloc_cmmu_tuples() alloced tuples %d - %d\n",
index, index + cmmugrp->tuples[i].count - 1));
for (j = 0; j < cmmugrp->tuples[i].count; j++) {
wrsm_cmmu_update(network, &cmmu, index,
CMMU_UPDATE_ALL);
index++;
}
}
exportseg->total_tuples += cmmugrp->num_tuples;
exportseg->num_cmmugrps++;
*cmmugrpp = cmmugrp;
return (WRSM_SUCCESS);
}
/*
* Allocate enough cmmu entries for a segment of size <size>. Allocate
* large pages where possible. Set up mappings to any passed in memory.
*/
static int
alloc_seg_cmmu_entries(exportseg_t *exportseg, rsm_memory_local_t *memory,
size_t size, boolean_t allow_lg_pages, boolean_t sleep)
{
int pgbytes, opgbytes, num_entries, need_entries;
off_t seg_offset;
cmmugrp_t **cmmugrp_nextp;
size_t nbytes = 0;
pfn_t pfn;
page_t *pp;
page_t *startpp = NULL;
struct buf *bp = NULL;
struct as *as = NULL;
off_t offset = 0;
void *vaddr = NULL;
/* LINTED: E_FUNC_SET_NOT_USED */
int err;
#ifdef DEBUG
pfn = 0;
#endif
if (memory->ms_type == RSM_MEM_BUF) {
bp = memory->ms_bp;
DPRINTF(DBG_EXPORT, (CE_CONT, "alloc_seg_cmmu_entries() - "
"bp 0x%p size 0x%lx\n", (void *)bp, size));
ASSERT(bp);
ASSERT(SEMA_HELD(&bp->b_sem));
nbytes = bp->b_bcount;
if (bp->b_flags & B_PAGEIO) {
if (!bp->b_pages) {
return (RSMERR_NO_BACKING_MEM);
} else {
pp = startpp = bp->b_pages;
}
} else {
vaddr = (void *)bp->b_un.b_addr;
if (bp->b_flags & B_PHYS) {
if (bp->b_proc == NULL ||
(as = bp->b_proc->p_as) == NULL)
as = &kas;
} else {
as = &kas;
}
}
} else {
ASSERT(memory->ms_type == RSM_MEM_VADDR);
as = memory->ms_as;
if (as == NULL) {
as = &kas;
}
vaddr = memory->ms_vaddr;
nbytes = memory->ms_length;
DPRINTF(DBG_EXPORT, (CE_CONT, "alloc_seg_cmmu_entries() - "
"as 0x%p vaddr 0x%p length 0x%lx size 0x%lx\n", (void *)as,
vaddr, nbytes, size));
}
if ((uint64_t)vaddr & (uint64_t)MMU_PAGEOFFSET) {
/* vaddr must be propertly aligned */
DPRINTF(DBG_EXPORT, (CE_CONT, "vaddr 0x%p not aligned\n",
(void *)vaddr));
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if (nbytes > size) {
/* size range can't exceed segment size */
DPRINTF(DBG_EXPORT, (CE_CONT, "nbytes %ld > size %ld\n",
nbytes, size));
return (RSMERR_BAD_LENGTH);
}
if (nbytes & MMU_PAGEOFFSET) {
/* size must be an aligned number of bytes */
DPRINTF(DBG_EXPORT, (CE_CONT, "nbytes %ld not 64 byte round\n",
nbytes));
return (RSMERR_BAD_MEM_ALIGNMENT);
}
size -= nbytes; /* size of region not backed by memory */
pgbytes = opgbytes = MMU_PAGESIZE;
num_entries = 0;
seg_offset = 0;
cmmugrp_nextp = &exportseg->cmmugrps;
ASSERT(*cmmugrp_nextp == NULL);
/*
* Use large page CMMU entries for all large physical pages if
* allowed and available. We could try seeing if the small pages
* happen to be allocated consecutively, but the caller apparently
* didn't care enough to use large pages, so don't bother.
*/
while (nbytes > 0) {
if (startpp) {
if (!pp) {
mseg_free_cmmus(exportseg);
DPRINTF(DBG_EXPORT,
(CE_CONT, "invalid buf pp\n"));
return (RSMERR_NO_BACKING_MEM);
}
pgbytes = PAGESIZE; /* same as bp_mapin */
pp = pp->p_next;
} else {
/*
* make sure the next region of the vaddr range
* points to valid physical memory. Grab AS_LOCK
* to make sure as mappings don't change.
*/
AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
pfn = hat_getpfnum(as->a_hat, (caddr_t)vaddr +
offset);
AS_LOCK_EXIT(as, &as->a_lock);
if (pfn == PFN_INVALID) {
/* not backed by anything! */
mseg_free_cmmus(exportseg);
DPRINTF(DBG_EXPORT, (CE_CONT, "vaddr 0x%p "
"not backed by memory\n",
(void *)((caddr_t)vaddr + offset)));
return (RSMERR_NO_BACKING_MEM);
}
if (!pf_is_memory(pfn)) {
/* tear down previously set up stuff */
mseg_free_cmmus(exportseg);
DPRINTF(DBG_EXPORT, (CE_CONT, "IO "
"pfn 0x%lx at vaddr 0x%p\n", pfn,
(void *)((caddr_t)vaddr + offset)));
return (RSMERR_NOT_MEM);
}
pgbytes = MMU_PAGESIZE;
offset += pgbytes;
}
ASSERT(pgbytes == MMU_PAGESIZE || pgbytes == MMU_PAGESIZE4M);
if (pgbytes == MMU_PAGESIZE4M && !allow_lg_pages) {
/*
* large pages not allowed: translate to small pages
*/
DPRINTF(DBG_EXPORT, (CE_CONT, "no large pages; convert "
"pages from size %d to %d (MMU_PAGESIZE)\n",
pgbytes, MMU_PAGESIZE));
ASSERT((pgbytes & MMU_PAGEOFFSET) == 0);
need_entries = pgbytes >> MMU_PAGESHIFT;
pgbytes = MMU_PAGESIZE;
} else {
need_entries = 1;
}
nbytes -= pgbytes;
if (pgbytes != opgbytes) {
/*
* a different page size is being used
*/
if (num_entries != 0) {
/*
* Allocate cmmu entries for the num_entries
* previous pages.
*/
if ((alloc_cmmu_tuples(exportseg,
num_entries, seg_offset, opgbytes,
cmmugrp_nextp, sleep)) != WRSM_SUCCESS) {
mseg_free_cmmus(exportseg);
DPRINTF(DBG_EXPORT, (CE_CONT,
"couldn't alloc cmmu tuples to "
"back memory\n"));
return (RSMERR_INSUFFICIENT_RESOURCES);
}
cmmugrp_nextp = &((*cmmugrp_nextp)->next);
/*
* record the physical addresses of this
* range of memory into the LPA fields in
* the cmmu entries
*/
err = set_lpa_fields(exportseg, seg_offset,
num_entries * opgbytes, as,
(caddr_t)vaddr + seg_offset, startpp);
ASSERT(err == WRSM_SUCCESS);
seg_offset += num_entries * opgbytes;
if (startpp) {
startpp = pp;
ASSERT(startpp || nbytes == 0);
}
opgbytes = pgbytes;
num_entries = 0;
}
}
num_entries += need_entries;
}
ASSERT(nbytes == 0);
/*
* allocate tuples for last set of physical pages
*/
if (num_entries != 0) {
ASSERT(pgbytes == MMU_PAGESIZE || pgbytes == MMU_PAGESIZE4M);
if (num_entries != 0) {
if ((alloc_cmmu_tuples(exportseg, num_entries,
seg_offset, pgbytes, cmmugrp_nextp, sleep)) !=
WRSM_SUCCESS) {
mseg_free_cmmus(exportseg);
DPRINTF(DBG_EXPORT, (CE_CONT,
"couldn't alloc cmmu tuples for "
"last set of backing memory\n"));
return (RSMERR_INSUFFICIENT_RESOURCES);
}
cmmugrp_nextp = &((*cmmugrp_nextp)->next);
err = set_lpa_fields(exportseg, seg_offset,
num_entries * pgbytes, as,
(caddr_t)vaddr + seg_offset, startpp);
ASSERT(err == WRSM_SUCCESS);
seg_offset += num_entries * pgbytes;
}
}
/*
* Allocate tuples for the end of the segment if some of it
* has no memory backing it. Allocate small pages for this
* part, as we don't know what memory will eventually back it.
*/
if (size != 0) {
num_entries = size >> MMU_PAGESHIFT;
ASSERT(num_entries != 0);
if ((alloc_cmmu_tuples(exportseg, num_entries,
seg_offset, MMU_PAGESIZE, cmmugrp_nextp, sleep))
!= WRSM_SUCCESS) {
mseg_free_cmmus(exportseg);
DPRINTF(DBG_EXPORT, (CE_CONT,
"couldn't alloc cmmu tuples for unbacked "
" memory\n"));
return (RSMERR_INSUFFICIENT_RESOURCES);
}
}
return (WRSM_SUCCESS);
}
/*
* Parse the passed in access list, calculate new per node access
* permissions (based on the old and new permissions), store the new
* permissions, and apply the access permissions for an exported segment
* to the appropriate cmmu entries.
*/
static int
apply_access_list(exportseg_t *exportseg,
rsm_access_entry_t access_list[], uint_t access_list_length)
{
rsm_addr_t addr;
wrsm_network_t *network;
cnodeid_t cnodeid;
cnode_bitmask_t bitmask;
rsm_permission_t perms = RSM_PERM_NONE;
uint_t i;
int j;
boolean_t changed[WRSM_MAX_CNODES];
boolean_t old_import_vals[WRSM_MAX_CNODES];
rsm_permission_t old_perms_vals[WRSM_MAX_CNODES];
DPRINTF(DBG_EXPORT, (CE_CONT, "apply_access_list()\n"));
ASSERT(MUTEX_HELD(&exportseg->lock));
WRSMSET_ZERO(bitmask);
network = exportseg->network;
for (i = 0; i < WRSM_MAX_CNODES; i++) {
changed[i] = B_FALSE;
}
/*
* If no access list, assume default of all nodes, with a
* permission of RSM_PERM_RDWR. If the access list's first entry
* specifies single hardware address of RSM_ACCESS_PUBLIC, apply
* the specified permission to all nodes. Otherwise, parse the
* access list.
*/
if (access_list == NULL ||
access_list[0].ae_addr == RSM_ACCESS_PUBLIC) {
perms = access_list ? access_list[0].ae_permission :
RSM_PERM_RDWR;
if ((perms & ~RSM_PERM_RDWR) != 0) {
return (RSMERR_BAD_ACL);
}
/* all nodes are allowed to import this segment */
for (i = 0; i < WRSM_MAX_CNODES; i++) {
if (network->nodes[i]) {
exportseg->nodes[i].allow_import = B_TRUE;
exportseg->nodes[i].perms = perms;
WRSMSET_ADD(bitmask, i);
}
}
} else {
for (i = 0; i < access_list_length; i++) {
/*
* wrsm hardware addresses must be cnodeids.
* Only allowed bits in perms are RSM_PERM_READ
* and RSM_PERM_WRITE
*/
addr = access_list[i].ae_addr;
if ((addr >= WRSM_MAX_CNODES) ||
!network->nodes[addr] ||
((access_list[i].ae_permission &
~RSM_PERM_RDWR) != 0)) {
/*
* invalid hardware address or perms --
* reinstate old settings, then fail
*/
for (j = 0; j < i; j++) {
if (changed[j]) {
exportseg->nodes[j].
allow_import =
old_import_vals[j];
exportseg->nodes[j].perms =
old_perms_vals[j];
}
}
return (RSMERR_BAD_ACL);
}
cnodeid = access_list[i].ae_addr;
WRSMSET_ADD(bitmask, cnodeid);
changed[cnodeid] = B_TRUE;
old_import_vals[cnodeid] =
exportseg->nodes[cnodeid].allow_import;
old_perms_vals[cnodeid] =
exportseg->nodes[cnodeid].perms;
exportseg->nodes[cnodeid].allow_import = B_TRUE;
exportseg->nodes[cnodeid].perms =
access_list[i].ae_permission;
/*
* make sure perms is set to the most permissive
* of each node's old and new permissions.
* perms starts out as RSM_PERM_NONE, and gets
* changed if the current node has greater
* permissions.
*/
switch (exportseg->nodes[cnodeid].perms) {
case RSM_PERM_RDWR:
case RSM_PERM_WRITE:
perms = RSM_PERM_RDWR;
break;
case RSM_PERM_READ:
if (perms == RSM_PERM_NONE)
perms = RSM_PERM_READ;
break;
#ifdef DEBUG
default:
ASSERT(exportseg->nodes[cnodeid].perms
== RSM_PERM_NONE);
break;
#endif
}
switch (exportseg->nodes[cnodeid].actual_perms) {
case RSM_PERM_RDWR:
case RSM_PERM_WRITE:
perms = RSM_PERM_RDWR;
break;
case RSM_PERM_READ:
if (perms == RSM_PERM_NONE)
perms = RSM_PERM_READ;
break;
#ifdef DEBUG
default:
ASSERT(exportseg->nodes[cnodeid].actual_perms
== RSM_PERM_NONE);
break;
#endif
}
}
}
/*
* Make sure the actual per node perms are the max permissions of
* previous actual perms and the newly installed perms.
*/
for (i = 0; i < WRSM_MAX_CNODES; i++) {
switch (exportseg->nodes[i].perms) {
case RSM_PERM_RDWR:
case RSM_PERM_WRITE:
exportseg->nodes[i].actual_perms = RSM_PERM_RDWR;
break;
case RSM_PERM_READ:
if (exportseg->nodes[i].actual_perms == RSM_PERM_NONE)
exportseg->nodes[i].actual_perms =
RSM_PERM_READ;
break;
}
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "cnode %d: allow_import %d "
"perms 0x%x actual perms 0x%x\n",
i,
exportseg->nodes[i].allow_import,
exportseg->nodes[i].perms,
exportseg->nodes[i].actual_perms));
}
if (!wrsm_hw_protection) {
/*
* Set all CMMU entries to valid (if they have a valid
* lpa). Set writeable to true if any node is allowed
* write permission, or if there were previous permissions
* that allowed writing. (This handles the case of
* republish calling this function with stricter
* permissions.)
*/
/* LINTED: E_PRECEDENCE_CONFUSION */
if ((perms == RSM_PERM_RDWR) || (perms == RSM_PERM_WRITE) ||
exportseg->writeable) {
exportseg->writeable = B_TRUE;
update_cmmu_fields(exportseg, 0, exportseg->size,
memseg_set_writeable);
} else {
exportseg->writeable = B_FALSE;
update_cmmu_fields(exportseg, 0, exportseg->size,
memseg_unset_writeable);
}
}
WRSMSET_COPY(bitmask, exportseg->import_bitmask);
return (RSM_SUCCESS);
}
/*
* Enable the small page interrupt cmmu entry for this exportseg.
*/
static void
enable_smallput_intr_page(exportseg_t *exportseg)
{
wrsm_cmmu_t cmmu;
wrsm_cmmu_index_t index;
index = exportseg->small_put_intr.tuple->index;
cmmu.entry_0.bit.valid = B_TRUE;
wrsm_cmmu_update(exportseg->network, &cmmu, index, CMMU_UPDATE_VALID);
}
/*
* Disable the small page interrupt cmmu entry for this exportseg.
*/
static void
disable_smallput_intr_page(exportseg_t *exportseg)
{
wrsm_cmmu_t cmmu;
wrsm_cmmu_index_t index;
index = exportseg->small_put_intr.tuple->index;
cmmu.entry_0.bit.valid = B_FALSE;
wrsm_cmmu_update(exportseg->network, &cmmu, index,
CMMU_UPDATE_VALID | CMMU_UPDATE_FLUSH);
/* make sure any in-process interrupts have completed */
wrsm_intr_flush_recvq(exportseg->small_put_intr.recvq);
}
/*
* Translate an exportseg's stored cmmu entry information into the format
* needed for connection messages.
*/
static void
cmmutuple_to_ncslicetuple(wrsm_network_t *network, wrsm_cmmu_tuple_t *cmmutuple,
import_ncslice_t *ncslicetuple, size_t seg_offset, size_t cmmu_page_size,
cnodeid_t cnodeid)
{
int i;
#ifdef DEBUG
boolean_t found_ncslice = B_FALSE;
#endif
ncslice_t ncslice;
ncslicetuple->seg_offset = seg_offset;
ncslice = cmmutuple->ncslice;
/* set ncslice to the equivalent ncslice imported by this node */
for (i = 0; i < WRSM_NODE_NCSLICES; i++) {
if (network->exported_ncslices.id[i] == ncslice) {
#ifdef DEBUG
found_ncslice = B_TRUE;
#endif
ncslicetuple->ncslice = network->nodes[cnodeid]->
config->imported_ncslices.id[i];
break;
}
}
#ifdef DEBUG
ASSERT(found_ncslice);
#endif
ncslicetuple->ncslice_offset = (off_t)cmmutuple->offset;
ncslicetuple->len = cmmutuple->count * cmmu_page_size;
}
/*
* Send requestor information about a published exported segment.
*/
void
wrsm_connect_msg_evt(void *arg)
{
wrsm_network_t *network = ((wrsm_memseg_evt_args_t *)arg)->network;
wrsm_message_t *msg = &((wrsm_memseg_evt_args_t *)arg)->msg;
cnodeid_t cnodeid = msg->header.source_cnode;
connect_msg_t args;
wrsm_node_t *node = network->nodes[msg->header.source_cnode];
wrsm_raw_message_t msgbuf;
wrsm_message_t *respmsg = (wrsm_message_t *)&msgbuf;
connect_resp_t respargs;
exportseg_t *exportseg;
connect_info_t *connected;
int connection = 0;
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: connect_msg_evt() "
"node %d\n", network->rsm_ctlr_id, cnodeid));
if (node == NULL) {
/* non-existent node */
return;
}
if (wrsm_tl_rxhandler_sessionid(network, msg) == B_FALSE) {
/* session must not be valid */
return;
}
bcopy(&msg->body, &args, sizeof (args));
respmsg->header.message_type = WRSM_MSG_SEGMENT_CONNECT_RESPONSE;
/*
* does segment exist?
*/
exportseg = find_and_lock_exportseg(network, args.segid);
if (exportseg == NULL) {
respargs.err = ENOENT;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
/*
* is segment published?
*/
if (exportseg->state != memseg_published) {
mutex_exit(&exportseg->lock);
respargs.err = ENOENT;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
/*
* does requesting node have permission to connect to it?
*/
if (!WRSM_IN_SET(exportseg->import_bitmask, cnodeid)) {
mutex_exit(&exportseg->lock);
respargs.err = EACCES;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
if (exportseg->nodes[cnodeid].inuse != B_TRUE) {
/*
* add to list of segments the remote node is importing
*/
exportseg->nodes[cnodeid].inuse = B_TRUE;
connection = 1;
mutex_enter(&node->memseg->lock);
connected = kmem_zalloc(sizeof (connect_info_t), KM_SLEEP);
connected->exportseg = exportseg;
connected->next = node->memseg->connected;
node->memseg->connected = connected;
mutex_exit(&node->memseg->lock);
#ifdef DEBUG
} else {
DPRINTF(DBG_WARN, (CE_WARN,
"unexpected connect request from node %d "
"for segment id %d\n", cnodeid, args.segid));
#endif
}
respargs.perms = exportseg->nodes[cnodeid].perms;
respargs.size = exportseg->size;
respargs.num_seg_tuples = exportseg->total_tuples;
respargs.err = RSM_SUCCESS;
mutex_exit(&exportseg->lock);
/*
* Transport Layer tears down the session if there is a message
* delivery failure.
*/
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
mutex_enter(&network->lock);
network->memseg->export_connected += connection;
mutex_exit(&network->lock);
/* We're done, deallocate our incoming args struct and the message */
kmem_free(arg, sizeof (wrsm_memseg_evt_args_t));
}
/*
* Send requestor small put interrupt page mapping information for a
* published exported segment.
*/
void
wrsm_smallputmap_msg_evt(void *arg)
{
wrsm_network_t *network = ((wrsm_memseg_evt_args_t *)arg)->network;
wrsm_message_t *msg = &((wrsm_memseg_evt_args_t *)arg)->msg;
cnodeid_t cnodeid = msg->header.source_cnode;
smallputmap_msg_t args;
wrsm_node_t *node = network->nodes[msg->header.source_cnode];
wrsm_raw_message_t msgbuf;
wrsm_message_t *respmsg = (wrsm_message_t *)&msgbuf;
smallputmap_resp_t respargs;
exportseg_t *exportseg;
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: smallputmap_msg_evt() "
"node %d\n", network->rsm_ctlr_id, cnodeid));
if (node == NULL) {
/* non-existent node */
return;
}
if (wrsm_tl_rxhandler_sessionid(network, msg) == B_FALSE) {
/* session must not be valid */
return;
}
bcopy(&msg->body, &args, sizeof (args));
respmsg->header.message_type = WRSM_MSG_SEGMENT_SMALLPUTMAP_RESPONSE;
/*
* does segment exist?
*/
exportseg = find_and_lock_exportseg(network, args.segid);
if (exportseg == NULL) {
respargs.err = ENOENT;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
respargs.err = RSM_SUCCESS;
/*
* is segment published?
*/
if (exportseg->state != memseg_published) {
respargs.err = ENOENT;
}
/*
* does requesting node have permission to connect to it?
*/
else if (!WRSM_IN_SET(exportseg->import_bitmask, cnodeid)) {
respargs.err = EACCES;
}
/*
* 0 length segment -- no small put page to report
*/
else if (exportseg->size == 0) {
respargs.err = EINVAL;
}
if (respargs.err != RSM_SUCCESS) {
mutex_exit(&exportseg->lock);
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
#ifdef DEBUG
if (exportseg->nodes[cnodeid].inuse != B_TRUE) {
DPRINTF(DBG_WARN, (CE_WARN, "ctlr %d: smallputmap_msg_evt() "
"unexpected smallputmap request from node %d "
"for segment id %d\n", cnodeid, args.segid));
}
#endif
cmmutuple_to_ncslicetuple(network, exportseg->small_put_intr.tuple,
&respargs.small_put_tuple, 0, MMU_PAGESIZE, cnodeid);
mutex_exit(&exportseg->lock);
/*
* Transport Layer tears down the session if there is a message
* delivery failure.
*/
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
/* We're done, deallocate our incoming args struct and the message */
kmem_free(arg, sizeof (wrsm_memseg_evt_args_t));
}
/*
* Send requestor barrier page mapping information for a published exported
* segment.
*/
void
wrsm_barriermap_msg_evt(void *arg)
{
wrsm_network_t *network = ((wrsm_memseg_evt_args_t *)arg)->network;
wrsm_message_t *msg = &((wrsm_memseg_evt_args_t *)arg)->msg;
cnodeid_t cnodeid = msg->header.source_cnode;
barriermap_msg_t args;
wrsm_node_t *node = network->nodes[msg->header.source_cnode];
wrsm_raw_message_t msgbuf;
wrsm_message_t *respmsg = (wrsm_message_t *)&msgbuf;
barriermap_resp_t respargs;
exportseg_t *exportseg;
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: barriermap_msg_evt() "
"node %d\n", network->rsm_ctlr_id, cnodeid));
if (node == NULL) {
/* non-existent node */
return;
}
if (wrsm_tl_rxhandler_sessionid(network, msg) == B_FALSE) {
/* session must not be valid */
return;
}
bcopy(&msg->body, &args, sizeof (args));
respmsg->header.message_type = WRSM_MSG_SEGMENT_BARRIERMAP_RESPONSE;
/*
* does segment exist?
*/
exportseg = find_and_lock_exportseg(network, args.segid);
if (exportseg == NULL) {
respargs.err = ENOENT;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
respargs.err = RSM_SUCCESS;
/*
* is segment published?
*/
if (exportseg->state != memseg_published) {
respargs.err = ENOENT;
}
/*
* does requesting node have permission to connect to it?
*/
else if (!WRSM_IN_SET(exportseg->import_bitmask, cnodeid)) {
respargs.err = EACCES;
}
/*
* 0 length segment -- no small put page to report
*/
else if (exportseg->size == 0) {
respargs.err = EINVAL;
}
if (respargs.err != RSM_SUCCESS) {
mutex_exit(&exportseg->lock);
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
#ifdef DEBUG
if (exportseg->nodes[cnodeid].inuse != B_TRUE) {
DPRINTF(DBG_WARN, (CE_WARN,
"unexpected barriermap request from node %d "
"for segment id %d\n", cnodeid, args.segid));
}
#endif
cmmutuple_to_ncslicetuple(network, exportseg->barrier_page.tuple,
&respargs.barrier_tuple, 0, MMU_PAGESIZE, cnodeid);
mutex_exit(&exportseg->lock);
/*
* Transport Layer tears down the session if there is a message
* delivery failure.
*/
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
/* We're done, deallocate our incoming args struct and the message */
kmem_free(arg, sizeof (wrsm_memseg_evt_args_t));
}
/*
* Send segment mapping information for a published exported segment.
*/
void
wrsm_segmap_msg_evt(void *arg)
{
wrsm_network_t *network = ((wrsm_memseg_evt_args_t *)arg)->network;
wrsm_message_t *msg = &((wrsm_memseg_evt_args_t *)arg)->msg;
segmap_msg_t args;
cnodeid_t cnodeid = msg->header.source_cnode;
wrsm_node_t *node = network->nodes[msg->header.source_cnode];
wrsm_raw_message_t msgbuf;
wrsm_message_t *respmsg = (wrsm_message_t *)&msgbuf;
segmap_resp_t respargs;
exportseg_t *exportseg;
int i, j;
cmmugrp_t *cmmugrp;
uint64_t tuple_offset;
int tuple_index;
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: segmap_msg_evt() "
"node %d\n", network->rsm_ctlr_id, cnodeid));
if (node == NULL) {
/* non-existent node */
return;
}
if (wrsm_tl_rxhandler_sessionid(network, msg) == B_FALSE) {
/* session must not be valid */
return;
}
bcopy(&msg->body, &args, sizeof (args));
respmsg->header.message_type = WRSM_MSG_SEGMENT_SEGMAP_RESPONSE;
/*
* does segment exist?
*/
exportseg = find_and_lock_exportseg(network, args.segid);
if (exportseg == NULL) {
respargs.err = ENOENT;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
/*
* is segment published?
*/
if (exportseg->state != memseg_published) {
mutex_exit(&exportseg->lock);
respargs.err = ENOENT;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
/*
* does requesting node have permission to connect to it?
*/
if (!WRSM_IN_SET(exportseg->import_bitmask, cnodeid)) {
mutex_exit(&exportseg->lock);
respargs.err = EACCES;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
#ifdef DEBUG
if (exportseg->nodes[cnodeid].inuse != B_TRUE) {
DPRINTF(DBG_WARN, (CE_WARN,
"unexpected map request from node %d "
"for segment id %d\n", cnodeid, args.segid));
}
#endif
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: segmap_msg_evt() "
"tuple_index %d\n", network->rsm_ctlr_id, args.tuple_index));
tuple_index = args.tuple_index;
if (tuple_index >= exportseg->total_tuples) {
mutex_exit(&exportseg->lock);
/* bad message */
respargs.err = EINVAL;
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
return;
}
/*
* find the cmmugrp containing the desired starting tuple
*/
i = 0;
cmmugrp = exportseg->cmmugrps;
while (i < tuple_index && ((i + cmmugrp->num_tuples) <= tuple_index)) {
i += cmmugrp->num_tuples;
cmmugrp = cmmugrp->next;
ASSERT(cmmugrp);
}
/* calculate index within the cmmugrp */
i = tuple_index - i;
/*
* If this is not the first cmmu tuple in a cmmugrp, then
* compute its offset.
*/
tuple_offset = 0;
if (i > 0) {
for (j = 0; j < i; j++) {
tuple_offset += (cmmugrp->tuples[j].count *
cmmugrp->pgbytes);
}
}
j = 0;
/*
* copy as many tuples as possible into the response message
*/
while (j < MAP_MSG_TUPLES && tuple_index < exportseg->total_tuples) {
cmmutuple_to_ncslicetuple(network, &(cmmugrp->tuples[i]),
&(respargs.tuples[j]),
cmmugrp->offset + tuple_offset,
cmmugrp->pgbytes, cnodeid);
tuple_offset += (cmmugrp->tuples[i].count *
cmmugrp->pgbytes);
i++;
j++;
tuple_index++;
if (tuple_index == exportseg->total_tuples)
break;
if (i == cmmugrp->num_tuples) {
cmmugrp = cmmugrp->next;
ASSERT(cmmugrp);
i = 0;
tuple_offset = 0;
}
}
respargs.num_tuples = j;
respargs.err = RSM_SUCCESS;
mutex_exit(&exportseg->lock);
/*
* Transport Layer tears down the session if there is a message
* delivery failure.
*/
bcopy(&respargs, &respmsg->body, sizeof (respargs));
(void) wrsm_tl_rsp(network, msg, respmsg);
/* We're done, deallocate our incoming args struct and the message */
kmem_free(arg, sizeof (wrsm_memseg_evt_args_t));
}
/*
* Mark this exportseg as no longer imported by node sending this message.
*/
void
wrsm_disconnect_msg_evt(void *arg)
{
wrsm_network_t *network = ((wrsm_memseg_evt_args_t *)arg)->network;
wrsm_message_t *msg = &((wrsm_memseg_evt_args_t *)arg)->msg;
disconnect_msg_t args;
cnodeid_t cnodeid = msg->header.source_cnode;
wrsm_node_t *node = network->nodes[msg->header.source_cnode];
exportseg_t *exportseg;
connect_info_t **connpp, *connp;
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: disconnect_msg_evt() "
"node %d\n", network->rsm_ctlr_id, cnodeid));
if (node == NULL) {
/* non-existent node */
return;
}
if (wrsm_tl_rxhandler_sessionid(network, msg) == B_FALSE) {
/* session must not be valid */
return;
}
/*
* does segment exist?
*/
bcopy(&msg->body, &args, sizeof (args));
exportseg = find_and_lock_exportseg(network, args.segid);
if (exportseg == NULL) {
/* ignore */
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: unexpected disconnect "
"from node %d for non-existent segment %d\n",
network->rsm_ctlr_id, node->config->cnodeid, args.segid));
return;
}
if (exportseg->nodes[cnodeid].inuse == B_FALSE) {
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: unexpected disconnect "
"from disconnected node %d for segment %d\n",
network->rsm_ctlr_id, node->config->cnodeid, args.segid));
mutex_exit(&exportseg->lock);
return;
}
/*
* remove from list of segments the remote node is importing
*/
exportseg->nodes[cnodeid].inuse = B_FALSE;
mutex_enter(&node->memseg->lock);
for (connpp = &node->memseg->connected; *connpp != NULL;
connpp = &((*connpp)->next)) {
if ((*connpp)->exportseg == exportseg) {
connp = *connpp;
*connpp = (*connpp)->next;
kmem_free(connp, sizeof (*connp));
break;
}
}
mutex_exit(&node->memseg->lock);
if (exportseg->wait_for_disconnects > 0) {
DPRINTF(DBG_EXPORT, (CE_CONT, "disconnect_evt: "
"wait_for_disconnects %d\n",
exportseg->wait_for_disconnects));
exportseg->wait_for_disconnects--;
}
mutex_exit(&exportseg->lock);
mutex_enter(&network->lock);
network->memseg->export_connected--;
mutex_exit(&network->lock);
/* We're done, deallocate our incoming args struct and the message */
kmem_free(arg, sizeof (wrsm_memseg_evt_args_t));
}
/*
* Send specified node a message indicating the the exported segment
* is no longer published. Record based on the response message whether
* the node has released all connections to the segment. Function
* returns 1 if it received a disconnect response from the remote
* node, otherwise it returns 0.
*/
static int
send_unpublish_msg(wrsm_node_t *node, exportseg_t *exportseg)
{
wrsm_raw_message_t msgbuf;
wrsm_message_t *msg = (wrsm_message_t *)&msgbuf;
unpublish_msg_t args;
wrsm_raw_message_t recvmsgbuf;
wrsm_message_t *recvmsg = (wrsm_message_t *)&recvmsgbuf;
wrsm_network_t *network = node->network;
unpublish_resp_t recvargs;
connect_info_t **connpp, *connp;
int disconnect = 0;
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: send_unpublish_msg() "
"node %d\n", network->rsm_ctlr_id, node->config->cnodeid));
/* LINTED */
ASSERT(sizeof (unpublish_msg_t) <= WRSM_MESSAGE_BODY_SIZE);
msg->header.message_type = WRSM_MSG_SEGMENT_UNPUBLISH;
args.segid = exportseg->segid;
bcopy(&args, &msg->body, sizeof (args));
if (wrsm_tl_rpc(network, node->config->cnodeid, msg, recvmsg)
!= WRSM_SUCCESS) {
/*
* This node is not responding (message not
* delivered or response not received). (Transport
* Layer tears down the session if there is a
* message delivery failure).
*
* Assume session teardown will remove all accesses
* to this segment.
*/
return (0);
}
#ifdef DEBUG
if (wrsm_export_memseg_debug & DBG_EXPORT_EXTRA)
wrsm_tl_dump_message("UNPUBLISH_RESPONSE: ", recvmsg);
#endif
if (recvmsg->header.message_type !=
WRSM_MSG_SEGMENT_UNPUBLISH_RESPONSE) {
DPRINTF(DBG_EXPORT, (CE_WARN,
"send_unpublish_msg got invalid response\n"));
return (0);
}
bcopy(&recvmsg->body, &recvargs, sizeof (recvargs));
if (recvargs.status == WC_DISCONNECTED) {
disconnect = 1;
/*
* remove from list of segments the remote node is
* importing
*/
mutex_enter(&node->memseg->lock);
for (connpp = &node->memseg->connected; *connpp != NULL;
connpp = &((*connpp)->next)) {
if ((*connpp)->exportseg == exportseg) {
connp = *connpp;
*connpp = (*connpp)->next;
kmem_free(connp, sizeof (*connp));
break;
}
}
mutex_exit(&node->memseg->lock);
}
return (disconnect);
}
/*
* Send the specified node a message indicating new access permissions
* for the exported segment.
*/
static void
send_access_msg(wrsm_node_t *node, rsm_memseg_id_t segid,
rsm_permission_t perms)
{
wrsm_raw_message_t msgbuf;
wrsm_message_t *msg = (wrsm_message_t *)&msgbuf;
access_msg_t args;
wrsm_raw_message_t recvmsgbuf;
wrsm_message_t *recvmsg = (wrsm_message_t *)&recvmsgbuf;
wrsm_network_t *network = node->network;
DPRINTF(DBG_EXPORT, (CE_CONT, "ctlr %d: send_access_msg() "
"node %d\n", network->rsm_ctlr_id, node->config->cnodeid));
/* LINTED */
ASSERT(sizeof (access_msg_t) <= WRSM_MESSAGE_BODY_SIZE);
msg->header.message_type = WRSM_MSG_SEGMENT_ACCESS;
args.segid = segid;
args.perms = perms;
bcopy(&args, &msg->body, sizeof (args));
if (wrsm_tl_rpc(network, node->config->cnodeid, msg, recvmsg)
!= WRSM_SUCCESS) {
/*
* This node is not responding (message not
* delivered or response not received). (Transport
* Layer tears down the session if there is a
* message delivery failure).
*
* Assume session teardown will remove all accesses
* to this segment.
*/
return;
}
#ifdef DEBUG
if (wrsm_export_memseg_debug & DBG_EXPORT_EXTRA)
wrsm_tl_dump_message("ACCESS_RESPONSE: ", recvmsg);
#endif
}
/*
* The session to the specified node has been torn down. Clean up
* references by this node to any exported segments.
*/
boolean_t
exportseg_sess_teardown(wrsm_node_t *node)
{
exportseg_t *exportseg;
rsm_memseg_id_t segid;
connect_info_t *connp;
int disconnects = 0;
wrsm_network_t *network = node->network;
DPRINTF(DBG_EXPORT, (CE_CONT, "exportseg_sess_teardown"));
/*
* it is presumed that at this point the node was removed from the
* cluster_members_bits registers in all wcis
*/
ASSERT(MUTEX_HELD(&node->memseg->lock));
/*
* clean up exports to the remote node
*/
while (node->memseg->connected) {
connp = node->memseg->connected;
exportseg = connp->exportseg;
segid = exportseg->segid;
node->memseg->connected = node->memseg->connected->next;
kmem_free(connp, sizeof (*connp));
mutex_exit(&node->memseg->lock);
/*
* Must release node->memseg->lock in order to take
* exportseg lock; meanwhile, exportseg could disappear, so
* use find_and_lock_exportseg to verify it's still around.
*/
exportseg = find_and_lock_exportseg(node->network, segid);
if (exportseg) {
if (exportseg->nodes[node->config->cnodeid].inuse) {
exportseg->nodes[node->config->cnodeid].inuse =
B_FALSE;
disconnects++;
if (exportseg->wait_for_disconnects > 0) {
exportseg->wait_for_disconnects--;
}
}
mutex_exit(&exportseg->lock);
}
mutex_enter(&network->lock);
network->memseg->export_connected -= disconnects;
mutex_exit(&network->lock);
disconnects = 0;
mutex_enter(&node->memseg->lock);
}
return (B_TRUE);
}
/*
* Allocate and set up cmmu entries for the segment.
* The exportseg lock is not needed because segment is not yet visible to
* other threads.
*/
static int
setup_segment_memory(exportseg_t *exportseg, int flags,
rsm_memory_local_t *memory, boolean_t sleep)
{
wrsm_network_t *network = exportseg->network;
boolean_t allow_lg_pages = B_TRUE;
int i;
int err;
DPRINTF(DBG_EXPORT, (CE_CONT, "setup segment memory\n"));
exportseg->num_pages = exportseg->size >> MMU_PAGESHIFT;
ASSERT(exportseg->num_pages != 0);
exportseg->pfn_list = kmem_zalloc(
exportseg->num_pages * sizeof (pfn_t), KM_SLEEP);
for (i = 0; i < exportseg->num_pages; i++) {
exportseg->pfn_list[i] = PFN_INVALID;
}
/*
* Allocate CMMU entries for this segment. We can't use 4 Meg
* entries if we don't export a 4 meg ncslice, if REBIND is
* permitted, or if there is no memory backing the segment.
*
* If backing memory was provided, calculate the physical address
* for each CMMU entry, and store it in the CMMU's LPA field. Note:
* there is no guarantee that a buf struct will hang around, so can't
* just save a pointer to it. Similarly, there is no guarantee
* that the particular address space mapping will remain the same
* (although it must be mapped somewhere and locked down).
*
* If we need the physical addresses for any reason (such as to
* create HW based per node entries), the LPA can be read from the
* CMMU entry or found in the pfn_list. (The LPA field is
* RW.)
*/
if (!network->have_lg_page_ncslice ||
(flags & RSM_ALLOW_UNBIND_REBIND)) {
allow_lg_pages = B_FALSE;
}
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "setup seg cmmu entries\n"));
if (memory == NULL) {
/* use small page CMMU entries */
if ((alloc_cmmu_tuples(exportseg, exportseg->num_pages, 0,
MMU_PAGESIZE, &exportseg->cmmugrps, sleep)) !=
WRSM_SUCCESS) {
kmem_free(exportseg->pfn_list,
exportseg->num_pages * sizeof (pfn_t));
return (RSMERR_INSUFFICIENT_RESOURCES);
}
} else if (memory->ms_type == RSM_MEM_VADDR ||
memory->ms_type == RSM_MEM_BUF) {
if ((err = alloc_seg_cmmu_entries(exportseg, memory,
exportseg->size, allow_lg_pages, sleep)) != WRSM_SUCCESS) {
kmem_free(exportseg->pfn_list,
exportseg->num_pages * sizeof (pfn_t));
return (err);
}
} else {
kmem_free(exportseg->pfn_list,
exportseg->num_pages * sizeof (pfn_t));
return (RSMERR_BAD_MSTYPE);
}
return (RSM_SUCCESS);
}
/*
* Invalidate and remove cmmu entries for the segment.
*/
static void
teardown_segment_memory(exportseg_t *exportseg)
{
/*
* Unbind all pages, free CMMU entries.
*/
(void) clear_lpa_fields(exportseg, 0, exportseg->size, B_FALSE);
mseg_free_cmmus(exportseg);
kmem_free(exportseg->pfn_list, exportseg->num_pages * sizeof (pfn_t));
}
/*
* Allocate and set up cmmu entry for the smallput interrupt page.
* The exportseg lock is not needed because segment is not yet visible to
* other threads.
*/
static int
setup_smallput_interrupt(exportseg_t *exportseg, boolean_t sleep)
{
wrsm_network_t *network = exportseg->network;
unsigned num_tuples;
int err;
int flags;
DPRINTF(DBG_EXPORT, (CE_CONT, "setup smallput interrupt\n"));
/*
* Set up an interrupt page for small puts. Allocate a CMMU entry,
* then create a receive queue.
*/
if ((err = wrsm_cmmu_alloc(network, CMMU_PAGE_SIZE_SMALL, 1,
&(exportseg->small_put_intr.tuple), &num_tuples, sleep)) !=
WRSM_SUCCESS) {
ASSERT(err != ENOSPC);
return (RSMERR_INSUFFICIENT_RESOURCES);
}
ASSERT(exportseg->small_put_intr.tuple->ncslice ==
network->nodes[network->cnodeid]->config->comm_ncslice);
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "smallput interrupt: index %d\n",
exportseg->small_put_intr.tuple->index));
/*
* wrsm_intr_create_recvq() sets up the cmmu entry - identified by
* the passed in cmmu index. Create the recvq with the invalid
* flag set, and sleep waiting for resources if the caller set
* the sleep flag.
*/
flags = WRSM_CREATE_RECVQ_INVALID;
if (sleep) {
flags |= WRSM_CREATE_RECVQ_SLEEP;
}
err = wrsm_intr_create_recvq(network,
WRSM_SMPUT_INTR_TYPE,
WRSM_SMPUT_PACKETRING_SIZE,
exportseg->small_put_intr.tuple->index,
&(exportseg->small_put_intr.recvq),
0, /* from_node - N/A for memsegs */
exportseg,
flags);
if (err != WRSM_SUCCESS) {
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "smallput interrupt: "
"freeing index %d\n",
exportseg->small_put_intr.tuple->index));
wrsm_cmmu_free(network, 1, exportseg->small_put_intr.tuple);
return (RSMERR_INSUFFICIENT_RESOURCES);
}
DPRINTF(DBG_EXPORT, (CE_CONT, "small put recvq 0x%p\n",
(void *)exportseg->small_put_intr.recvq));
return (RSM_SUCCESS);
}
/*
* Invalidate and remove cmmu entry for the smallput interrupt page.
*/
static void
teardown_smallput_interrupt(exportseg_t *exportseg)
{
wrsm_network_t *network = exportseg->network;
/*
* Release the small put interrupt page recvq
* and free the cmmu entry.
*/
wrsm_intr_destroy_recvq(network,
exportseg->small_put_intr.recvq);
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "teardown_smallput interrupt: "
"freeing index %d\n",
exportseg->small_put_intr.tuple->index));
wrsm_cmmu_free(network, 1, exportseg->small_put_intr.tuple);
}
/*
* Allocate and set up cmmu entry for the barrier page.
* The exportseg lock is not needed because segment is not yet visible to
* other threads.
*/
static int
setup_barrier_page(exportseg_t *exportseg, boolean_t sleep)
{
wrsm_network_t *network = exportseg->network;
unsigned num_tuples;
caddr_t aligned_vaddr;
wrsm_cmmu_t cmmu;
pfn_t pfn;
/* LINTED: E_FUNC_SET_NOT_USED */
int err;
DPRINTF(DBG_EXPORT, (CE_CONT, "setup barrier page\n"));
/*
* Set up a barrier page: allocate a page of memory, allocate a
* cmmu entry, and point the cmmu entry at the memory page.
*/
/*
* Need an aligned page, so allocate 2 pages.
*/
exportseg->barrier_page.vaddr = wrsm_alloc(MMU_PAGESIZE * 2, VM_SLEEP);
bzero(exportseg->barrier_page.vaddr, (MMU_PAGESIZE * 2));
if ((err = wrsm_cmmu_alloc(network, CMMU_PAGE_SIZE_SMALL, 1,
&(exportseg->barrier_page.tuple), &num_tuples, sleep)) !=
WRSM_SUCCESS) {
ASSERT(err != ENOSPC);
return (RSMERR_INSUFFICIENT_RESOURCES);
}
ASSERT(exportseg->barrier_page.tuple->ncslice ==
network->nodes[network->cnodeid]->config->comm_ncslice);
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "setup_barrier interrupt: "
"index %d\n",
exportseg->barrier_page.tuple->index));
cmmu.entry_0.val = 0;
cmmu.entry_0.bit.count_enable = B_FALSE;
cmmu.entry_0.bit.large_page = B_FALSE;
cmmu.entry_0.bit.user_err = B_FALSE;
cmmu.entry_0.bit.writable = B_TRUE;
cmmu.entry_0.bit.from_all = B_TRUE;
cmmu.entry_0.bit.from_node = 255;
cmmu.entry_0.bit.valid = B_TRUE;
cmmu.entry_0.bit.type = CMMU_TYPE_CACHEABLE;
cmmu.entry_1.val = 0;
aligned_vaddr = (caddr_t)
((uint64_t)((caddr_t)exportseg->barrier_page.vaddr +
MMU_PAGEOFFSET) & (uint64_t)MMU_PAGEMASK);
pfn = hat_getpfnum(kas.a_hat, aligned_vaddr);
cmmu.entry_1.addr.lpa_page = pfn;
DPRINTF(DBG_EXPORT, (CE_CONT, "setup barrier cmmu entry to "
"point to paddr 0x%lx (pfn 0x%lx)\n", va_to_pa(aligned_vaddr),
pfn));
wrsm_cmmu_update(network, &cmmu, exportseg->barrier_page.tuple->index,
CMMU_UPDATE_ALL);
return (RSM_SUCCESS);
}
/*
* Invalidate and remove cmmu entry for the barrier page.
*/
static void
teardown_barrier_page(exportseg_t *exportseg)
{
wrsm_network_t *network = exportseg->network;
wrsm_cmmu_t cmmu;
/*
* Invalidate and free barrier page cmmu entry, and free the
* barrier page memory.
*/
cmmu.entry_0.bit.valid = B_FALSE;
wrsm_cmmu_update(network, &cmmu, exportseg->barrier_page.tuple->index,
CMMU_UPDATE_VALID);
DPRINTF(DBG_EXPORT_EXTRA, (CE_CONT, "teardown_barrier interrupt: "
"freeing index %d\n",
exportseg->barrier_page.tuple->index));
wrsm_cmmu_free(network, 1, exportseg->barrier_page.tuple);
wrsm_free(exportseg->barrier_page.vaddr, MMU_PAGESIZE * 2);
}
/*
* Free exportsegs when network is being removed. Will only happen
* if client does a release_controller without first releasing
* exported segments.
*/
void
wrsm_free_exportsegs(wrsm_network_t *network)
{
exportseg_t *exportseg;
exportseg_t **exportsegp;
int i;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsm_free_exportseg: ctlr %d\n",
network->rsm_ctlr_id));
mutex_enter(&network->lock);
if (network->memseg->export_count == 0) {
mutex_exit(&network->lock);
return;
}
mutex_enter(&all_exportsegs_lock);
for (i = 0; i < WRSM_SEGID_HASH_SIZE; i++) {
exportsegp = &(all_exportsegs_hash[i]);
while (*exportsegp != NULL) {
exportseg = *exportsegp;
if (exportseg->network == network) {
/*
* remove exportseg from all_exportsegs_hash
* and destroy it
*/
*exportsegp = exportseg->all_next;
mutex_destroy(&exportseg->lock);
if (exportseg->size > 0) {
teardown_segment_memory(exportseg);
teardown_smallput_interrupt(exportseg);
teardown_barrier_page(exportseg);
}
kmem_free(exportseg, sizeof (exportseg_t));
ASSERT(network->memseg->export_count > 0);
network->memseg->export_count--;
} else {
exportsegp = &((*exportsegp)->all_next);
}
}
}
mutex_exit(&all_exportsegs_lock);
#ifdef DEBUG
if (network->memseg->export_count > 0) {
DPRINTF(DBG_WARN, (CE_WARN, "wrsm_free_exportseg: network "
"exportseg count %d after exportseg cleanup\n",
network->memseg->export_count));
}
#endif
mutex_exit(&network->lock);
}
/*
*
* RSMPI entry points
*
*/
/* ARGSUSED */
int
wrsmrsm_seg_create(rsm_controller_handle_t controller,
rsm_memseg_export_handle_t *memsegp,
size_t size, uint_t flags, rsm_memory_local_t *memory,
rsm_resource_callback_t callback,
rsm_resource_callback_arg_t callback_arg)
{
wrsm_network_t *network = (wrsm_network_t *)controller;
exportseg_t *exportseg;
int err;
int i;
int index;
boolean_t sleep = B_FALSE;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_seg_create(ctlr %d)\n",
network->rsm_ctlr_id));
if (callback != RSM_RESOURCE_SLEEP &&
callback != RSM_RESOURCE_DONTWAIT) {
/* we don't support callbacks */
return (RSMERR_CALLBACKS_NOT_SUPPORTED);
}
if (callback == RSM_RESOURCE_SLEEP)
sleep = B_TRUE;
if ((size & MMU_PAGEOFFSET) != 0) {
/* size must be full pages */
DPRINTF(DBG_WARN, (CE_WARN, "seg_create: bad size 0x%lx\n",
size));
return (RSMERR_BAD_MEM_ALIGNMENT);
}
/*
* ddi_map() in sun4u's rootnex.c limits us to 4GB of total
* mappable space per segment.
*/
if (size > (uint64_t)UINT_MAX) {
DPRINTF(DBG_WARN, (CE_WARN, "seg_create: bad size 0x%llx\n",
size));
return (RSMERR_INSUFFICIENT_RESOURCES);
}
exportseg = kmem_zalloc(sizeof (exportseg_t), KM_SLEEP);
exportseg->network = network;
exportseg->size = size;
exportseg->state = memseg_unpublished;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_seg_create: exportseg 0x%p "
"(8k) size 0x%lx\n", (void *)exportseg, exportseg->size));
if (exportseg->size > 0) {
if ((err = setup_segment_memory(exportseg, flags, memory,
sleep)) != RSM_SUCCESS) {
kmem_free(exportseg, sizeof (exportseg_t));
return (err);
}
if ((err = setup_smallput_interrupt(exportseg, sleep))
!= RSM_SUCCESS) {
teardown_segment_memory(exportseg);
kmem_free(exportseg, sizeof (exportseg_t));
return (err);
}
if ((err = setup_barrier_page(exportseg, sleep))
!= RSM_SUCCESS) {
teardown_segment_memory(exportseg);
teardown_smallput_interrupt(exportseg);
kmem_free(exportseg, sizeof (exportseg_t));
return (err);
}
}
mutex_init(&exportseg->lock, NULL, MUTEX_DRIVER, NULL);
exportseg->valid = B_TRUE;
/* save flags */
if (flags & RSM_ALLOW_UNBIND_REBIND) {
exportseg->allow_rebind = B_TRUE;
}
for (i = 0; i < WRSM_MAX_CNODES; i++) {
exportseg->nodes[i].perms = RSM_PERM_NONE;
exportseg->nodes[i].actual_perms = RSM_PERM_NONE;
}
/*
* add exportseg to all_exportsegs_hash
*/
index = WRSM_PTR_HASH_FUNC(exportseg);
mutex_enter(&network->lock);
network->memseg->export_count++;
mutex_exit(&network->lock);
mutex_enter(&all_exportsegs_lock);
exportseg->all_next = all_exportsegs_hash[index];
all_exportsegs_hash[index] = exportseg;
mutex_exit(&all_exportsegs_lock);
*memsegp = (rsm_memseg_export_handle_t)exportseg;
ASSERT(MUTEX_NOT_HELD(&exportseg->lock));
return (RSM_SUCCESS);
}
int
wrsmrsm_seg_destroy(rsm_memseg_export_handle_t handle)
{
exportseg_t *exportseg = (exportseg_t *)handle;
exportseg_t **exportsegp;
wrsm_network_t *network;
boolean_t found_exportseg;
int err;
int index;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_seg_destroy(0x%p)\n",
(void *)exportseg));
if ((err = lock_exportseg(exportseg)) != RSM_SUCCESS) {
return (err);
}
/*
* make sure segment is not published
*/
if (exportseg->state != memseg_unpublished) {
DPRINTF(DBG_EXPORT,
(CE_CONT, "seg_destroy - memseg 0x%p is published "
"with segid %d\n", (void *)exportseg, exportseg->segid));
mutex_exit(&exportseg->lock);
return (RSMERR_SEG_PUBLISHED);
}
network = exportseg->network;
/*
* Remove exportseg from all_exportsegs_hash. exportseg->lock
* can't be held prior to taking all_exportsegs_lock, so mark
* exportseg as invalid until it is actually removed from the hash.
* Searching for exportseg in the hash fails when exportseg->valid
* is B_FALSE.
*/
exportseg->valid = B_FALSE;
mutex_exit(&exportseg->lock);
index = WRSM_PTR_HASH_FUNC(exportseg);
mutex_enter(&all_exportsegs_lock);
found_exportseg = B_FALSE;
for (exportsegp = &(all_exportsegs_hash[index]);
*exportsegp != NULL;
exportsegp = &((*exportsegp)->all_next)) {
/* make sure no one else got here first */
if ((*exportsegp == exportseg) &&
(exportseg->valid == B_FALSE)) {
*exportsegp = exportseg->all_next;
found_exportseg = B_TRUE;
break;
}
}
mutex_exit(&all_exportsegs_lock);
if (found_exportseg) {
mutex_enter(&network->lock);
network->memseg->export_count--;
mutex_exit(&network->lock);
mutex_destroy(&exportseg->lock);
if (exportseg->size > 0) {
teardown_segment_memory(exportseg);
teardown_smallput_interrupt(exportseg);
teardown_barrier_page(exportseg);
}
kmem_free(exportseg, sizeof (exportseg_t));
}
return (RSM_SUCCESS);
}
/* ARGSUSED */
int
wrsmrsm_bind(rsm_memseg_export_handle_t memseg,
off_t offset,
rsm_memory_local_t *memory,
rsm_resource_callback_t callback,
rsm_resource_callback_arg_t callback_arg)
{
exportseg_t *exportseg = (exportseg_t *)memseg;
size_t nbytes;
struct buf *bp;
page_t *startpp = NULL;
struct as *as = NULL;
void *vaddr = NULL;
int err;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_bind(0x%p)\n",
(void *)exportseg));
if (callback != RSM_RESOURCE_SLEEP &&
callback != RSM_RESOURCE_DONTWAIT) {
/* we don't support callbacks */
return (RSMERR_CALLBACKS_NOT_SUPPORTED);
}
if (offset & (off_t)MMU_PAGEOFFSET) {
/* can only bind starting at page boundaries */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if ((err = lock_exportseg(exportseg)) != RSM_SUCCESS) {
return (err);
}
if (memory->ms_type == RSM_MEM_BUF) {
bp = memory->ms_bp;
ASSERT(bp);
ASSERT(SEMA_HELD(&bp->b_sem));
nbytes = bp->b_bcount;
if (bp->b_flags & B_PAGEIO) {
if (!bp->b_pages) {
mutex_exit(&exportseg->lock);
return (RSMERR_NO_BACKING_MEM);
} else {
startpp = bp->b_pages;
}
} else {
vaddr = (void *)bp->b_un.b_addr;
if (bp->b_flags & B_PHYS) {
if (bp->b_proc == NULL ||
(as = bp->b_proc->p_as) == NULL)
as = &kas;
} else {
as = &kas;
}
}
} else if (memory->ms_type == RSM_MEM_VADDR) {
nbytes = memory->ms_length;
as = memory->ms_as;
vaddr = memory->ms_vaddr;
} else {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_MSTYPE);
}
if (nbytes + offset > exportseg->size) {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_LENGTH);
}
if (nbytes & MMU_PAGEOFFSET) {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if ((uint64_t)vaddr & MMU_PAGEOFFSET) {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if (exportseg->size == 0) {
/* don't touch cmmu entries if a 0 length segment */
mutex_exit(&exportseg->lock);
return (RSM_SUCCESS);
}
/*
* set up cmmu entries to point at the specified memory
*/
err = set_lpa_fields(exportseg, offset, nbytes, as, vaddr,
startpp);
mutex_exit(&exportseg->lock);
return (err);
}
int
wrsmrsm_unbind(rsm_memseg_export_handle_t memseg, off_t offset,
size_t length)
{
exportseg_t *exportseg = (exportseg_t *)memseg;
int err;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_unbind(0x%p)\n",
(void *)exportseg));
if (offset & MMU_PAGEOFFSET) {
/* can only unbind starting at page boundaries */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if (length & MMU_PAGEOFFSET) {
/* can only unbind page aligned regions */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if ((err = lock_exportseg(exportseg)) != RSM_SUCCESS) {
return (err);
}
if (!exportseg->allow_rebind) {
mutex_exit(&exportseg->lock);
return (RSMERR_UNBIND_REBIND_NOT_ALLOWED);
}
if (offset + length > exportseg->size) {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_LENGTH);
}
if (exportseg->size == 0) {
/* don't touch cmmu entries if a 0 length segment */
mutex_exit(&exportseg->lock);
return (RSM_SUCCESS);
}
/*
* modify cmmu entries to no longer point to this memory
*/
err = clear_lpa_fields(exportseg, offset, length, B_TRUE);
mutex_exit(&exportseg->lock);
return (err);
}
/* ARGSUSED */
int
wrsmrsm_rebind(rsm_memseg_export_handle_t memseg, off_t offset,
rsm_memory_local_t *memory, rsm_resource_callback_t callback,
rsm_resource_callback_arg_t callback_arg)
{
exportseg_t *exportseg = (exportseg_t *)memseg;
size_t nbytes;
struct buf *bp;
page_t *startpp = NULL;
struct as *as = NULL;
void *vaddr = NULL;
int err;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_rebind(0x%p)\n",
(void *)exportseg));
if (callback != RSM_RESOURCE_SLEEP &&
callback != RSM_RESOURCE_DONTWAIT) {
/* we don't support callbacks */
return (RSMERR_CALLBACKS_NOT_SUPPORTED);
}
if (offset & MMU_PAGEOFFSET) {
/* can only rebind starting at page boundaries */
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if ((err = lock_exportseg(exportseg)) != RSM_SUCCESS) {
return (err);
}
if (!exportseg->allow_rebind) {
mutex_exit(&exportseg->lock);
return (RSMERR_UNBIND_REBIND_NOT_ALLOWED);
}
if (memory->ms_type == RSM_MEM_BUF) {
bp = memory->ms_bp;
ASSERT(bp);
ASSERT(SEMA_HELD(&bp->b_sem));
nbytes = bp->b_bcount;
if (bp->b_flags & B_PAGEIO) {
if (!bp->b_pages) {
mutex_exit(&exportseg->lock);
return (RSMERR_NO_BACKING_MEM);
} else {
startpp = bp->b_pages;
}
} else {
vaddr = (void *)bp->b_un.b_addr;
if (bp->b_flags & B_PHYS) {
if (bp->b_proc == NULL ||
(as = bp->b_proc->p_as) == NULL)
as = &kas;
} else {
as = &kas;
}
}
} else if (memory->ms_type == RSM_MEM_VADDR) {
nbytes = memory->ms_length;
as = memory->ms_as;
vaddr = memory->ms_vaddr;
} else {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_MSTYPE);
}
if (nbytes + offset > exportseg->size) {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_LENGTH);
}
if (nbytes & MMU_PAGEOFFSET) {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if ((uint64_t)vaddr & MMU_PAGEOFFSET) {
mutex_exit(&exportseg->lock);
return (RSMERR_BAD_MEM_ALIGNMENT);
}
if (exportseg->size == 0) {
/* don't touch cmmu entries if a 0 length segment */
mutex_exit(&exportseg->lock);
return (RSM_SUCCESS);
}
/*
* modify cmmu entries to remove old mappings
*/
if ((err = clear_lpa_fields(exportseg, offset, nbytes, B_FALSE)) !=
WRSM_SUCCESS) {
mutex_exit(&exportseg->lock);
return (err);
}
/*
* modify cmmu entries to point to new memory
*/
err = set_lpa_fields(exportseg, offset, nbytes, as, vaddr,
startpp);
mutex_exit(&exportseg->lock);
return (err);
}
/* ARGSUSED */
int
wrsmrsm_publish(rsm_memseg_export_handle_t memseg,
rsm_access_entry_t access_list[],
uint_t access_list_length,
rsm_memseg_id_t segid,
rsm_resource_callback_t callback,
rsm_resource_callback_arg_t callback_arg)
{
exportseg_t *exportseg = (exportseg_t *)memseg;
int err;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_publish(0x%p)\n",
(void *)exportseg));
if (callback != RSM_RESOURCE_SLEEP &&
callback != RSM_RESOURCE_DONTWAIT) {
/* we don't support callbacks */
return (RSMERR_CALLBACKS_NOT_SUPPORTED);
}
if ((err = lock_exportseg(exportseg)) != RSM_SUCCESS) {
return (err);
}
if ((err = exportseg_set_segid(exportseg, segid)) != RSM_SUCCESS) {
if (err != RSMERR_BAD_SEG_HNDL) {
mutex_exit(&exportseg->lock);
}
return (err);
}
if ((err = apply_access_list(exportseg, access_list,
access_list_length)) != RSM_SUCCESS) {
mutex_exit(&exportseg->lock);
exportseg_unset_segid(exportseg, segid);
return (err);
}
if (exportseg->size > 0) {
enable_smallput_intr_page(exportseg);
}
mutex_exit(&exportseg->lock);
return (RSM_SUCCESS);
}
int
wrsmrsm_unpublish(rsm_memseg_export_handle_t memseg)
{
exportseg_t *exportseg = (exportseg_t *)memseg;
rsm_memseg_id_t segid;
wrsm_network_t *network;
int err;
int i;
int disconnects = 0;
int rcv_disconnect;
int num_waiting;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_unpublish(0x%p)\n",
(void *)exportseg));
if ((err = lock_exportseg(exportseg)) != RSM_SUCCESS) {
return (err);
}
segid = exportseg->segid;
if (exportseg->state == memseg_wait_for_disconnects) {
/*
* segment was already unpublished, but wasn't able
* to complete cleanup. Check whether cleanup has
* now completed.
*/
if (exportseg->wait_for_disconnects) {
mutex_exit(&exportseg->lock);
return (RSMERR_SEG_IN_USE);
} else {
mutex_exit(&exportseg->lock);
exportseg_unset_segid(exportseg, segid);
return (RSM_SUCCESS);
}
}
if (exportseg->state != memseg_published) {
/* segment is not published */
mutex_exit(&exportseg->lock);
return (RSMERR_SEG_NOT_PUBLISHED);
}
network = exportseg->network;
/*
* Set state to reflect we're doing an unpublish.
*
* update state prior to releasing lock, so subsequent publish or
* republish calls fail.
*
* exportseg->wait_for_disconnects is used as a reference count
* for the export_seg. The segment can't be freed until
* the count goes to zero.
*
* Note that the export segment lock is released prior
* to sending the RPC and thus the export seg state can change.
*/
exportseg->state = memseg_wait_for_disconnects;
exportseg->wait_for_disconnects = 0;
/*
* Notify all importers that segment is being unpublished.
*/
for (i = 0; i < WRSM_MAX_CNODES; i++) {
exportseg->nodes[i].allow_import = B_FALSE;
exportseg->nodes[i].perms = RSM_PERM_NONE;
exportseg->nodes[i].actual_perms = RSM_PERM_NONE;
if (exportseg->nodes[i].inuse) {
exportseg->wait_for_disconnects++;
mutex_exit(&exportseg->lock);
rcv_disconnect = send_unpublish_msg(network->nodes[i],
exportseg);
/*
* If a session teardown occurs while we are waiting
* for the reponse to the rpc,
* exportseg_sess_teardown(), while holding exportseg
* lock, will decrement wait_for_disconnects,
* decrement network->memseg->export_connected,
* and clear inuse.
* So, if inuse is cleared, we don't want to
* do those actions again here.
*/
mutex_enter(&exportseg->lock);
if (rcv_disconnect && exportseg->nodes[i].inuse) {
disconnects++;
exportseg->wait_for_disconnects--;
exportseg->nodes[i].inuse = B_FALSE;
}
}
}
/*
* disable ability to write to segment
*/
exportseg->writeable = B_FALSE;
/* only need to update cmmu entries if size > 0 */
if (exportseg->size != 0) {
disable_smallput_intr_page(exportseg);
update_cmmu_fields(exportseg, 0, exportseg->size,
memseg_unset_valid);
}
/*
* Kernel agent on importer doesn't always release mappings
* (doesn't call rsm_unmap) in a timely fashion. So instead of
* waiting to complete the disconnect or tearing down the session,
* return RSMERR_SEG_IN_USE.
*/
num_waiting = exportseg->wait_for_disconnects;
mutex_exit(&exportseg->lock);
mutex_enter(&network->lock);
network->memseg->export_connected -= disconnects;
mutex_exit(&network->lock);
if (num_waiting) {
return (RSMERR_SEG_IN_USE);
}
exportseg_unset_segid(exportseg, segid);
return (RSM_SUCCESS);
}
/* ARGSUSED */
int
wrsmrsm_republish(rsm_memseg_export_handle_t memseg,
rsm_access_entry_t access_list[], uint_t access_list_length,
rsm_resource_callback_t callback, rsm_resource_callback_arg_t callback_arg)
{
exportseg_t *exportseg = (exportseg_t *)memseg;
int err;
int i;
DPRINTF(DBG_EXPORT, (CE_CONT, "wrsmrsm_republish(0x%p)\n",
(void *)exportseg));
if (callback != RSM_RESOURCE_SLEEP &&
callback != RSM_RESOURCE_DONTWAIT) {
/* we don't support callbacks */
return (RSMERR_CALLBACKS_NOT_SUPPORTED);
}
if ((err = lock_exportseg(exportseg)) != RSM_SUCCESS) {
return (err);
}
if (exportseg->state != memseg_published) {
/* segment is not published */
mutex_exit(&exportseg->lock);
return (RSMERR_SEG_NOT_PUBLISHED);
}
/*
* apply new permissions
*/
if ((err = apply_access_list(exportseg, access_list,
access_list_length)) != RSM_SUCCESS) {
mutex_exit(&exportseg->lock);
return (err);
}
/*
* Notify current importers of permission changes.
*/
for (i = 0; i < WRSM_MAX_CNODES; i++) {
if (exportseg->nodes[i].inuse) {
mutex_exit(&exportseg->lock);
send_access_msg(exportseg->network->nodes[i],
exportseg->segid,
exportseg->nodes[i].perms);
mutex_enter(&exportseg->lock);
}
}
mutex_exit(&exportseg->lock);
return (RSM_SUCCESS);
}