seg_dev.c revision 17965fd8a9b2cdd0c3955036055590edd4fe1d85
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
/*
* VM - segment of a mapped device.
*
* This segment driver is used when mapping character special devices.
*/
#include <sys/sysmacros.h>
#include <sys/ddidevmap.h>
#include <sys/ddi_implfuncs.h>
#include <vm/seg_kmem.h>
#if DEBUG
int segdev_debug;
#else
#endif
/* Default timeout for devmap context management */
#define CTX_TIMEOUT_VALUE 0
#define round_down_p2(a, s) ((a) & ~((s) - 1))
/*
* VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
* VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
*/
/*
* mutex, vnode and page for the page of zeros we use for the trash mappings.
* One trash page is allocated on the first ddi_umem_setup call that uses it
* XXX Eventually, we may want to combine this with what segnf does when all
* hat layers implement HAT_NOFAULT.
*
* The trash page is used when the backing store for a userland mapping is
* removed but the application semantics do not take kindly to a SIGBUS.
* In that scenario, the applications pages are mapped to some dummy page
* which returns garbage on read and writes go into a common place.
* (Perfect for NO_FAULT semantics)
* The device driver is responsible to communicating to the app with some
* other mechanism that such remapping has happened and the app should take
* corrective action.
* We can also use an anonymous memory page as there is no requirement to
* keep the page locked, however this complicates the fault code. RFE.
*/
/* Non-pageable kernel memory is allocated from the umem_np_arena. */
static vmem_t *umem_np_arena;
/* Set the cookie to a value we know will never be a valid umem_cookie */
/*
* Macros to check if type of devmap handle
*/
#define cookie_is_devmem(c) \
((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
#define cookie_is_pmem(c) \
((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
((c)->type == KMEM_PAGEABLE))
#define dhp_is_devmem(dhp) \
#define dhp_is_pmem(dhp) \
#define dhp_is_kpmem(dhp) \
/*
* Private seg op routines.
*/
static void segdev_free(struct seg *);
enum fault_type, enum seg_rw);
static void segdev_badop(void);
static void segdev_dump(struct seg *);
/*
* XXX this struct is used by rootnex_map_fault to identify
* the segment it has been passed. So if you make it
* "static" you'll need to fix rootnex_map_fault.
*/
struct seg_ops segdev_ops = {
(int (*)())segdev_badop, /* kluster */
segdev_sync, /* sync */
segdev_lockop, /* lockop */
};
/*
* Private segdev support routines
*/
static struct segdev_data *sdp_alloc(void);
static void devmap_softlock_rele(devmap_handle_t *);
static void devmap_ctx_rele(devmap_handle_t *);
static void devmap_ctxto(void *);
/*
* routines to lock and unlock underlying segkp segment for
* KMEM_PAGEABLE type cookies.
*/
/*
* Routines to synchronize F_SOFTLOCK and F_INVAL faults for
* drivers with devmap_access callbacks
*/
enum fault_type);
enum fault_type);
static kmutex_t devmapctx_lock;
static kmutex_t devmap_slock;
/*
* Initialize the thread callbacks and thread private data.
*/
static struct devmap_ctx *
{
struct devmap_ctx *devctx;
struct devmap_ctx *tmp;
break;
} else
return (devctx);
}
/*
* Timeout callback called if a CPU has not given up the device context
* within dhp->dh_timeout_length ticks
*/
static void
devmap_ctxto(void *data)
{
"devmap_ctxto:timeout expired, devctx=%p", (void *)devctx);
/*
* Set oncpu = 0 so the next mapping trying to get the device context
* can.
*/
}
/*
* Create a device segment.
*/
int
{
struct segdev_data *sdp;
int error;
/*
* Since the address space is "write" locked, we
* don't need the segment lock to protect "segdev" data.
*/
sdp->softlockcnt = 0;
else
/*
* Currently, hat_flags supports only HAT_LOAD_NOCONSIST
*/
/*
* Hold shadow vnode -- segdev only deals with
* character (VCHR) devices. We use the common
* vp to hang pages on.
*/
}
/*
* Inform the vnode of the new mapping.
*/
/*
* It is ok to use pass sdp->maxprot to ADDMAP rather than to use
* dhp specific maxprot because spec_addmap does not use maxprot.
*/
if (error != 0) {
} else {
/*
*/
MAP_PRIVATE)) == 0) {
}
}
return (error);
}
static struct segdev_data *
sdp_alloc(void)
{
struct segdev_data *sdp;
return (sdp);
}
/*
* Duplicate seg and return new segment in newseg.
*/
static int
{
struct segdev_data *newsdp;
int ret;
/*
* Since the address space is "write" locked, we
* don't need the segment lock to protect "segdev" data.
*/
newsdp->softlockcnt = 0;
/*
* Initialize per page data if the segment we are
* dup'ing has per page information.
*/
} else
/*
* duplicate devmap handles
*/
if (ret != 0) {
"segdev_dup:ret1 ret=%x, dhp=%p seg=%p",
"segdev_dup: ret %x dhp %p seg %p\n",
return (ret);
}
}
/*
* Inform the common vnode of the new mapping.
*/
}
/*
* duplicate devmap handles
*/
static int
{
struct devmap_callback_ctl *callbackops;
/* Need to lock the original dhp while copying if REMAP */
if (newdhp_save != NULL)
else
/*
* Initialize dh_lock if we want to do remap.
*/
}
int ret;
/*
* Call the dup callback so that the driver can
* duplicate its private data.
*/
if (ret != 0) {
/*
* We want to free up this segment as the driver
* has indicated that we can't dup it. But we
* don't want to call the drivers, devmap_unmap,
* callback function as the driver does not
* think this segment exists. The caller of
* devmap_dup will call seg_free on newseg
* as it was the caller that allocated the
* segment.
*/
"newdhp %p dhp %p\n", (void *)newdhp,
(void *)dhp));
return (ret);
}
}
}
return (0);
}
/*
* Split a segment at addr for length len.
*/
/*ARGSUSED*/
static int
{
register struct segdev_data *nsdp;
struct devmap_callback_ctl *callbackops;
"segdev_unmap:start dhp=%p, seg=%p addr=%p len=%lx",
/*
* Since the address space is "write" locked, we
* don't need the segment lock to protect "segdev" data.
*/
/*
* Fail the unmap if pages are SOFTLOCKed through this mapping.
* softlockcnt is protected from change by the as write lock.
*/
"segdev_unmap:error softlockcnt = %ld", sz);
return (EAGAIN);
}
/*
* Check for bad sizes
*/
panic("segdev_unmap");
/*
* If large page size was used in hat_devload(),
* the same page size must be used in hat_unload().
*/
break;
}
}
while (slen != 0) {
soff = 0;
}
} else
} else {
/*
* Unload any hardware translations in the range
* to be taken out.
*/
}
/*
* get the user offset which will used in the driver callbacks
*/
/*
* Inform the vnode of the unmapping.
*/
/*
* Check for entire segment
*/
return (0);
}
/*
* Check for beginning of segment
*/
/* free up old vpage */
}
/*
* free devmap handles from the beginning of the mapping.
*/
return (0);
}
/*
* Check for end of segment
*/
/* free up old vpage */
}
/*
* free devmap handles from addr to the end of the mapping.
*/
return (0);
}
/*
* The section to go is in the middle of the segment,
* have to make it into two segments. nseg is made for
* the high end while seg is cut down at the low end.
*/
panic("segdev_unmap seg_alloc");
nsdp->softlockcnt = 0;
/*
* Initialize per page data if the segment we are
* dup'ing has per page information.
*/
/* need to split vpage into two arrays */
/* free up old vpage */
} else
/*
* unmap dhps.
*/
return (0);
}
/*
* <addr, addr+len> is enclosed by dhp.
* create a newdhp that begins at addr+len and
* ends at dhp->dh_uvaddr+dhp->dh_len.
*/
}
/* XX Changing len should recalculate LARGE flag */
/*
* <addr, addr+len> spans over dhps.
*/
/* XX Changing len should recalculate LARGE flag */
/*
* dhp is enclosed by <addr, addr+len>.
*/
} else {
}
}
return (0);
}
/*
* Utility function handles reducing the length of a devmap handle during unmap
* Note that is only used for unmapping the front portion of the handler,
* Do not use if reducing length at the tail.
*/
static void
{
struct ddi_umem_cookie *cp;
struct devmap_pmem_cookie *pcp;
/*
* adjust devmap handle fields
*/
/* Make sure only page-aligned changes are done */
/* Need to grab dhp lock if REMAP */
if (cookie_is_devmem(cp)) {
} else if (cookie_is_pmem(cp)) {
} else {
}
}
/* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */
}
/*
* Free devmap handle, dhp.
* Return the next devmap handle on the linked list.
*/
static devmap_handle_t *
{
/*
* before we free up dhp, call the driver's devmap_unmap entry point
* to free resources allocated for this dhp.
*/
}
} else {
}
}
}
return (dhpp);
}
/*
* Free complete devmap handles from dhp for len bytes
* dhp can be either the first handle or a subsequent handle
*/
static void
{
struct devmap_callback_ctl *callbackops;
/*
* free the devmap handles covered by len.
*/
}
if (len != 0) { /* partial unmap at head of first remaining dhp */
/*
* Call the unmap callback so the drivers can make
* adjustment on its private data.
*/
}
}
/*
* Free devmap handles to truncate the mapping after addr
* RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again)
* Also could then use the routine in middle unmap case too
*/
static void
{
struct devmap_callback_ctl *callbackops;
register devmap_handle_t *dhpp;
if (maplen == 0) {
} else {
/*
* Call the unmap callback so the driver
* can make adjustments on its private data.
*/
(devmap_cookie_t *)dhph,
/* XXX Reducing len needs to recalculate LARGE flag */
maplen = 0;
}
} /* end while */
}
/*
* Free a segment.
*/
static void
{
/*
* Since the address space is "write" locked, we
* don't need the segment lock to protect "segdev" data.
*/
}
static void
{
register devmap_handle_t *dhpp;
/*
* free up devmap handle
*/
}
}
}
/*
* routines to lock and unlock underlying segkp segment for
* KMEM_PAGEABLE type cookies.
* segkp only allows a single pending F_SOFTLOCK
* we keep track of number of locks in the cookie so we can
* have multiple pending faults and manage the calls to segkp.
* RFE: if segkp supports either pagelock or can support multiple
* calls to F_SOFTLOCK, then these routines can go away.
* If pagelock, segdev_faultpage can fault on a page by page basis
* and simplifies the code quite a bit.
* if multiple calls allowed but not partial ranges, then need for
* cookie->lock and locked count goes away, code can call as_fault directly
*/
static faultcode_t
{
int err = 0;
/*
* Fault in pages in segkp with F_SOFTLOCK.
* We want to hold the lock until all pages have been loaded.
* segkp only allows single caller to hold SOFTLOCK, so cookie
* holds a count so we dont call into segkp multiple times
*/
/*
* Check for overflow in locked field
*/
/* First time locking */
}
if (!err) {
}
return (err);
}
static void
{
/* Last unlock */
}
}
/*
* Routines to synchronize F_SOFTLOCK and F_INVAL faults for
* drivers with devmap_access callbacks
* slock->softlocked basically works like a rw lock
* -ve counts => F_SOFTLOCK in progress
* We allow only one F_SOFTLOCK at a time
*
* This routine waits using cv_wait_sig so killing processes is more graceful
* Returns EINTR if coming out of this routine due to a signal, 0 otherwise
*/
static int devmap_softlock_enter(
struct devmap_softlock *slock,
enum fault_type type)
{
if (npages == 0)
return (0);
switch (type) {
case F_SOFTLOCK :
while (slock->softlocked) {
/* signalled */
return (EINTR);
}
}
break;
case F_INVAL :
case F_PROT :
while (slock->softlocked < 0)
/* signalled */
return (EINTR);
}
break;
default:
ASSERT(0);
}
return (0);
}
static void devmap_softlock_exit(
struct devmap_softlock *slock,
enum fault_type type)
{
return;
switch (type) {
case F_SOFTLOCK :
if (slock->softlocked == 0)
break;
case F_INVAL :
case F_PROT:
if (slock->softlocked == 0)
break;
default:
ASSERT(0);
}
}
/*
* Do a F_SOFTUNLOCK call over the range requested.
* The range must have already been F_SOFTLOCK'ed.
* The segment lock should be held, (but not the segment private lock?)
* The softunlock code below does not adjust for large page sizes
* pagesize mappings before calling.
*/
/*ARGSUSED*/
static void
{
"segdev_softunlock:dhp_head=%p sdp=%p addr=%p len=%lx",
"addr %p len %lx\n",
while (tlen != 0) {
/*
* unlock segkp memory, locked during F_SOFTLOCK
*/
if (dhp_is_kpmem(dhp)) {
}
/*
* Do the softlock accounting for devmap_access
*/
}
off = 0;
}
}
if (sdp->softlockcnt == 0) {
/*
* All SOFTLOCKS are gone. Wakeup any waiting
* unmappers so they can try again to unmap.
* Check for waiters first without the mutex
* held so we don't always grab the mutex on
* softunlocks.
*/
}
}
}
}
/*
* Handle fault for a single page.
* Done in a separate routine so we can handle errors more easily.
* This routine is called only from segdev_faultpages()
* when looping over the range of addresses requested. The segment lock is held.
*/
static faultcode_t
{
/*
* Initialize protection value for this page.
* If we have per page protection values check it now.
*/
switch (rw) {
case S_READ:
break;
case S_WRITE:
break;
case S_EXEC:
break;
case S_OTHER:
default:
break;
}
return (FC_PROT); /* illegal access type */
} else {
/* caller has already done segment level protection check */
}
if (type == F_SOFTLOCK) {
sdp->softlockcnt++;
}
/*
* In the devmap framework, sdp->mapfunc is set to NULL. we can get
* pfnum from dhp->dh_pfn (at beginning of segment) and offset from
* seg->s_base.
*/
/* If segment has devmap_data, then dhp should be non-NULL */
} else {
struct ddi_umem_cookie *cp;
struct devmap_pmem_cookie *pcp;
/* ensure the dhp passed in contains addr. */
/*
* This routine assumes that the caller makes sure that the
* fields in dhp used below are unchanged due to remap during
* this call. Caller does HOLD_DHP_LOCK if neeed
*/
pfnum = PFN_INVALID;
} else if (cookie_is_devmem(cp)) {
} else if (cookie_is_pmem(cp)) {
} else {
case UMEM_LOCKED :
PAGEOFFSET) == 0);
} else {
}
break;
case UMEM_TRASH :
/*
* We should set hat_flags to HAT_NOFAULT also
* However, not all hat layers implement this
*/
break;
case KMEM_PAGEABLE:
case KMEM_NON_PAGEABLE:
break;
default :
pfnum = PFN_INVALID;
break;
}
}
}
if (pfnum == PFN_INVALID) {
return (FC_MAKE_ERR(EFAULT));
}
/* prot should already be OR'ed in with hat_attributes if needed */
"segdev_faultpage: pfnum=%lx memory=%x prot=%x flags=%x",
/*
* It's not _really_ required here to pass sdp->hat_flags
* to hat_devload even though we do it.
* This is because hat figures it out DEVMEM mappings
* are non-consistent, anyway.
*/
return (0);
}
/*
* Fall through to the case where devmap is not used and need to call
* up the device tree to set up the mapping
*/
/*
* When calling ddi_map_fault, we do not OR in sdp->hat_attr
* This is because this calls drivers which may not expect
* prot to have any other values than PROT_ALL
* The root nexus driver has a hack to peek into the segment
* structure and then OR in sdp->hat_attr.
* XX In case the bus_ops interfaces are ever revisited
* we need to fix this. prot should include other hat attributes
*/
return (FC_MAKE_ERR(EFAULT));
}
return (0);
}
static faultcode_t
{
int err;
int err_is_faultcode = 0;
"segdev_fault: dhp_head=%p seg=%p addr=%p len=%lx type=%x",
"addr %p len %lx type %x\n",
/* Handle non-devmap case */
/* Find devmap handle */
return (FC_NOMAP);
/*
* The seg_dev driver does not implement copy-on-write,
* and always loads translations with maximal allowed permissions
* but we got an fault trying to access the device.
* Servicing the fault is not going to result in any better result
* RFE: If we want devmap_access callbacks to be involved in F_PROT
* faults, then the code below is written for that
* Pending resolution of the following:
* - determine if the F_INVAL/F_SOFTLOCK syncing
* is needed for F_PROT also or not. The code below assumes it does
* - If driver sees F_PROT and calls devmap_load with same type,
* then segdev_faultpages will fail with FC_PROT anyway, need to
* change that so calls from devmap_load to segdev_faultpages for
* F_PROT type are retagged to F_INVAL.
* RFE: Today we dont have drivers that use devmap and want to handle
* F_PROT calls. The code in segdev_fault* is written to allow
* this case but is not tested. A driver that needs this capability
* should be able to remove the short-circuit case; resolve the
* above issues and "should" work.
*/
return (FC_PROT);
}
/*
* Loop through dhp list calling devmap_access or segdev_faultpages for
* each devmap handle.
* drivers which implement devmap_access can interpose on faults and do
* device-appropriate special actions before calling devmap_load.
*/
/*
* Unfortunately, this simple loop has turned out to expose a variety
* of complex problems which results in the following convoluted code.
*
* First, a desire to handle a serialization of F_SOFTLOCK calls
* to the driver within the framework.
* This results in a dh_softlock structure that is on a per device
* (or device instance) basis and serializes devmap_access calls.
* Ideally we would need to do this for underlying
* but that is hard to identify and with REMAP, harder
* Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t.
* to F_SOFTLOCK calls to the driver.
* These serializations are to simplify the driver programmer model.
* To support these two features, the code first goes through the
* devmap handles and counts the pages (slpage) that are covered
* by devmap_access callbacks.
* This part ends with a devmap_softlock_enter call
* which allows only one F_SOFTLOCK active on a device instance,
* F_SOFTLOCK is active
*
* Next, we dont short-circuit the fault code upfront to call
* segdev_softunlock for F_SOFTUNLOCK, because we must use
* the same length when we softlock and softunlock.
*
* -Hat layers may not support softunlocking lengths less than the
* original length when there is large page support.
* -kpmem locking is dependent on keeping the lengths same.
* -if drivers handled F_SOFTLOCK, they probably also expect to
* see an F_SOFTUNLOCK of the same length
* Hence, if extending lengths during softlock,
* softunlock has to make the same adjustments and goes through
* the same loop calling segdev_faultpages/segdev_softunlock
* But some of the synchronization and error handling is different
*/
if (type != F_SOFTUNLOCK) {
/*
* Calculate count of pages that are :
* a) within the (potentially extended) fault region
* b) AND covered by devmap handle with devmap_access
*/
while (slen != 0) {
/*
* Softlocking on a region that allows remap is
* unsupported due to unresolved locking issues
* XXX: unclear what these are?
* One potential is that if there is a pending
* softlock, then a remap should not be allowed
* until the unlock is done. This is easily
* fixed by returning error in devmap*remap on
* checking the dh->dh_softlock->softlocked value
*/
if ((type == F_SOFTLOCK) &&
return (FC_NOSUPPORT);
}
/*
* use extended length for large page mappings
*/
} else {
}
}
off = 0;
}
/*
* synchonize with other faulting threads and wait till safe
* devmap_softlock_enter might return due to signal in cv_wait
*
* devmap_softlock_enter has to be called outside of while loop
* to prevent a deadlock if len spans over multiple dhps.
* dh_softlock is based on device instance and if multiple dhps
* use the same device instance, the second dhp's LOCK call
* will hang waiting on the first to complete.
* devmap_setup verifies that slocks in a dhp_chain are same.
* RFE: this deadlock only hold true for F_SOFTLOCK. For
* we could have done the softlock_enter inside the loop
* and supported multi-dhp mappings with dissimilar devices
*/
return (FC_MAKE_ERR(err));
}
/* reset 'maddr' to the start addr of the range of fault. */
/* calculate the offset corresponds to 'addr' in the first dhp. */
/*
* The fault length may span over multiple dhps.
* Loop until the total length is satisfied.
*/
while (len != 0) {
/*
* mlen is the smaller of 'len' and the length
* from addr to the end of mapping defined by dhp.
*/
/*
* Pass the extended length and address to devmap_access
* if large pagesize is used for loading address translations.
*/
} else {
}
/*
* call driver's devmap_access entry point which will
* call devmap_load/contextmgmt to load the translations
*
* We drop the dhp_lock before calling access so
* drivers can call devmap_*_remap within access
*/
} else {
/*
* If no devmap_access entry point, then load mappings
* hold dhp_lock across faultpages if REMAP
*/
err_is_faultcode = 1;
}
if (err) {
/*
* If not first dhp, use
* segdev_fault(F_SOFTUNLOCK) for prior dhps
* While this is recursion, it is incorrect to
* call just segdev_softunlock
* if we are using either large pages
* or devmap_access. It will be more right
* to go through the same loop as above
* rather than call segdev_softunlock directly
* It will use the right lenghths as well as
* call into the driver devmap_access routines.
*/
/*
* reduce slpage by number of pages
* released by segdev_softunlock
*/
} else {
}
/*
* Segdev_faultpages() already returns a faultcode,
* hence, result from segdev_faultpages() should be
* returned directly.
*/
if (err_is_faultcode)
return (err);
return (FC_MAKE_ERR(err));
}
off = 0;
}
/*
* release the softlock count at end of fault
* For F_SOFTLOCk this is done in the later F_SOFTUNLOCK
*/
return (0);
}
/*
* segdev_faultpages
*
* Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load
* This routine assumes that the callers makes sure that the fields
* in dhp used below are not changed due to remap during this call.
* Caller does HOLD_DHP_LOCK if neeed
* This routine returns a faultcode_t as a return value for segdev_fault.
*/
static faultcode_t
{
register caddr_t a;
int err;
"segdev_faultpages: dhp=%p seg=%p addr=%p len=%lx",
"dhp %p seg %p addr %p len %lx\n",
/*
* The seg_dev driver does not implement copy-on-write,
* and always loads translations with maximal allowed permissions
* but we got an fault trying to access the device.
* Servicing the fault is not going to result in any better result
* XXX: If we want to allow devmap_access to handle F_PROT calls,
* This code should be removed and let the normal fault handling
* take care of finding the error
*/
return (FC_PROT);
}
if (type == F_SOFTUNLOCK) {
return (0);
}
/*
* We hold this until the completion of this
*/
return (err);
}
/*
* If we have the same protections for the entire segment,
* insure that the access being attempted is legitimate.
*/
switch (rw) {
case S_READ:
break;
case S_WRITE:
break;
case S_EXEC:
break;
case S_OTHER:
default:
break;
}
/* undo kpmem locking */
if (kpmem_cookie != NULL) {
}
return (FC_PROT); /* illegal access type */
}
}
/*
* we do a single hat_devload for the range if
* - devmap framework (dhp is not NULL),
* - pageprot == 0, i.e., no per-page protection set and
* - is device pages, irrespective of whether we are using large pages
*/
return (FC_NOMAP);
}
if (type == F_SOFTLOCK) {
}
return (0);
}
/* Handle cases where we have to loop through fault handling per-page */
else
/* loop over the address range handling each fault */
break;
}
vpage++;
}
if (done > 0) {
/* use softunlock for those pages */
}
if (kpmem_cookie != NULL) {
/* release kpmem lock for rest of pages */
}
/* for non-SOFTLOCK cases, release kpmem */
}
return (err);
}
/*
* Asynchronous page fault. We simply do nothing since this
* entry point is not supposed to load up the translation.
*/
/*ARGSUSED*/
static faultcode_t
{
return (0);
}
static int
{
register devmap_handle_t *dhp;
"segdev_setprot:start seg=%p addr=%p len=%lx prot=%x",
/*
* Fail the setprot if pages are SOFTLOCKed through this
* mapping.
* Softlockcnt is protected from change by the as read lock.
*/
"segdev_setprot:error softlockcnt=%lx", sz);
return (EAGAIN);
}
return (EINVAL);
/*
* check if violate maxprot.
*/
while (dhp) {
return (EACCES); /* violated maxprot */
off = 0;
} else
break;
}
} else {
return (EACCES);
}
return (0); /* all done */
}
} else {
/*
* First time through setting per page permissions,
* initialize all the vpage structures to prot
*/
KM_SLEEP);
}
/*
* Now go change the needed vpages protections.
*/
}
/*
* If large page size was used in hat_devload(),
* the same page size must be used in hat_unload().
*/
break;
}
}
if (tdhp) {
while (slen != 0) {
soff = 0;
}
return (0);
}
}
} else {
/*
* RFE: the segment should keep track of all attributes
* allowing us to remove the deprecated hat_chgprot
* and use hat_chgattr.
*/
}
return (0);
}
static int
{
"segdev_checkprot:start seg=%p addr=%p len=%lx prot=%x",
/*
* If segment protection can be used, simply check against them
*/
register int err;
return (err);
}
/*
* Have to check down to the vpage level
*/
return (EACCES);
}
}
return (0);
}
static int
{
"segdev_getprot:start seg=%p addr=%p len=%lx protv=%p",
if (pgno != 0) {
do {
} while (pgno != 0);
} else {
do {
pgno--;
} while (pgno != 0);
}
}
return (0);
}
static u_offset_t
{
}
/*ARGSUSED*/
static int
{
}
/*ARGSUSED*/
static int
{
/*
* Note that this vp is the common_vp of the device, where the
* pages are hung ..
*/
return (0);
}
static void
segdev_badop(void)
{
"segdev_badop:start");
panic("segdev_badop");
/*NOTREACHED*/
}
/*
* segdev pages are not in the cache, and thus can't really be controlled.
* Hence, syncs are simply always successful.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*
* segdev pages are always "in core".
*/
/*ARGSUSED*/
static size_t
{
size_t v = 0;
v += PAGESIZE)
*vec++ = 1;
return (v);
}
/*
* segdev pages are not in the cache, and thus can't really be controlled.
* Hence, locks are simply always successful.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*
* segdev pages are not in the cache, and thus can't really be controlled.
* Hence, advise is simply always successful.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*
* segdev pages are not dumped, so we just return
*/
/*ARGSUSED*/
static void
{}
/*
* ddi_segmap_setup: Used by drivers who wish specify mapping attributes
* for a segment. Called from a drivers segmap(9E)
* routine.
*/
/*ARGSUSED*/
int
{
struct segdev_crargs dev_a;
int error, i;
"ddi_segmap_setup:start");
return (ENODEV);
/*
* Character devices that support the d_mmap
* interface can only be mmap'ed shared.
*/
return (EINVAL);
/*
* Check that this region is indeed mappable on this platform.
* Use the mapping function.
*/
return (ENXIO);
/*
* Check to ensure that the entire range is
* legal and we are not trying to map in
* more than the device will let us.
*/
if (i == 0) {
/*
* Save the pfn at offset here. This pfn will be
* used later to get user address.
*/
maxprot)) == PFN_INVALID)
return (ENXIO);
} else {
return (ENXIO);
}
}
/* Pick an address w/o worrying about any vac alignment constraints. */
if (error != 0) {
return (error);
}
return (error);
}
/*ARGSUSED*/
static int
{
"segdev_pagelock:start");
return (ENOTSUP);
}
/*ARGSUSED*/
static int
{
return (ENOTSUP);
}
/*
* devmap_device: Used by devmap framework to establish mapping
* called by devmap_seup(9F) during map setup time.
*/
/*ARGSUSED*/
static int
{
struct segdev_crargs dev_a;
int err;
struct devmap_pmem_cookie *pcp;
"devmap_device:start dhp=%p addr=%p off=%llx, len=%lx",
}
/*
* Use the dhp that has the
* largest len to get user address.
*/
/*
* use 0 which is as good as any other.
*/
aligned_off = (offset_t)0;
} else if (dhp_is_devmem(maxdhp)) {
} else if (dhp_is_pmem(maxdhp)) {
pfn = page_pptonum(
} else {
}
/*
* Pick an address aligned to dh_cookie.
* for device memory, cookie is physical address.
*/
return (ENOMEM);
}
} else {
/*
* User-specified address; blow away any previous mappings.
*/
}
/*
* sdp->maxprot has the least restrict protection of all dhps.
*/
/*
* devmap uses dhp->dh_hat_attr for hat.
*/
return (err);
}
int
{
struct devmap_ctx *devctx;
int do_timeout = 0;
int ret;
#ifdef lint
#endif
"devmap_do_ctxmgt:start dhp=%p off=%llx, len=%lx",
return (FC_HWERR);
/*
* If we are on an MP system with more than one cpu running
* and if a thread on some CPU already has the context, wait
* for it to finish if there is a hysteresis timeout.
*
* We call cv_wait() instead of cv_wait_sig() because
* it does not matter much if it returned due to a signal
* or due to a cv_signal() or cv_broadcast(). In either event
* we need to complete the mapping otherwise the processes
* will die with a SEGV.
*/
"devmap_do_ctxmgt:doing hysteresis, devctl %p dhp %p",
do_timeout = 1;
}
/*
* Call the contextmgt callback so that the driver can handle
* the fault.
*/
/*
* If devmap_access() returned -1, then there was a hardware
* error so we need to convert the return value to something
* that trap() will understand. Otherwise, the return value
* is already a fault code generated by devmap_unload()
* or devmap_load().
*/
if (ret) {
"devmap_do_ctxmgt: ret=%x dhp=%p devctx=%p",
}
return (FC_HWERR);
}
/*
* Setup the timeout if we need to
*/
if (do_timeout) {
if (dhp->dh_timeout_length > 0) {
"devmap_do_ctxmgt:timeout set");
} else {
/*
* We don't want to wait so set oncpu to
* 0 and wake up anyone waiting.
*/
"devmap_do_ctxmgt:timeout not set");
}
}
return (DDI_SUCCESS);
}
/*
* end of mapping
* poff fault_offset |
* base | | |
* | | | |
* V V V V
* +-----------+---------------+-------+---------+-------+
* ^ ^ ^ ^
* |<--- offset--->|<-len->| |
* |<--- dh_len(size of mapping) --->|
* |<-- pg -->|
* -->|rlen|<--
*/
static ulong_t
{
register int level;
long rlen;
"devmap_roundup:start dhp=%p off=%lx len=%lx",
/*
* get the max. pagesize that is aligned within the range
* <dh_pfn, dh_pfn+offset>.
*
* The calculations below use physical address to ddetermine
* the page size to use. The same calculations can use the
* virtual address to determine the page size.
*/
break;
}
"devmap_roundup: base=%lx poff=%lx dhp=%p",
"devmap_roundup:ret dhp=%p level=%x rlen=%lx psiz=%p opfn=%p",
"level %x rlen %lx psize %lx opfn %lx\n",
}
/*
* find the dhp that contains addr.
*/
static devmap_handle_t *
{
"devmap_find_handle:start");
while (dhp) {
return (dhp);
}
return ((devmap_handle_t *)NULL);
}
/*
* devmap_unload:
* Marks a segdev segment or pages if offset->offset+len
* is not the entire segment as intercept and unloads the
* pages in the range offset -> offset+len.
*/
int
{
"devmap_unload:start dhp=%p offset=%llx len=%lx",
return (FC_MAKE_ERR(EINVAL));
/*
* Address and size must be page aligned. Len is set to the
* number of bytes in the number of pages that are required to
* support len. Offset is set to the byte offset of the first byte
* of the page that contains offset.
*/
/*
* If len is == 0, then calculate the size by getting
* the number of bytes from offset to the end of the segment.
*/
if (len == 0)
else {
return (FC_MAKE_ERR(EINVAL));
}
/*
* The address is offset bytes from the base address of
* the dhp.
*/
/*
* If large page size was used in hat_devload(),
* the same page size must be used in hat_unload().
*/
} else {
}
return (0);
}
/*
* calculates the optimal page size that will be used for hat_devload().
*/
static void
{
"devmap_get_large_pgsize:start");
/*
* RFE - Code only supports large page mappings for devmem
* This code could be changed in future if we want to support
* large page mappings for kernel exported memory.
*/
*llen = 0;
while ((long)len > 0) {
/*
* get the optimal pfn to minimize address translations.
* devmap_roundup() returns residue bytes for next round
* calculations.
*/
if (first) {
first = 0;
}
}
}
/*
* Initialize the devmap_softlock structure.
*/
static struct devmap_softlock *
{
struct devmap_softlock *slock;
struct devmap_softlock *tmp;
"devmap_softlock_init:start");
break;
} else
return (slock);
}
/*
* Wake up processes that sleep on softlocked.
* Free dh_softlock if refcnt is 0.
*/
static void
{
struct devmap_softlock *tmp;
struct devmap_softlock *parent;
"devmap_softlock_rele:start");
/*
* If no one is using the device, free up the slock data.
*/
slock->softlocked = 0;
if (devmap_slist == slock)
else {
break;
}
}
}
} else
}
/*
* Wake up processes that sleep on dh_ctx->locked.
* Free dh_ctx if refcnt is 0.
*/
static void
{
struct devmap_ctx *tmp;
struct devmap_ctx *parent;
"devmap_ctx_rele:start");
/*
* If no one is using the device, free up the devctx data.
*/
/*
* Untimeout any threads using this mapping as they are about
* to go away.
*/
"devmap_ctx_rele:untimeout ctx->timeout");
}
if (devmapctx_list == devctx)
else {
break;
}
}
}
} else
}
/*
* devmap_load:
* Marks a segdev segment or pages if offset->offset+len
* is not the entire segment as nointercept and faults in
* the pages in the range offset -> offset+len.
*/
int
{
int rc;
"devmap_load:start dhp=%p offset=%llx len=%lx",
/*
* Hat layer only supports devload to process' context for which
* the as lock is held. Verify here and return error if drivers
* inadvertently call devmap_load on a wrong devmap handle.
*/
return (FC_MAKE_ERR(EINVAL));
return (FC_MAKE_ERR(EINVAL));
/*
* Address and size must be page aligned. Len is set to the
* number of bytes in the number of pages that are required to
* support len. Offset is set to the byte offset of the first byte
* of the page that contains offset.
*/
/*
* If len == 0, then calculate the size by getting
* the number of bytes from offset to the end of the segment.
*/
if (len == 0)
else {
return (FC_MAKE_ERR(EINVAL));
}
/*
* The address is offset bytes from the base address of
* the segment.
*/
return (rc);
}
int
{
register devmap_handle_t *dhp;
struct devmap_callback_ctl *callbackops;
int map_flag;
int ret;
#ifdef lint
#endif
/*
* driver must provide devmap(9E) entry point in cb_ops to use the
* devmap framework.
*/
return (EINVAL);
/*
* To protect from an inadvertent entry because the devmap entry point
* is not NULL, return error if D_DEVMAP bit is not set in cb_flag and
* mmap is NULL.
*/
return (EINVAL);
/*
* devmap allows mmap(2) to map multiple registers.
* one devmap_handle is created for each register mapped.
*/
else
/*
* Get mapping specific info from
* the driver, such as rnumber, roff, len, callbackops,
* accattrp and, if the mapping is for kernel memory,
* ddi_umem_cookie.
*/
return (ENXIO);
}
if (map_len & PAGEOFFSET) {
return (EINVAL);
}
/*
* Normally devmap does not support MAP_PRIVATE unless
* the drivers provide a valid devmap_access routine.
*/
if ((flags & MAP_PRIVATE) != 0) {
return (EINVAL);
}
} else {
/*
* Initialize dhp_softlock and dh_ctx if the drivers
* provide devmap_access.
*/
/*
* segdev_fault can only work when all
* dh_softlock in a multi-dhp mapping
* are same. see comments in segdev_fault
* This code keeps track of the first
* dh_softlock allocated in slock and
* compares all later allocations and if
* not similar, returns an error.
*/
return (ENOTSUP);
}
}
}
/*
* get the user virtual address and establish the mapping between
* uvaddr and device physical address.
*/
!= 0) {
/*
* free devmap handles if error during the mapping.
*/
return (ret);
}
/*
* call the driver's devmap_map callback to do more after the mapping,
* such as to allocate driver private data for context management.
*/
if (ret != 0) {
struct segdev_data *sdp;
/*
* call driver's devmap_unmap entry point
* to free driver resources.
*/
(*callbackops->devmap_unmap)(
}
}
return (ENXIO);
}
}
}
return (0);
}
int
{
"devmap_segmap:start");
}
/*
* Called from devmap_devmem_setup/remap to see if can use large pages for
* this device mapping.
* Also calculate the max. page size for this mapping.
* this page size will be used in fault routine for
* optimal page size calculations.
*/
static void
{
dhp->dh_mmulevel = 0;
/*
* use large page size only if:
* 1. device memory.
* 2. mmu supports multiple page sizes,
* 3. Driver did not disallow it
* 4. dhp length is at least as big as the large pagesize
* 5. the uvaddr and pfn are large pagesize aligned
*/
if (page_num_pagesizes() > 1 &&
int level;
break;
}
}
}
if (dhp->dh_mmulevel > 0) {
} else {
}
}
/*
* Called by driver devmap routine to pass device specific info to
* the framework. used for device memory mapping only.
*/
int
{
int err;
"devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
/*
* First to check if this function has been called for this dhp.
*/
return (DDI_FAILURE);
return (DDI_FAILURE);
if (flags & DEVMAP_MAPPING_INVALID) {
/*
* Don't go up the tree to get pfn if the driver specifies
* DEVMAP_MAPPING_INVALID in flags.
*
* If DEVMAP_MAPPING_INVALID is specified, we have to grant
* remap permission.
*/
if (!(flags & DEVMAP_ALLOW_REMAP)) {
return (DDI_FAILURE);
}
} else {
return (DDI_FAILURE);
/*
* up the device tree to get pfn.
* The rootnex_map_regspec() routine in nexus drivers has been
* modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
*/
if (err)
return (DDI_FAILURE);
}
/* Should not be using devmem setup for memory pages */
/* Only some of the flags bits are settable by the driver */
/* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
if (callbackops != NULL) {
sizeof (struct devmap_callback_ctl));
}
/*
* Initialize dh_lock if we want to do remap.
*/
}
return (DDI_SUCCESS);
}
int
{
int err;
"devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
/*
* Return failure if setup has not been done or no remap permission
* has been granted during the setup.
*/
return (DDI_FAILURE);
/* Only DEVMAP_MAPPING_INVALID flag supported for remap */
return (DDI_FAILURE);
return (DDI_FAILURE);
if (!(flags & DEVMAP_MAPPING_INVALID)) {
return (DDI_FAILURE);
}
/*
* Unload the old mapping, so next fault will setup the new mappings
* Do this while holding the dhp lock so other faults dont reestablish
* the mappings
*/
if (flags & DEVMAP_MAPPING_INVALID) {
} else {
/* clear any prior DEVMAP_MAPPING_INVALID flag */
/*
* up the device tree to get pfn.
* The rootnex_map_regspec() routine in nexus drivers has been
* modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
*/
if (err) {
return (DDI_FAILURE);
}
/*
* Store result of ddi_map first in local variables, as we do
* not want to overwrite the existing dhp with wrong data.
*/
}
/* clear the large page size flag */
/* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
return (DDI_SUCCESS);
}
/*
* called by driver devmap routine to pass kernel virtual address mapping
* info to the framework. used only for kernel memory
* allocated from ddi_umem_alloc().
*/
int
{
#ifdef lint
#endif
"devmap_umem_setup:start dhp=%p offset=%llx cookie=%p len=%lx",
return (DDI_FAILURE);
/* For UMEM_TRASH, this restriction is not needed */
return (DDI_FAILURE);
/* check if the cache attributes are supported */
return (DDI_FAILURE);
/*
* First to check if this function has been called for this dhp.
*/
return (DDI_FAILURE);
return (DDI_FAILURE);
if (flags & DEVMAP_MAPPING_INVALID) {
/*
* If DEVMAP_MAPPING_INVALID is specified, we have to grant
* remap permission.
*/
if (!(flags & DEVMAP_ALLOW_REMAP)) {
return (DDI_FAILURE);
}
} else {
/* set HAT cache attributes */
/* set HAT endianess attributes */
}
/*
* The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload();
* we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to
* create consistent mappings but our intention was to create
* non-consistent mappings.
*
* DEVMEM: hat figures it out it's DEVMEM and creates non-consistent
* mappings.
*
* kernel exported memory: hat figures it out it's memory and always
* creates consistent mappings.
*
*
* MAP_FIXED. We _explicitly_ tell hat to create non-consistent
* mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED
*/
/* Only some of the flags bits are settable by the driver */
if (callbackops != NULL) {
sizeof (struct devmap_callback_ctl));
}
/*
* Initialize dh_lock if we want to do remap.
*/
}
return (DDI_SUCCESS);
}
int
{
"devmap_umem_remap:start dhp=%p offset=%llx cookie=%p len=%lx",
#ifdef lint
#endif
/*
* Reture failure if setup has not been done or no remap permission
* has been granted during the setup.
*/
return (DDI_FAILURE);
/* No flags supported for remap yet */
if (flags != 0)
return (DDI_FAILURE);
/* check if the cache attributes are supported */
return (DDI_FAILURE);
return (DDI_FAILURE);
/* For UMEM_TRASH, this restriction is not needed */
return (DDI_FAILURE);
/*
* Unload the old mapping, so next fault will setup the new mappings
* Do this while holding the dhp lock so other faults dont reestablish
* the mappings
*/
/* set HAT cache attributes */
/* set HAT endianess attributes */
/* clear the large page size flag */
return (DDI_SUCCESS);
}
/*
* to set timeout value for the driver's context management callback, e.g.
* devmap_access().
*/
void
{
"devmap_set_ctx_timeout:start dhp=%p ticks=%x",
}
int
{
#ifdef lint
#endif
"devmap_default_access:start");
}
/*
* segkmem_alloc() wrapper to allocate memory which is both
* non-relocatable (for DR) and sharelocked, since the rest
* of this segment driver requires it.
*/
static void *
{
}
/*
* This is where things are a bit incestuous with seg_kmem: unlike
* seg_kp, seg_kmem does not keep its pages long-term sharelocked, so
* we need to do a bit of a dance around that to prevent duplication of
* code until we decide to bite the bullet and implement a new kernel
* segment for driver-allocated memory that is exported to user space.
*/
static void
{
/*
* Use page_find() instead of page_lookup() to find the page
* since we know that it is hashed and has a shared lock.
*/
panic("devmap_free_pages: page not found");
if (!page_tryupgrade(pp)) {
SE_EXCL);
panic("devmap_free_pages: page already freed");
}
/* Clear p_lckcnt so page_destroy() doesn't update availrmem */
page_destroy(pp, 0);
}
}
/*
* devmap_umem_alloc_np() replaces kmem_zalloc() as the method for
* allocating non-pageable kmem in response to a ddi_umem_alloc()
* default request. For now we allocate our own pages and we keep
* them long-term sharelocked, since: A) the fault routines expect the
* memory to already be locked; B) pageable umem is already long-term
* locked; C) it's a lot of work to make it otherwise, particularly
* since the nexus layer expects the pages to never fault. An RFE is to
* not keep the pages long-term locked, but instead to be able to
* take faults on them and simply look them up in kvp in case we
* fault on them. Even then, we must take care not to let pageout
* steal them from us since the data must remain resident; if we
* do this we must come up with some way to pin the pages to prevent
*/
static void *
{
void *buf;
return (buf);
}
static void
{
}
/*
* allocate page aligned kernel memory for exporting to user land.
* The devmap framework will use the cookie allocated by ddi_umem_alloc()
* to find a user virtual address that is in same color as the address
* allocated here.
*/
void *
{
struct ddi_umem_cookie *cp;
int iflags = 0;
"devmap_umem_alloc:start");
if (len == 0)
return ((void *)NULL);
/*
* allocate cookie
*/
return ((void *)NULL);
}
if (flags & DDI_UMEM_PAGEABLE) {
/* Only one of the flags is allowed */
/* initialize resource with 0 */
/*
* to allocate unlocked pageable memory, use segkp_get() to
* create a segkp segment. Since segkp can only service kas,
* other segment drivers such as segdev have to do
* as_fault(segkp, SOFTLOCK) in its fault routine,
*/
if (flags & DDI_UMEM_NOSLEEP)
iflags |= KPD_NOWAIT;
return ((void *)NULL);
}
} else if (flags & DDI_UMEM_TRASH) {
/* Only one of the flags is allowed */
} else {
return ((void *)NULL);
}
}
/*
* need to save size here. size will be used when
* we do kmem_free.
*/
return (buf);
}
void
{
struct ddi_umem_cookie *cp;
"devmap_umem_free:start");
/*
* if cookie is NULL, no effects on the system
*/
return;
case KMEM_PAGEABLE :
/*
* Check if there are still any pending faults on the cookie
* while the driver is deleting it,
* XXX - could change to an ASSERT but wont catch errant drivers
*/
panic("ddi_umem_free for cookie with pending faults %p",
(void *)cp);
return;
}
/*
* release mutex associated with this cookie.
*/
break;
case KMEM_NON_PAGEABLE :
break;
case UMEM_TRASH :
break;
case UMEM_LOCKED :
/* Callers should use ddi_umem_unlock for this type */
/* Frees the cookie too */
return;
default:
/* panic so we can diagnose the underlying cause */
panic("ddi_umem_free: illegal cookie type 0x%x\n",
}
}
static int
{
/*
* It looks as if it is always mapped shared
*/
"segdev_getmemid:start");
return (0);
}
/*ARGSUSED*/
static lgrp_mem_policy_info_t *
{
return (NULL);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*
* ddi_umem_alloc() non-pageable quantum cache max size.
* This is just a SWAG.
*/
/*
* Initialize seg_dev from boot. This routine sets up the trash page
* and creates the umem_np_arena used to back non-pageable memory
* requests.
*/
void
segdev_init(void)
{
panic("segdev_init: failed to create trash page");
}
/*
* Invoke platform-dependent support routines so that /proc can have
* the platform code deal with curious hardware.
*/
int
{
}
int
{
}