winlockio.c revision d3d50737e566cade9a08d73d2af95105ac7cd960
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* This is the lock device driver.
*
* The lock driver provides a variation of inter-process mutexes with the
* following twist in semantics:
* A waiter for a lock after a set timeout can "break" the lock and
* grab it from the current owner (without informing the owner).
*
* These semantics result in temporarily multiple processes thinking they
* own the lock. This usually does not make sense for cases where locks are
* used to protect a critical region and it is important to serialize access
* to data structures. As breaking the lock will also lose the serialization
* and result in corrupt data structures.
*
* The usage for winlock driver is primarily driven by the graphics system
* when doing DGA (direct graphics access) graphics. The locks are used to
* protect access to the frame buffer (presumably reflects back to the screen)
* between competing processes that directly write to the screen as opposed
* to going through the window server etc.
* In this case, the result of breaking the lock at worst causes the screen
* image to be distorted and is easily fixed by doing a "refresh"
*
* In well-behaved applications, the lock is held for a very short time and
* the breaking semantics do not come into play. Not having this feature and
* using normal inter-process mutexes will result in a misbehaved application
* from grabbing the screen writing capability from the window manager and
* effectively make the system look like it is hung (mouse pointer does not
* move).
*
* A secondary aspect of the winlock driver is that it allows for extremely
* fast lock acquire/release in cases where there is low contention. A memory
* write is all that is needed (not even a function call). And the window
* manager is the only DGA writer usually and this optimized for. Occasionally
* some processes might do DGA graphics and cause kernel faults to handle
* the contention/locking (and that has got to be slow!).
*
* The following IOCTLs are supported:
*
* GRABPAGEALLOC:
* Compatibility with old cgsix device driver lockpage ioctls.
* Lockpages created this way must be an entire page for compatibility with
* older software. This ioctl allocates a lock context with its own
* private lock page. The unique "ident" that identifies this lock is
* returned.
*
* GRABPAGEFREE:
* Compatibility with cgsix device driver lockpage ioctls. This
* ioctl releases the lock context allocated by GRABPAGEALLOC.
*
* GRABLOCKINFO:
* Returns a one-word flag. '1' means that multiple clients may
* access this lock page. Older device drivers returned '0',
* meaning that only two clients could access a lock page.
*
* GRABATTACH:
* Not supported. This ioctl would have grabbed all lock pages
* on behalf of the calling program.
*
* WINLOCKALLOC:
* Allocate a lock context. This ioctl accepts a key value. as
* its argument. If the key is zero, a new lock context is
* created, and its "ident" is returned. If the key is nonzero,
* all existing contexts are checked to see if they match they
* key. If a match is found, its reference count is incremented
* and its ident is returned, otherwise a new context is created
* and its ident is returned.
*
* WINLOCKFREE:
* Free a lock context. This ioctl accepts the ident of a lock
* context and decrements its reference count. Once the reference
* count reaches zero *and* all mappings are released, the lock
* context is freed. When all the lock context in the lock page are
* freed, the lock page is freed as well.
*
* WINLOCKSETTIMEOUT:
* Set lock timeout for a context. This ioctl accepts the ident
* of a lock context and a timeout value in milliseconds.
* Whenever lock contention occurs, the timer is started and the lock is
* broken after the timeout expires. If timeout value is zero, lock does
* not timeout. This value will be rounded to the nearest clock
* tick, so don't try to use it for real-time control or something.
*
* WINLOCKGETTIMEOUT:
* Get lock timeout from a context.
*
* WINLOCKDUMP:
* Dump state of this device.
*
*
* How /dev/winlock works:
*
* Every lock context consists of two mappings for the client to the lock
* page. These mappings are known as the "lock page" and "unlock page"
* to the client. The first mmap to the lock context (identified by the
* sy_ident field returns during alloc) allocates mapping to the lock page,
* the second mmap allocates a mapping to the unlock page.
* The mappings dont have to be ordered in virtual address space, but do
* need to be ordered in time. Mapping and unmapping of these lock and unlock
* pages should happen in pairs. Doing them one at a time or unmapping one
* and leaving one mapped etc cause undefined behaviors.
* The mappings are always of length PAGESIZE, and type MAP_SHARED.
*
* The first ioctl is to ALLOC a lock, either based on a key (if trying to
* grab a preexisting lock) or 0 (gets a default new one)
* This ioctl returns a value in sy_ident which is needed to do the
* later mmaps and FREE/other ioctls.
*
* The "page number" portion of the sy_ident needs to be passed as the
* file offset when doing an mmap for both the lock page and unlock page
*
* The value returned by mmap ( a user virtual address) needs to be
* incremented by the "page offset" portion of sy_ident to obtain the
* pointer to the actual lock. (Skipping this step, does not cause any
* visible error, but the process will be using the wrong lock!)
*
* On a fork(), the child process will inherit the mappings for free, but
* will not inherit the parent's lock ownership if any. The child should NOT
* do an explicit FREE on the lock context unless it did an explicit ALLOC.
* Only one process at a time is allowed to have a valid hat
* mapping to a lock page. This is enforced by this driver.
* A client acquires a lock by writing a '1' to the lock page.
* Note, that it is not necessary to read and veryify that the lock is '0'
* prior to writing a '1' in it.
* If it does not already have a valid mapping to that page, the driver
* takes a fault (devmap_access), loads the client mapping
* and allows the client to continue. The client releases the lock by
* writing a '0' to the unlock page. Again, if it does not have a valid
* mapping to the unlock page, the segment driver takes a fault,
* loads the mapping, and lets the client continue. From this point
* forward, the client can make as many locks and unlocks as it
* wants, without any more faults into the kernel.
*
* If a different process wants to acquire a lock, it takes a page fault
* when it writes the '1' to the lock page. If the segment driver sees
* that the lock page contained a zero, then it invalidates the owner's
* mappings and gives the mappings to this process.
*
* If there is already a '1' in the lock page when the second client
* tries to access the lock page, then a lock exists. The segment
* driver sleeps the second client and, if applicable, starts the
* timeout on the lock. The owner's mapping to the unlock page
* is invalidated so that the driver will be woken again when the owner
* releases the lock.
*
* When the locking client finally writes a '0' to the unlock page, the
* segment driver takes another fault. The client is given a valid
* mapping, not to the unlock page, but to the "trash page", and allowed
* to continue. Meanwhile, the sleeping client is given a valid mapping
* to the lock/unlock pages and allowed to continue as well.
*
* RFE: There is a leak if process exits before freeing allocated locks
* But currently not tracking which locks were allocated by which
* process and we do not have a clean entry point into the driver
* to do garbage collection. If the interface used a file descriptor for each
* lock it allocs, then the driver can free up stuff in the _close routine
*/
#include <sys/types.h> /* various type defn's */
#include <sys/debug.h>
#include <sys/param.h> /* various kernel limits */
#include <sys/time.h>
#include <sys/errno.h>
#include <sys/kmem.h> /* defines kmem_alloc() */
#include <sys/conf.h> /* defines cdevsw */
#include <sys/file.h> /* various file modes, etc. */
#include <sys/uio.h> /* UIO stuff */
#include <sys/ioctl.h>
#include <sys/cred.h> /* defines cred struct */
#include <sys/mman.h> /* defines mmap(2) parameters */
#include <sys/stat.h> /* defines S_IFCHR */
#include <sys/cmn_err.h> /* use cmn_err */
#include <sys/ddi.h> /* ddi stuff */
#include <sys/sunddi.h> /* ddi stuff */
#include <sys/ddi_impldefs.h> /* ddi stuff */
#include <sys/winlockio.h> /* defines ioctls, flags, data structs */
static int winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
static int winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t,
size_t *, uint_t);
static int winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t,
uint_t, uint_t, uint_t, cred_t *);
static struct cb_ops winlock_cb_ops = {
nulldev, /* open */
nulldev, /* close */
nodev, /* strategy */
nodev, /* print */
nodev, /* dump */
nodev, /* read */
nodev, /* write */
winlock_ioctl, /* ioctl */
winlock_devmap, /* devmap */
nodev, /* mmap */
winlocksegmap, /* segmap */
nochpoll, /* poll */
ddi_prop_op, /* prop_op */
NULL, /* streamtab */
D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
0, /* rev */
nodev, /* aread */
nodev /* awrite */
};
static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
static int winlock_attach(dev_info_t *, ddi_attach_cmd_t);
static int winlock_detach(dev_info_t *, ddi_detach_cmd_t);
static struct dev_ops winlock_ops = {
DEVO_REV,
0, /* refcount */
winlock_info, /* info */
nulldev, /* identify */
nulldev, /* probe */
winlock_attach, /* attach */
winlock_detach, /* detach */
nodev, /* reset */
&winlock_cb_ops, /* driver ops */
NULL, /* bus ops */
NULL, /* power */
ddi_quiesce_not_needed, /* quiesce */
};
static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t,
void **);
static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t,
devmap_cookie_t, void **, devmap_cookie_t, void **);
static int winlockmap_dup(devmap_cookie_t, void *,
devmap_cookie_t, void **);
static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t,
uint_t, uint_t);
static
struct devmap_callback_ctl winlockmap_ops = {
DEVMAP_OPS_REV,
winlockmap_map,
winlockmap_access,
winlockmap_dup,
winlockmap_unmap,
};
#if DEBUG
static int lock_debug = 0;
#define DEBUGF(level, args) { if (lock_debug >= (level)) cmn_err args; }
#else
#define DEBUGF(level, args)
#endif
/* Driver supports two styles of locks */
enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK };
/*
* These structures describe a lock context. We permit multiple
* clients (not just two) to access a lock page
*
* The "cookie" identifies the lock context. It is the page number portion
* sy_ident returned on lock allocation. Cookie is used in later ioctls.
* "cookie" is lockid * PAGESIZE
* "lockptr" is the kernel virtual address to the lock itself
* The page offset portion of lockptr is the page offset portion of sy_ident
*/
/*
* per-process information about locks. This is the private field of
* a devmap mapping. Note that usually *two* mappings point to this.
*/
/*
* Each process using winlock is associated with a segproc structure
* In various driver entry points, we need to search to find the right
* segproc structure (If we were using file handles for each lock this
* would not have been necessary).
* It would have been simple to use the process pid (and ddi_get_pid)
* However, during fork devmap_dup is called in the parent process context
* and using the pid complicates the code by introducing orphans.
* Instead we use the as pointer for the process as a cookie
* which requires delving into various non-DDI kosher structs
*/
typedef struct segproc {
struct segproc *next; /* next client of this lock */
struct seglock *lp; /* associated lock context */
devmap_cookie_t lockseg; /* lock mapping, if any */
devmap_cookie_t unlockseg; /* unlock mapping, if any */
void *tag; /* process as pointer as tag */
uint_t flag; /* see "flag bits" in winlockio.h */
} SegProc;
#define ID(sdp) ((sdp)->tag)
#define CURPROC_ID (void *)(curproc->p_as)
/* per lock context information */
typedef struct seglock {
struct seglock *next; /* next lock */
uint_t sleepers; /* nthreads sleeping on this lock */
uint_t alloccount; /* how many times created? */
uint_t cookie; /* mmap() offset (page #) into device */
uint_t key; /* key, if any */
enum winlock_style style; /* style of lock - OLDSTYLE, NEWSTYLE */
clock_t timeout; /* sleep time in ticks */
ddi_umem_cookie_t umem_cookie; /* cookie for umem allocated memory */
int *lockptr; /* kernel virtual addr of lock */
struct segproc *clients; /* list of clients of this lock */
struct segproc *owner; /* current owner of lock */
kmutex_t mutex; /* mutex for lock */
kcondvar_t locksleep; /* for sleeping on lock */
} SegLock;
#define LOCK(lp) (*((lp)->lockptr))
/*
* Number of locks that can fit in a page. Driver can support only that many.
* For oldsytle locks, it is relatively easy to increase the limit as each
* is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation
* For newstyle locks, this is trickier as the code needs to allow for mapping
* into the second or third page of the cookie for some locks.
*/
#define MAX_LOCKS (PAGESIZE/sizeof (int))
#define LOCKTIME 3 /* Default lock timeout in seconds */
/* Protections setting for winlock user mappings */
#define WINLOCK_PROT (PROT_READ|PROT_WRITE|PROT_USER)
/*
* The trash page is where unwanted writes go
* when a process is releasing a lock.
*/
static ddi_umem_cookie_t trashpage_cookie = NULL;
/* For newstyle allocations a common page of locks is used */
static caddr_t lockpage = NULL;
static ddi_umem_cookie_t lockpage_cookie = NULL;
static dev_info_t *winlock_dip = NULL;
static kmutex_t winlock_mutex;
/*
* winlock_mutex protects
* lock_list
* lock_free_list
* "next" field in SegLock
* next_lock
* trashpage_cookie
* lockpage & lockpage_cookie
*
* SegLock_mutex protects
* rest of fields in SegLock
* All fields in list of SegProc (lp->clients)
*
* Lock ordering is winlock_mutex->SegLock_mutex
* During devmap/seg operations SegLock_mutex acquired without winlock_mutex
*
* During devmap callbacks, the pointer to SegProc is stored as the private
* data in the devmap handle. This pointer will not go stale (i.e., the
* SegProc getting deleted) as the SegProc is not deleted until both the
* lockseg and unlockseg have been unmapped and the pointers stored in
* the devmap handles have been NULL'ed.
* But before this pointer is used to access any fields (other than the 'lp')
* lp->mutex must be held.
*/
/*
* The allocation code tries to allocate from lock_free_list
* first, otherwise it uses kmem_zalloc. When lock list is idle, all
* locks in lock_free_list are kmem_freed
*/
static SegLock *lock_list = NULL; /* in-use locks */
static SegLock *lock_free_list = NULL; /* free locks */
static int next_lock = 0; /* next lock cookie */
/* Routines to find a lock in lock_list based on offset or key */
static SegLock *seglock_findlock(uint_t);
static SegLock *seglock_findkey(uint_t);
/* Routines to find and allocate SegProc structures */
static SegProc *seglock_find_specific(SegLock *, void *);
static SegProc *seglock_alloc_specific(SegLock *, void *);
#define seglock_findclient(lp) seglock_find_specific((lp), CURPROC_ID)
#define seglock_allocclient(lp) seglock_alloc_specific((lp), CURPROC_ID)
/* Delete client from lock's client list */
static void seglock_deleteclient(SegLock *, SegProc *);
static void garbage_collect_lock(SegLock *, SegProc *);
/* Create a new lock */
static SegLock *seglock_createlock(enum winlock_style);
/* Destroy lock */
static void seglock_destroylock(SegLock *);
static void lock_destroyall(void);
/* Helper functions in winlockmap_access */
static int give_mapping(SegLock *, SegProc *, uint_t);
static int lock_giveup(SegLock *, int);
static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t);
/* routines called from ioctl */
static int seglock_graballoc(intptr_t, enum winlock_style, int);
static int seglock_grabinfo(intptr_t, int);
static int seglock_grabfree(intptr_t, int);
static int seglock_gettimeout(intptr_t, int);
static int seglock_settimeout(intptr_t, int);
static void seglock_dump_all(void);
static int
winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n",
(void *)devi, (int)cmd));
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0)
== DDI_FAILURE) {
return (DDI_FAILURE);
}
winlock_dip = devi;
ddi_report_dev(devi);
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n",
(void *)devi, (int)cmd));
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
mutex_enter(&winlock_mutex);
if (lock_list != NULL) {
mutex_exit(&winlock_mutex);
return (DDI_FAILURE);
}
ASSERT(lock_free_list == NULL);
DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n"));
/* destroy any common stuff created */
if (trashpage_cookie != NULL) {
ddi_umem_free(trashpage_cookie);
trashpage_cookie = NULL;
}
if (lockpage != NULL) {
ddi_umem_free(lockpage_cookie);
lockpage = NULL;
lockpage_cookie = NULL;
}
winlock_dip = NULL;
mutex_exit(&winlock_mutex);
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
register int error;
/* initialize result */
*result = NULL;
/* only valid instance (i.e., getminor) is 0 */
if (getminor((dev_t)arg) >= 1)
return (DDI_FAILURE);
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
if (winlock_dip == NULL)
error = DDI_FAILURE;
else {
*result = (void *)winlock_dip;
error = DDI_SUCCESS;
}
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
error = DDI_SUCCESS;
break;
default:
error = DDI_FAILURE;
}
return (error);
}
/*ARGSUSED*/
int
winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
cred_t *cred, int *rval)
{
DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n",
cmd, (void *)arg));
switch (cmd) {
/*
* ioctls that used to be handled by framebuffers (defined in fbio.h)
* RFE: No code really calls the GRAB* ioctls now. Should EOL.
*/
case GRABPAGEALLOC:
return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode));
case GRABPAGEFREE:
return (seglock_grabfree(arg, mode));
case GRABLOCKINFO:
return (seglock_grabinfo(arg, mode));
case GRABATTACH:
return (EINVAL); /* GRABATTACH is not supported (never was) */
case WINLOCKALLOC:
return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode));
case WINLOCKFREE:
return (seglock_grabfree(arg, mode));
case WINLOCKSETTIMEOUT:
return (seglock_settimeout(arg, mode));
case WINLOCKGETTIMEOUT:
return (seglock_gettimeout(arg, mode));
case WINLOCKDUMP:
seglock_dump_all();
return (0);
#ifdef DEBUG
case (WIOC|255):
lock_debug = arg;
return (0);
#endif
default:
return (ENOTTY); /* Why is this not EINVAL */
}
}
int
winlocksegmap(
dev_t dev, /* major:minor */
off_t off, /* device offset from mmap(2) */
struct as *as, /* user's address space. */
caddr_t *addr, /* address from mmap(2) */
off_t len, /* length from mmap(2) */
uint_t prot, /* user wants this access */
uint_t maxprot, /* this is the maximum the user can have */
uint_t flags, /* flags from mmap(2) */
cred_t *cred)
{
DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len));
/* Only MAP_SHARED mappings are supported */
if ((flags & MAP_TYPE) == MAP_PRIVATE) {
return (EINVAL);
}
/* Use devmap_setup to setup the mapping */
return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot,
maxprot, flags, cred));
}
/*ARGSUSED*/
int
winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
size_t *maplen, uint_t model)
{
SegLock *lp;
int err;
DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n",
off, len, (void *)dhp));
*maplen = 0;
/* Check if the lock exists, i.e., has been created by alloc */
/* off is the sy_ident returned in the alloc ioctl */
if ((lp = seglock_findlock((uint_t)off)) == NULL) {
return (ENXIO);
}
/*
* The offset bits in mmap(2) offset has to be same as in lockptr
* OR the offset should be 0 (i.e. masked off)
*/
if (((off & PAGEOFFSET) != 0) &&
((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) {
DEBUGF(2, (CE_CONT,
"mmap offset %llx mismatch with lockptr %p\n",
off, (void *)lp->lockptr));
mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
return (EINVAL);
}
/* Only supports PAGESIZE length mappings */
if (len != PAGESIZE) {
mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
return (EINVAL);
}
/*
* Set up devmap to point at page associated with lock
* RFE: At this point we dont know if this is a lockpage or unlockpage
* a lockpage would not need DEVMAP_ALLOW_REMAP setting
* We could have kept track of the mapping order here,
* but devmap framework does not support storing any state in this
* devmap callback as it does not callback for error cleanup if some
* other error happens in the framework.
* RFE: We should modify the winlock mmap interface so that the
* user process marks in the offset passed in whether this is for a
* lock or unlock mapping instead of guessing based on order of maps
* This would cleanup other things (such as in fork)
*/
if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops,
lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT,
DEVMAP_ALLOW_REMAP, 0)) < 0) {
mutex_exit(&lp->mutex); /* held by seglock_findlock */
return (err);
}
/*
* No mappings are loaded to those segments yet. The correctness
* of the winlock semantics depends on the devmap framework/seg_dev NOT
* loading the translations without calling _access callback.
*/
mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
*maplen = PAGESIZE;
return (0);
}
/*
* This routine is called by the devmap framework after the devmap entry point
* above and the mapping is setup in seg_dev.
* We store the pointer to the per-process context in the devmap private data.
*/
/*ARGSUSED*/
static int
winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
size_t len, void **pvtp)
{
SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */
SegProc *sdp;
ASSERT(len == PAGESIZE);
/* Find the per-process context for this lock, alloc one if not found */
sdp = seglock_allocclient(lp);
/*
* RFE: Determining which is a lock vs unlock seg is based on order
* of mmaps, we should change that to be derivable from off
*/
if (sdp->lockseg == NULL) {
sdp->lockseg = dhp;
} else if (sdp->unlockseg == NULL) {
sdp->unlockseg = dhp;
} else {
/* attempting to map lock more than twice */
mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
return (ENOMEM);
}
*pvtp = sdp;
mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
return (DDI_SUCCESS);
}
/*
* duplicate a segment, as in fork()
* On fork, the child inherits the mappings to the lock
* lp->alloccount is NOT incremented, so child should not do a free().
* Semantics same as if done an alloc(), map(), map().
* This way it would work fine if doing an exec() variant later
* Child does not inherit any UFLAGS set in parent
* The lock and unlock pages are started off unmapped, i.e., child does not
* own the lock.
* The code assumes that the child process has a valid pid at this point
* RFE: This semantics depends on fork not duplicating the hat mappings
* (which is the current implementation). To enforce it would need to
* call devmap_unload from here - not clear if that is allowed.
*/
static int
winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
void **newpvt)
{
SegProc *sdp = (SegProc *)oldpvt;
SegProc *ndp;
SegLock *lp = sdp->lp;
mutex_enter(&lp->mutex);
ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
/*
* Note: At this point, the child process does have a pid, but
* the arguments passed to as_dup and hence to devmap_dup dont pass it
* down. So we cannot use normal seglock_findclient - which finds the
* parent sdp itself!
* Instead we allocate the child's SegProc by using the child as pointer
* RFE: we are using the as stucture which means peeking into the
* devmap_cookie. This is not DDI-compliant. Need a compliant way of
* getting at either the as or, better, a way to get the child's new pid
*/
ndp = seglock_alloc_specific(lp,
(void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as);
ASSERT(ndp != sdp);
if (sdp->lockseg == dhp) {
ASSERT(ndp->lockseg == NULL);
ndp->lockseg = new_dhp;
} else {
ASSERT(sdp->unlockseg == dhp);
ASSERT(ndp->unlockseg == NULL);
ndp->unlockseg = new_dhp;
if (sdp->flag & TRASHPAGE) {
ndp->flag |= TRASHPAGE;
}
}
mutex_exit(&lp->mutex);
*newpvt = (void *)ndp;
return (0);
}
/*ARGSUSED*/
static void
winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
devmap_cookie_t new_dhp1, void **newpvtp1,
devmap_cookie_t new_dhp2, void **newpvtp2)
{
SegProc *sdp = (SegProc *)pvtp;
SegLock *lp = sdp->lp;
/*
* We always create PAGESIZE length mappings, so there should never
* be a partial unmapping case
*/
ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL));
mutex_enter(&lp->mutex);
ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
/* make sure this process doesn't own the lock */
if (sdp == lp->owner) {
/*
* Not handling errors - i.e., errors in unloading mapping
* As part of unmapping hat/seg structure get torn down anyway
*/
(void) lock_giveup(lp, 0);
}
ASSERT(sdp != lp->owner);
if (sdp->lockseg == dhp) {
sdp->lockseg = NULL;
} else {
ASSERT(sdp->unlockseg == dhp);
sdp->unlockseg = NULL;
sdp->flag &= ~TRASHPAGE; /* clear flag if set */
}
garbage_collect_lock(lp, sdp);
}
/*ARGSUSED*/
static int
winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len,
uint_t type, uint_t rw)
{
SegProc *sdp = (SegProc *)pvt;
SegLock *lp = sdp->lp;
int err;
/* Driver handles only DEVMAP_ACCESS type of faults */
if (type != DEVMAP_ACCESS)
return (-1);
mutex_enter(&lp->mutex);
ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
/* should be using a SegProc that corresponds to current process */
ASSERT(ID(sdp) == CURPROC_ID);
/*
* If process is faulting but does not have both segments mapped
* return error (should cause a segv).
* RFE: could give it a permanent trashpage
*/
if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) {
err = -1;
} else {
err = seglock_lockfault(dhp, sdp, lp, rw);
}
mutex_exit(&lp->mutex);
return (err);
}
/* INTERNAL ROUTINES START HERE */
/*
* search the lock_list list for the specified cookie
* The cookie is the sy_ident field returns by ALLOC ioctl.
* This has two parts:
* the pageoffset bits contain offset into the lock page.
* the pagenumber bits contain the lock id.
* The user code is supposed to pass in only the pagenumber portion
* (i.e. mask off the pageoffset bits). However the code below
* does the mask in case the users are not diligent
* if found, returns with mutex for SegLock structure held
*/
static SegLock *
seglock_findlock(uint_t cookie)
{
SegLock *lp;
cookie &= (uint_t)PAGEMASK; /* remove pageoffset bits to get cookie */
mutex_enter(&winlock_mutex);
for (lp = lock_list; lp != NULL; lp = lp->next) {
mutex_enter(&lp->mutex);
if (cookie == lp->cookie) {
break; /* return with lp->mutex held */
}
mutex_exit(&lp->mutex);
}
mutex_exit(&winlock_mutex);
return (lp);
}
/*
* search the lock_list list for the specified non-zero key
* if found, returns with lock for SegLock structure held
*/
static SegLock *
seglock_findkey(uint_t key)
{
SegLock *lp;
ASSERT(MUTEX_HELD(&winlock_mutex));
/* The driver allows multiple locks with key 0, dont search */
if (key == 0)
return (NULL);
for (lp = lock_list; lp != NULL; lp = lp->next) {
mutex_enter(&lp->mutex);
if (key == lp->key)
break;
mutex_exit(&lp->mutex);
}
return (lp);
}
/*
* Create a new lock context.
* Returns with SegLock mutex held
*/
static SegLock *
seglock_createlock(enum winlock_style style)
{
SegLock *lp;
DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n",
(void *)lock_free_list, next_lock));
ASSERT(MUTEX_HELD(&winlock_mutex));
if (lock_free_list != NULL) {
lp = lock_free_list;
lock_free_list = lp->next;
} else if (next_lock >= MAX_LOCKS) {
return (NULL);
} else {
lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP);
lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE;
mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL);
cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL);
++next_lock;
}
mutex_enter(&lp->mutex);
ASSERT((lp->cookie/PAGESIZE) <= next_lock);
if (style == OLDSTYLE_LOCK) {
lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE,
DDI_UMEM_SLEEP, &(lp->umem_cookie));
} else {
lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1);
lp->umem_cookie = lockpage_cookie;
}
ASSERT(lp->lockptr != NULL);
lp->style = style;
lp->sleepers = 0;
lp->alloccount = 1;
lp->timeout = LOCKTIME*hz;
lp->clients = NULL;
lp->owner = NULL;
LOCK(lp) = 0;
lp->next = lock_list;
lock_list = lp;
return (lp);
}
/*
* Routine to destory a lock structure.
* This routine is called while holding the lp->mutex but not the
* winlock_mutex.
*/
static void
seglock_destroylock(SegLock *lp)
{
ASSERT(MUTEX_HELD(&lp->mutex));
ASSERT(!MUTEX_HELD(&winlock_mutex));
DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n",
lp->cookie, lp->key));
ASSERT(lp->alloccount == 0);
ASSERT(lp->clients == NULL);
ASSERT(lp->owner == NULL);
ASSERT(lp->sleepers == 0);
/* clean up/release fields in lp */
if (lp->style == OLDSTYLE_LOCK) {
ddi_umem_free(lp->umem_cookie);
}
lp->umem_cookie = NULL;
lp->lockptr = NULL;
lp->key = 0;
/*
* Reduce cookie by 1, makes it non page-aligned and invalid
* This prevents any valid lookup from finding this lock
* so when we drop the lock and regrab it it will still
* be there and nobody else would have attached to it
*/
lp->cookie--;
/* Drop and reacquire mutexes in right order */
mutex_exit(&lp->mutex);
mutex_enter(&winlock_mutex);
mutex_enter(&lp->mutex);
/* reincrement the cookie to get the original valid cookie */
lp->cookie++;
ASSERT((lp->cookie & PAGEOFFSET) == 0);
ASSERT(lp->alloccount == 0);
ASSERT(lp->clients == NULL);
ASSERT(lp->owner == NULL);
ASSERT(lp->sleepers == 0);
/* Remove lp from lock_list */
if (lock_list == lp) {
lock_list = lp->next;
} else {
SegLock *tmp = lock_list;
while (tmp->next != lp) {
tmp = tmp->next;
ASSERT(tmp != NULL);
}
tmp->next = lp->next;
}
/* Add to lock_free_list */
lp->next = lock_free_list;
lock_free_list = lp;
mutex_exit(&lp->mutex);
/* Check if all locks deleted and cleanup */
if (lock_list == NULL) {
lock_destroyall();
}
mutex_exit(&winlock_mutex);
}
/* Routine to find a SegProc corresponding to the tag */
static SegProc *
seglock_find_specific(SegLock *lp, void *tag)
{
SegProc *sdp;
ASSERT(MUTEX_HELD(&lp->mutex));
ASSERT(tag != NULL);
for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
if (ID(sdp) == tag)
break;
}
return (sdp);
}
/* Routine to find (and if needed allocate) a SegProc corresponding to tag */
static SegProc *
seglock_alloc_specific(SegLock *lp, void *tag)
{
SegProc *sdp;
ASSERT(MUTEX_HELD(&lp->mutex));
ASSERT(tag != NULL);
/* Search and return if existing one found */
sdp = seglock_find_specific(lp, tag);
if (sdp != NULL)
return (sdp);
DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n",
tag, lp->cookie));
/* Allocate a new SegProc */
sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP);
sdp->next = lp->clients;
lp->clients = sdp;
sdp->lp = lp;
ID(sdp) = tag;
return (sdp);
}
/*
* search a context's client list for the given client and delete
*/
static void
seglock_deleteclient(SegLock *lp, SegProc *sdp)
{
ASSERT(MUTEX_HELD(&lp->mutex));
ASSERT(lp->owner != sdp); /* Not current owner of lock */
ASSERT(sdp->lockseg == NULL); /* Mappings torn down */
ASSERT(sdp->unlockseg == NULL);
DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n",
ddi_get_pid(), lp->cookie));
if (lp->clients == sdp) {
lp->clients = sdp->next;
} else {
SegProc *tmp = lp->clients;
while (tmp->next != sdp) {
tmp = tmp->next;
ASSERT(tmp != NULL);
}
tmp->next = sdp->next;
}
kmem_free(sdp, sizeof (SegProc));
}
/*
* Routine to verify if a SegProc and SegLock
* structures are empty/idle.
* Destroys the structures if they are ready
* Can be called with sdp == NULL if want to verify only the lock state
* caller should hold the lp->mutex
* and this routine drops the mutex
*/
static void
garbage_collect_lock(SegLock *lp, SegProc *sdp)
{
ASSERT(MUTEX_HELD(&lp->mutex));
/* see if both segments unmapped from client structure */
if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL))
seglock_deleteclient(lp, sdp);
/* see if this is last client in the entire lock context */
if ((lp->clients == NULL) && (lp->alloccount == 0)) {
seglock_destroylock(lp);
} else {
mutex_exit(&lp->mutex);
}
}
/* IOCTLS START HERE */
static int
seglock_grabinfo(intptr_t arg, int mode)
{
int i = 1;
/* multiple clients per lock supported - see comments up top */
if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0)
return (EFAULT);
return (0);
}
static int
seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */
{
struct seglock *lp;
uint_t key;
struct winlockalloc wla;
int err;
if (style == OLDSTYLE_LOCK) {
key = 0;
} else {
if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla),
mode)) {
return (EFAULT);
}
key = wla.sy_key;
}
DEBUGF(3, (CE_CONT,
"seglock_graballoc: key=%u, style=%d\n", key, style));
mutex_enter(&winlock_mutex);
/* Allocate lockpage on first new style alloc */
if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) {
lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
&lockpage_cookie);
}
/* Allocate trashpage on first alloc (any style) */
if (trashpage_cookie == NULL) {
(void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP,
&trashpage_cookie);
}
if ((lp = seglock_findkey(key)) != NULL) {
DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n",
key, lp->cookie));
++lp->alloccount;
} else if ((lp = seglock_createlock(style)) != NULL) {
DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n",
key, lp->cookie));
lp->key = key;
} else {
DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key));
mutex_exit(&winlock_mutex);
return (ENOMEM);
}
ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex));
mutex_exit(&winlock_mutex);
if (style == OLDSTYLE_LOCK) {
err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg,
sizeof (lp->cookie), mode);
} else {
wla.sy_ident = lp->cookie +
(uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET);
err = ddi_copyout((caddr_t)&wla, (caddr_t)arg,
sizeof (wla), mode);
}
if (err) {
/* On error, should undo allocation */
lp->alloccount--;
/* Verify and delete if lock is unused now */
garbage_collect_lock(lp, NULL);
return (EFAULT);
}
mutex_exit(&lp->mutex);
return (0);
}
static int
seglock_grabfree(intptr_t arg, int mode) /* IOCTL */
{
struct seglock *lp;
uint_t offset;
if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode)
!= 0) {
return (EFAULT);
}
DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset));
if ((lp = seglock_findlock(offset)) == NULL) {
DEBUGF(2, (CE_CONT, "did not find lock\n"));
return (EINVAL);
}
DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n",
lp->key, lp->cookie, lp->alloccount));
if (lp->alloccount > 0)
lp->alloccount--;
/* Verify and delete if lock is unused now */
garbage_collect_lock(lp, NULL);
return (0);
}
/*
* Sets timeout in lock and UFLAGS in client
* the UFLAGS are stored in the client structure and persistent only
* till the unmap of the lock pages. If the process sets UFLAGS
* does a map of the lock/unlock pages and unmaps them, the client
* structure will get deleted and the UFLAGS will be lost. The process
* will need to resetup the flags.
*/
static int
seglock_settimeout(intptr_t arg, int mode) /* IOCTL */
{
SegLock *lp;
SegProc *sdp;
struct winlocktimeout wlt;
if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) {
return (EFAULT);
}
if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
return (EINVAL);
lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout);
/* if timeout modified, wake up any sleepers */
if (lp->sleepers > 0) {
cv_broadcast(&lp->locksleep);
}
/*
* If the process is trying to set UFLAGS,
* Find the client segproc and allocate one if needed
* Set the flags preserving the kernel flags
* If the process is clearing UFLAGS
* Find the client segproc but dont allocate one if does not exist
*/
if (wlt.sy_flags & UFLAGS) {
sdp = seglock_allocclient(lp);
sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS;
} else if ((sdp = seglock_findclient(lp)) != NULL) {
sdp->flag = sdp->flag & KFLAGS;
/* If clearing UFLAGS leaves the segment or lock idle, delete */
garbage_collect_lock(lp, sdp);
return (0);
}
mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
return (0);
}
static int
seglock_gettimeout(intptr_t arg, int mode)
{
SegLock *lp;
SegProc *sdp;
struct winlocktimeout wlt;
if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0)
return (EFAULT);
if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
return (EINVAL);
wlt.sy_timeout = TICK_TO_MSEC(lp->timeout);
/*
* If this process has an active allocated lock return those flags
* Dont allocate a client structure on gettimeout
* If not, return 0.
*/
if ((sdp = seglock_findclient(lp)) != NULL) {
wlt.sy_flags = sdp->flag & UFLAGS;
} else {
wlt.sy_flags = 0;
}
mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0)
return (EFAULT);
return (0);
}
/*
* Handle lock segment faults here...
*
* This is where the magic happens.
*/
/* ARGSUSED */
static int
seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw)
{
SegProc *owner = lp->owner;
int err;
ASSERT(MUTEX_HELD(&lp->mutex));
DEBUGF(3, (CE_CONT,
"seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n",
(void *)dhp, (void *)sdp, (void *)lp, (void *)owner));
/* lockfault is always called with sdp in current process context */
ASSERT(ID(sdp) == CURPROC_ID);
/* If Lock has no current owner, give the mapping to new owner */
if (owner == NULL) {
DEBUGF(4, (CE_CONT, " lock has no current owner\n"));
return (give_mapping(lp, sdp, rw));
}
if (owner == sdp) {
/*
* Current owner is faulting on owned lock segment OR
* Current owner is faulting on unlock page and has no waiters
* Then can give the mapping to current owner
*/
if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) {
DEBUGF(4, (CE_CONT, "lock owner faulting\n"));
return (give_mapping(lp, sdp, rw));
} else {
/*
* Owner must be writing to unlock page and there are waiters.
* other cases have been checked earlier.
* Release the lock, owner, and owners mappings
* As the owner is trying to write to the unlock page, leave
* it with a trashpage mapping and wake up the sleepers
*/
ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0));
DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n"));
return (lock_giveup(lp, 1));
}
}
ASSERT(owner != sdp);
/*
* If old owner faulting on trash unlock mapping,
* load hat mappings to trash page
* RFE: non-owners should NOT be faulting on unlock mapping as they
* as first supposed to fault on the lock seg. We could give them
* a trash page or return error.
*/
if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) {
DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n"));
return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
DEVMAP_ACCESS, rw));
}
/*
* Non-owner faulting. Need to check current LOCK state.
*
* Before reading lock value in LOCK(lp), we must make sure that
* the owner cannot change its value before we change mappings
* or else we could end up either with a hung process
* or more than one process thinking they have the lock.
* We do that by unloading the owner's mappings
*/
DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n"));
err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
if (err != 0)
return (err); /* unable to remove owner mapping */
/*
* If lock is not held, then current owner mappings were
* unloaded above and we can give the lock to the new owner
*/
if (LOCK(lp) == 0) {
DEBUGF(4, (CE_CONT,
"Free lock (%p): Giving mapping to new owner %d\n",
(void *)lp, ddi_get_pid()));
return (give_mapping(lp, sdp, rw));
}
DEBUGF(4, (CE_CONT, " lock held, sleeping\n"));
/*
* A non-owning process tried to write (presumably to the lockpage,
* but it doesn't matter) but the lock is held; we need to sleep for
* the lock while there is an owner.
*/
lp->sleepers++;
while ((owner = lp->owner) != NULL) {
int rval;
if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) {
/*
* No timeout has been specified for this lock;
* we'll simply sleep on the condition variable.
*/
rval = cv_wait_sig(&lp->locksleep, &lp->mutex);
} else {
/*
* A timeout _has_ been specified for this lock. We need
* to wake up and possibly steal this lock if the owner
* does not let it go. Note that all sleepers on a lock
* with a timeout wait; the sleeper with the earliest
* timeout will wakeup, and potentially steal the lock
* Stealing the lock will cause a broadcast on the
* locksleep cv and thus kick the other timed waiters
* and cause everyone to restart in a new timedwait
*/
rval = cv_reltimedwait_sig(&lp->locksleep,
&lp->mutex, lp->timeout, TR_CLOCK_TICK);
}
/*
* Timeout and still old owner - steal lock
* Force-Release lock and give old owner a trashpage mapping
*/
if ((rval == -1) && (lp->owner == owner)) {
/*
* if any errors in lock_giveup, go back and sleep/retry
* If successful, will break out of loop
*/
cmn_err(CE_NOTE, "Process %d timed out on lock %d\n",
ddi_get_pid(), lp->cookie);
(void) lock_giveup(lp, 1);
} else if (rval == 0) { /* signal pending */
cmn_err(CE_NOTE,
"Process %d signalled while waiting on lock %d\n",
ddi_get_pid(), lp->cookie);
lp->sleepers--;
return (FC_MAKE_ERR(EINTR));
}
}
lp->sleepers--;
/*
* Give mapping to this process and save a fault later
*/
return (give_mapping(lp, sdp, rw));
}
/*
* Utility: give a valid mapping to lock and unlock pages to current process.
* Caller responsible for unloading old owner's mappings
*/
static int
give_mapping(SegLock *lp, SegProc *sdp, uint_t rw)
{
int err = 0;
ASSERT(MUTEX_HELD(&lp->mutex));
ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0)));
/* give_mapping is always called with sdp in current process context */
ASSERT(ID(sdp) == CURPROC_ID);
/* remap any old trash mappings */
if (sdp->flag & TRASHPAGE) {
/* current owner should not have a trash mapping */
ASSERT(sdp != lp->owner);
DEBUGF(4, (CE_CONT,
"new owner %d remapping old trash mapping\n",
ddi_get_pid()));
if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip,
lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
/*
* unable to remap old trash page,
* abort before changing owner
*/
DEBUGF(4, (CE_CONT,
"aborting: error in umem_remap %d\n", err));
return (err);
}
sdp->flag &= ~TRASHPAGE;
}
/* we have a new owner now */
lp->owner = sdp;
if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE,
DEVMAP_ACCESS, rw)) != 0) {
return (err);
}
DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid()));
if (lp->sleepers) {
/* Force unload unlock mapping if there are waiters */
DEBUGF(4, (CE_CONT,
" lock has %d sleepers => remove unlock mapping\n",
lp->sleepers));
err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE);
} else {
/*
* while here, give new owner a valid mapping to unlock
* page so we don't get called again.
*/
DEBUGF(4, (CE_CONT, " and unlock mapping\n"));
err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
DEVMAP_ACCESS, PROT_WRITE);
}
return (err);
}
/*
* Unload owner's mappings, release the lock and wakeup any sleepers
* If trash, then the old owner is given a trash mapping
* => old owner held lock too long and caused a timeout
*/
static int
lock_giveup(SegLock *lp, int trash)
{
SegProc *owner = lp->owner;
DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n",
(void *)lp, (void *)ID(lp->owner), trash));
ASSERT(MUTEX_HELD(&lp->mutex));
ASSERT(owner != NULL);
/*
* owner loses lockpage/unlockpage mappings and gains a
* trashpage mapping, if needed.
*/
if (!trash) {
/*
* We do not handle errors in devmap_unload in the !trash case,
* as the process is attempting to unmap/exit or otherwise
* release the lock. Errors in unloading the mapping are not
* going to affect that (unmap does not take error return).
*/
(void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
(void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
} else {
int err;
if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) {
/* error unloading lockseg mapping. abort giveup */
return (err);
}
/*
* old owner gets mapping to trash page so it can continue
* devmap_umem_remap does a hat_unload (and does it holding
* the right locks), so no need to devmap_unload on unlockseg
*/
if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip,
trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
/* error remapping to trash page, abort giveup */
return (err);
}
owner->flag |= TRASHPAGE;
/*
* Preload mapping to trash page by calling devmap_load
* However, devmap_load can only be called on the faulting
* process context and not on the owner's process context
* we preload only if we happen to be in owner process context
* Other processes will fault on the unlock mapping
* and be given a trash mapping at that time.
*/
if (ID(owner) == CURPROC_ID) {
(void) devmap_load(owner->unlockseg, lp->cookie,
PAGESIZE, DEVMAP_ACCESS, PROT_WRITE);
}
}
lp->owner = NULL;
/* Clear the lock value in underlying page so new owner can grab it */
LOCK(lp) = 0;
if (lp->sleepers) {
DEBUGF(4, (CE_CONT, " waking up, lp=%p\n", (void *)lp));
cv_broadcast(&lp->locksleep);
}
return (0);
}
/*
* destroy all allocated memory.
*/
static void
lock_destroyall(void)
{
SegLock *lp, *lpnext;
ASSERT(MUTEX_HELD(&winlock_mutex));
ASSERT(lock_list == NULL);
DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n"));
for (lp = lock_free_list; lp != NULL; lp = lpnext) {
mutex_enter(&lp->mutex);
lpnext = lp->next;
ASSERT(lp->clients == NULL);
ASSERT(lp->owner == NULL);
ASSERT(lp->alloccount == 0);
mutex_destroy(&lp->mutex);
cv_destroy(&lp->locksleep);
kmem_free(lp, sizeof (SegLock));
}
lock_free_list = NULL;
next_lock = 0;
}
/* RFE: create mdb walkers instead of dump routines? */
static void
seglock_dump_all(void)
{
SegLock *lp;
mutex_enter(&winlock_mutex);
cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n");
cmn_err(CE_CONT, "Lock List:\n");
for (lp = lock_list; lp != NULL; lp = lp->next) {
mutex_enter(&lp->mutex);
cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
lp->cookie, lp->key, lp->alloccount,
lp->clients ? 'Y' : 'N',
lp->owner ? 'Y' : 'N',
lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
lp->sleepers);
mutex_exit(&lp->mutex);
}
cmn_err(CE_CONT, "Free Lock List:\n");
for (lp = lock_free_list; lp != NULL; lp = lp->next) {
mutex_enter(&lp->mutex);
cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
lp->cookie, lp->key, lp->alloccount,
lp->clients ? 'Y' : 'N',
lp->owner ? 'Y' : 'N',
lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
lp->sleepers);
mutex_exit(&lp->mutex);
}
#ifdef DEBUG
if (lock_debug < 3) {
mutex_exit(&winlock_mutex);
return;
}
for (lp = lock_list; lp != NULL; lp = lp->next) {
SegProc *sdp;
mutex_enter(&lp->mutex);
cmn_err(CE_CONT,
"lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n",
(void *)lp, lp->key, lp->cookie, lp->alloccount,
lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers);
cmn_err(CE_CONT,
"style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n",
lp->style, (void *)lp->lockptr, lp->timeout,
(void *)lp->clients, (void *)lp->owner);
for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
cmn_err(CE_CONT, " client %p%s, lp=%p, flag=%x, "
"process tag=%p, lockseg=%p, unlockseg=%p\n",
(void *)sdp, sdp == lp->owner ? " (owner)" : "",
(void *)sdp->lp, sdp->flag, (void *)ID(sdp),
(void *)sdp->lockseg, (void *)sdp->unlockseg);
}
mutex_exit(&lp->mutex);
}
#endif
mutex_exit(&winlock_mutex);
}
#include <sys/modctl.h>
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. This one is a driver */
"Winlock Driver", /* Name of the module */
&winlock_ops, /* driver ops */
};
static struct modlinkage modlinkage = {
MODREV_1,
(void *)&modldrv,
0,
0,
0
};
int
_init(void)
{
int e;
mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL);
e = mod_install(&modlinkage);
if (e) {
mutex_destroy(&winlock_mutex);
}
return (e);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
int
_fini(void)
{
int e;
e = mod_remove(&modlinkage);
if (e == 0) {
mutex_destroy(&winlock_mutex);
}
return (e);
}