physmem.c revision cd64d6e9f50d70599efa5c3e990d0f88efbe4351
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
/*
* Linked list element hanging off physmem_proc_hash below, which holds all
* the information for a given segment which has been setup for this process.
* This is a simple linked list as we are assuming that for a given process
* the setup ioctl will only be called a handful of times. If this assumption
* changes in the future, a quicker to traverse data structure should be used.
*/
struct physmem_hash {
struct physmem_hash *ph_next;
};
/*
* Hash of all of the processes which have setup mappings with the driver with
* pointers to per process data.
*/
struct physmem_proc_hash {
struct physmem_hash *pph_hash;
struct physmem_proc_hash *pph_next;
};
/* Needs to be a power of two for simple hash algorithm */
#define PPH_SIZE 8
/*
* Lock which protects the pph hash above. To add an element (either a new
* process or a new segment) the WRITE lock must be held. To traverse the
* list, only a READ lock is needed.
*/
/*
* Need to keep a reference count of how many processes have the driver
* open to prevent it from disappearing.
*/
const fs_operation_def_t physmem_vnodeops_template[] = {
};
/*
* Removes the current process from the hash if the process has no more
* physmem segments active.
*/
void
{
int index;
struct physmem_proc_hash **walker;
break;
}
}
}
/*
* Add a new entry to the hash for the given process to cache the
* address ranges that it is working on. If this is the first hash
* item to be added for this process, we will create the head pointer
* for this process.
* Returns 0 on success, ERANGE when the physical address is already in the
* hash.
*/
int
{
int index;
struct physmem_proc_hash *iterator;
struct physmem_hash *temp;
int ret = 0;
/*
* check to make sure a single process does not try to
* map the same region twice.
*/
temp->ph_seg_len) ||
php->ph_seg_len)) {
break;
}
}
if (ret == 0) {
}
/* Need to check for two threads in sync */
return (ret);
}
}
return (0);
}
/* Dropped the lock so we could use KM_SLEEP */
goto insert;
}
/*
* Will return the pointer to the physmem_hash struct if the setup routine
* has previously been called for this memory.
* Returns NULL on failure.
*/
struct physmem_hash *
{
int index;
struct physmem_proc_hash *proc_hp;
struct physmem_hash *php;
return (php);
}
}
}
}
return (NULL);
}
int
{
int index;
struct physmem_proc_hash *proc_hp;
struct physmem_hash *php;
return (1);
}
}
}
}
return (0);
}
/*
* Remove the given vnode from the pph hash. If it exists in the hash the
* process still has to be around as the vnode is obviously still around and
* since it's a physmem vnode, it must be in the hash.
* If it is not in the hash that must mean that the setup ioctl failed.
* Return 0 in this instance, 1 if it is in the hash.
*/
int
{
int index;
struct physmem_proc_hash *proc_hp;
struct physmem_hash **phpp;
struct physmem_hash *victim;
/* synchronize with the map routine */
return (1);
}
}
}
}
/* not found */
return (0);
}
int
{
int error;
char *name = "physmem";
if (physmem_vnodeops != NULL)
if (error != 0) {
}
return (error);
}
/*
* The guts of the PHYSMEM_SETUP ioctl.
* Create a segment in the address space with the specified parameters.
* If pspp->user_va is NULL, as_gap will be used to find an appropriate VA.
* We do not do bounds checking on the requested phsycial addresses, if they
* do not exist in the system, they will not be mappable.
* Returns 0 on success with the following error codes on failure:
* ENOMEM - The VA range requested was already mapped if pspp->user_va is
* non-NULL or the system was unable to find enough VA space for
* the desired length if user_va was NULL>
* EINVAL - The requested PA, VA, or length was not PAGESIZE aligned.
*/
int
{
struct segvn_crargs vn_a;
int ret = 0;
struct physmem_hash *php;
/* Sanity checking */
return (EINVAL);
return (EINVAL);
return (EINVAL);
/* Need to bump vnode count so that the driver can not be unloaded */
fail:
return (ret);
}
} else {
/* We pick the address for the user */
goto fail;
}
}
if (ret == 0) {
if (ret == 0)
return (0);
/* Note that the call to as_unmap will free the vnode */
return (ret);
}
goto fail;
/*NOTREACHED*/
}
/*
* The guts of the PHYSMEM_MAP ioctl.
* Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already
* been called for this PA range.
* Returns 0 on success with the following error codes on failure:
* EPERM - The requested page is long term locked, and thus repeated
* requests to allocate this page will likely fail.
* EAGAIN - The requested page could not be allocated, but it is believed
* that future attempts could succeed.
* ENOMEM - There was not enough free memory in the system to safely
* map the requested page.
* EINVAL - The requested paddr was not PAGESIZE aligned or the
* PHYSMEM_SETUP ioctl was not called for this page.
* ENOENT - The requested page was iniside the kernel cage, and the
* PHYSMEM_CAGE flag was not set.
* EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag
* was not set.
*/
static int
{
int ret = 0;
struct physmem_hash *php;
return (EINVAL);
/* Find the vnode for this map request */
return (EINVAL);
}
return (EPERM);
}
/*
* Check to see if page already mapped correctly. This can happen
* when we failed to capture a page previously and it was captured
* asynchronously for us. Return success in this case.
*/
return (0);
}
/*
* physmem should be responsible for checking for cage
* and prom pages.
*/
if (ret != 0) {
return (ret);
} else {
return (0);
}
}
/*
* Map the given page into the process's address space if possible.
* We actually only hash the page in on the correct vnode as the page
* will be mapped via segvn_pagefault.
* returns 0 on success
* returns 1 if there is no need to map this page anymore (process exited)
* returns -1 if we failed to map the page.
*/
int
{
int ret;
struct physmem_hash *php;
/*
* Check against availrmem to make sure that we're not low on memory.
* We check again here as ASYNC requests do not do this check elsewhere.
* We return 1 as we don't want the page to have the PR_CAPTURE bit
* set or be on the page capture hash.
*/
return (1);
}
/*
* If this is an asynchronous request for the current process,
* we can not map the page as it's possible that we are also in the
* process of unmapping the page which could result in a deadlock
* with the as lock.
*/
return (-1);
}
/* only return zeroed out pages */
/*
* Free the page as there is no longer a valid outstanding
* request for this page.
*/
return (1);
}
/*
* We need to protect against a possible deadlock here where we own
* the vnode page hash mutex and want to acquire it again as there
* are locations in the code, where we unlock a page while holding
* the mutex which can lead to the page being captured and eventually
* end up here.
*/
return (-1);
}
if (ret == 0) {
return (-1);
}
availrmem--;
return (0);
}
/*
* The guts of the PHYSMEM_DESTROY ioctl.
* The cookie passed in will provide all of the information needed to
* free up the address space and physical memory associated with the
* corresponding PHSYMEM_SETUP ioctl.
* Returns 0 on success with the following error codes on failure:
* EINVAL - The cookie supplied is not valid.
*/
int
{
if (physmem_validate_cookie(p_cookie) == 0) {
return (EINVAL);
}
return (0);
}
/*
* If the page has been hashed into the physmem vnode, then just look it up
* and return it via pl, otherwise return ENOMEM as the map ioctl has not
* succeeded on the given page.
*/
/*ARGSUSED*/
static int
{
/*
* If the page is in the hash, then we successfully claimed this
* page earlier, so return it to the caller.
*/
return (0);
}
return (ENOMEM);
}
/*
* return of EINVAL when we are not working on our own address space.
* Otherwise we return zero as this function is required for normal operation.
*/
/*ARGSUSED*/
static int
{
return (EINVAL);
}
return (0);
}
/* Will always get called for removing a whole segment. */
/*ARGSUSED*/
static int
{
/*
* Release our hold on the vnode so that the final VN_RELE will
* call physmem_inactive to clean things up.
*/
return (0);
}
/*
* Clean up all the pages belonging to this vnode and then free it.
*/
/*ARGSUSED*/
static void
{
/*
* Remove the vnode from the hash now, to prevent asynchronous
* attempts to map into this vnode. This avoids a deadlock
* where two threads try to get into this logic at the same
* time and try to map the pages they are destroying into the
* other's address space.
* If it's not in the hash, just free it.
*/
if (physmem_remove_vnode_hash(vp) == 0) {
return;
}
/*
* At this point in time, no other logic can be adding or removing
* pages from the vnode, otherwise the v_pages list could be inaccurate.
*/
if (page_tryupgrade(pp)) {
/*
* set lckcnt for page_destroy to do availrmem
* accounting
*/
page_destroy(pp, 0);
} else {
/* failure to lock should be transient */
continue;
}
}
}
}
/*ARGSUSED*/
static int
int *rvalp)
{
int ret;
switch (cmd) {
case PHYSMEM_SETUP:
{
struct physmem_setup_param psp;
sizeof (struct physmem_setup_param), 0))
return (EFAULT);
return (EFAULT);
}
break;
case PHYSMEM_MAP:
{
struct physmem_map_param pmp;
sizeof (struct physmem_map_param), 0))
return (EFAULT);
return (EFAULT);
}
break;
case PHYSMEM_DESTROY:
{
sizeof (uint64_t), 0))
return (EFAULT);
}
break;
default:
return (ENOTSUP);
}
return (ret);
}
/*ARGSUSED*/
static int
{
int ret;
static int msg_printed = 0;
return (EINVAL);
}
/* need to make sure we have the right privileges */
return (ret);
return (ret);
if (msg_printed == 0) {
"take out long term locks on pages which may impact "
"dynamic reconfiguration events");
msg_printed = 1;
}
return (0);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static int
{
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
*resultp = physmem_dip;
return (DDI_SUCCESS);
case DDI_INFO_DEVT2INSTANCE:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
static int
{
int i;
if (cmd == DDI_RESUME) {
return (DDI_SUCCESS);
}
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
return (DDI_FAILURE);
physmem_dip = dip;
/* Initialize driver specific data */
if (physmem_setup_vnops()) {
return (DDI_FAILURE);
}
for (i = 0; i < PPH_SIZE; i++)
return (DDI_SUCCESS);
}
static int
{
int ret = DDI_SUCCESS;
if (cmd == DDI_SUSPEND) {
return (DDI_SUCCESS);
}
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
if (physmem_vnodecnt == 0) {
if (physmem_vnodeops != NULL) {
}
} else {
}
if (ret == DDI_SUCCESS)
return (ret);
}
static struct cb_ops physmem_cb_ops = {
physmem_open, /* open */
physmem_close, /* close */
nodev, /* strategy */
nodev, /* print */
nodev, /* dump */
nodev, /* read */
nodev, /* write */
physmem_ioctl, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* chpoll */
ddi_prop_op, /* prop_op */
NULL, /* cb_str */
NULL,
};
static struct dev_ops physmem_ops = {
0,
NULL,
};
"physmem driver %I%",
};
static struct modlinkage modlinkage = {
&modldrv,
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
{
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}