pmem.c revision 02bbca1855f5e27ab1fa25720d377f068144c9df
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* PMEM - Direct mapping physical memory pages to userland process
*
* Provide functions used for directly (w/o occupying kernel virtual address
* space) allocating and exporting physical memory pages to userland.
*/
#include <sys/ddidevmap.h>
#include <sys/sysmacros.h>
/*
* The routines in this file allocate memory which will be accessed through
* the AGP GART hardware. The GART is programmed with the PFNs for this
* memory, and the only mechanism for removing these entries is by an
* As such, the pages need to remain locked to ensure that they won't be
* relocated or paged out.
*
* To prevent these locked pages from getting in the way of page
* coalescing, we try to allocate large pages from the system, and carve
* them up to satisfy pmem allocation requests. This will keep the locked
* pages within a constrained area of physical memory, limiting the number
* of large pages that would be pinned by our locked pages. This is, of
* course, another take on the infamous kernel cage, and it has many of the
* downsides of the original cage. It also interferes with system-wide
* resource management decisions, as it maintains its own pool of unused
* pages which can't be easily reclaimed and used during low-memory
* situations.
*
* The right solution is for pmem to register a callback that the VM system
* could call, which would temporarily remove any GART entries for pages
* that were being relocated. This would let us leave the pages unlocked,
* which would remove the need for using large pages, which would simplify
* this code a great deal. Unfortunately, the support for these callbacks
* only exists on some SPARC platforms right now.
*
* Note that this is the *only* reason that large pages are used here. The
* GART can't perform large-page translations, and the code appropriately
* falls back to using small pages if page_create_va_large() fails.
*/
/*
* Structs and static variables used for pmem only.
*/
typedef struct pmem_lpg {
} pmem_lpg_t;
/* The segment to be associated with all the allocated pages. */
/* Fully occupied large pages allocated for pmem. */
static pmem_lpg_t *pmem_occ_lpgs;
/* Memory pool to store residual small pages from large pages. */
/* Number of small pages reside in pmem_mpool currently. */
static pgcnt_t pmem_nmpages = 0;
/* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */
static int lpg_isfree(pmem_lpg_t *);
/*
* Called by driver devmap routine to pass physical memory mapping info to
* seg_dev framework, used only for physical memory allocated from
* devmap_pmem_alloc().
*/
/* ARGSUSED */
int
{
return (DDI_FAILURE);
/*
* First to check if this function has been called for this dhp.
*/
return (DDI_FAILURE);
return (DDI_FAILURE);
if (flags & DEVMAP_MAPPING_INVALID) {
/*
* If DEVMAP_MAPPING_INVALID is specified, we have to grant
* remap permission.
*/
if (!(flags & DEVMAP_ALLOW_REMAP))
return (DDI_FAILURE);
} else {
/* dh_roff is the offset inside the dh_pcookie. */
}
/*
* Only "No Cache" and "Write Combining" are supported. If any other
* cache type is specified, override with "No Cache".
*/
else
if (callbackops != NULL) {
sizeof (struct devmap_callback_ctl));
}
/*
* Initialize dh_lock if we want to do remap.
*/
}
return (DDI_SUCCESS);
}
/*
* Replace existing mapping using a new cookie, mainly gets called when doing
* fork(). Should be called in associated devmap_dup(9E).
*/
/* ARGSUSED */
int
{
/*
* Reture failure if setup has not been done or no remap permission
* has been granted during the setup.
*/
return (DDI_FAILURE);
/* No flags supported for remap yet. */
if (flags != 0)
return (DDI_FAILURE);
return (DDI_FAILURE);
return (DDI_FAILURE);
/*
* Unload the old mapping of pages reloated with this dhp, so next
* fault will setup the new mappings. It is in segdev_faultpage that
* calls hat_devload to establish the mapping. Do this while holding
* the dhp lock so other faults dont reestablish the mappings.
*/
/*
* Only "No Cache" and "Write Combining" are supported, if other cache
* type is specified, override with "No Cache".
*/
else
/* Clear the large page size flag. */
return (DDI_SUCCESS);
}
/*
* Directly (i.e., without occupying kernel virtual address space) allocate
* 'npages' physical memory pages for exporting to user land. The allocated
* page_t pointer will be recorded in cookie.
*/
int
{
u_offset_t pmem_off = 0;
pgcnt_t i = 0;
struct devmap_pmem_cookie *pcp;
/*
* Number larger than this will cause page_create_va() to loop
* infinitely.
*/
return (DDI_FAILURE);
return (DDI_FAILURE);
/* Allocate pmem cookie. */
return (DDI_FAILURE);
/*
* See if the requested memory can be locked. Currently we do resource
* controls on the project levlel only.
*/
goto alloc_fail;
locked = 1;
/*
* First, grab as many as possible from pmem_mpool. If pages in
* pmem_mpool are enough for this request, we are done.
*/
/* IOlock and hashin them into the new offset. */
if (tpages)
goto done;
/* Quit now if memory cannot be reserved. */
goto alloc_fail;
reserved = 1;
/* Try to allocate large pages first to decrease fragmentation. */
kflags) == DDI_FAILURE)
goto alloc_fail;
/*
* Pages in large pages is more than the request, put the residual
* pages into pmem_mpool.
*/
goto done;
}
/* Allocate small pages if lpp+tlist cannot satisfy the request. */
goto alloc_fail;
done:
/* Set those small pages from large pages as allocated. */
/*
* Now tlist holds all the pages for this cookie. Record these pages in
* pmem cookie.
*/
}
return (DDI_SUCCESS);
/* Free large pages and the associated allocation records. */
if (lpp)
if (reserved == 1)
/* Put those pages in tlist back into pmem_mpool. */
if (tpages != 0) {
/* IOunlock, hashout and update the allocation records. */
}
if (locked == 1)
/* Freeing pmem_cookie. */
return (DDI_FAILURE);
}
/*
* Free all small pages inside cookie, and return pages from large pages into
* mpool, if all the pages from one large page is in mpool, free it as a whole.
*/
void
{
pgcnt_t i;
/* Free small pages and return them to memory pool. */
/*
* Remove the mapping of this single page, this mapping is
* created using hat_devload() in segdev_faultpage().
*/
/* Normal small page. */
page_unresv(1);
} else {
/* Small page from large pages. */
/*
* Move this record to pf_lpgs list, this large
* page may be able to be freed as a whole.
*/
npls++;
} else {
/* Search in pf_lpgs list. */
}
/* Mark this page as free. */
/* Record this page in pmem_mpool. */
}
}
/*
* Find out the large pages whose pages have been freed, remove them
* from plp list, free them and the associated pmem_lpg struct.
*/
if (lpg_isfree(pl1)) {
/*
* Get one free large page. Find all pages in this
* large page and remove them from pmem_mpool.
*/
/* Remove associated allocation records. */
tpages -= pmem_pgcnt;
} else
}
/* Update allocation records accordingly. */
}
/*
* To extract page frame number from specified range in a cookie.
*/
int
{
pgcnt_t i;
return (DDI_FAILURE);
return (DDI_SUCCESS);
}
void
{
}
/* Allocate kernel memory for one pmem cookie with n pages. */
static int
{
struct devmap_pmem_cookie *pcp;
return (DDI_FAILURE);
return (DDI_FAILURE);
}
if ((pcp->dp_pparray =
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/* Try to lock down n pages resource for current project. */
static int
{
ptob(n)) != 0) {
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/* To check if all the pages in a large page are freed. */
static int
{
uint_t i;
for (i = 0; i < BT_BITOUL(pmem_pgcnt); i++)
return (0);
/* All 1 means all pages are freed. */
return (1);
}
/*
* Using pp to get the associated large page allocation record, searching in
* the splp linked list with *last as the heuristic pointer. Return NULL if
* not found.
*/
static pmem_lpg_t *
{
return (NULL);
/* Try last winner first. */
goto pl_found;
/* Else search the whole pmem_lpg list. */
break;
}
}
return (*last);
}
/*
* Remove one pmem_lpg plp from the oplpp list.
*/
static void
{
else {
}
}
/*
* Concatenate page list nplpp onto the end of list plpp.
*/
static void
{
return;
}
return;
}
}
/*
* Allocate and initialize the allocation record of one large page, the init
* value is 'allocated'.
*/
static pmem_lpg_t *
{
return (NULL);
return (NULL);
}
return (plp);
}
/* Free one allocation record pointed by oplp. */
static void
{
else {
}
}
/* Free one large page headed by spp from pmem_mpool. */
static void
{
uint_t i;
for (i = 0; i < pmem_pgcnt; i++) {
/* Break pp1 from pmem_mpool. */
pp1++;
}
/* Free pages in this large page. */
}
/* Put n pages in *ppp list back into pmem_mpool. */
static void
{
/* Put back pages. */
pmem_nmpages += n;
}
/*
* Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp
* list, and return the number of grabbed pages.
*/
static pgcnt_t
{
pgcnt_t i;
/* Grab the pages. */
i = MIN(pmem_nmpages, n);
*ppp = pmem_mpool;
pmem_nmpages -= i;
return (i);
}
/*
* Create n large pages, lpages and plpp contains the number of small pages and
* allocation records list respectively.
*/
static int
{
pgcnt_t i;
for (i = 0, *lpages = 0; i < n; i++) {
/* Allocte one large page each time. */
break;
*offp += pmem_lpgsize;
*lpages += pmem_pgcnt;
/* Add one allocation record for this large page. */
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* Break the last r small pages from the large page list *lppp (with totally n
* small pages) and put them into pmem_mpool.
*/
static void
{
pgcnt_t i;
if (r == 0)
return;
/* The residual should reside in the last large page. */
/* IOunlock and hashout the residual pages. */
/* Mark this page as free. */
}
/* Put these residual pages into memory pool. */
mpool_append(&pp, r);
}
/* Freeing large pages in lpp and the associated allocation records in plp. */
static void
{
pgcnt_t i, j;
for (i = 0; i < lpgs; i++) {
for (j = 0; j < pmem_pgcnt; j++) {
/* IO unlock and hashout this small page. */
}
/* Free one large page at one time. */
}
/* Free associate pmem large page allocation records. */
}
}
/*
* IOlock and hashin all pages in tlist, associate them with vnode *pvnp
* and offset starting with *poffp. Update allocation records accordingly at
* the same time.
*/
static void
{
pgcnt_t i = 0;
/* Mark this page as allocated. */
}
}
/*
* IOunlock and hashout all pages in tlist, update allocation records
* accordingly at the same time.
*/
static void
{
pgcnt_t i = 0;
/* Mark this page as free. */
}
}