memobj-r0drv-solaris.c revision ca017e3301fd7925fee12eb3c95edbe1555222f4
/* $Id$ */
/** @file
* IPRT - Ring-0 Memory Objects, Solaris.
*/
/*
* Copyright (C) 2006-2012 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#include "the-solaris-kernel.h"
#include "memobj-r0drv-solaris.h"
/*******************************************************************************
* Defined Constants And Macros *
*******************************************************************************/
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
/**
* The Solaris version of the memory object structure.
*/
typedef struct RTR0MEMOBJSOL
{
/** The core structure. */
/** Pointer to kernel memory cookie. */
/** Shadow locked pages. */
void *pvHandle;
/** Access during locking. */
int fAccess;
/** Set if large pages are involved in an RTR0MEMOBJTYPE_PHYS
* allocation. */
bool fLargePage;
/** Whether we have individual pages or a kernel-mapped virtual memory block in
* an RTR0MEMOBJTYPE_PHYS_NC allocation. */
bool fIndivPages;
/*******************************************************************************
* Global Variables *
*******************************************************************************/
static vnode_t g_PageVnode;
static kmutex_t g_OffsetMtx;
static u_offset_t g_offPage;
static vnode_t g_LargePageVnode;
static kmutex_t g_LargePageOffsetMtx;
static u_offset_t g_offLargePage;
static bool g_fLargePageNoReloc;
/**
* Returns the physical address for a virtual address.
*
* @param pv The virtual address.
*
* @returns The physical address corresponding to @a pv.
*/
{
pfn_t PageFrameNum = 0;
if (SOL_IS_KRNL_ADDR(pv))
else
{
}
}
/**
* Returns the physical address for a page.
*
* @param pPage Pointer to the page.
*
* @returns The physical address for a page.
*/
{
}
/**
* Allocates one page.
*
* @param virtAddr The virtual address to which this page maybe mapped in
* the future.
*
* @returns Pointer to the allocated page, NULL on failure.
*/
{
page_t *pPage = page_create_va(&g_PageVnode, offPage, PAGE_SIZE, PG_WAIT | PG_NORELOC, &KernelSeg, virtAddr);
{
/*
* Lock this page into memory "long term" to prevent this page from being paged out
* when we drop the page lock temporarily (during free).
*/
}
return pPage;
}
/**
* Destroys an allocated page.
*
* @param pPage Pointer to the page to be destroyed.
* @remarks This function expects page in @c pPage to be shared locked.
*/
{
/*
* We need to exclusive lock the pages before freeing them, if upgrading the shared lock to exclusive fails,
* drop the page lock and look it up from the hash. Record the page offset before we drop the page lock as
* we cannot touch any page_t members once the lock is dropped.
*/
if (!rc)
{
/*
* Since we allocated the pages as PG_NORELOC we should only get back the exact page always.
*/
}
}
/**
* Allocates physical, non-contiguous memory of pages.
*
* @param puPhys Where to store the physical address of first page. Optional,
* can be NULL.
* @param cb The size of the allocation.
*
* @return Array of allocated pages, NULL on failure.
*/
{
/*
* VM1:
* The page freelist and cachelist both hold pages that are not mapped into any address space.
* The cachelist is not really free pages but when memory is exhausted they'll be moved to the
* free lists, it's the total of the free+cache list that we see on the 'free' column in vmstat.
*
* VM2:
* @todo Document what happens behind the scenes in VM2 regarding the free and cachelist.
*/
/*
* Non-pageable memory reservation request for _4K pages, don't sleep.
*/
if (rc)
{
{
/*
* Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
* we don't yet have the 'virtAddr' to which this memory may be mapped.
*/
{
/*
* Get a page from the free list locked exclusively. The page will be named (hashed in)
* and we rely on it during free. Downgrade the page to a shared lock to prevent the page
* from being relocated.
*/
if (RT_UNLIKELY(!pPage))
{
/*
* No page found, release whatever pages we grabbed so far.
*/
for (size_t k = 0; k < i; k++)
return NULL;
}
}
if (puPhys)
return ppPages;
}
}
return NULL;
}
/**
* Frees the allocates pages.
*
* @param ppPages Pointer to the page list.
* @param cbPages Size of the allocation.
*/
{
}
/**
* Allocates one large page.
*
* @param puPhys Where to store the physical address of the allocated
* page. Optional, can be NULL.
* @param cbLargePage Size of the large page.
*
* @returns Pointer to a list of pages that cover the large page, NULL on
* failure.
*/
{
/*
* Check PG_NORELOC support for large pages. Using this helps prevent _1G page
* fragementation on systems that support it.
*/
static bool fPageNoRelocChecked = false;
if (fPageNoRelocChecked == false)
{
fPageNoRelocChecked = true;
g_fLargePageNoReloc = false;
{
g_fLargePageNoReloc = true;
}
}
/*
* Non-pageable memory reservation request for _4K pages, don't sleep.
*/
u_offset_t offPage = 0;
if (rc)
{
{
if (pRootPage)
{
/*
* Split it into sub-pages, downgrade each page to a shared lock to prevent page relocation.
*/
{
/*
* Lock the page into memory "long term". This prevents callers of page_try_demote_pages() (such as the
* pageout scanner) from demoting the large page into smaller pages while we temporarily release the
* exclusive lock (during free). We pass "0, 1" since we've already accounted for availrmem during
* page_resv().
*/
}
if (puPhys)
return ppPages;
}
/*
* Don't restore offPrev in case of failure (race condition), we have plenty of offset space.
* The offset must be unique (for the same vnode) or we'll encounter panics on page_create_va_large().
*/
}
}
return NULL;
}
/**
* Frees the large page.
*
* @param ppPages Pointer to the list of small pages that cover the
* large page.
* @param cbLargePage Size of the allocation (i.e. size of the large
* page).
*/
{
bool fDemoted = false;
{
/*
* We need the pages exclusively locked, try upgrading the shared lock.
* If it fails, drop the shared page lock (cannot access any page_t members once this is done)
* and lookup the page from the page hash locking it exclusively.
*/
if (!rc)
{
if (g_fLargePageNoReloc)
{
/*
* This can only be guaranteed if PG_NORELOC is used while allocating the pages.
*/
pFoundPage, pPage));
}
/*
* Check for page demotion (regardless of relocation). Some places in Solaris (e.g. VM1 page_retire())
* could possibly demote the large page to _4K pages between our call to page_unlock() and page_lookup().
*/
if (page_get_pagecnt(pFoundPage->p_szc) == 1) /* Base size of only _4K associated with this page. */
fDemoted = true;
pPage = pFoundPage;
}
}
if (fDemoted)
{
{
}
}
else
{
/*
* Although we shred the adjacent pages in the linked list, page_destroy_pages works on
* adjacent pages via array increments. So this does indeed free all the pages.
*/
}
}
/**
* Unmaps kernel/user-space mapped memory.
*
* @param pv Pointer to the mapped memory block.
* @param cb Size of the memory block.
*/
{
if (SOL_IS_KRNL_ADDR(pv))
{
}
else
{
}
}
/**
* Lock down memory mappings for a virtual address.
*
* @param pv Pointer to the memory to lock down.
* @param cb Size of the memory block.
* @param fAccess Page access rights (S_READ, S_WRITE, S_EXEC)
*
* @returns IPRT status code.
*/
{
/*
*/
if (!SOL_IS_KRNL_ADDR(pv))
{
faultcode_t rc = as_fault(pProc->p_as->a_hat, pProc->p_as, (caddr_t)pv, cb, F_SOFTLOCK, fPageAccess);
if (rc)
{
LogRel(("rtR0MemObjSolLock failed for pv=%pv cb=%lx fPageAccess=%d rc=%d\n", pv, cb, fPageAccess, rc));
return VERR_LOCK_FAILED;
}
}
return VINF_SUCCESS;
}
/**
* Unlock memory mappings for a virtual address.
*
* @param pv Pointer to the locked memory.
* @param cb Size of the memory block.
* @param fPageAccess Page access rights (S_READ, S_WRITE, S_EXEC).
*/
{
if (!SOL_IS_KRNL_ADDR(pv))
{
}
}
/**
* Maps a list of physical pages into user address space.
*
* @param pVirtAddr Where to store the virtual address of the mapping.
* @param fPageAccess Page access rights (PROT_READ, PROT_WRITE,
* PROT_EXEC)
* @param paPhysAddrs Array of physical addresses to pages.
* @param cb Size of memory being mapped.
*
* @returns IPRT status code.
*/
static int rtR0MemObjSolUserMap(caddr_t *pVirtAddr, unsigned fPageAccess, uint64_t *paPhysAddrs, size_t cb, size_t cbPageSize)
{
int rc = VERR_INTERNAL_ERROR;
else
return RTErrConvertFromErrno(rc);
}
{
{
case RTR0MEMOBJTYPE_LOW:
break;
case RTR0MEMOBJTYPE_PHYS:
{
if (pMemSolaris->fLargePage)
else
}
break;
case RTR0MEMOBJTYPE_PHYS_NC:
if (pMemSolaris->fIndivPages)
else
break;
case RTR0MEMOBJTYPE_PAGE:
break;
case RTR0MEMOBJTYPE_LOCK:
break;
case RTR0MEMOBJTYPE_MAPPING:
break;
case RTR0MEMOBJTYPE_RES_VIRT:
{
else
AssertFailed();
break;
}
case RTR0MEMOBJTYPE_CONT: /* we don't use this type here. */
default:
return VERR_INTERNAL_ERROR;
}
return VINF_SUCCESS;
}
{
/* Create the object. */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PAGE, NULL, cb);
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
if (RT_UNLIKELY(!pvMem))
{
return VERR_NO_PAGE_MEMORY;
}
return VINF_SUCCESS;
}
{
/* Create the object */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOW, NULL, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* Allocate physically low page-aligned memory. */
if (RT_UNLIKELY(!pvMem))
{
return VERR_NO_LOW_MEMORY;
}
return VINF_SUCCESS;
}
{
}
DECLHIDDEN(int) rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
{
#if HC_ARCH_BITS == 64
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS_NC, NULL, cb);
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
if (PhysHighest == NIL_RTHCPHYS)
{
if (!pvPages)
{
return VERR_NO_MEMORY;
}
pMemSolaris->fIndivPages = true;
return VINF_SUCCESS;
}
else
{
/*
* If we must satisfy an upper limit constraint, it isn't feasible to grab individual pages.
* We fall back to using contig_alloc().
*/
if (!pvMem)
{
LogRel(("rtR0MemObjNativeAllocPhysNC: rtR0SolMemAlloc failed for cb=%u PhysHighest=%RHp.\n", cb, PhysHighest));
return VERR_NO_MEMORY;
}
pMemSolaris->fIndivPages = false;
return VINF_SUCCESS;
}
#else /* 32 bit: */
return VERR_NOT_SUPPORTED; /* see the RTR0MemObjAllocPhysNC specs */
#endif
}
DECLHIDDEN(int) rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment)
{
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS, NULL, cb);
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
/*
* Allocating one large page gets special treatment.
*/
if (s_cbLargePage == UINT32_MAX)
{
if (page_num_pagesizes() > 1)
ASMAtomicWriteU32(&s_cbLargePage, page_get_pagesize(1)); /* Page-size code 1 maps to _2M on Solaris x86/amd64. */
else
}
if ( cb == s_cbLargePage
&& cb == uAlignment
&& PhysHighest == NIL_RTHCPHYS)
{
/*
* Allocate one large page (backed by physically contiguous memory).
*/
{
pMemSolaris->fLargePage = true;
return VINF_SUCCESS;
}
}
else
{
/*
* Allocate physically contiguous memory aligned as specified.
*/
{
pMemSolaris->fLargePage = false;
return VINF_SUCCESS;
}
}
return VERR_NO_CONT_MEMORY;
}
DECLHIDDEN(int) rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy)
{
/* Create the object. */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS, NULL, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* There is no allocation here, it needs to be mapped somewhere first. */
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess,
{
/* Create the locking object */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* Lock down user pages. */
int fPageAccess = S_READ;
if (fAccess & RTMEM_PROT_WRITE)
if (fAccess & RTMEM_PROT_EXEC)
if (RT_FAILURE(rc))
{
return rc;
}
/* Fill in the object attributes and return successfully. */
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess)
{
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOCK, pv, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* Lock down kernel pages. */
int fPageAccess = S_READ;
if (fAccess & RTMEM_PROT_WRITE)
if (fAccess & RTMEM_PROT_EXEC)
if (RT_FAILURE(rc))
{
return rc;
}
/* Fill in the object attributes and return successfully. */
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
{
/*
* Use xalloc.
*/
if (RT_UNLIKELY(!pv))
return VERR_NO_MEMORY;
/* Create the object. */
if (!pMemSolaris)
{
LogRel(("rtR0MemObjNativeReserveKernel failed to alloc memory object.\n"));
return VERR_NO_MEMORY;
}
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment,
{
return VERR_NOT_SUPPORTED;
}
DECLHIDDEN(int) rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment,
{
/* Fail if requested to do something we can't. */
if (uAlignment > PAGE_SIZE)
return VERR_NOT_SUPPORTED;
/*
* Use xalloc to get address space.
*/
if (!cbSub)
if (RT_UNLIKELY(!pv))
return VERR_MAP_FAILED;
/*
* Load the pages from the other object into it.
*/
if (fProt & RTMEM_PROT_READ)
if (fProt & RTMEM_PROT_EXEC)
if (fProt & RTMEM_PROT_WRITE)
fAttr |= PROT_WRITE;
fAttr |= HAT_NOSYNC;
int rc = VINF_SUCCESS;
{
/* Advance. */
}
if (RT_SUCCESS(rc))
{
/*
* Create a memory object for the mapping.
*/
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_MAPPING, pv, cbSub);
if (pMemSolaris)
{
return VINF_SUCCESS;
}
LogRel(("rtR0MemObjNativeMapKernel failed to alloc memory object.\n"));
rc = VERR_NO_MEMORY;
}
if (off)
return rc;
}
DECLHIDDEN(int) rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, PRTR0MEMOBJINTERNAL pMemToMap, RTR3PTR R3PtrFixed,
{
/*
* Fend off things we cannot do.
*/
AssertMsgReturn(R0Process == RTR0ProcHandleSelf(), ("%p != %p\n", R0Process, RTR0ProcHandleSelf()), VERR_NOT_SUPPORTED);
if (uAlignment != PAGE_SIZE)
return VERR_NOT_SUPPORTED;
/*
* Get parameters from the source object.
*/
/*
* Create the mapping object
*/
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
int rc = VINF_SUCCESS;
if (RT_LIKELY(paPhysAddrs))
{
/*
* Prepare the pages for mapping according to type.
*/
{
}
{
}
else
{
/*
* Have kernel mapping, just translate virtual to physical.
*/
rc = VINF_SUCCESS;
{
{
LogRel(("rtR0MemObjNativeMapUser: no page to map.\n"));
break;
}
}
}
if (RT_SUCCESS(rc))
{
unsigned fPageAccess = PROT_READ;
if (fProt & RTMEM_PROT_WRITE)
if (fProt & RTMEM_PROT_EXEC)
fPageAccess |= PROT_EXEC;
/*
* Perform the actual mapping.
*/
if (RT_SUCCESS(rc))
{
return VINF_SUCCESS;
}
}
}
else
rc = VERR_NO_MEMORY;
return rc;
}
DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt)
{
return VERR_NOT_SUPPORTED;
}
{
{
case RTR0MEMOBJTYPE_PHYS_NC:
|| !pMemSolaris->fIndivPages)
{
return rtR0MemObjSolVirtToPhys(pb);
}
case RTR0MEMOBJTYPE_PAGE:
case RTR0MEMOBJTYPE_LOW:
case RTR0MEMOBJTYPE_LOCK:
{
return rtR0MemObjSolVirtToPhys(pb);
}
/*
* Although mapping can be handled by rtR0MemObjSolVirtToPhys(offset) like the above case,
*/
case RTR0MEMOBJTYPE_MAPPING:
case RTR0MEMOBJTYPE_CONT:
case RTR0MEMOBJTYPE_PHYS:
AssertFailed(); /* handled by the caller */
case RTR0MEMOBJTYPE_RES_VIRT:
default:
return NIL_RTHCPHYS;
}
}