memobj-r0drv-solaris.c revision 62b8247e4970b835e263b4c18a2748bf651fabd0
/* $Id$ */
/** @file
* IPRT - Ring-0 Memory Objects, Solaris.
*/
/*
* Copyright (C) 2006-2012 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* you can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#include "the-solaris-kernel.h"
#include "internal/iprt.h"
#include <iprt/memobj.h>
#include <iprt/asm.h>
#include <iprt/assert.h>
#include <iprt/err.h>
#include <iprt/log.h>
#include <iprt/mem.h>
#include <iprt/param.h>
#include <iprt/process.h>
#include "internal/memobj.h"
#include "memobj-r0drv-solaris.h"
/*******************************************************************************
* Defined Constants And Macros *
*******************************************************************************/
#define SOL_IS_KRNL_ADDR(vx) ((uintptr_t)(vx) >= kernelbase)
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
/**
* The Solaris version of the memory object structure.
*/
typedef struct RTR0MEMOBJSOL
{
/** The core structure. */
RTR0MEMOBJINTERNAL Core;
/** Pointer to kernel memory cookie. */
ddi_umem_cookie_t Cookie;
/** Shadow locked pages. */
void *pvHandle;
/** Access during locking. */
int fAccess;
/** Set if large pages are involved in an RTR0MEMOBJTYPE_PHYS
* allocation. */
bool fLargePage;
/** Whether we have individual pages or a kernel-mapped virtual memory block in
* an RTR0MEMOBJTYPE_PHYS_NC allocation. */
bool fIndivPages;
} RTR0MEMOBJSOL, *PRTR0MEMOBJSOL;
/*******************************************************************************
* Global Variables *
*******************************************************************************/
static vnode_t g_PageVnode;
static kmutex_t g_OffsetMtx;
static u_offset_t g_offPage;
static vnode_t g_LargePageVnode;
static kmutex_t g_LargePageOffsetMtx;
static u_offset_t g_offLargePage;
static bool g_fLargePageNoReloc;
/**
* Returns the physical address for a virtual address.
*
* @param pv The virtual address.
*
* @returns The physical address corresponding to @a pv.
*/
static uint64_t rtR0MemObjSolVirtToPhys(void *pv)
{
struct hat *pHat = NULL;
pfn_t PageFrameNum = 0;
uintptr_t uVirtAddr = (uintptr_t)pv;
if (SOL_IS_KRNL_ADDR(pv))
pHat = kas.a_hat;
else
{
proc_t *pProcess = (proc_t *)RTR0ProcHandleSelf();
AssertRelease(pProcess);
pHat = pProcess->p_as->a_hat;
}
PageFrameNum = hat_getpfnum(pHat, (caddr_t)(uVirtAddr & PAGEMASK));
AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolVirtToPhys failed. pv=%p\n", pv));
return (((uint64_t)PageFrameNum << PAGE_SHIFT) | (uVirtAddr & PAGE_OFFSET_MASK));
}
/**
* Returns the physical address for a page.
*
* @param pPage Pointer to the page.
*
* @returns The physical address for a page.
*/
static inline uint64_t rtR0MemObjSolPagePhys(page_t *pPage)
{
AssertPtr(pPage);
pfn_t PageFrameNum = page_pptonum(pPage);
AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolPagePhys failed pPage=%p\n"));
return (uint64_t)PageFrameNum << PAGE_SHIFT;
}
/**
* Allocates one page.
*
* @param virtAddr The virtual address to which this page maybe mapped in
* the future.
*
* @returns Pointer to the allocated page, NULL on failure.
*/
static page_t *rtR0MemObjSolPageAlloc(caddr_t virtAddr)
{
u_offset_t offPage;
seg_t KernelSeg;
/*
* 16777215 terabytes of total memory for all VMs or
* restart 8000 1GB VMs 2147483 times until wraparound!
*/
mutex_enter(&g_OffsetMtx);
AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR);
g_offPage = RT_ALIGN_64(g_offPage, PAGE_SIZE) + PAGE_SIZE;
offPage = g_offPage;
mutex_exit(&g_OffsetMtx);
KernelSeg.s_as = &kas;
page_t *pPage = page_create_va(&g_PageVnode, offPage, PAGE_SIZE, PG_WAIT | PG_NORELOC, &KernelSeg, virtAddr);
if (RT_LIKELY(pPage))
{
/*
* Lock this page into memory "long term" to prevent this page from being paged out
* when we drop the page lock temporarily (during free). Downgrade to a shared lock
* to prevent page relocation.
*/
page_pp_lock(pPage, 0 /* COW */, 1 /* Kernel */);
page_io_unlock(pPage);
page_downgrade(pPage);
Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
}
return pPage;
}
/**
* Destroys an allocated page.
*
* @param pPage Pointer to the page to be destroyed.
* @remarks This function expects page in @c pPage to be shared locked.
*/
static void rtR0MemObjSolPageDestroy(page_t *pPage)
{
/*
* We need to exclusive lock the pages before freeing them, if upgrading the shared lock to exclusive fails,
* drop the page lock and look it up from the hash. Record the page offset before we drop the page lock as
* we cannot touch any page_t members once the lock is dropped.
*/
AssertPtr(pPage);
Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
u_offset_t offPage = pPage->p_offset;
int rc = page_tryupgrade(pPage);
if (!rc)
{
page_unlock(pPage);
page_t *pFoundPage = page_lookup(&g_PageVnode, offPage, SE_EXCL);
/*
* Since we allocated the pages as PG_NORELOC we should only get back the exact page always.
*/
AssertReleaseMsg(pFoundPage == pPage, ("Page lookup failed %p:%llx returned %p, expected %p\n",
&g_PageVnode, offPage, pFoundPage, pPage));
}
Assert(PAGE_LOCKED_SE(pPage, SE_EXCL));
page_pp_unlock(pPage, 0 /* COW */, 1 /* Kernel */);
page_destroy(pPage, 0 /* move it to the free list */);
}
/**
* Allocates physical, non-contiguous memory of pages.
*
* @param puPhys Where to store the physical address of first page. Optional,
* can be NULL.
* @param cb The size of the allocation.
*
* @return Array of allocated pages, NULL on failure.
*/
static page_t **rtR0MemObjSolPagesAlloc(uint64_t *puPhys, size_t cb)
{
/*
* VM1:
* The page freelist and cachelist both hold pages that are not mapped into any address space.
* The cachelist is not really free pages but when memory is exhausted they'll be moved to the
* free lists, it's the total of the free+cache list that we see on the 'free' column in vmstat.
*
* VM2:
* @todo Document what happens behind the scenes in VM2 regarding the free and cachelist.
*/
/*
* Non-pageable memory reservation request for _4K pages, don't sleep.
*/
size_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
int rc = page_resv(cPages, KM_NOSLEEP);
if (rc)
{
size_t cbPages = cPages * sizeof(page_t *);
page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
if (RT_LIKELY(ppPages))
{
/*
* Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
* we don't yet have the 'virtAddr' to which this memory may be mapped.
*/
caddr_t virtAddr = 0;
for (size_t i = 0; i < cPages; i++, virtAddr += PAGE_SIZE)
{
/*
* Get a page from the free list locked exclusively. The page will be named (hashed in)
* and we rely on it during free. The page we get will be shared locked to prevent the page
* from being relocated.
*/
page_t *pPage = rtR0MemObjSolPageAlloc(virtAddr);
if (RT_UNLIKELY(!pPage))
{
/*
* No page found, release whatever pages we grabbed so far.
*/
for (size_t k = 0; k < i; k++)
rtR0MemObjSolPageDestroy(ppPages[k]);
kmem_free(ppPages, cbPages);
page_unresv(cPages);
return NULL;
}
ppPages[i] = pPage;
}
if (puPhys)
*puPhys = rtR0MemObjSolPagePhys(ppPages[0]);
return ppPages;
}
page_unresv(cPages);
}
return NULL;
}
/**
* Frees the allocates pages.
*
* @param ppPages Pointer to the page list.
* @param cbPages Size of the allocation.
*/
static void rtR0MemObjSolPagesFree(page_t **ppPages, size_t cb)
{
size_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
size_t cbPages = cPages * sizeof(page_t *);
for (size_t iPage = 0; iPage < cPages; iPage++)
rtR0MemObjSolPageDestroy(ppPages[iPage]);
kmem_free(ppPages, cbPages);
page_unresv(cPages);
}
/**
* Allocates one large page.
*
* @param puPhys Where to store the physical address of the allocated
* page. Optional, can be NULL.
* @param cbLargePage Size of the large page.
*
* @returns Pointer to a list of pages that cover the large page, NULL on
* failure.
*/
static page_t **rtR0MemObjSolLargePageAlloc(uint64_t *puPhys, size_t cbLargePage)
{
/*
* Check PG_NORELOC support for large pages. Using this helps prevent _1G page
* fragementation on systems that support it.
*/
static bool fPageNoRelocChecked = false;
if (fPageNoRelocChecked == false)
{
fPageNoRelocChecked = true;
g_fLargePageNoReloc = false;
if ( g_pfnrtR0Sol_page_noreloc_supported
&& g_pfnrtR0Sol_page_noreloc_supported(cbLargePage))
{
g_fLargePageNoReloc = true;
}
}
/*
* Non-pageable memory reservation request for _4K pages, don't sleep.
*/
size_t cPages = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT;
size_t cbPages = cPages * sizeof(page_t *);
u_offset_t offPage = 0;
int rc = page_resv(cPages, KM_NOSLEEP);
if (rc)
{
page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
if (RT_LIKELY(ppPages))
{
mutex_enter(&g_LargePageOffsetMtx);
AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR);
g_offLargePage = RT_ALIGN_64(g_offLargePage, cbLargePage) + cbLargePage;
offPage = g_offLargePage;
mutex_exit(&g_LargePageOffsetMtx);
seg_t KernelSeg;
KernelSeg.s_as = &kas;
page_t *pRootPage = page_create_va_large(&g_LargePageVnode, offPage, cbLargePage,
PG_EXCL | (g_fLargePageNoReloc ? PG_NORELOC : 0), &KernelSeg,
0 /* vaddr */,NULL /* locality group */);
if (pRootPage)
{
/*
* Split it into sub-pages, downgrade each page to a shared lock to prevent page relocation.
*/
page_t *pPageList = pRootPage;
for (size_t iPage = 0; iPage < cPages; iPage++)
{
page_t *pPage = pPageList;
AssertPtr(pPage);
AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage),
("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage)));
AssertMsg(pPage->p_szc == pRootPage->p_szc, ("Size code mismatch %p %d %d\n", pPage,
(int)pPage->p_szc, (int)pRootPage->p_szc));
/*
* Lock the page into memory "long term". This prevents callers of page_try_demote_pages() (such as the
* pageout scanner) from demoting the large page into smaller pages while we temporarily release the
* exclusive lock (during free). We pass "0, 1" since we've already accounted for availrmem during
* page_resv().
*/
page_pp_lock(pPage, 0 /* COW */, 1 /* Kernel */);
page_sub(&pPageList, pPage);
page_io_unlock(pPage);
page_downgrade(pPage);
Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
ppPages[iPage] = pPage;
}
Assert(pPageList == NULL);
Assert(ppPages[0] == pRootPage);
uint64_t uPhys = rtR0MemObjSolPagePhys(pRootPage);
AssertMsg(!(uPhys & (cbLargePage - 1)), ("%llx %zx\n", uPhys, cbLargePage));
if (puPhys)
*puPhys = uPhys;
return ppPages;
}
/*
* Don't restore offPrev in case of failure (race condition), we have plenty of offset space.
* The offset must be unique (for the same vnode) or we'll encounter panics on page_create_va_large().
*/
kmem_free(ppPages, cbPages);
}
page_unresv(cPages);
}
return NULL;
}
/**
* Frees the large page.
*
* @param ppPages Pointer to the list of small pages that cover the
* large page.
* @param cbLargePage Size of the allocation (i.e. size of the large
* page).
*/
static void rtR0MemObjSolLargePageFree(page_t **ppPages, size_t cbLargePage)
{
Assert(ppPages);
Assert(cbLargePage > PAGE_SIZE);
bool fDemoted = false;
size_t cPages = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT;
size_t cbPages = cPages * sizeof(page_t *);
page_t *pPageList = ppPages[0];
for (size_t iPage = 0; iPage < cPages; iPage++)
{
/*
* We need the pages exclusively locked, try upgrading the shared lock.
* If it fails, drop the shared page lock (cannot access any page_t members once this is done)
* and lookup the page from the page hash locking it exclusively.
*/
page_t *pPage = ppPages[iPage];
u_offset_t offPage = pPage->p_offset;
int rc = page_tryupgrade(pPage);
if (!rc)
{
page_unlock(pPage);
page_t *pFoundPage = page_lookup(&g_LargePageVnode, offPage, SE_EXCL);
AssertRelease(pFoundPage);
if (g_fLargePageNoReloc)
{
/*
* This can only be guaranteed if PG_NORELOC is used while allocating the pages.
*/
AssertReleaseMsg(pFoundPage == pPage,
("lookup failed %p:%llu returned %p, expected %p\n", &g_LargePageVnode, offPage,
pFoundPage, pPage));
}
/*
* Check for page demotion (regardless of relocation). Some places in Solaris (e.g. VM1 page_retire())
* could possibly demote the large page to _4K pages between our call to page_unlock() and page_lookup().
*/
if (page_get_pagecnt(pFoundPage->p_szc) == 1) /* Base size of only _4K associated with this page. */
fDemoted = true;
pPage = pFoundPage;
ppPages[iPage] = pFoundPage;
}
Assert(PAGE_LOCKED_SE(pPage, SE_EXCL));
page_pp_unlock(pPage, 0 /* COW */, 1 /* Kernel */);
}
if (fDemoted)
{
for (size_t iPage = 0; iPage < cPages; iPage++)
{
Assert(page_get_pagecnt(ppPages[iPage]->p_szc) == 1);
page_destroy(ppPages[iPage], 0 /* move it to the free list */);
}
}
else
{
/*
* Although we shred the adjacent pages in the linked list, page_destroy_pages works on
* adjacent pages via array increments. So this does indeed free all the pages.
*/
AssertPtr(pPageList);
page_destroy_pages(pPageList);
}
kmem_free(ppPages, cbPages);
page_unresv(cPages);
}
/**
* Unmaps kernel/user-space mapped memory.
*
* @param pv Pointer to the mapped memory block.
* @param cb Size of the memory block.
*/
static void rtR0MemObjSolUnmap(void *pv, size_t cb)
{
if (SOL_IS_KRNL_ADDR(pv))
{
hat_unload(kas.a_hat, pv, cb, HAT_UNLOAD | HAT_UNLOAD_UNLOCK);
vmem_free(heap_arena, pv, cb);
}
else
{
struct as *pAddrSpace = ((proc_t *)RTR0ProcHandleSelf())->p_as;
AssertPtr(pAddrSpace);
as_rangelock(pAddrSpace);
as_unmap(pAddrSpace, pv, cb);
as_rangeunlock(pAddrSpace);
}
}
/**
* Lock down memory mappings for a virtual address.
*
* @param pv Pointer to the memory to lock down.
* @param cb Size of the memory block.
* @param fAccess Page access rights (S_READ, S_WRITE, S_EXEC)
*
* @returns IPRT status code.
*/
static int rtR0MemObjSolLock(void *pv, size_t cb, int fPageAccess)
{
/*
* Kernel memory mappings on x86/amd64 are always locked, only handle user-space memory.
*/
if (!SOL_IS_KRNL_ADDR(pv))
{
proc_t *pProc = (proc_t *)RTR0ProcHandleSelf();
AssertPtr(pProc);
faultcode_t rc = as_fault(pProc->p_as->a_hat, pProc->p_as, (caddr_t)pv, cb, F_SOFTLOCK, fPageAccess);
if (rc)
{
LogRel(("rtR0MemObjSolLock failed for pv=%pv cb=%lx fPageAccess=%d rc=%d\n", pv, cb, fPageAccess, rc));
return VERR_LOCK_FAILED;
}
}
return VINF_SUCCESS;
}
/**
* Unlock memory mappings for a virtual address.
*
* @param pv Pointer to the locked memory.
* @param cb Size of the memory block.
* @param fPageAccess Page access rights (S_READ, S_WRITE, S_EXEC).
*/
static void rtR0MemObjSolUnlock(void *pv, size_t cb, int fPageAccess)
{
if (!SOL_IS_KRNL_ADDR(pv))
{
proc_t *pProcess = (proc_t *)RTR0ProcHandleSelf();
AssertPtr(pProcess);
as_fault(pProcess->p_as->a_hat, pProcess->p_as, (caddr_t)pv, cb, F_SOFTUNLOCK, fPageAccess);
}
}
/**
* Maps a list of physical pages into user address space.
*
* @param pVirtAddr Where to store the virtual address of the mapping.
* @param fPageAccess Page access rights (PROT_READ, PROT_WRITE,
* PROT_EXEC)
* @param paPhysAddrs Array of physical addresses to pages.
* @param cb Size of memory being mapped.
*
* @returns IPRT status code.
*/
static int rtR0MemObjSolUserMap(caddr_t *pVirtAddr, unsigned fPageAccess, uint64_t *paPhysAddrs, size_t cb, size_t cbPageSize)
{
struct as *pAddrSpace = ((proc_t *)RTR0ProcHandleSelf())->p_as;
int rc = VERR_INTERNAL_ERROR;
SEGVBOX_CRARGS Args;
Args.paPhysAddrs = paPhysAddrs;
Args.fPageAccess = fPageAccess;
Args.cbPageSize = cbPageSize;
as_rangelock(pAddrSpace);
map_addr(pVirtAddr, cb, 0 /* offset */, 0 /* vacalign */, MAP_SHARED);
if (*pVirtAddr != NULL)
rc = as_map(pAddrSpace, *pVirtAddr, cb, rtR0SegVBoxSolCreate, &Args);
else
rc = ENOMEM;
as_rangeunlock(pAddrSpace);
return RTErrConvertFromErrno(rc);
}
DECLHIDDEN(int) rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
{
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)pMem;
switch (pMemSolaris->Core.enmType)
{
case RTR0MEMOBJTYPE_LOW:
rtR0SolMemFree(pMemSolaris->Core.pv, pMemSolaris->Core.cb);
break;
case RTR0MEMOBJTYPE_PHYS:
if (pMemSolaris->Core.u.Phys.fAllocated)
{
if (pMemSolaris->fLargePage)
rtR0MemObjSolLargePageFree(pMemSolaris->pvHandle, pMemSolaris->Core.cb);
else
rtR0SolMemFree(pMemSolaris->Core.pv, pMemSolaris->Core.cb);
}
break;
case RTR0MEMOBJTYPE_PHYS_NC:
if (pMemSolaris->fIndivPages)
rtR0MemObjSolPagesFree(pMemSolaris->pvHandle, pMemSolaris->Core.cb);
else
rtR0SolMemFree(pMemSolaris->Core.pv, pMemSolaris->Core.cb);
break;
case RTR0MEMOBJTYPE_PAGE:
ddi_umem_free(pMemSolaris->Cookie);
break;
case RTR0MEMOBJTYPE_LOCK:
rtR0MemObjSolUnlock(pMemSolaris->Core.pv, pMemSolaris->Core.cb, pMemSolaris->fAccess);
break;
case RTR0MEMOBJTYPE_MAPPING:
rtR0MemObjSolUnmap(pMemSolaris->Core.pv, pMemSolaris->Core.cb);
break;
case RTR0MEMOBJTYPE_RES_VIRT:
{
if (pMemSolaris->Core.u.ResVirt.R0Process == NIL_RTR0PROCESS)
vmem_xfree(heap_arena, pMemSolaris->Core.pv, pMemSolaris->Core.cb);
else
AssertFailed();
break;
}
case RTR0MEMOBJTYPE_CONT: /* we don't use this type here. */
default:
AssertMsgFailed(("enmType=%d\n", pMemSolaris->Core.enmType));
return VERR_INTERNAL_ERROR;
}
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
{
/* Create the object. */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PAGE, NULL, cb);
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
void *pvMem = ddi_umem_alloc(cb, DDI_UMEM_SLEEP, &pMemSolaris->Cookie);
if (RT_UNLIKELY(!pvMem))
{
rtR0MemObjDelete(&pMemSolaris->Core);
return VERR_NO_PAGE_MEMORY;
}
pMemSolaris->Core.pv = pvMem;
pMemSolaris->pvHandle = NULL;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
{
NOREF(fExecutable);
/* Create the object */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOW, NULL, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* Allocate physically low page-aligned memory. */
uint64_t uPhysHi = _4G - 1;
void *pvMem = rtR0SolMemAlloc(uPhysHi, NULL /* puPhys */, cb, PAGE_SIZE, false /* fContig */);
if (RT_UNLIKELY(!pvMem))
{
rtR0MemObjDelete(&pMemSolaris->Core);
return VERR_NO_LOW_MEMORY;
}
pMemSolaris->Core.pv = pvMem;
pMemSolaris->pvHandle = NULL;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
{
NOREF(fExecutable);
return rtR0MemObjNativeAllocPhys(ppMem, cb, _4G - 1, PAGE_SIZE /* alignment */);
}
DECLHIDDEN(int) rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
{
#if HC_ARCH_BITS == 64
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS_NC, NULL, cb);
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
if (PhysHighest == NIL_RTHCPHYS)
{
uint64_t PhysAddr = UINT64_MAX;
void *pvPages = rtR0MemObjSolPagesAlloc(&PhysAddr, cb);
if (!pvPages)
{
LogRel(("rtR0MemObjNativeAllocPhysNC: rtR0MemObjSolPagesAlloc failed for cb=%u.\n", cb));
rtR0MemObjDelete(&pMemSolaris->Core);
return VERR_NO_MEMORY;
}
Assert(PhysAddr != UINT64_MAX);
Assert(!(PhysAddr & PAGE_OFFSET_MASK));
pMemSolaris->Core.pv = NULL;
pMemSolaris->pvHandle = pvPages;
pMemSolaris->fIndivPages = true;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
else
{
/*
* If we must satisfy an upper limit constraint, it isn't feasible to grab individual pages.
* We fall back to using contig_alloc().
*/
uint64_t PhysAddr = UINT64_MAX;
void *pvMem = rtR0SolMemAlloc(PhysHighest, &PhysAddr, cb, PAGE_SIZE, false /* fContig */);
if (!pvMem)
{
LogRel(("rtR0MemObjNativeAllocPhysNC: rtR0SolMemAlloc failed for cb=%u PhysHighest=%RHp.\n", cb, PhysHighest));
rtR0MemObjDelete(&pMemSolaris->Core);
return VERR_NO_MEMORY;
}
Assert(PhysAddr != UINT64_MAX);
Assert(!(PhysAddr & PAGE_OFFSET_MASK));
pMemSolaris->Core.pv = pvMem;
pMemSolaris->pvHandle = NULL;
pMemSolaris->fIndivPages = false;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
#else /* 32 bit: */
return VERR_NOT_SUPPORTED; /* see the RTR0MemObjAllocPhysNC specs */
#endif
}
DECLHIDDEN(int) rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment)
{
AssertMsgReturn(PhysHighest >= 16 *_1M, ("PhysHigest=%RHp\n", PhysHighest), VERR_NOT_SUPPORTED);
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS, NULL, cb);
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
/*
* Allocating one large page gets special treatment.
*/
static uint32_t s_cbLargePage = UINT32_MAX;
if (s_cbLargePage == UINT32_MAX)
{
if (page_num_pagesizes() > 1)
ASMAtomicWriteU32(&s_cbLargePage, page_get_pagesize(1)); /* Page-size code 1 maps to _2M on Solaris x86/amd64. */
else
ASMAtomicWriteU32(&s_cbLargePage, 0);
}
uint64_t PhysAddr;
if ( cb == s_cbLargePage
&& cb == uAlignment
&& PhysHighest == NIL_RTHCPHYS)
{
/*
* Allocate one large page (backed by physically contiguous memory).
*/
void *pvPages = rtR0MemObjSolLargePageAlloc(&PhysAddr, cb);
if (RT_LIKELY(pvPages))
{
AssertMsg(!(PhysAddr & (cb - 1)), ("%RHp\n", PhysAddr));
pMemSolaris->Core.pv = NULL;
pMemSolaris->Core.u.Phys.PhysBase = PhysAddr;
pMemSolaris->Core.u.Phys.fAllocated = true;
pMemSolaris->pvHandle = pvPages;
pMemSolaris->fLargePage = true;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
}
else
{
/*
* Allocate physically contiguous memory aligned as specified.
*/
AssertCompile(NIL_RTHCPHYS == UINT64_MAX); NOREF(RTASSERTVAR);
PhysAddr = PhysHighest;
void *pvMem = rtR0SolMemAlloc(PhysHighest, &PhysAddr, cb, uAlignment, true /* fContig */);
if (RT_LIKELY(pvMem))
{
Assert(!(PhysAddr & PAGE_OFFSET_MASK));
Assert(PhysAddr < PhysHighest);
Assert(PhysAddr + cb <= PhysHighest);
pMemSolaris->Core.pv = pvMem;
pMemSolaris->Core.u.Phys.PhysBase = PhysAddr;
pMemSolaris->Core.u.Phys.fAllocated = true;
pMemSolaris->pvHandle = NULL;
pMemSolaris->fLargePage = false;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
}
rtR0MemObjDelete(&pMemSolaris->Core);
return VERR_NO_CONT_MEMORY;
}
DECLHIDDEN(int) rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy)
{
AssertReturn(uCachePolicy == RTMEM_CACHE_POLICY_DONT_CARE, VERR_NOT_SUPPORTED);
/* Create the object. */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_PHYS, NULL, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* There is no allocation here, it needs to be mapped somewhere first. */
pMemSolaris->Core.u.Phys.fAllocated = false;
pMemSolaris->Core.u.Phys.PhysBase = Phys;
pMemSolaris->Core.u.Phys.uCachePolicy = uCachePolicy;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess,
RTR0PROCESS R0Process)
{
AssertReturn(R0Process == RTR0ProcHandleSelf(), VERR_INVALID_PARAMETER);
NOREF(fAccess);
/* Create the locking object */
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* Lock down user pages. */
int fPageAccess = S_READ;
if (fAccess & RTMEM_PROT_WRITE)
fPageAccess = S_WRITE;
if (fAccess & RTMEM_PROT_EXEC)
fPageAccess = S_EXEC;
int rc = rtR0MemObjSolLock((void *)R3Ptr, cb, fPageAccess);
if (RT_FAILURE(rc))
{
LogRel(("rtR0MemObjNativeLockUser: rtR0MemObjSolLock failed rc=%d\n", rc));
rtR0MemObjDelete(&pMemSolaris->Core);
return rc;
}
/* Fill in the object attributes and return successfully. */
pMemSolaris->Core.u.Lock.R0Process = R0Process;
pMemSolaris->pvHandle = NULL;
pMemSolaris->fAccess = fPageAccess;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess)
{
NOREF(fAccess);
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_LOCK, pv, cb);
if (!pMemSolaris)
return VERR_NO_MEMORY;
/* Lock down kernel pages. */
int fPageAccess = S_READ;
if (fAccess & RTMEM_PROT_WRITE)
fPageAccess = S_WRITE;
if (fAccess & RTMEM_PROT_EXEC)
fPageAccess = S_EXEC;
int rc = rtR0MemObjSolLock(pv, cb, fPageAccess);
if (RT_FAILURE(rc))
{
LogRel(("rtR0MemObjNativeLockKernel: rtR0MemObjSolLock failed rc=%d\n", rc));
rtR0MemObjDelete(&pMemSolaris->Core);
return rc;
}
/* Fill in the object attributes and return successfully. */
pMemSolaris->Core.u.Lock.R0Process = NIL_RTR0PROCESS;
pMemSolaris->pvHandle = NULL;
pMemSolaris->fAccess = fPageAccess;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
{
PRTR0MEMOBJSOL pMemSolaris;
/*
* Use xalloc.
*/
void *pv = vmem_xalloc(heap_arena, cb, uAlignment, 0 /* phase */, 0 /* nocross */,
NULL /* minaddr */, NULL /* maxaddr */, VM_SLEEP);
if (RT_UNLIKELY(!pv))
return VERR_NO_MEMORY;
/* Create the object. */
pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
if (!pMemSolaris)
{
LogRel(("rtR0MemObjNativeReserveKernel failed to alloc memory object.\n"));
vmem_xfree(heap_arena, pv, cb);
return VERR_NO_MEMORY;
}
pMemSolaris->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment,
RTR0PROCESS R0Process)
{
return VERR_NOT_SUPPORTED;
}
DECLHIDDEN(int) rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment,
unsigned fProt, size_t offSub, size_t cbSub)
{
/* Fail if requested to do something we can't. */
AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
if (uAlignment > PAGE_SIZE)
return VERR_NOT_SUPPORTED;
/*
* Use xalloc to get address space.
*/
if (!cbSub)
cbSub = pMemToMap->cb;
void *pv = vmem_xalloc(heap_arena, cbSub, uAlignment, 0 /* phase */, 0 /* nocross */,
NULL /* minaddr */, NULL /* maxaddr */, VM_SLEEP);
if (RT_UNLIKELY(!pv))
return VERR_MAP_FAILED;
/*
* Load the pages from the other object into it.
*/
uint32_t fAttr = HAT_UNORDERED_OK | HAT_MERGING_OK | HAT_LOADCACHING_OK | HAT_STORECACHING_OK;
if (fProt & RTMEM_PROT_READ)
fAttr |= PROT_READ;
if (fProt & RTMEM_PROT_EXEC)
fAttr |= PROT_EXEC;
if (fProt & RTMEM_PROT_WRITE)
fAttr |= PROT_WRITE;
fAttr |= HAT_NOSYNC;
int rc = VINF_SUCCESS;
size_t off = 0;
while (off < cbSub)
{
RTHCPHYS HCPhys = rtR0MemObjNativeGetPagePhysAddr(pMemToMap, (offSub + offSub) >> PAGE_SHIFT);
AssertBreakStmt(HCPhys != NIL_RTHCPHYS, rc = VERR_INTERNAL_ERROR_2);
pfn_t pfn = HCPhys >> PAGE_SHIFT;
AssertBreakStmt(((RTHCPHYS)pfn << PAGE_SHIFT) == HCPhys, rc = VERR_INTERNAL_ERROR_3);
hat_devload(kas.a_hat, (uint8_t *)pv + off, PAGE_SIZE, pfn, fAttr, HAT_LOAD_LOCK);
/* Advance. */
off += PAGE_SIZE;
}
if (RT_SUCCESS(rc))
{
/*
* Create a memory object for the mapping.
*/
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_MAPPING, pv, cbSub);
if (pMemSolaris)
{
pMemSolaris->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
*ppMem = &pMemSolaris->Core;
return VINF_SUCCESS;
}
LogRel(("rtR0MemObjNativeMapKernel failed to alloc memory object.\n"));
rc = VERR_NO_MEMORY;
}
if (off)
hat_unload(kas.a_hat, pv, off, HAT_UNLOAD | HAT_UNLOAD_UNLOCK);
vmem_xfree(heap_arena, pv, cbSub);
return rc;
}
DECLHIDDEN(int) rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, PRTR0MEMOBJINTERNAL pMemToMap, RTR3PTR R3PtrFixed,
size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
{
/*
* Fend off things we cannot do.
*/
AssertMsgReturn(R3PtrFixed == (RTR3PTR)-1, ("%p\n", R3PtrFixed), VERR_NOT_SUPPORTED);
AssertMsgReturn(R0Process == RTR0ProcHandleSelf(), ("%p != %p\n", R0Process, RTR0ProcHandleSelf()), VERR_NOT_SUPPORTED);
if (uAlignment != PAGE_SIZE)
return VERR_NOT_SUPPORTED;
/*
* Get parameters from the source object.
*/
PRTR0MEMOBJSOL pMemToMapSolaris = (PRTR0MEMOBJSOL)pMemToMap;
void *pv = pMemToMapSolaris->Core.pv;
size_t cb = pMemToMapSolaris->Core.cb;
size_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
/*
* Create the mapping object
*/
PRTR0MEMOBJSOL pMemSolaris;
pMemSolaris = (PRTR0MEMOBJSOL)rtR0MemObjNew(sizeof(*pMemSolaris), RTR0MEMOBJTYPE_MAPPING, pv, cb);
if (RT_UNLIKELY(!pMemSolaris))
return VERR_NO_MEMORY;
int rc = VINF_SUCCESS;
uint64_t *paPhysAddrs = kmem_zalloc(sizeof(uint64_t) * cPages, KM_SLEEP);
if (RT_LIKELY(paPhysAddrs))
{
/*
* Prepare the pages for mapping according to type.
*/
if ( pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS_NC
&& pMemToMapSolaris->fIndivPages)
{
page_t **ppPages = pMemToMapSolaris->pvHandle;
AssertPtr(ppPages);
for (size_t iPage = 0; iPage < cPages; iPage++)
paPhysAddrs[iPage] = rtR0MemObjSolPagePhys(ppPages[iPage]);
}
else if ( pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS
&& pMemToMapSolaris->fLargePage)
{
RTHCPHYS Phys = pMemToMapSolaris->Core.u.Phys.PhysBase;
for (size_t iPage = 0; iPage < cPages; iPage++, Phys += PAGE_SIZE)
paPhysAddrs[iPage] = Phys;
}
else
{
/*
* Have kernel mapping, just translate virtual to physical.
*/
AssertPtr(pv);
rc = VINF_SUCCESS;
for (size_t iPage = 0; iPage < cPages; iPage++)
{
paPhysAddrs[iPage] = rtR0MemObjSolVirtToPhys(pv);
if (RT_UNLIKELY(paPhysAddrs[iPage] == -(uint64_t)1))
{
LogRel(("rtR0MemObjNativeMapUser: no page to map.\n"));
rc = VERR_MAP_FAILED;
break;
}
pv = (void *)((uintptr_t)pv + PAGE_SIZE);
}
}
if (RT_SUCCESS(rc))
{
unsigned fPageAccess = PROT_READ;
if (fProt & RTMEM_PROT_WRITE)
fPageAccess |= PROT_WRITE;
if (fProt & RTMEM_PROT_EXEC)
fPageAccess |= PROT_EXEC;
/*
* Perform the actual mapping.
*/
caddr_t UserAddr = NULL;
rc = rtR0MemObjSolUserMap(&UserAddr, fPageAccess, paPhysAddrs, cb, PAGE_SIZE);
if (RT_SUCCESS(rc))
{
pMemSolaris->Core.u.Mapping.R0Process = R0Process;
pMemSolaris->Core.pv = UserAddr;
*ppMem = &pMemSolaris->Core;
kmem_free(paPhysAddrs, sizeof(uint64_t) * cPages);
return VINF_SUCCESS;
}
LogRel(("rtR0MemObjNativeMapUser: rtR0MemObjSolUserMap failed rc=%d.\n", rc));
}
rc = VERR_MAP_FAILED;
kmem_free(paPhysAddrs, sizeof(uint64_t) * cPages);
}
else
rc = VERR_NO_MEMORY;
rtR0MemObjDelete(&pMemSolaris->Core);
return rc;
}
DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt)
{
NOREF(pMem);
NOREF(offSub);
NOREF(cbSub);
NOREF(fProt);
return VERR_NOT_SUPPORTED;
}
DECLHIDDEN(RTHCPHYS) rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
{
PRTR0MEMOBJSOL pMemSolaris = (PRTR0MEMOBJSOL)pMem;
switch (pMemSolaris->Core.enmType)
{
case RTR0MEMOBJTYPE_PHYS_NC:
if ( pMemSolaris->Core.u.Phys.fAllocated
|| !pMemSolaris->fIndivPages)
{
uint8_t *pb = (uint8_t *)pMemSolaris->Core.pv + ((size_t)iPage << PAGE_SHIFT);
return rtR0MemObjSolVirtToPhys(pb);
}
page_t **ppPages = pMemSolaris->pvHandle;
return rtR0MemObjSolPagePhys(ppPages[iPage]);
case RTR0MEMOBJTYPE_PAGE:
case RTR0MEMOBJTYPE_LOW:
case RTR0MEMOBJTYPE_LOCK:
{
uint8_t *pb = (uint8_t *)pMemSolaris->Core.pv + ((size_t)iPage << PAGE_SHIFT);
return rtR0MemObjSolVirtToPhys(pb);
}
/*
* Although mapping can be handled by rtR0MemObjSolVirtToPhys(offset) like the above case,
* request it from the parent so that we have a clear distinction between CONT/PHYS_NC.
*/
case RTR0MEMOBJTYPE_MAPPING:
return rtR0MemObjNativeGetPagePhysAddr(pMemSolaris->Core.uRel.Child.pParent, iPage);
case RTR0MEMOBJTYPE_CONT:
case RTR0MEMOBJTYPE_PHYS:
AssertFailed(); /* handled by the caller */
case RTR0MEMOBJTYPE_RES_VIRT:
default:
return NIL_RTHCPHYS;
}
}