memobj-r0drv-linux.c revision 10980b915d35fd2fada103463f764165662b8f0c
/* $Revision$ */
/** @file
* IPRT - Ring-0 Memory Objects, Linux.
*/
/*
* Copyright (C) 2006-2007 Sun Microsystems, Inc.
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 USA or visit http://www.sun.com if you need
* additional information or have any questions.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#include "the-linux-kernel.h"
/* early 2.6 kernels */
#ifndef PAGE_SHARED_EXEC
# define PAGE_SHARED_EXEC PAGE_SHARED
#endif
#ifndef PAGE_READONLY_EXEC
# define PAGE_READONLY_EXEC PAGE_READONLY
#endif
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
/**
* The Darwin version of the memory object structure.
*/
typedef struct RTR0MEMOBJLNX
{
/** The core structure. */
/** Set if the allocation is contiguous.
* This means it has to be given back as one chunk. */
bool fContiguous;
/** Set if we've vmap'ed thed memory into ring-0. */
bool fMappedToRing0;
/** The pages in the apPages array. */
/** Array of struct page pointers. (variable size) */
/**
* Helper that converts from a RTR0PROCESS handle to a linux task.
*
* @returns The corresponding Linux task.
* @param R0Process IPRT ring-0 process handle.
*/
{
/** @todo fix rtR0ProcessToLinuxTask!! */
}
/**
* Compute order. Some functions allocate 2^order pages.
*
* @returns order.
* @param cPages Number of pages.
*/
{
int iOrder;
;
++iOrder;
return iOrder;
}
/**
* Converts from RTMEM_PROT_* to Linux PAGE_*.
*
* @returns Linux page protection constant.
* @param fProt The IPRT protection mask.
* @param fKernel Whether it applies to kernel or user space.
*/
{
switch (fProt)
{
default:
case RTMEM_PROT_NONE:
return PAGE_NONE;
case RTMEM_PROT_READ:
case RTMEM_PROT_WRITE:
case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
case RTMEM_PROT_EXEC:
case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
if (fKernel)
{
return fPg;
}
return PAGE_READONLY_EXEC;
#else
#endif
case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
}
}
/**
* Internal worker that allocates physical pages and creates the memory object for them.
*
* @returns IPRT status code.
* @param ppMemLnx Where to store the memory object pointer.
* @param enmType The object type.
* @param cb The number of bytes to allocate.
* @param fFlagsLnx The page allocation flags (GPFs).
* @param fContiguous Whether the allocation must be contiguous.
*/
static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb, unsigned fFlagsLnx, bool fContiguous)
{
/*
* Allocate a memory object structure that's large enough to contain
* the page pointer array.
*/
PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
if (!pMemLnx)
return VERR_NO_MEMORY;
/*
* Allocate the pages.
* For small allocations we'll try contiguous first and then fall back on page by page.
*/
if ( fContiguous
{
#ifdef VBOX_USE_INSERT_PAGE
#else
#endif
if (paPages)
{
fContiguous = true;
}
else if (fContiguous)
{
return VERR_NO_MEMORY;
}
}
if (!fContiguous)
{
{
{
while (iPage-- > 0)
return VERR_NO_MEMORY;
}
}
}
#else /* < 2.4.22 */
/** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
if (!paPages)
{
return VERR_NO_MEMORY;
}
{
BUG();
}
fContiguous = true;
#endif /* < 2.4.22 */
/*
* Reserve the pages.
*/
return VINF_SUCCESS;
}
/**
* Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
*
* This method does NOT free the object.
*
* @param pMemLnx The object which physical pages should be freed.
*/
{
if (iPage > 0)
{
/*
* Restore the page flags.
*/
while (iPage-- > 0)
{
#else
#endif
}
/*
* Free the pages.
*/
if (!pMemLnx->fContiguous)
{
while (iPage-- > 0)
}
else
#endif
}
}
/**
* Maps the allocation into ring-0.
*
* This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
*
* Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
* space, so we'll use that mapping if possible. If execute access is required, we'll
* play safe and do our own mapping.
*
* @returns IPRT status code.
* @param pMemLnx The linux memory object to map.
* @param fExecutable Whether execute access is required.
*/
{
int rc = VINF_SUCCESS;
/*
* Choose mapping strategy.
*/
bool fMustMap = fExecutable
|| !pMemLnx->fContiguous;
if (!fMustMap)
{
while (iPage-- > 0)
{
fMustMap = true;
break;
}
}
if (fMustMap)
{
/*
* Use vmap - 2.4.22 and later.
*/
# ifdef _PAGE_NX
if (!fExecutable)
# endif
# ifdef VM_MAP
# else
# endif
pMemLnx->fMappedToRing0 = true;
else
#else /* < 2.4.22 */
#endif
}
else
{
/*
* Use the kernel RAM mapping.
*/
}
return rc;
}
/**
* Undos what rtR0MemObjLinuxVMap() did.
*
* @param pMemLnx The linux memory object.
*/
{
if (pMemLnx->fMappedToRing0)
{
pMemLnx->fMappedToRing0 = false;
}
#else /* < 2.4.22 */
#endif
}
{
/*
* Release any memory that we've allocated or locked.
*/
{
case RTR0MEMOBJTYPE_LOW:
case RTR0MEMOBJTYPE_PAGE:
case RTR0MEMOBJTYPE_CONT:
case RTR0MEMOBJTYPE_PHYS:
case RTR0MEMOBJTYPE_PHYS_NC:
break;
case RTR0MEMOBJTYPE_LOCK:
{
while (iPage-- > 0)
{
}
}
else
AssertFailed(); /* not implemented for R0 */
break;
case RTR0MEMOBJTYPE_RES_VIRT:
{
{
}
}
else
{
}
break;
case RTR0MEMOBJTYPE_MAPPING:
{
{
}
}
else
break;
default:
return VERR_INTERNAL_ERROR;
}
return VINF_SUCCESS;
}
{
int rc;
rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_HIGHUSER, false /* non-contiguous */);
#else
rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_USER, false /* non-contiguous */);
#endif
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
return rc;
}
}
return rc;
}
{
int rc;
#ifdef RT_ARCH_AMD64
# ifdef GFP_DMA32
rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA32, false /* non-contiguous */);
if (RT_FAILURE(rc))
# endif
rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA, false /* non-contiguous */);
#else
rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_USER, false /* non-contiguous */);
#endif
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
return rc;
}
}
return rc;
}
{
int rc;
#ifdef RT_ARCH_AMD64
# ifdef GFP_DMA32
rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_DMA32, true /* contiguous */);
if (RT_FAILURE(rc))
# endif
#else
#endif
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
#if defined(RT_STRICT) && defined(RT_ARCH_AMD64)
while (iPage-- > 0)
#endif
return rc;
}
}
return rc;
}
/**
* Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
*
* @returns IPRT status.
* @param ppMemLnx Where to
* @param enmType The object type.
* @param cb The size of the allocation.
* @param PhysHighest See rtR0MemObjNativeAllocPhys.
* @param fGfp The Linux GFP flags to use for the allocation.
*/
static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest, unsigned fGfp)
{
int rc;
if (RT_FAILURE(rc))
return rc;
/*
* Check the addresses if necessary. (Can be optimized a bit for PHYS.)
*/
if (PhysHighest != NIL_RTHCPHYS)
{
while (iPage-- > 0)
{
return VERR_NO_MEMORY;
}
}
/*
* Complete the object.
*/
if (enmType == RTR0MEMOBJTYPE_PHYS)
{
}
return rc;
}
/**
* Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
*
* @returns IPRT status.
* @param ppMem Where to store the memory object pointer on success.
* @param enmType The object type.
* @param cb The size of the allocation.
* @param PhysHighest See rtR0MemObjNativeAllocPhys.
*/
static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest)
{
int rc;
/*
* There are two clear cases and that's the <=16MB and anything-goes ones.
* When the physical address limit is somewhere inbetween those two we'll
* just have to try, starting with HIGHUSER and working our way thru the
* different types, hoping we'll get lucky.
*
* We should probably move this physical address restriction logic up to
* the page alloc function as it would be more efficient there. But since
* we don't expect this to be a performance issue just yet it can wait.
*/
if (PhysHighest == NIL_RTHCPHYS)
else
{
rc = VERR_NO_MEMORY;
if (RT_FAILURE(rc))
if (RT_FAILURE(rc))
#ifdef GFP_DMA32
if (RT_FAILURE(rc))
#endif
if (RT_FAILURE(rc))
}
return rc;
}
{
}
{
}
{
/*
* All we need to do here is to validate that we can use
* ioremap on the specified address (32/64-bit dma_addr_t).
*/
if (!pMemLnx)
return VERR_NO_MEMORY;
return VINF_SUCCESS;
}
int rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, RTR0PROCESS R0Process)
{
struct vm_area_struct **papVMAs;
int rc = VERR_NO_MEMORY;
/*
* Check for valid task and size overflows.
*/
if (!pTask)
return VERR_NOT_SUPPORTED;
return VERR_OUT_OF_RANGE;
/*
* Allocate the memory object and a temporary buffer for the VMAs.
*/
pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
if (!pMemLnx)
return VERR_NO_MEMORY;
if (papVMAs)
{
/*
* Get user pages.
*/
R3Ptr, /* Where from. */
cPages, /* How many pages. */
1, /* Write to memory. */
0, /* force. */
papVMAs); /* vmas */
{
/*
* Flush dcache (required?) and protect against fork.
*/
/** @todo The Linux fork() protection will require more work if this API
* is to be used for anything but locking VM pages. */
while (rc-- > 0)
{
}
return VINF_SUCCESS;
}
/*
* Failed - we need to unlock any pages that we succeeded to lock.
*/
while (rc-- > 0)
{
}
}
return rc;
}
{
return VERR_NOT_SUPPORTED;
}
int rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
{
struct page *pDummyPage;
/* check for unsupported stuff. */
/*
* Allocate a dummy page and create a page pointer array for vmap such that
* the dummy page is mapped all over the reserved area.
*/
if (!pDummyPage)
return VERR_NO_MEMORY;
if (papPages)
{
void *pv;
while (iPage-- > 0)
# ifdef VM_MAP
# else
# endif
if (pv)
{
PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
if (pMemLnx)
{
return VINF_SUCCESS;
}
}
}
return VERR_NO_MEMORY;
#else /* < 2.4.22 */
/*
* Could probably use ioremap here, but the caller is in a better position than us
* to select some safe physical memory.
*/
return VERR_NOT_SUPPORTED;
#endif
}
/**
* Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
* an empty user space mapping.
*
* The caller takes care of acquiring the mmap_sem of the task.
*
* @returns Pointer to the mapping.
* (void *)-1 on failure.
* @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
* @param cb The size of the mapping.
* @param uAlignment The alignment of the mapping.
* @param pTask The Linux task to create this mapping in.
* @param fProt The RTMEM_PROT_* mask.
*/
static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
{
unsigned fLnxProt;
unsigned long ulAddr;
/*
* Convert from IPRT protection to mman.h PROT_ and call do_mmap.
*/
if (fProt == RTMEM_PROT_NONE)
else
{
fLnxProt = 0;
if (fProt & RTMEM_PROT_READ)
if (fProt & RTMEM_PROT_WRITE)
fLnxProt |= PROT_WRITE;
if (fProt & RTMEM_PROT_EXEC)
}
else
{
{
/** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
* up alignment gaps. This is of course complicated by fragmentation (which we might have cause
* ourselves) and further by there begin two mmap strategies (top / bottom). */
/* For now, just ignore uAlignment requirements... */
}
}
return (void *)-1;
return (void *)ulAddr;
}
int rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
{
void *pv;
if (!pTask)
return VERR_NOT_SUPPORTED;
/*
* Let rtR0MemObjLinuxDoMmap do the difficult bits.
*/
if (pv == (void *)-1)
return VERR_NO_MEMORY;
if (!pMemLnx)
{
return VERR_NO_MEMORY;
}
return VINF_SUCCESS;
}
int rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment, unsigned fProt)
{
int rc = VERR_NO_MEMORY;
/* Fail if requested to do something we can't. */
/*
* Create the IPRT memory object.
*/
pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
if (pMemLnx)
{
if (pMemLnxToMap->cPages)
{
/*
* Use vmap - 2.4.22 and later.
*/
# ifdef VM_MAP
# else
# endif
{
pMemLnx->fMappedToRing0 = true;
rc = VINF_SUCCESS;
}
else
#else /* < 2.4.22 */
/*
* Only option here is to share mappings if possible and forget about fProt.
*/
if (rtR0MemObjIsRing3(pMemToMap))
else
{
rc = VINF_SUCCESS;
if (RT_SUCCESS(rc))
{
}
}
#endif
}
else
{
/*
* MMIO / physical memory.
*/
{
/** @todo fix protection. */
rc = VINF_SUCCESS;
}
}
if (RT_SUCCESS(rc))
{
return VINF_SUCCESS;
}
}
return rc;
}
int rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed, size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
{
int rc = VERR_NO_MEMORY;
/*
* Check for restrictions.
*/
if (!pTask)
return VERR_NOT_SUPPORTED;
/*
* Create the IPRT memory object.
*/
pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
if (pMemLnx)
{
/*
* Allocate user space mapping.
*/
void *pv;
if (pv != (void *)-1)
{
/*
* Map page by page into the mmap area.
* This is generic, paranoid and not very efficient.
*/
rc = 0;
if (pMemLnxToMap->cPages)
{
{
struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
#endif
vma->vm_flags |= VM_RESERVED; /* This flag helps making 100% sure some bad stuff wont happen (swap, core, ++). */
#else /* 2.4 */
#endif
if (rc)
break;
}
}
else
{
else
{
Phys = NIL_RTHCPHYS;
}
if (Phys != NIL_RTHCPHYS)
{
{
struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
#endif
#else /* 2.4 */
#endif
if (rc)
break;
}
}
}
if (!rc)
{
return VINF_SUCCESS;
}
/*
* Bail out.
*/
if (rc != VERR_INTERNAL_ERROR)
rc = VERR_NO_MEMORY;
}
}
return rc;
}
{
{
case RTR0MEMOBJTYPE_CONT:
case RTR0MEMOBJTYPE_PHYS:
/* the parent knows */
case RTR0MEMOBJTYPE_MAPPING:
/* cPages > 0 */
case RTR0MEMOBJTYPE_LOW:
case RTR0MEMOBJTYPE_LOCK:
case RTR0MEMOBJTYPE_PHYS_NC:
case RTR0MEMOBJTYPE_PAGE:
default:
/* fall thru */
case RTR0MEMOBJTYPE_RES_VIRT:
return NIL_RTHCPHYS;
}
}