alloc-r0drv-linux.c revision 097de894d6a9a01ff7763f8a12d2b8882b29cd69
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * IPRT - Memory Allocation, Ring-0 Driver, Linux.
e64031e20c39650a7bc902a3e1aba613b9415deevboxsync * Copyright (C) 2006-2010 Oracle Corporation
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * This file is part of VirtualBox Open Source Edition (OSE), as
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * available from http://www.virtualbox.org. This file is free software;
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * you can redistribute it and/or modify it under the terms of the GNU
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync * General Public License (GPL) as published by the Free Software
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync * Foundation, in version 2 as it comes in the "COPYING" file of the
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
a16eb14ad7a4b5ef91ddc22d3e8e92d930f736fcvboxsync * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * The contents of this file may alternatively be used under the terms
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * of the Common Development and Distribution License Version 1.0
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * VirtualBox OSE distribution, in which case the provisions of the
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * CDDL are applicable instead of those of the GPL.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * You may elect to license modified versions of this file under the
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * terms and conditions of either the GPL or the CDDL or both.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync/*******************************************************************************
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync* Header Files *
d31ded334a29f575e23dc889b603b1a586759348vboxsync*******************************************************************************/
b74ca013e5f201a2dd371e6c438433ceac12af30vboxsync#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * memory in the moduel range. This is preferrable to the exec heap below.
223cf005b18af2c21352a70693ebaf0582f68ebcvboxsync * We need memory in the module range (~2GB to ~0) this can only be obtained
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * thru APIs that are not exported (see module_alloc()).
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * So, we'll have to create a quick and dirty heap here using BSS memory.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * Very annoying and it's going to restrict us!
7666082b743c5e146a8cee6cc794ff4bc3fd0ffdvboxsync/*******************************************************************************
7666082b743c5e146a8cee6cc794ff4bc3fd0ffdvboxsync* Structures and Typedefs *
590bfe12ce22cd3716448fbb9f4dc51664bfe5e2vboxsync*******************************************************************************/
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * This is used with allocating executable memory, for things like generated
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * code and loaded modules.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync /** The VM area for this allocation. */
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync /** The header we present to the generic API. */
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync/** Pointer to an extended memory header. */
22e281e75ed636601178296c6daebda8f1d17c59vboxsync/*******************************************************************************
22e281e75ed636601178296c6daebda8f1d17c59vboxsync* Global Variables *
22e281e75ed636601178296c6daebda8f1d17c59vboxsync*******************************************************************************/
22e281e75ed636601178296c6daebda8f1d17c59vboxsync/** The heap. */
22e281e75ed636601178296c6daebda8f1d17c59vboxsync/** Spinlock protecting the heap. */
22e281e75ed636601178296c6daebda8f1d17c59vboxsyncstatic RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * API for cleaning up the heap spinlock on IPRT termination.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * Donate read+write+execute memory to the exec heap.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * allocated memory in the module if it wishes for GCC generated code to work.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * GCC can only generate modules that work in the address range ~2GB to ~0
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * currently.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * The API only accept one single donation.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * @returns IPRT status code.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * @param pvMemory Pointer to the memory block.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * @param cb The size of the memory block.
22e281e75ed636601178296c6daebda8f1d17c59vboxsyncRTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
22e281e75ed636601178296c6daebda8f1d17c59vboxsync AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
d1cbbd799d8912978f5146960b6780f387bb414bvboxsync rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate");
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync#endif /* RTMEMALLOC_EXEC_HEAP */
b1c3cdef473df2fbc621d5da81acc82dbfb8a11avboxsync * Allocate executable kernel memory in the module range.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * @returns Pointer to a allocation header success. NULL on failure.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * @param cb The size the user requested.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE);
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL);
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * Map the pages. The API requires an iterator argument, which can be
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * used, in case of failure, to figure out how much was actually
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * mapped. Not sure how useful this really is, but whatever.
223cf005b18af2c21352a70693ebaf0582f68ebcvboxsync * Not entirely sure we really need to set nr_pages and pages here, but
b74ca013e5f201a2dd371e6c438433ceac12af30vboxsync * they provide a very convenient place for storing something we need
b74ca013e5f201a2dd371e6c438433ceac12af30vboxsync * in the free function, if nothing else...
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC, &papPagesIterator))
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
b74ca013e5f201a2dd371e6c438433ceac12af30vboxsync /* bail out */
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync while (iPage-- > 0)
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync#endif /* RTMEMALLOC_EXEC_VM_AREA */
9e4166cf5ed4940f506bc718ea6c89bf7ed252c8vboxsync * OS specific allocation function.
9e4166cf5ed4940f506bc718ea6c89bf7ed252c8vboxsyncDECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
8a132edc1577cbe2a19cd778c1b2bea6ae5e8515vboxsync * Allocate.
f9147fe1eaa4e35287f8f39282c7f92f0d7de0b7vboxsync pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
b978e5849454446957177fd47ee98609ab0457a6vboxsync# else /* !RTMEMALLOC_EXEC_HEAP */
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
247b55faa8d054157f2481e68caca36f4dc9542cvboxsync# endif /* !RTMEMALLOC_EXEC_HEAP */
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
b74ca013e5f201a2dd371e6c438433ceac12af30vboxsync pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM, MY_PAGE_KERNEL_EXEC);
82e90599291da476b2de7c8db33cfb0f2cbac774vboxsync#if 1 /* vmalloc has serious performance issues, avoid it. */
22e281e75ed636601178296c6daebda8f1d17c59vboxsync (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? GFP_ATOMIC : GFP_KERNEL);
6ae4b1c72625a8e5c369effea7f018b578d733c4vboxsync * Initialize.
533ffcb943c4af2c5fe6385d816d0ba3eda9383bvboxsync * OS specific free function.
1843553dbdf4e46417158b4c6348c503adf10740vboxsync else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA)
13ba5527caaa9b8c4fee29f22e374fa67c4c6f72vboxsync PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr);
1843553dbdf4e46417158b4c6348c503adf10740vboxsync while (iPage-- > 0)
1843553dbdf4e46417158b4c6348c503adf10740vboxsync * Compute order. Some functions allocate 2^order pages.
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * @returns order.
e0b9d3c357adf9b7d05f55540e86f22943fc4b23vboxsync * @param cPages Number of pages.
13ba5527caaa9b8c4fee29f22e374fa67c4c6f72vboxsync unsigned long cTmp;
1843553dbdf4e46417158b4c6348c503adf10740vboxsync for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
533ffcb943c4af2c5fe6385d816d0ba3eda9383bvboxsync * Allocates physical contiguous memory (below 4GB).
533ffcb943c4af2c5fe6385d816d0ba3eda9383bvboxsync * The allocation is page aligned and the content is undefined.
533ffcb943c4af2c5fe6385d816d0ba3eda9383bvboxsync * @returns Pointer to the memory block. This is page aligned.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * @param pPhys Where to store the physical address.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * @param cb The allocation size in bytes. This is always
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync * rounded up to PAGE_SIZE.
e52f819639386db020b2a635b47a415248c7fbf9vboxsyncRTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
b74ca013e5f201a2dd371e6c438433ceac12af30vboxsync * validate input.
6ae4b1c72625a8e5c369effea7f018b578d733c4vboxsync * Allocate page pointer array.
9e4166cf5ed4940f506bc718ea6c89bf7ed252c8vboxsync#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
0e77737b0ba913683e614db11463b31ca67aacbevboxsync /* ZONE_DMA32: 0-4GB */
2d53f6e472561965d363674e17f48d3bdffc24d3vboxsync /* ZONE_DMA; 0-16MB */
22e281e75ed636601178296c6daebda8f1d17c59vboxsync /* ZONE_NORMAL: 0-896MB */
2d53f6e472561965d363674e17f48d3bdffc24d3vboxsync * Reserve the pages and mark them executable.
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
22e281e75ed636601178296c6daebda8f1d17c59vboxsync ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
8e972b677df5ee27b99211fc7e456a5aa50f3e68vboxsync (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */
806d0b554daa555364af5f87bc96eccbe760db7avboxsync * Frees memory allocated using RTMemContAlloc().
22e281e75ed636601178296c6daebda8f1d17c59vboxsync * @param pv Pointer to return from RTMemContAlloc().
806d0b554daa555364af5f87bc96eccbe760db7avboxsync * @param cb The cb parameter passed to RTMemContAlloc().
d408b82da0773c7e8cd4b3a01cb8a065a2c73a2dvboxsync /* validate */
806d0b554daa555364af5f87bc96eccbe760db7avboxsync AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
806d0b554daa555364af5f87bc96eccbe760db7avboxsync /* calc order and get pages */
806d0b554daa555364af5f87bc96eccbe760db7avboxsync * Restore page attributes freeing the pages.
806d0b554daa555364af5f87bc96eccbe760db7avboxsync#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 4, 20) /** @todo find the exact kernel where change_page_attr was introduced. */