SUPDrv-linux.c revision cb064f7dec5293b9cb3b3b777c6372abe13e6b77
/* $Rev$ */
/** @file
* VBoxDrv - The VirtualBox Support Driver - Linux specifics.
*/
/*
* Copyright (C) 2006-2007 Sun Microsystems, Inc.
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 USA or visit http://www.sun.com if you need
* additional information or have any questions.
* Some lines of code to disable the local APIC on x86_64 machines taken
* from a Mandriva patch by Gwenole Beauchesne <gbeauchesne@mandriva.com>.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#define LOG_GROUP LOG_GROUP_SUP_DRV
#include "../SUPDrvInternal.h"
#include "the-linux-kernel.h"
#include "version-generated.h"
#include <iprt/spinlock.h>
#include <iprt/semaphore.h>
#include <iprt/initterm.h>
/** @todo figure out the exact version number */
# define VBOX_WITH_SUSPEND_NOTIFICATION
#endif
#ifdef CONFIG_DEVFS_FS
# include <linux/devfs_fs_kernel.h>
#endif
#ifdef CONFIG_VBOXDRV_AS_MISC
# include <linux/miscdevice.h>
#endif
#ifdef CONFIG_X86_LOCAL_APIC
#endif
# include <linux/platform_device.h>
#endif
/* devfs defines */
#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
# ifdef VBOX_WITH_HARDENING
# else
# endif
#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
#ifdef CONFIG_X86_HIGH_ENTRY
# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
#endif
#ifdef CONFIG_X86_LOCAL_APIC
/* If an NMI occurs while we are inside the world switcher the machine will
* crash. The Linux NMI watchdog generates periodic NMIs increasing a counter
* which is compared with another counter increased in the timer interrupt
* handler. We disable the NMI watchdog.
*
* - Linux >= 2.6.21: The watchdog is disabled by default on i386 and x86_64.
* - Linux < 2.6.21: The watchdog is normally enabled by default on x86_64
* and disabled on i386.
*/
# if defined(RT_ARCH_AMD64)
# define DO_DISABLE_NMI 1
# endif
# endif
extern int nmi_active;
# define nmi_atomic_read(P) *(P)
# define nmi_atomic_set(P, V) *(P) = (V)
# define nmi_atomic_dec(P) nmi_atomic_set(P, 0)
# else
# define nmi_atomic_read(P) atomic_read(P)
# define nmi_atomic_set(P, V) atomic_set(P, V)
# define nmi_atomic_dec(P) atomic_dec(P)
# endif
# ifndef X86_FEATURE_ARCH_PERFMON
# endif
# ifndef MSR_ARCH_PERFMON_EVENTSEL0
# define MSR_ARCH_PERFMON_EVENTSEL0 0x186
# endif
# define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
# endif
#endif /* CONFIG_X86_LOCAL_APIC */
#define str(s) #s
/*******************************************************************************
* Global Variables *
*******************************************************************************/
/**
* Device extention & session data association structure.
*/
static SUPDRVDEVEXT g_DevExt;
#ifndef CONFIG_VBOXDRV_AS_MISC
/** Module major number */
#define DEVICE_MAJOR 234
/** Saved major device number */
static int g_iModuleMajor;
#endif /* !CONFIG_VBOXDRV_AS_MISC */
/** Module parameter.
* Not prefixed because the name is used by macros and the end of this file. */
static int force_async_tsc = 0;
/** The module name. */
#define DEVICE_NAME "vboxdrv"
#ifdef RT_ARCH_AMD64
/**
* Memory for the executable memory heap (in IPRT).
*/
__asm__(".section execmemory, \"awx\", @progbits\n\t"
".align 32\n\t"
".globl g_abExecMemory\n"
"g_abExecMemory:\n\t"
".zero 1572864\n\t"
".type g_abExecMemory, @object\n\t"
".size g_abExecMemory, 1572864\n\t"
".text\n\t");
#endif
/*******************************************************************************
* Internal Functions *
*******************************************************************************/
static int VBoxDrvLinuxInit(void);
static void VBoxDrvLinuxUnload(void);
#ifdef HAVE_UNLOCKED_IOCTL
#else
static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
#endif
static int VBoxDrvLinuxErr2LinuxErr(int);
#endif
/** The file_operations structure. */
static struct file_operations gFileOpsVBoxDrv =
{
#ifdef HAVE_UNLOCKED_IOCTL
#else
#endif
};
#ifdef CONFIG_VBOXDRV_AS_MISC
/** The miscdevice structure. */
static struct miscdevice gMiscDevice =
{
# endif
};
#endif
static struct platform_driver gPlatformDriver =
{
.probe = VBoxDrvProbe,
.resume = VBoxDrvResume,
/** @todo .shutdown? */
.driver =
{
.name = "vboxdrv"
}
};
static struct platform_device gPlatformDevice =
{
.name = "vboxdrv",
.dev =
{
}
};
#endif /* VBOX_WITH_SUSPEND_NOTIFICATION */
#ifdef CONFIG_X86_LOCAL_APIC
# ifdef DO_DISABLE_NMI
/** Stop AMD NMI watchdog (x86_64 only). */
static int vboxdrvStopK7Watchdog(void)
{
wrmsr(MSR_K7_EVNTSEL0, 0, 0);
return 1;
}
/** Stop Intel P4 NMI watchdog (x86_64 only). */
static int vboxdrvStopP4Watchdog(void)
{
wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
return 1;
}
/** The new method of detecting the event counter */
static int vboxdrvStopIntelArchWatchdog(void)
{
unsigned ebx;
if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
return 1;
}
/** Stop NMI watchdog. */
static void vboxdrvStopApicNmiWatchdog(void *unused)
{
int stopped = 0;
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
if (nmi_watchdog == NMI_LOCAL_APIC)
{
switch (boot_cpu_data.x86_vendor)
{
case X86_VENDOR_AMD:
return;
break;
case X86_VENDOR_INTEL:
{
break;
}
break;
default:
return;
}
}
if (stopped)
}
/** Disable LAPIC NMI watchdog. */
static void DisableLapicNmiWatchdog(void)
{
if (nmi_atomic_read(&nmi_active) <= 0)
return;
/* tell do_nmi() and others that we're not active any more */
}
/** Shutdown NMI. */
static void vboxdrvNmiCpuShutdown(void * dummy)
{
{
}
}
static void vboxdrvNmiShutdown(void)
{
}
# endif /* DO_DISABLE_NMI */
#endif /* CONFIG_X86_LOCAL_APIC */
{
#else
#endif
}
{
#else
#endif
}
{
#else
#endif
}
/**
* Initialize module.
*
* @returns appropriate status code.
*/
static int __init VBoxDrvLinuxInit(void)
{
int rc;
dprintf(("VBoxDrv::ModuleInit\n"));
#ifdef CONFIG_X86_LOCAL_APIC
/*
* If an NMI occurs while we are inside the world switcher the macine will crash.
* The Linux NMI watchdog generates periodic NMIs increasing a counter which is
* compared with another counter increased in the timer interrupt handler. Therefore
* we don't allow to setup an NMI watchdog.
*/
# if !defined(VBOX_REDHAT_KABI)
/*
* First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
* the nmi_watchdog variable.
*/
# ifdef DO_DISABLE_NMI
if (nmi_atomic_read(&nmi_active) > 0)
{
switch (nmi_watchdog)
{
case NMI_LOCAL_APIC:
break;
case NMI_NONE:
break;
}
if (nmi_atomic_read(&nmi_active) == 0)
{
}
else
}
# endif /* DO_DISABLE_NMI */
/*
* Permanent IO_APIC mode active? No way to handle this!
*/
if (nmi_watchdog == NMI_IO_APIC)
{
": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
": command line.\n");
return -EINVAL;
}
/*
*/
/*
* Now fall through and see if it actually was enabled before. If so, fail
* as we cannot deactivate it cleanly from here.
*/
# else /* < 2.6.19 */
/*
* Older 2.6 kernels: nmi_watchdog is not initalized by default
*/
if (nmi_watchdog != NMI_NONE)
goto nmi_activated;
# endif
# endif /* >= 2.6.0 && !defined(VBOX_REDHAT_KABI) */
/*
* Second test: Interrupt generated by performance counter not masked and can
* generate an NMI. Works also with Linux 2.4.
*/
{
ver = GET_APIC_VERSION(v);
/* 82489DXs do not report # of LVT entries. */
if (maxlvt >= 4)
{
/* Read status of performance counter IRQ vector */
v = apic_read(APIC_LVTPC);
/* performance counter generates NMI and is not masked? */
{
": NMI watchdog either active or at least initialized. Please disable the NMI\n"
": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
return -EINVAL;
# else /* < 2.6.19 */
# if !defined(VBOX_REDHAT_KABI)
# endif
": NMI watchdog active -- refused to load the kernel module! Please disable\n"
": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
return -EINVAL;
# endif /* >= 2.6.19 */
}
}
}
# endif /* >= 2.6.19 */
#endif /* CONFIG_X86_LOCAL_APIC */
/*
* Check for synchronous/asynchronous TSC mode.
*/
#ifdef CONFIG_VBOXDRV_AS_MISC
if (rc)
{
return rc;
}
#else /* !CONFIG_VBOXDRV_AS_MISC */
/*
* Register character device.
*/
if (rc < 0)
{
return rc;
}
/*
* Save returned module major number
*/
if (DEVICE_MAJOR != 0)
else
g_iModuleMajor = rc;
rc = 0;
# ifdef CONFIG_DEVFS_FS
/*
* Register a device entry
*/
{
dprintf(("devfs_register failed!\n"));
}
# endif
#endif /* !CONFIG_VBOXDRV_AS_MISC */
if (!rc)
{
/*
* Initialize the runtime.
* On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
*/
if (RT_SUCCESS(rc))
{
#ifdef RT_ARCH_AMD64
#endif
/*
* Initialize the device extension.
*/
if (RT_SUCCESS(rc))
if (RT_SUCCESS(rc))
{
if (rc == 0)
{
if (rc == 0)
#endif
{
#ifdef VBOX_HRTIMER
"'high-res'"
#else
"'normal'"
#endif
".\n",
return rc;
}
else
}
#endif
}
RTR0Term();
}
else
/*
* Failed, cleanup and return the error code.
*/
#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
#endif
}
#ifdef CONFIG_VBOXDRV_AS_MISC
#else
#endif
return rc;
}
/**
* Unload the module.
*/
static void __exit VBoxDrvLinuxUnload(void)
{
int rc;
dprintf(("VBoxDrvLinuxUnload\n"));
#endif
/*
* I Don't think it's possible to unload a driver which processes have
* opened, at least we'll blindly assume that here.
*/
#ifdef CONFIG_VBOXDRV_AS_MISC
if (rc < 0)
{
}
#else /* !CONFIG_VBOXDRV_AS_MISC */
# ifdef CONFIG_DEVFS_FS
/*
* Unregister a device entry
*/
# endif /* devfs */
#endif /* !CONFIG_VBOXDRV_AS_MISC */
/*
* Destroy GIP, delete the device extension and terminate IPRT.
*/
RTR0Term();
}
/**
*
* @param pInode Pointer to inode info structure.
* @param pFilp Associated file pointer.
*/
{
int rc;
Log(("VBoxDrvLinuxCreate: pFilp=%p pid=%d/%d %s\n", pFilp, RTProcSelf(), current->pid, current->comm));
#ifdef VBOX_WITH_HARDENING
/*
* Only root is allowed to access the device, enforce it!
*/
if (vboxdrvLinuxEuid() != 0 /* root */ )
{
return -EPERM;
}
#endif /* VBOX_WITH_HARDENING */
/*
* Call common code for the rest.
*/
if (!rc)
{
}
Log(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d/%d (pid=%d/%d %s)\n",
return VBoxDrvLinuxErr2LinuxErr(rc);
}
/**
* Close device.
*
* @param pInode Pointer to inode info structure.
* @param pFilp Associated file pointer.
*/
{
Log(("VBoxDrvLinuxClose: pFilp=%p pSession=%p pid=%d/%d %s\n",
return 0;
}
/**
* Dummy device release function. We have to provide this function,
* otherwise the kernel will complain.
*
* @param pDev Pointer to the platform device.
*/
{
}
/**
* Dummy probe function.
*
* @param pDev Pointer to the platform device.
*/
{
return 0;
}
/**
* Suspend callback.
* @param pDev Pointer to the platform device.
* @param State message type, see Documentation/power/devices.txt.
*/
{
return 0;
}
/**
* Resume callback.
*
* @param pDev Pointer to the platform device.
*/
{
return 0;
}
#endif /* VBOX_WITH_SUSPEND_NOTIFICATION */
/**
* Device I/O Control entry point.
*
* @param pFilp Associated file pointer.
* @param uCmd The function specified to ioctl().
* @param ulArg The argument specified to ioctl().
*/
#ifdef HAVE_UNLOCKED_IOCTL
#else
static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
#endif
{
/*
* Deal with the two high-speed IOCtl that takes it's arguments from
* the session and iCmd, and only returns a VBox status code.
*/
#ifdef HAVE_UNLOCKED_IOCTL
|| uCmd == SUP_IOCTL_FAST_DO_NOP))
#else /* !HAVE_UNLOCKED_IOCTL */
int rc;
|| uCmd == SUP_IOCTL_FAST_DO_NOP))
else
lock_kernel();
return rc;
#endif /* !HAVE_UNLOCKED_IOCTL */
}
/**
* Device I/O Control entry point.
*
* @param pFilp Associated file pointer.
* @param uCmd The function specified to ioctl().
* @param ulArg The argument specified to ioctl().
*/
{
int rc;
Log6(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p pid=%d/%d\n", pFilp, uCmd, (void *)ulArg, RTProcSelf(), current->pid));
/*
* Read the header.
*/
{
return -EFAULT;
}
{
Log(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
return -EINVAL;
}
/*
* Buffer the request.
*/
{
return -E2BIG;
}
{
Log(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x.\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
return -EINVAL;
}
if (RT_UNLIKELY(!pHdr))
{
OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x.\n", cbBuf, uCmd));
return -ENOMEM;
}
{
return -EFAULT;
}
/*
* Process the IOCtl.
*/
/*
* Copy ioctl data and output buffer back to user space.
*/
{
{
}
{
/* this is really bad! */
}
}
else
{
Log(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
}
return rc;
}
/**
* The SUPDRV IDC entry point.
*
* @returns VBox status code, see supdrvIDC.
* @param iReq The request code.
* @param pReq The request.
*/
{
/*
* Some quick validations.
*/
return VERR_INVALID_POINTER;
if (pSession)
{
return VERR_INVALID_PARAMETER;
return VERR_INVALID_PARAMETER;
}
return VERR_INVALID_PARAMETER;
/*
* Do the job.
*/
}
/**
* Initializes any OS specific object creator fields.
*/
{
}
/**
* Checks if the session can access the object.
*
* @returns true if a decision has been made.
* @returns false if the default access policy should be applied.
*
* @param pObj The object in question.
* @param pSession The session wanting to access the object.
* @param pszObjName The object name, can be NULL.
* @param prc Where to store the result when returning true.
*/
bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
{
return false;
}
{
return force_async_tsc != 0;
}
/**
* Converts a supdrv error code to an linux error code.
*
* @returns corresponding linux error code.
* @param rc supdrv error code (SUPDRV_ERR_* defines).
*/
static int VBoxDrvLinuxErr2LinuxErr(int rc)
{
switch (rc)
{
case 0: return 0;
case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
case SUPDRV_ERR_IDT_FAILED: return -1000;
}
return -EPERM;
}
{
#if 1
char szMsg[512];
#else
/* forward to printf - needs some more GCC hacking to fix ebp... */
"jmp %1\n\t",
"m" (printk));
#endif
return 0;
}
MODULE_AUTHOR("Sun Microsystems, Inc.");
MODULE_DESCRIPTION("VirtualBox Support Driver");
MODULE_LICENSE("GPL");
#ifdef MODULE_VERSION
#endif