vbi.c revision 74eea81909c0b042ff4e1a9c20f6bae43bbe947d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010-2011 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Private interfaces for VirtualBox access to Solaris kernel internal
* facilities.
*
*/
#include <sys/schedctl.h>
#include <sys/sysmacros.h>
#include <sys/x86_archext.h>
#include <vm/seg_kmem.h>
#include <sys/machparam.h>
#include "vbi.h"
/*
* We have to use dl_lookup to find contig_free().
*/
extern void contig_free(void *, size_t);
#pragma weak contig_free
/*
* We have to use dl_lookup to find kflt_init() and thereby use kernel pages from
* the freelists if we no longer get user pages from freelist and cachelists.
*/
/* Introduced in v9 */
static int use_kflt = 0;
/*
* Workarounds for running on old versions of solaris with different cross call
* interfaces. If we find xc_init_cpu() in the kernel, then just use the defined
* interfaces for xc_call() from the include file where the xc_call()
* interfaces just takes a pointer to a ulong_t array. The array must be long
* enough to hold "ncpus" bits at runtime.
* The reason for the hacks is that using the type "cpuset_t" is pretty much
* impossible from code built outside the Solaris source repository that wants
* to run on multiple releases of Solaris.
*
* For old style xc_call()s, 32 bit solaris and older 64 bit versions use
* "ulong_t" as cpuset_t.
*
* Later versions of 64 bit Solaris used: struct {ulong_t words[x];}
* where "x" depends on NCPU.
*
* We detect the difference in 64 bit support by checking the kernel value of
* max_cpuid, which always holds the compiled value of NCPU - 1.
*
* If Solaris increases NCPU to more than 256, this module will continue
* to work on all versions of Solaris as long as the number of installed
* CPUs in the machine is <= VBI_NCPU. If VBI_NCPU is increased, this code
* has to be re-written some to provide compatibility with older Solaris which
* expects cpuset_t to be based on NCPU==256 -- or we discontinue support
*/
static int use_old = 0;
static int use_old_with_ulong = 0;
#define VBI_NCPU 256
typedef struct vbi_cpuset {
} vbi_cpuset_t;
/*
* module linkage stuff
*/
#if 0
static struct modlmisc vbi_modlmisc = {
&mod_miscops, "VirtualBox Interfaces V8"
};
static struct modlinkage vbi_modlinkage = {
};
#endif
extern uintptr_t kernelbase;
#if 0
static int vbi_verbose = 0;
#endif
/* Introduced in v8 */
static int vbi_is_initialized = 0;
/* Which offsets will be used */
static int off_cpu_runrun = -1;
static int off_cpu_kprunrun = -1;
static int off_t_preempt = -1;
#define VBI_PREEMPT_DISABLE() \
{ \
VBI_T_PREEMPT++; \
}
#define VBI_PREEMPT_ENABLE() \
{ \
if (--VBI_T_PREEMPT == 0 && \
}
/* End of v6 intro */
#if 0
int
_init(void)
{
if (!err)
return (err);
}
#endif
static int
vbi_get_ctf_member_offset(ctf_file_t *ctfp, const char *structname, const char *membername, int *offset)
{
{
{
return (0);
}
else
}
else
return (CTF_ERR);
}
int
vbi_init(void)
{
/*
* Check to see if this version of virtualbox interface module will work
* with the kernel.
*/
/*
* Our bit vector storage needs to be large enough for the
* actual number of CPUs running in the system.
*/
return (EINVAL);
}
} else {
use_old = 1;
use_old_with_ulong = 1;
{
return (EINVAL); /* cpuset_t size mismatch */
}
}
/*
* In older versions of Solaris contig_free() is a static routine.
*/
if (p_contig_free == NULL) {
p_contig_free = (void (*)(void *, size_t))
if (p_contig_free == NULL) {
return (EINVAL);
}
}
/*
* Use kernel page freelist flags to get pages from kernel page freelists
* while allocating physical pages, once the userpages are exhausted.
* snv_161+, see @bugref{5632}.
*/
{
int *p_kflt_disable = (int*)kobj_getsymvalue("kflt_disable", 1); /* amd64 only, on 32-bit kflt's are disabled. */
if (p_kflt_disable && *p_kflt_disable == 0)
{
use_kflt = 1;
}
}
/*
* CTF probing for fluid, private members.
*/
int err = 0;
if (genunix_modctl)
{
if (ctfp)
{
do {
err = vbi_get_ctf_member_offset(ctfp, "cpu_t", "cpu_kprunrun", &off_cpu_kprunrun); AssertBreak(!err);
} while (0);
}
}
else
{
}
if (err)
return (EINVAL);
vbi_is_initialized = 1;
return (0);
}
#if 0
int
_fini(void)
{
if (err != 0)
return (err);
return (0);
}
int
{
}
#endif
static ddi_dma_attr_t base_attr = {
DMA_ATTR_V0, /* Version Number */
(uint64_t)0, /* lower limit */
(uint64_t)0, /* high limit */
1, /* list length (1 for contiguous) */
1, /* device granularity */
0 /* bus-specific flags */
};
static void *
{
void *ptr;
if ((size & PAGEOFFSET) != 0)
return (NULL);
if (npages == 0)
return (NULL);
if (!contig)
return (NULL);
}
if (pfn == PFN_INVALID)
panic("vbi_contig_alloc(): hat_getpfnum() failed\n");
return (ptr);
}
void *
{
/* Obsolete */
}
void
{
/* Obsolete */
}
void *
{
return (NULL);
}
return (va);
}
void
{
} else {
}
}
void *
vbi_curthread(void)
{
return (curthread);
}
int
vbi_yield(void)
{
int rv = 0;
char tpr = VBI_T_PREEMPT;
char kpr = VBI_CPU_KPRUNRUN;
rv = 1;
return (rv);
}
vbi_timer_granularity(void)
{
return (nsec_per_tick);
}
typedef struct vbi_timer {
void (*vbi_func)();
void *vbi_arg1;
void *vbi_arg2;
} vbi_timer_t;
static void
vbi_timer_callback(void *arg)
{
vbi_timer_t *t = arg;
if (t->vbi_interval == 0)
}
void *
{
t->vbi_handler.cyh_arg = (void *)t;
t->vbi_cyclic = CYCLIC_NONE;
t->vbi_interval = interval;
return (t);
}
void
vbi_timer_destroy(void *timer)
{
vbi_timer_t *t = timer;
if (t != NULL) {
kmem_free(t, sizeof (*t));
}
}
void
{
vbi_timer_t *t = timer;
if (interval == 0)
else
}
void
vbi_timer_stop(void *timer)
{
vbi_timer_t *t = timer;
if (t->vbi_cyclic == CYCLIC_NONE)
return;
if (t->vbi_cyclic != CYCLIC_NONE) {
cyclic_remove(t->vbi_cyclic);
t->vbi_cyclic = CYCLIC_NONE;
}
}
vbi_tod(void)
{
}
void *
vbi_proc(void)
{
proc_t *p;
drv_getparm(UPROCP, &p);
return (p);
}
void
{
thread_lock(t);
(void) thread_change_pri(t, priority, 0);
thread_unlock(t);
}
void *
{
kthread_t *t;
return (t);
}
void
vbi_thread_exit(void)
{
thread_exit();
}
void *
{
}
void
{
}
int
vbi_cpu_id(void)
{
}
int
vbi_max_cpu_id(void)
{
return (max_cpuid);
}
int
vbi_cpu_maxcount(void)
{
return (max_cpuid + 1);
}
int
vbi_cpu_count(void)
{
return (ncpus);
}
int
vbi_cpu_online(int c)
{
int x;
x = cpu_is_online(cpu[c]);
return (x);
}
void
vbi_preempt_disable(void)
{
}
void
vbi_preempt_enable(void)
{
}
void
{
int i;
for (i = 0; i < VBI_SET_WORDS; ++i)
if (use_old) {
if (use_old_with_ulong) {
} else {
}
} else {
}
}
void
{
int i;
for (i = 0; i < VBI_SET_WORDS; ++i)
if (use_old) {
if (use_old_with_ulong) {
} else {
}
} else {
}
}
void
{
int i;
for (i = 0; i < VBI_SET_WORDS; ++i)
if (use_old) {
if (use_old_with_ulong) {
} else {
}
} else {
}
}
int
{
/*
* kernel mappings on x86 are always locked, so only handle user.
*/
if (err != 0) {
return (-1);
}
}
return (0);
}
/*ARGSUSED*/
void
{
}
vbi_va_to_pa(void *addr)
{
if (IS_KERNEL(v))
else
if (pfn == PFN_INVALID)
return (-(uint64_t)1);
}
struct segvbi_crargs {
};
struct segvbi_data {
};
static struct seg_ops segvbi_ops;
static int
{
struct segvbi_crargs *a = args;
struct segvbi_data *data;
ulong_t p;
/*
* now load locked mappings to the pages
*/
}
return (0);
}
/*
* Duplicate a seg and return new segment in newseg.
*/
static int
{
struct segvbi_data *ndata;
return (0);
}
static int
{
panic("segvbi_unmap");
return (ENOTSUP);
return (0);
}
static void
{
}
/*
* We would demand fault if the (u)read() path would SEGOP_FAULT()
* on buffers mapped in via vbi_user_map() i.e. prefaults before DMA.
* Don't fail in such case where we're called directly, see #5047.
*/
static int
{
return (0);
}
static int
{
return (0);
}
static int
{
return (EACCES);
}
static int
{
return (EINVAL);
}
static int
{
return (-1);
}
static int
{
return (0);
}
static size_t
{
size_t v;
*vec++ = 1;
return (v);
}
static int
{
return (0);
}
static int
{
if (pgno != 0)
{
do
{
pgno--;
} while (pgno != 0);
}
return (0);
}
static u_offset_t
{
}
static int
{
return (MAP_SHARED);
}
static int
{
return (0);
}
static int
{
return (0);
}
static void
{}
static int
{
return (ENOTSUP);
}
static int
{
return (ENOTSUP);
}
static int
{
return (ENODEV);
}
static lgrp_mem_policy_info_t *
{
return (NULL);
}
static int
{
return (0);
}
static struct seg_ops segvbi_ops = {
(int (*)())segvbi_kluster,
};
/*
* Interfaces to inject physical pages into user address space
* and later remove them.
*/
int
{
struct segvbi_crargs args;
int error = 0;
else
if (error)
return (error);
}
/*
* This is revision 2 of the interface.
*/
struct vbi_cpu_watch {
void (*vbi_cpu_func)(void *, int, int);
void *vbi_cpu_arg;
};
static int
{
vbi_cpu_watch_t *w = arg;
int online;
online = 1;
online = 0;
else
return (0);
return (0);
}
{
int c;
vbi_cpu_watch_t *w;
w = kmem_alloc(sizeof (*w), KM_SLEEP);
w->vbi_cpu_func = func;
w->vbi_cpu_arg = arg;
if (current_too) {
for (c = 0; c < ncpus; ++c) {
if (cpu_is_online(cpu[c]))
}
}
return (w);
}
void
{
kmem_free(w, sizeof (*w));
}
/*
* Simple timers are pretty much a pass through to the cyclic subsystem.
*/
struct vbi_stimer {
void *s_arg;
};
static void
vbi_stimer_func(void *arg)
{
vbi_stimer_t *t = arg;
}
extern vbi_stimer_t *
void *arg,
int on_cpu)
{
t->s_tick = 0;
t = NULL;
goto done;
}
if (interval == 0)
else
if (on_cpu != VBI_ANY_CPU)
done:
return (t);
}
extern void
{
cyclic_remove(t->s_cyclic);
kmem_free(t, sizeof (*t));
}
/*
* Global timers are more complicated. They include a counter on the callback,
* that indicates the first call on a given cpu.
*/
struct vbi_gtimer {
void *g_arg;
};
static void
vbi_gtimer_func(void *arg)
{
vbi_gtimer_t *t = arg;
}
/*
* Whenever a cpu is onlined, need to reset the g_counters[] for it to zero.
*/
static void
{
vbi_gtimer_t *t = arg;
h->cyh_func = vbi_gtimer_func;
h->cyh_arg = t;
h->cyh_level = CY_LOCK_LEVEL;
else
}
void *arg,
{
vbi_gtimer_t *t;
/*
* one shot global timer is not supported yet.
*/
if (interval == 0)
return (NULL);
t = kmem_zalloc(sizeof (*t), KM_SLEEP);
t->g_interval = interval;
t->g_cyclic = CYCLIC_NONE;
return (t);
}
extern void
{
cyclic_remove(t->g_cyclic);
kmem_free(t, sizeof (*t));
}
int
vbi_is_preempt_enabled(void)
{
if (vbi_is_initialized) {
char tpr = VBI_T_PREEMPT;
return (tpr == 0);
} else {
return 1;
}
}
void
vbi_poke_cpu(int c)
{
if (c < ncpus)
poke_cpu(c);
}
/*
* This is revision 5 of the interface.
*/
void *
{
}
void
{
}
/*
* This is revision 6 of the interface.
*/
int
vbi_is_preempt_pending(void)
{
char crr = VBI_CPU_RUNRUN;
char krr = VBI_CPU_KPRUNRUN;
}
/*
* This is revision 7 of the interface.
*/
void *
{
}
void
{
}
/*
* This is revision 8 of the interface.
*/
page_t **
{
/*
* the page freelist and cachelist both hold pages that are not mapped into any address space.
* the cachelist is not really free pages but when memory is exhausted they'll be moved to the
* free lists.
* it's the total of the free+cache list that we see on the 'free' column in vmstat.
*/
/* reserve available memory for pages */
if (rc)
{
/* create the pages */
if (rc)
{
/* alloc space for page_t pointer array */
if (pp_pages)
{
/*
* get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
* we don't have the 'virtAddr' to which this memory may be mapped.
*/
{
/* get a page from the freelists */
if (!ppage)
{
/* try from the cachelists */
if (!ppage)
{
/* damn */
page_create_putback(npages - i);
while (--i >= 0)
return NULL;
}
/* remove association with the vnode for pages from the cachelist */
}
}
/*
* we now have the pages locked exclusively, before they are mapped in
* we must downgrade the lock.
*/
return pp_pages;
}
}
}
return NULL;
}
void
{
{
/* we need to exclusive lock the pages before freeing them */
if (!rc)
{
page_unlock(pp_pages[i]);
;
}
}
}
int
{
if (!pphysaddrs)
return -1;
{
/*
* downgrade the exclusive page lock to a shared lock if the
* pages is locked exclusively.
*/
page_downgrade(pp_pages[i]);
}
return 0;
}
{
if (pfn == PFN_INVALID)
panic("vbi_page_to_pa: page_pptonum() failed\n");
}
static page_t *
{
/* pgsize only applies when using the freelist */
if (freelist == 1)
{
{
}
}
else
{
/* cachelist */
{
}
}
return ppage;
}
/*
* Large page code.
*/
page_t *
{
int rc;
/*
* Reserve available memory for a large page and create it.
*/
if (!rc)
return NULL;
if (!rc) {
return NULL;
}
/*
* Get a page off the free list. We set vaddr to 0 since we don't know
* where the memory is going to be mapped.
*/
if (!pproot)
{
return NULL;
}
AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
/*
* Mark all the sub-pages as non-free and not-hashed-in.
* It is paramount that we destroy the list (before freeing it).
*/
PP_CLRFREE(pp);
PP_CLRAGED(pp);
}
return pproot;
}
void
{
AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
/*
* We need to exclusively lock the sub-pages before freeing
* the large one.
*/
if (!rc) {
/*nothing*/;
}
}
}
/*
* Free the large page and unreserve the memory.
*/
}
int
{
AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
/*
* We need to downgrade the sub-pages from exclusive to shared locking
* because otherwise we cannot <you go figure>.
*/
}
return 0;
}
/*
* As more functions are added, they should start with a comment indicating
* the revision and above this point in the file and the revision level should
* be increased. Also change vbi_modlmisc at the top of the file.
*
* NOTE! We'll start care about this if anything in here ever makes it into
* the solaris kernel proper.
*/