vboxfs_vnode.c revision 72cc0d1a8b7c8406808a92eb0b9eb58bf6f8c3aa
/** @file
* VirtualBox File System for Solaris Guests, vnode implementation.
* Portions contributed by: Ronald.
*/
/*
* Copyright (C) 2009-2010 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*
* Shared Folder File System is used from Solaris when run as a guest operating
* system on VirtualBox, though is meant to be usable with any hypervisor that
* can provide similar functionality. The sffs code handles all the Solaris
* specific semantics and relies on a provider module to actually access
* directories, files, etc. The provider interfaces are described in
* "vboxfs_prov.h" and the module implementing them is shipped as part of the
* VirtualBox Guest Additions for Solaris.
*
* The shared folder file system is similar to a networked file system,
* but with some caveats. The sffs code caches minimal information and proxies
* out to the provider whenever possible. Here are some things that are
* handled in this code and not by the proxy:
*
* - a way to open ".." from any already open directory
* - st_ino numbers
* - detecting directory changes that happened on the host.
*
* ever accessed in all mounted sffs filesystems using sf_node structures.
*
* This information for both open or closed files can become invalid if
* asynchronous changes are made on the host. Solaris should not panic() in
* this event, but some file system operations may return unexpected errors.
* Information for such directories or files while they have active vnodes
* is removed from the regular cache and stored in a "stale" bucket until
* the vnode becomes completely inactive.
*
* We suppport only read-only mmap (VBOXVFS_WITH_MMAP) i.e. MAP_SHARED,
* MAP_PRIVATE in PROT_READ, this data caching would not be coherent with
* normal simultaneous read()/write() operations, nor will it be coherent
* with data access on the host. Writable mmap(MAP_SHARED) access is not
* implemented, as guaranteeing any kind of coherency with concurrent
* activity on the host would be near impossible with the existing
* interfaces.
*
* A note about locking. sffs is not a high performance file system.
* No fine grained locking is done. The one sffs_lock protects just about
* everything.
*/
#include <unistd.h>
#include <sys/sysmacros.h>
#if !defined(VBOX_VFS_SOLARIS_10U6)
#include <sys/vfs_opreg.h>
#endif
#include <sys/pathname.h>
#include "vboxfs_prov.h"
#include "vboxfs_vnode.h"
#include "vboxfs_vfs.h"
#define VBOXVFS_WITH_MMAP
static avl_tree_t sfnodes;
static avl_tree_t stale_sfnodes;
/*
* For now we'll use an I/O buffer that doesn't page fault for VirtualBox
* to transfer data into.
*/
char *sffs_buffer;
/*
* sfnode_compare() is needed for AVL tree functionality.
* The nodes are sorted by mounted filesystem, then path. If the
* nodes are stale, the node pointer itself is used to force uniqueness.
*/
static int
sfnode_compare(const void *a, const void *b)
{
int diff;
if (x->sf_is_stale) {
ASSERT(y->sf_is_stale);
if (diff == 0)
} else {
ASSERT(!y->sf_is_stale);
if (diff == 0)
}
if (diff < 0)
return (-1);
if (diff > 0)
return (1);
return (0);
}
/*
* Construct a new pathname given an sfnode plus an optional tail component.
* This handles ".." and "."
*/
static char *
{
char *p;
strcat(p, "/");
return (p);
}
/*
* Clears the (cached) directory listing for the node.
*/
static void
{
}
}
/*
* Open the provider file associated with a vnode. Holding the file open is
* the only way we have of trying to have a vnode continue to refer to the
* same host file in the host in light of the possibility of host side renames.
*/
static void
{
int error;
sfp_file_t *fp;
return;
if (error == 0)
}
/*
* get a new vnode reference for an sfnode
*/
vnode_t *
{
} else {
#ifndef VBOXVFS_WITH_MMAP
#endif
}
}
/*
* Allocate and initialize a new sfnode and assign it a vnode
*/
sfnode_t *
char *path,
sfp_file_t *fp,
{
/*
* build the sfnode
*/
node->sf_children = 0;
if (parent)
++parent->sf_children;
} else {
node->sf_stat_time = 0;
}
/*
* add the new node to our cache
*/
return (node);
}
/*
* destroy an sfnode
*/
static void
{
top:
if (node->sf_children != 0)
if (node->sf_is_stale)
tree = &stale_sfnodes;
else
if (parent->sf_children == 0)
--parent->sf_children;
if (parent->sf_children == 0 &&
parent->sf_is_stale &&
goto top;
}
}
}
/*
* Some sort of host operation on an sfnode has failed or it has been
* deleted. Mark this node and any children as stale, deleting knowledge
* about any which do not have active vnodes or children
* This also handle deleting an inactive node that was already stale.
*/
static void
{
sfnode_t *n;
int len;
/*
* First deal with any children of a directory node.
* If a directory becomes stale, anything below it becomes stale too.
*/
n = node;
ASSERT(!n->sf_is_stale);
/*
* quit when no longer seeing children of node
*/
break;
/*
* Either mark the child as stale or destroy it
*/
sfnode_destroy(n);
} else {
panic("sfnode_make_stale(%s)"
" not in sfnodes", n->sf_path);
avl_remove(&sfnodes, n);
n->sf_is_stale = 1;
panic("sffs_make_stale(%s) duplicates",
n->sf_path);
}
}
}
/*
* Now deal with the given node.
*/
} else if (!node->sf_is_stale) {
panic("sfnode_make_stale(%s) not in sfnodes",
}
}
static uint64_t
sfnode_cur_time_usec(void)
{
return now;
}
static int
{
}
static int
{
}
static void
{
node->sf_stat_time = 0;
}
static int
{
int error;
if (error == 0)
return (error);
}
/*
* Rename a file or a directory
*/
static void
{
sfnode_t *n;
int old_len;
char *new_path;
char *tail;
/*
* Have to remove anything existing that had the new name.
*/
template.sf_is_stale = 0;
if (n != NULL)
/*
* Do the renaming, deal with any children of this node first.
*/
/*
* quit when no longer seeing children of node
*/
break;
/*
* Rename the child:
* - build the new path name
* - unlink the AVL node
* - assign the new name
* - re-insert the AVL name
*/
KM_SLEEP);
panic("sfnode_rename(%s) not in sfnodes",
n->sf_path);
avl_remove(&sfnodes, n);
panic("sfnode_rename(%s) duplicates",
n->sf_path);
}
}
/*
* Deal with the given node.
*/
/*
* change the parent
*/
++newparent->sf_children;
}
/*
* Look for a cached node, if not found either handle ".." or try looking
* via the provider. Create an entry in sfnodes if found but not cached yet.
* If the create flag is set, a file or directory is created. If the file
* already existed, an error is returned.
* Nodes returned from this routine always have a vnode with its ref count
* bumped by 1.
*/
static sfnode_t *
char *name,
int *err)
{
int error = 0;
int type;
char *fullpath;
sfp_file_t *fp;
if (err)
/*
* handle referencing myself
*/
return (dir);
/*
* deal with parent
*/
/*
* Look for an existing node.
*/
template.sf_is_stale = 0;
return (NULL);
return (node);
}
/*
* No entry for this path currently.
* Check if the file exists with the provider and get the type from
* there.
*/
} else {
mode_t m;
} else {
error = 0;
}
if (error != 0)
else if (S_ISDIR(m))
else if (S_ISREG(m))
}
if (err)
/*
* If no errors, make a new node and return it.
*/
if (error) {
return (NULL);
}
return (node);
}
/*
* uid and gid in sffs determine owner and group for all files.
*/
static int
{
mode_t m;
int shift = 0;
int error;
/*
* get the mode from the cache or provider
*/
if (sfnode_stat_cached(node))
error = 0;
else
/*
*/
shift += 3;
shift += 3;
}
if (mode == 0) {
error = 0;
} else {
}
return (error);
}
/*
*
* Everything below this point are the vnode operations used by Solaris VFS
*/
static int
int *eofp,
int flags)
{
int dummy_eof;
int error = 0;
return (EINVAL);
return (ENOTDIR);
*eofp = 0;
*eofp = 1;
return (0);
}
/*
* Get the directory entry names from the host. This gets all
* entries. These are stored in a linked list of sffs_dirents_t
* buffers, each of which contains a list of dirent64_t's.
*/
&dir->sf_dir_list);
if (error != 0)
goto done;
}
/*
* Validate and skip to the desired offset.
*/
offset = 0;
}
goto done;
}
int step;
break;
}
goto done;
}
}
/*
* Lookup each of the names, so that we have ino's, and copy to
* result buffer.
*/
offset = 0;
continue;
}
dirent = (struct sffs_dirent *)
break;
} else {
panic("sffs_readdir() lookup failed");
}
if (error != 0)
break;
}
*eofp = 1;
done:
if (error != 0)
return (error);
}
#if defined(VBOX_VFS_SOLARIS_10U6)
/*
* HERE JOE.. this may need more logic, need to look at other file systems
*/
static int
int cmd,
{
}
#else
/*
* HERE JOE.. this may need more logic, need to look at other file systems
*/
static int
int cmd,
{
}
#endif
static int
int flags,
{
int error = 0;
if (!sfnode_stat_cached(node)) {
if (error != 0)
goto done;
}
{
}
{
}
{
}
done:
return (error);
}
static int
int flags,
{
int error;
return (error);
}
static int
int cmd,
int flags,
{
int error;
/* we only support changing the length of the file */
return ENOSYS;
return (error);
}
/*ARGSUSED*/
static int
int ioflag,
{
int error = 0;
return (EISDIR);
return (EINVAL);
{
mutex_enter(&p->p_lock);
p, RCA_UNSAFE_SIGINFO);
mutex_exit(&p->p_lock);
return (EFBIG);
}
if (uio->uio_loffset < 0)
return (EINVAL);
if (total == 0)
return (0);
return (EINVAL);
}
do {
/*
* a partial read is never an error
*/
error = 0;
return (error);
}
/*ARGSUSED*/
static int
int ioflag,
{
int error = 0;
return (EISDIR);
return (EINVAL);
/*
* We have to hold this lock for a long time to keep
* multiple FAPPEND writes from intermixing
*/
return (EINVAL);
}
if (error != 0) {
return (error);
}
}
return (EINVAL);
}
limit = MAXOFFSET_T;
mutex_enter(&p->p_lock);
p, RCA_UNSAFE_SIGINFO);
mutex_exit(&p->p_lock);
return (EFBIG);
}
return (EFBIG);
}
if (total == 0) {
return (0);
}
do {
break;
}
}
if (error != 0)
break;
if (error == 0)
break;
}
/*
* A short write is never really an error.
*/
error = 0;
return (error);
}
/*ARGSUSED*/
static int
{
int error;
return (error);
}
/*
* Lookup an entry in a directory and create a new vnode if found.
*/
/* ARGSUSED3 */
static int
char *name, /* the name of the file or directory */
int flags,
int *direntflags,
{
int error;
/*
* dvp must be a directory
*/
return (ENOTDIR);
/*
* An empty component name or just "." means the directory itself.
* Don't do any further lookup or checking.
*/
return (0);
}
/*
* Check permission to look at this directory. We always allow "..".
*/
if (error) {
return (error);
}
}
/*
* Lookup the node.
*/
}
/*ARGSUSED*/
static int
char *name,
int mode,
int flag,
{
int error;
/*
* this is used for regular files, not mkdir
*/
return (EISDIR);
return (EINVAL);
/*
* is this a pre-existing file?
*/
else if (error != 0)
return (error);
/*
* Operation on a pre-existing file.
*/
return (EEXIST);
}
return (EISDIR);
}
if (error != 0) {
return (error);
}
/*
* handle truncating an existing file
*/
else
if (error) {
return (error);
}
}
return (0);
}
/*
* Create a new node. First check for a race creating it.
*/
return (EEXIST);
}
/*
* Doesn't exist yet and we have the lock, so create it.
*/
int lookuperr;
if (error)
}
return (lookuperr);
return (0);
}
/*ARGSUSED*/
static int
char *nm,
int flags,
{
int error;
/*
* These should never happen
*/
/*
* Do an unlocked look up first
*/
if (error == 0) {
return (EEXIST);
}
return (error);
/*
* Must be able to write in current directory
*/
if (error) {
return (error);
}
if (error)
}
return (lookuperr);
return (0);
}
/*ARGSUSED*/
static int
char *nm,
int flags)
{
int error;
/*
* Return error when removing . and ..
*/
return (EINVAL);
return (EEXIST);
if (error)
return (error);
return (ENOTDIR);
}
#ifdef VBOXVFS_WITH_MMAP
if (vn_vfswlock(vp)) {
return (EBUSY);
}
#endif
if (vn_mountedvfs(vp)) {
return (EBUSY);
}
if (error)
goto done;
/*
* If anything else is using this vnode, then fail the remove.
* Why? Windows hosts can't remove something that is open,
* so we have to sfprov_close() it first.
* There is no errno for this - since it's not a problem on UNIX,
* but EINVAL is the closest.
*/
goto done;
}
}
/*
* Remove the directory on the host and mark the node as stale.
*/
done:
#ifdef VBOXVFS_WITH_MMAP
#endif
return (error);
}
#ifdef VBOXVFS_WITH_MMAP
static caddr_t
{
/* Use seg_kpm driver if possible (64-bit) */
if (kpm_enable)
}
static void
{
if (kpm_enable)
else
}
/*
* Called when there's no page in the cache. This will create new page(s) and read
* the file data into it.
*/
static int
{
int error = 0;
if (pagelistsize == PAGESIZE)
{
}
else
/* If page already exists return success */
if (!ppages)
{
return (0);
}
/*
* Map & read page-by-page.
*/
{
/*
* If we reuse pages without zero'ing them, one process can mmap() and read-past the length
* to read previously mmap'd contents (from possibly other processes).
*/
if (error != 0)
{
/* Get rid of all kluster pages read & bail. */
return (error);
}
}
/*
* Fill in the pagelist from kluster at the requested offset.
*/
return (0);
}
/*ARGSUSED*/
static int
#if !defined(VBOX_VFS_SOLARIS_10U6)
#endif
)
{
int error = 0;
int is_recursive = 0;
return (ENOSYS); /* Will this ever happen? */
/* Don't bother about faultahead for now. */
return (0);
if (len > pagelistsize)
len = pagelistsize;
else
if (protp)
/*
* The buffer passed to sffs_write may be mmap'd so we may get a
* pagefault there, in which case we'll end up here with this thread
* already owning the mutex. Mutexes aren't recursive.
*/
is_recursive = 1;
else
/* Don't map pages past end of the file. */
{
return (EFAULT);
}
while (len > 0)
{
/*
* Look for pages in the requested offset range, or create them if we can't find any.
*/
{
while (pagelist > pageliststart)
page_unlock(*--pagelist);
if (!is_recursive)
return (error);
}
while (*pagelist)
{
if (len > 0)
{
}
pagelistsize -= PAGESIZE;
pagelist++;
}
}
/*
* Fill the page list array with any pages left in the cache.
*/
while ( pagelistsize > 0
{
pagelistsize -= PAGESIZE;
}
if (!is_recursive)
return (error);
}
/*ARGSUSED*/
static int
int flags,
#if !defined(VBOX_VFS_SOLARIS_10U6)
#endif
)
{
/*
* We don't support PROT_WRITE mmaps. For normal writes we do not map and IO via
* vop_putpage() either, therefore, afaik this shouldn't ever be called.
*/
return (ENOSYS);
}
/*ARGSUSED*/
static int
int flags,
{
/*
* This would not get invoked i.e. via pvn_vplist_dirty() since we don't support
* PROT_WRITE mmaps and therefore will not have dirty pages.
*/
return (0);
}
/*ARGSUSED*/
static int
#if !defined(VBOX_VFS_SOLARIS_10U6)
#endif
)
{
/*
* Invocation: mmap()->smmap_common()->VOP_MAP()->sffs_map(). Once the
* segment driver creates the new segment via segvn_create(), it'll
* invoke down the line VOP_ADDMAP()->sffs_addmap()
*/
int error = 0;
if ((prot & PROT_WRITE))
return (ENOTSUP);
return (ENXIO);
return (ENODEV);
return (ENOSYS);
return (EAGAIN);
#if defined(VBOX_VFS_SOLARIS_10U6)
{
}
else
#else
#endif
if (error)
{
return (error);
}
return (error);
}
/*ARGSUSED*/
static int
#if !defined(VBOX_VFS_SOLARIS_10U6)
#endif
)
{
return (ENOSYS);
return (0);
}
/*ARGSUSED*/
static int
#if !defined(VBOX_VFS_SOLARIS_10U6)
#endif
)
{
return (ENOSYS);
return (0);
}
#endif /* VBOXVFS_WITH_MMAP */
/*ARGSUSED*/
static int
char *name,
int flags)
{
int error;
/*
* These should never happen
*/
if (error)
return (error);
if (error)
goto done;
/*
* If anything else is using this vnode, then fail the remove.
* Why? Windows hosts can't sfprov_remove() a file that is open,
* so we have to sfprov_close() it first.
* There is no errno for this - since it's not a problem on UNIX,
* but ETXTBSY is the closest.
*/
goto done;
}
}
/*
* Remove the file on the host and mark the node as stale.
*/
done:
return (error);
}
/*ARGSUSED*/
static int
char *old_nm,
char *new_nm,
int flags)
{
char *newpath;
int error;
return (EINVAL);
/*
* make sure we have permission to do the rename
*/
if (error)
goto done;
goto done;
}
/*
* Rename the file on the host and in our caches.
*/
if (error == 0)
else {
}
done:
return (error);
}
/*ARGSUSED*/
static int
{
int error;
/*
* Ask the host to sync any data it may have cached for open files.
*/
else
error = 0;
return (error);
}
/*
* This may be the last reference, possibly time to close the file and
* destroy the vnode. If the sfnode is stale, we'll destroy that too.
*/
/*ARGSUSED*/
static void
#if defined(VBOX_VFS_SOLARIS_10U6)
#else
#endif
{
/*
* nothing to do if this isn't the last use
*/
return;
}
if (vn_has_cached_data(vp)) {
#ifdef VBOXVFS_WITH_MMAP
/* We're fine with releasing the vnode lock here as we should be covered by the sffs_lock */
/* We won't have any dirty pages, this will just invalidate (destroy) the pages and move it to the cachelist. */
#else
panic("sffs_inactive() found cached data");
#endif
}
/*
* destroy the vnode
*/
vn_invalid(vp);
/*
* Close the sf_file for the node.
*/
}
/*
* Free the directory entries for the node. This should normally
* have been taken care of in sffs_close(), but better safe than
* sorry.
*/
/*
* If the node is stale, we can also destroy it.
*/
return;
}
/*
* All the work for this is really done in lookup.
*/
/*ARGSUSED*/
static int
{
return (0);
}
/*
* All the work for this is really done in inactive.
*/
/*ARGSUSED*/
static int
int flag,
int count,
{
/*
* Free the directory entries for the node. We do this on this call
* here because the directory node may not become inactive for a long
* time after the readdir is over. Case in point, if somebody cd's into
* the directory then it won't become inactive until they cd away again.
* In such a case we would end up with the directory listing not getting
* updated (i.e. the result of 'ls' always being the same) until they
* change the working directory.
*/
return (0);
}
/* ARGSUSED */
static int
{
return (EINVAL);
{
return (0);
return (0);
}
return (EINVAL);
}
return (0);
}
/*
* By returning an error for this, we prevent anything in sffs from
* being re-exported by NFS
*/
/* ARGSUSED */
static int
{
return (ENOTSUP);
}
/*
* vnode operations for regular files
*/
const fs_operation_def_t sffs_ops_template[] = {
#if defined(VBOX_VFS_SOLARIS_10U6)
# ifdef VBOXVFS_WITH_MMAP
# endif
#else
# ifdef VBOXVFS_WITH_MMAP
# endif
#endif
};
/*
* Also, init and fini functions...
*/
int
sffs_vnode_init(void)
{
int err;
if (err)
return (err);
return (0);
}
void
sffs_vnode_fini(void)
{
if (sffs_ops)
if (sffs_buffer != NULL) {
sffs_buffer = NULL;
}
}
/*
* Utility at unmount to get all nodes in that mounted filesystem removed.
*/
int
{
/*
* Check that no vnodes are active.
*/
return (-1);
return (-1);
}
return (-1);
}
/*
* All clear to destroy all node information. Since there are no
* vnodes, the make stale will cause deletion.
*/
else
break;
panic("vboxfs: purge hit active vnode");
} else {
}
}
return (0);
}
#if 0
/* Debug helper functions */
static void
{
Log((" type=%s (%d)",
if (node->sf_children)
}
static void
sfnode_list(void)
{
sfnode_t *n;
sfnode_print(n);
n = AVL_NEXT(&stale_sfnodes, n))
sfnode_print(n);
}
#endif