vboxfs_vnode.c revision 72cc0d1a8b7c8406808a92eb0b9eb58bf6f8c3aa
/** @file
* VirtualBox File System for Solaris Guests, vnode implementation.
* Portions contributed by: Ronald.
*/
/*
* Copyright (C) 2009-2010 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* you can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/*
* Shared Folder File System is used from Solaris when run as a guest operating
* system on VirtualBox, though is meant to be usable with any hypervisor that
* can provide similar functionality. The sffs code handles all the Solaris
* specific semantics and relies on a provider module to actually access
* directories, files, etc. The provider interfaces are described in
* "vboxfs_prov.h" and the module implementing them is shipped as part of the
* VirtualBox Guest Additions for Solaris.
*
* The shared folder file system is similar to a networked file system,
* but with some caveats. The sffs code caches minimal information and proxies
* out to the provider whenever possible. Here are some things that are
* handled in this code and not by the proxy:
*
* - a way to open ".." from any already open directory
* - st_ino numbers
* - detecting directory changes that happened on the host.
*
* The implementation builds a cache of information for every file/directory
* ever accessed in all mounted sffs filesystems using sf_node structures.
*
* This information for both open or closed files can become invalid if
* asynchronous changes are made on the host. Solaris should not panic() in
* this event, but some file system operations may return unexpected errors.
* Information for such directories or files while they have active vnodes
* is removed from the regular cache and stored in a "stale" bucket until
* the vnode becomes completely inactive.
*
* We suppport only read-only mmap (VBOXVFS_WITH_MMAP) i.e. MAP_SHARED,
* MAP_PRIVATE in PROT_READ, this data caching would not be coherent with
* normal simultaneous read()/write() operations, nor will it be coherent
* with data access on the host. Writable mmap(MAP_SHARED) access is not
* implemented, as guaranteeing any kind of coherency with concurrent
* activity on the host would be near impossible with the existing
* interfaces.
*
* A note about locking. sffs is not a high performance file system.
* No fine grained locking is done. The one sffs_lock protects just about
* everything.
*/
#include <VBox/log.h>
#include <iprt/asm.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mntent.h>
#include <sys/param.h>
#include <sys/modctl.h>
#include <sys/mount.h>
#include <sys/policy.h>
#include <sys/atomic.h>
#include <sys/sysmacros.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/vfs.h>
#include <sys/vmsystm.h>
#include <vm/seg_kpm.h>
#include <vm/pvn.h>
#if !defined(VBOX_VFS_SOLARIS_10U6)
#include <sys/vfs_opreg.h>
#endif
#include <sys/pathname.h>
#include <sys/dirent.h>
#include <sys/fs_subr.h>
#include <sys/time.h>
#include "vboxfs_prov.h"
#include "vboxfs_vnode.h"
#include "vboxfs_vfs.h"
#define VBOXVFS_WITH_MMAP
static struct vnodeops *sffs_ops = NULL;
kmutex_t sffs_lock;
static avl_tree_t sfnodes;
static avl_tree_t stale_sfnodes;
/*
* For now we'll use an I/O buffer that doesn't page fault for VirtualBox
* to transfer data into.
*/
char *sffs_buffer;
/*
* sfnode_compare() is needed for AVL tree functionality.
* The nodes are sorted by mounted filesystem, then path. If the
* nodes are stale, the node pointer itself is used to force uniqueness.
*/
static int
sfnode_compare(const void *a, const void *b)
{
sfnode_t *x = (sfnode_t *)a;
sfnode_t *y = (sfnode_t *)b;
int diff;
if (x->sf_is_stale) {
ASSERT(y->sf_is_stale);
diff = strcmp(x->sf_path, y->sf_path);
if (diff == 0)
diff = (uintptr_t)y - (uintptr_t)x;
} else {
ASSERT(!y->sf_is_stale);
diff = (uintptr_t)y->sf_sffs - (uintptr_t)x->sf_sffs;
if (diff == 0)
diff = strcmp(x->sf_path, y->sf_path);
}
if (diff < 0)
return (-1);
if (diff > 0)
return (1);
return (0);
}
/*
* Construct a new pathname given an sfnode plus an optional tail component.
* This handles ".." and "."
*/
static char *
sfnode_construct_path(sfnode_t *node, char *tail)
{
char *p;
if (strcmp(tail, ".") == 0 || strcmp(tail, "..") == 0)
panic("construct path for %s", tail);
p = kmem_alloc(strlen(node->sf_path) + 1 + strlen(tail) + 1, KM_SLEEP);
strcpy(p, node->sf_path);
strcat(p, "/");
strcat(p, tail);
return (p);
}
/*
* Clears the (cached) directory listing for the node.
*/
static void
sfnode_clear_dir_list(sfnode_t *node)
{
ASSERT(MUTEX_HELD(&sffs_lock));
while (node->sf_dir_list != NULL) {
sffs_dirents_t *next = node->sf_dir_list->sf_next;
kmem_free(node->sf_dir_list, SFFS_DIRENTS_SIZE);
node->sf_dir_list = next;
}
}
/*
* Open the provider file associated with a vnode. Holding the file open is
* the only way we have of trying to have a vnode continue to refer to the
* same host file in the host in light of the possibility of host side renames.
*/
static void
sfnode_open(sfnode_t *node)
{
int error;
sfp_file_t *fp;
if (node->sf_file != NULL)
return;
error = sfprov_open(node->sf_sffs->sf_handle, node->sf_path, &fp);
if (error == 0)
node->sf_file = fp;
}
/*
* get a new vnode reference for an sfnode
*/
vnode_t *
sfnode_get_vnode(sfnode_t *node)
{
vnode_t *vp;
if (node->sf_vnode != NULL) {
VN_HOLD(node->sf_vnode);
} else {
vp = vn_alloc(KM_SLEEP);
LogFlowFunc((" %s gets vnode 0x%p\n", node->sf_path, vp));
vp->v_type = node->sf_type;
vp->v_vfsp = node->sf_sffs->sf_vfsp;
vn_setops(vp, sffs_ops);
vp->v_flag = VNOSWAP;
#ifndef VBOXVFS_WITH_MMAP
vp->v_flag |= VNOMAP;
#endif
vn_exists(vp);
vp->v_data = node;
node->sf_vnode = vp;
}
sfnode_open(node);
return (node->sf_vnode);
}
/*
* Allocate and initialize a new sfnode and assign it a vnode
*/
sfnode_t *
sfnode_make(
sffs_data_t *sffs,
char *path,
vtype_t type,
sfp_file_t *fp,
sfnode_t *parent, /* can be NULL for root */
sffs_stat_t *stat,
uint64_t stat_time)
{
sfnode_t *node;
avl_index_t where;
ASSERT(MUTEX_HELD(&sffs_lock));
ASSERT(path != NULL);
/*
* build the sfnode
*/
LogFlowFunc(("sffs_make(%s)\n", path));
node = kmem_alloc(sizeof (*node), KM_SLEEP);
node->sf_sffs = sffs;
VFS_HOLD(node->sf_sffs->sf_vfsp);
node->sf_path = path;
node->sf_ino = sffs->sf_ino++;
node->sf_type = type;
node->sf_is_stale = 0; /* never stale at creation */
node->sf_file = fp;
node->sf_vnode = NULL; /* do this before any sfnode_get_vnode() */
node->sf_children = 0;
node->sf_parent = parent;
if (parent)
++parent->sf_children;
node->sf_dir_list = NULL;
if (stat != NULL) {
node->sf_stat = *stat;
node->sf_stat_time = stat_time;
} else {
node->sf_stat_time = 0;
}
/*
* add the new node to our cache
*/
if (avl_find(&sfnodes, node, &where) != NULL)
panic("sffs_create_sfnode(%s): duplicate sfnode_t", path);
avl_insert(&sfnodes, node, where);
return (node);
}
/*
* destroy an sfnode
*/
static void
sfnode_destroy(sfnode_t *node)
{
avl_index_t where;
avl_tree_t *tree;
sfnode_t *parent;
top:
parent = node->sf_parent;
ASSERT(MUTEX_HELD(&sffs_lock));
ASSERT(node->sf_path != NULL);
LogFlowFunc(("sffs_destroy(%s)%s\n", node->sf_path, node->sf_is_stale ? " stale": ""));
if (node->sf_children != 0)
panic("sfnode_destroy(%s) has %d children", node->sf_path, node->sf_children);
if (node->sf_vnode != NULL)
panic("sfnode_destroy(%s) has active vnode", node->sf_path);
if (node->sf_is_stale)
tree = &stale_sfnodes;
else
tree = &sfnodes;
if (avl_find(tree, node, &where) == NULL)
panic("sfnode_destroy(%s) not found", node->sf_path);
avl_remove(tree, node);
VFS_RELE(node->sf_sffs->sf_vfsp);
sfnode_clear_dir_list(node);
kmem_free(node->sf_path, strlen(node->sf_path) + 1);
kmem_free(node, sizeof (*node));
if (parent != NULL) {
sfnode_clear_dir_list(parent);
if (parent->sf_children == 0)
panic("sfnode_destroy parent (%s) has no child", parent->sf_path);
--parent->sf_children;
if (parent->sf_children == 0 &&
parent->sf_is_stale &&
parent->sf_vnode == NULL) {
node = parent;
goto top;
}
}
}
/*
* Some sort of host operation on an sfnode has failed or it has been
* deleted. Mark this node and any children as stale, deleting knowledge
* about any which do not have active vnodes or children
* This also handle deleting an inactive node that was already stale.
*/
static void
sfnode_make_stale(sfnode_t *node)
{
sfnode_t *n;
int len;
ASSERT(MUTEX_HELD(&sffs_lock));
avl_index_t where;
/*
* First deal with any children of a directory node.
* If a directory becomes stale, anything below it becomes stale too.
*/
if (!node->sf_is_stale && node->sf_type == VDIR) {
len = strlen(node->sf_path);
n = node;
while ((n = AVL_NEXT(&sfnodes, node)) != NULL) {
ASSERT(!n->sf_is_stale);
/*
* quit when no longer seeing children of node
*/
if (n->sf_sffs != node->sf_sffs ||
strncmp(node->sf_path, n->sf_path, len) != 0 ||
n->sf_path[len] != '/')
break;
/*
* Either mark the child as stale or destroy it
*/
if (n->sf_vnode == NULL && n->sf_children == 0) {
sfnode_destroy(n);
} else {
LogFlowFunc(("sffs_make_stale(%s) sub\n", n->sf_path));
sfnode_clear_dir_list(n);
if (avl_find(&sfnodes, n, &where) == NULL)
panic("sfnode_make_stale(%s)"
" not in sfnodes", n->sf_path);
avl_remove(&sfnodes, n);
n->sf_is_stale = 1;
if (avl_find(&stale_sfnodes, n, &where) != NULL)
panic("sffs_make_stale(%s) duplicates",
n->sf_path);
avl_insert(&stale_sfnodes, n, where);
}
}
}
/*
* Now deal with the given node.
*/
if (node->sf_vnode == NULL && node->sf_children == 0) {
sfnode_destroy(node);
} else if (!node->sf_is_stale) {
LogFlowFunc(("sffs_make_stale(%s)\n", node->sf_path));
sfnode_clear_dir_list(node);
if (node->sf_parent)
sfnode_clear_dir_list(node->sf_parent);
if (avl_find(&sfnodes, node, &where) == NULL)
panic("sfnode_make_stale(%s) not in sfnodes",
node->sf_path);
avl_remove(&sfnodes, node);
node->sf_is_stale = 1;
if (avl_find(&stale_sfnodes, node, &where) != NULL)
panic("sffs_make_stale(%s) duplicates", node->sf_path);
avl_insert(&stale_sfnodes, node, where);
}
}
static uint64_t
sfnode_cur_time_usec(void)
{
clock_t now = drv_hztousec(ddi_get_lbolt());
return now;
}
static int
sfnode_stat_cached(sfnode_t *node)
{
return (sfnode_cur_time_usec() - node->sf_stat_time) <
node->sf_sffs->sf_stat_ttl * 1000L;
}
static int
sfnode_get_stat(sfp_mount_t *mnt, char *path, sffs_stat_t *stat)
{
return sfprov_get_attr(mnt, path, &stat->sf_mode, &stat->sf_size,
&stat->sf_atime, &stat->sf_mtime, &stat->sf_ctime);
}
static void
sfnode_invalidate_stat_cache(sfnode_t *node)
{
node->sf_stat_time = 0;
}
static int
sfnode_update_stat_cache(sfnode_t *node)
{
int error;
error = sfnode_get_stat(node->sf_sffs->sf_handle, node->sf_path,
&node->sf_stat);
if (error == ENOENT)
sfnode_make_stale(node);
if (error == 0)
node->sf_stat_time = sfnode_cur_time_usec();
return (error);
}
/*
* Rename a file or a directory
*/
static void
sfnode_rename(sfnode_t *node, sfnode_t *newparent, char *path)
{
sfnode_t *n;
sfnode_t template;
avl_index_t where;
int len = strlen(path);
int old_len;
char *new_path;
char *tail;
ASSERT(MUTEX_HELD(&sffs_lock));
ASSERT(!node->sf_is_stale);
/*
* Have to remove anything existing that had the new name.
*/
template.sf_sffs = node->sf_sffs;
template.sf_path = path;
template.sf_is_stale = 0;
n = avl_find(&sfnodes, &template, &where);
if (n != NULL)
sfnode_make_stale(n);
/*
* Do the renaming, deal with any children of this node first.
*/
if (node->sf_type == VDIR) {
old_len = strlen(node->sf_path);
while ((n = AVL_NEXT(&sfnodes, node)) != NULL) {
/*
* quit when no longer seeing children of node
*/
if (n->sf_sffs != node->sf_sffs ||
strncmp(node->sf_path, n->sf_path, old_len) != 0 ||
n->sf_path[old_len] != '/')
break;
/*
* Rename the child:
* - build the new path name
* - unlink the AVL node
* - assign the new name
* - re-insert the AVL name
*/
ASSERT(strlen(n->sf_path) > old_len);
tail = n->sf_path + old_len; /* includes initial "/" */
new_path = kmem_alloc(len + strlen(tail) + 1,
KM_SLEEP);
strcpy(new_path, path);
strcat(new_path, tail);
if (avl_find(&sfnodes, n, &where) == NULL)
panic("sfnode_rename(%s) not in sfnodes",
n->sf_path);
avl_remove(&sfnodes, n);
LogFlowFunc(("sfnode_rname(%s to %s) sub\n", n->sf_path, new_path));
kmem_free(n->sf_path, strlen(n->sf_path) + 1);
n->sf_path = new_path;
if (avl_find(&sfnodes, n, &where) != NULL)
panic("sfnode_rename(%s) duplicates",
n->sf_path);
avl_insert(&sfnodes, n, where);
}
}
/*
* Deal with the given node.
*/
if (avl_find(&sfnodes, node, &where) == NULL)
panic("sfnode_rename(%s) not in sfnodes", node->sf_path);
avl_remove(&sfnodes, node);
LogFlowFunc(("sfnode_rname(%s to %s)\n", node->sf_path, path));
kmem_free(node->sf_path, strlen(node->sf_path) + 1);
node->sf_path = path;
if (avl_find(&sfnodes, node, &where) != NULL)
panic("sfnode_rename(%s) duplicates", node->sf_path);
avl_insert(&sfnodes, node, where);
/*
* change the parent
*/
if (node->sf_parent == NULL)
panic("sfnode_rename(%s) no parent", node->sf_path);
if (node->sf_parent->sf_children == 0)
panic("sfnode_rename(%s) parent has no child", node->sf_path);
sfnode_clear_dir_list(node->sf_parent);
sfnode_clear_dir_list(newparent);
--node->sf_parent->sf_children;
node->sf_parent = newparent;
++newparent->sf_children;
}
/*
* Look for a cached node, if not found either handle ".." or try looking
* via the provider. Create an entry in sfnodes if found but not cached yet.
* If the create flag is set, a file or directory is created. If the file
* already existed, an error is returned.
* Nodes returned from this routine always have a vnode with its ref count
* bumped by 1.
*/
static sfnode_t *
sfnode_lookup(
sfnode_t *dir,
char *name,
vtype_t create,
sffs_stat_t *stat,
uint64_t stat_time,
int *err)
{
avl_index_t where;
sfnode_t template;
sfnode_t *node;
int error = 0;
int type;
char *fullpath;
sfp_file_t *fp;
sffs_stat_t tmp_stat;
ASSERT(MUTEX_HELD(&sffs_lock));
if (err)
*err = error;
/*
* handle referencing myself
*/
if (strcmp(name, "") == 0 || strcmp(name, ".") == 0)
return (dir);
/*
* deal with parent
*/
if (strcmp(name, "..") == 0)
return (dir->sf_parent);
/*
* Look for an existing node.
*/
fullpath = sfnode_construct_path(dir, name);
template.sf_sffs = dir->sf_sffs;
template.sf_path = fullpath;
template.sf_is_stale = 0;
node = avl_find(&sfnodes, &template, &where);
if (node != NULL) {
kmem_free(fullpath, strlen(fullpath) + 1);
if (create != VNON)
return (NULL);
return (node);
}
/*
* No entry for this path currently.
* Check if the file exists with the provider and get the type from
* there.
*/
if (create == VREG) {
type = VREG;
error = sfprov_create(dir->sf_sffs->sf_handle, fullpath, &fp);
} else if (create == VDIR) {
type = VDIR;
error = sfprov_mkdir(dir->sf_sffs->sf_handle, fullpath, &fp);
} else {
mode_t m;
fp = NULL;
type = VNON;
if (stat == NULL) {
stat = &tmp_stat;
error = sfnode_get_stat(dir->sf_sffs->sf_handle,
fullpath, stat);
stat_time = sfnode_cur_time_usec();
} else {
error = 0;
}
m = stat->sf_mode;
if (error != 0)
error = ENOENT;
else if (S_ISDIR(m))
type = VDIR;
else if (S_ISREG(m))
type = VREG;
}
if (err)
*err = error;
/*
* If no errors, make a new node and return it.
*/
if (error) {
kmem_free(fullpath, strlen(fullpath) + 1);
return (NULL);
}
node = sfnode_make(dir->sf_sffs, fullpath, type, fp, dir, stat,
stat_time);
return (node);
}
/*
* uid and gid in sffs determine owner and group for all files.
*/
static int
sfnode_access(sfnode_t *node, mode_t mode, cred_t *cr)
{
sffs_data_t *sffs = node->sf_sffs;
mode_t m;
int shift = 0;
int error;
vnode_t *vp;
ASSERT(MUTEX_HELD(&sffs_lock));
/*
* get the mode from the cache or provider
*/
if (sfnode_stat_cached(node))
error = 0;
else
error = sfnode_update_stat_cache(node);
m = (error == 0) ? node->sf_stat.sf_mode : 0;
/*
* mask off the permissions based on uid/gid
*/
if (crgetuid(cr) != sffs->sf_uid) {
shift += 3;
if (groupmember(sffs->sf_gid, cr) == 0)
shift += 3;
}
mode &= ~(m << shift);
if (mode == 0) {
error = 0;
} else {
vp = sfnode_get_vnode(node);
error = secpolicy_vnode_access(cr, vp, sffs->sf_uid, mode);
VN_RELE(vp);
}
return (error);
}
/*
*
* Everything below this point are the vnode operations used by Solaris VFS
*/
static int
sffs_readdir(
vnode_t *vp,
uio_t *uiop,
cred_t *cred,
int *eofp,
caller_context_t *ct,
int flags)
{
sfnode_t *dir = VN2SFN(vp);
sfnode_t *node;
struct sffs_dirent *dirent = NULL;
sffs_dirents_t *cur_buf;
offset_t offset = 0;
offset_t orig_off = uiop->uio_loffset;
int dummy_eof;
int error = 0;
if (uiop->uio_iovcnt != 1)
return (EINVAL);
if (vp->v_type != VDIR)
return (ENOTDIR);
if (eofp == NULL)
eofp = &dummy_eof;
*eofp = 0;
if (uiop->uio_loffset >= MAXOFFSET_T) {
*eofp = 1;
return (0);
}
/*
* Get the directory entry names from the host. This gets all
* entries. These are stored in a linked list of sffs_dirents_t
* buffers, each of which contains a list of dirent64_t's.
*/
mutex_enter(&sffs_lock);
if (dir->sf_dir_list == NULL) {
error = sfprov_readdir(dir->sf_sffs->sf_handle, dir->sf_path,
&dir->sf_dir_list);
if (error != 0)
goto done;
}
/*
* Validate and skip to the desired offset.
*/
cur_buf = dir->sf_dir_list;
offset = 0;
while (cur_buf != NULL &&
offset + cur_buf->sf_len <= uiop->uio_loffset) {
offset += cur_buf->sf_len;
cur_buf = cur_buf->sf_next;
}
if (cur_buf == NULL && offset != uiop->uio_loffset) {
error = EINVAL;
goto done;
}
if (cur_buf != NULL && offset != uiop->uio_loffset) {
offset_t off = offset;
int step;
dirent = &cur_buf->sf_entries[0];
while (off < uiop->uio_loffset) {
if (dirent->sf_entry.d_off == uiop->uio_loffset)
break;
step = sizeof(sffs_stat_t) + dirent->sf_entry.d_reclen;
dirent = (struct sffs_dirent *) (((char *) dirent) + step);
off += step;
}
if (off >= uiop->uio_loffset) {
error = EINVAL;
goto done;
}
}
offset = uiop->uio_loffset - offset;
/*
* Lookup each of the names, so that we have ino's, and copy to
* result buffer.
*/
while (cur_buf != NULL) {
if (offset >= cur_buf->sf_len) {
cur_buf = cur_buf->sf_next;
offset = 0;
continue;
}
dirent = (struct sffs_dirent *)
(((char *) &cur_buf->sf_entries[0]) + offset);
if (dirent->sf_entry.d_reclen > uiop->uio_resid)
break;
if (strcmp(dirent->sf_entry.d_name, ".") == 0) {
node = dir;
} else if (strcmp(dirent->sf_entry.d_name, "..") == 0) {
node = dir->sf_parent;
if (node == NULL)
node = dir;
} else {
node = sfnode_lookup(dir, dirent->sf_entry.d_name, VNON,
&dirent->sf_stat, sfnode_cur_time_usec(), NULL);
if (node == NULL)
panic("sffs_readdir() lookup failed");
}
dirent->sf_entry.d_ino = node->sf_ino;
error = uiomove(&dirent->sf_entry, dirent->sf_entry.d_reclen, UIO_READ, uiop);
if (error != 0)
break;
uiop->uio_loffset= dirent->sf_entry.d_off;
offset += sizeof(sffs_stat_t) + dirent->sf_entry.d_reclen;
}
if (error == 0 && cur_buf == NULL)
*eofp = 1;
done:
mutex_exit(&sffs_lock);
if (error != 0)
uiop->uio_loffset = orig_off;
return (error);
}
#if defined(VBOX_VFS_SOLARIS_10U6)
/*
* HERE JOE.. this may need more logic, need to look at other file systems
*/
static int
sffs_pathconf(
vnode_t *vp,
int cmd,
ulong_t *valp,
cred_t *cr)
{
return (fs_pathconf(vp, cmd, valp, cr));
}
#else
/*
* HERE JOE.. this may need more logic, need to look at other file systems
*/
static int
sffs_pathconf(
vnode_t *vp,
int cmd,
ulong_t *valp,
cred_t *cr,
caller_context_t *ct)
{
return (fs_pathconf(vp, cmd, valp, cr, ct));
}
#endif
static int
sffs_getattr(
vnode_t *vp,
vattr_t *vap,
int flags,
cred_t *cred,
caller_context_t *ct)
{
sfnode_t *node = VN2SFN(vp);
sffs_data_t *sffs = node->sf_sffs;
mode_t mode;
int error = 0;
mutex_enter(&sffs_lock);
vap->va_type = vp->v_type;
vap->va_uid = sffs->sf_uid;
vap->va_gid = sffs->sf_gid;
vap->va_fsid = sffs->sf_vfsp->vfs_dev;
vap->va_nodeid = node->sf_ino;
vap->va_nlink = 1;
vap->va_rdev = sffs->sf_vfsp->vfs_dev;
vap->va_seq = 0;
if (!sfnode_stat_cached(node)) {
error = sfnode_update_stat_cache(node);
if (error != 0)
goto done;
}
vap->va_atime = node->sf_stat.sf_atime;
vap->va_mtime = node->sf_stat.sf_mtime;
vap->va_ctime = node->sf_stat.sf_ctime;
mode = node->sf_stat.sf_mode;
vap->va_mode = mode & MODEMASK;
if (S_ISDIR(mode))
{
vap->va_type = VDIR;
vap->va_mode = sffs->sf_dmode != ~0 ? (sffs->sf_dmode & 0777) : vap->va_mode;
vap->va_mode &= ~sffs->sf_dmask;
vap->va_mode |= S_IFDIR;
}
else if (S_ISREG(mode))
{
vap->va_type = VREG;
vap->va_mode = sffs->sf_fmode != ~0 ? (sffs->sf_fmode & 0777) : vap->va_mode;
vap->va_mode &= ~sffs->sf_fmask;
vap->va_mode |= S_IFREG;
}
else if (S_ISFIFO(mode))
vap->va_type = VFIFO;
else if (S_ISCHR(mode))
vap->va_type = VCHR;
else if (S_ISBLK(mode))
vap->va_type = VBLK;
else if (S_ISLNK(mode))
{
vap->va_type = VLNK;
vap->va_mode = sffs->sf_fmode != ~0 ? (sffs->sf_fmode & 0777) : vap->va_mode;
vap->va_mode &= ~sffs->sf_fmask;
vap->va_mode |= S_IFLNK;
}
else if (S_ISSOCK(mode))
vap->va_type = VSOCK;
vap->va_size = node->sf_stat.sf_size;
vap->va_blksize = 512;
vap->va_nblocks = (vap->va_size + 511) / 512;
done:
mutex_exit(&sffs_lock);
return (error);
}
static int
sffs_setattr(
vnode_t *vp,
vattr_t *vap,
int flags,
cred_t *cred,
caller_context_t *ct)
{
sfnode_t *node = VN2SFN(vp);
int error;
mode_t mode;
mode = vap->va_mode;
if (vp->v_type == VREG)
mode |= S_IFREG;
else if (vp->v_type == VDIR)
mode |= S_IFDIR;
else if (vp->v_type == VBLK)
mode |= S_IFBLK;
else if (vp->v_type == VCHR)
mode |= S_IFCHR;
else if (vp->v_type == VLNK)
mode |= S_IFLNK;
else if (vp->v_type == VFIFO)
mode |= S_IFIFO;
else if (vp->v_type == VSOCK)
mode |= S_IFSOCK;
mutex_enter(&sffs_lock);
sfnode_invalidate_stat_cache(node);
error = sfprov_set_attr(node->sf_sffs->sf_handle, node->sf_path,
vap->va_mask, mode, vap->va_atime, vap->va_mtime, vap->va_ctime);
if (error == ENOENT)
sfnode_make_stale(node);
mutex_exit(&sffs_lock);
return (error);
}
static int
sffs_space(
vnode_t *vp,
int cmd,
struct flock64 *bfp,
int flags,
offset_t off,
cred_t *cred,
caller_context_t *ct)
{
sfnode_t *node = VN2SFN(vp);
int error;
/* we only support changing the length of the file */
if (bfp->l_whence != SEEK_SET || bfp->l_len != 0)
return ENOSYS;
mutex_enter(&sffs_lock);
sfnode_invalidate_stat_cache(node);
error = sfprov_set_size(node->sf_sffs->sf_handle, node->sf_path,
bfp->l_start);
if (error == ENOENT)
sfnode_make_stale(node);
mutex_exit(&sffs_lock);
return (error);
}
/*ARGSUSED*/
static int
sffs_read(
vnode_t *vp,
struct uio *uio,
int ioflag,
cred_t *cred,
caller_context_t *ct)
{
sfnode_t *node = VN2SFN(vp);
int error = 0;
uint32_t bytes;
uint32_t done;
ulong_t offset;
ssize_t total;
if (vp->v_type == VDIR)
return (EISDIR);
if (vp->v_type != VREG)
return (EINVAL);
if (uio->uio_loffset >= MAXOFFSET_T)
{
proc_t *p = ttoproc(curthread);
mutex_enter(&p->p_lock);
(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
p, RCA_UNSAFE_SIGINFO);
mutex_exit(&p->p_lock);
return (EFBIG);
}
if (uio->uio_loffset < 0)
return (EINVAL);
total = uio->uio_resid;
if (total == 0)
return (0);
mutex_enter(&sffs_lock);
sfnode_open(node);
if (node->sf_file == NULL) {
mutex_exit(&sffs_lock);
return (EINVAL);
}
do {
offset = uio->uio_offset;
done = bytes = MIN(PAGESIZE, uio->uio_resid);
error = sfprov_read(node->sf_file, sffs_buffer, offset, &done);
if (error == 0 && done > 0)
error = uiomove(sffs_buffer, done, UIO_READ, uio);
} while (error == 0 && uio->uio_resid > 0 && done > 0);
mutex_exit(&sffs_lock);
/*
* a partial read is never an error
*/
if (total != uio->uio_resid)
error = 0;
return (error);
}
/*ARGSUSED*/
static int
sffs_write(
vnode_t *vp,
struct uio *uiop,
int ioflag,
cred_t *cred,
caller_context_t *ct)
{
sfnode_t *node = VN2SFN(vp);
int error = 0;
uint32_t bytes;
uint32_t done;
ulong_t offset;
ssize_t total;
rlim64_t limit = uiop->uio_llimit;
if (vp->v_type == VDIR)
return (EISDIR);
if (vp->v_type != VREG)
return (EINVAL);
/*
* We have to hold this lock for a long time to keep
* multiple FAPPEND writes from intermixing
*/
mutex_enter(&sffs_lock);
sfnode_open(node);
if (node->sf_file == NULL) {
mutex_exit(&sffs_lock);
return (EINVAL);
}
sfnode_invalidate_stat_cache(node);
if (ioflag & FAPPEND) {
uint64_t endoffile;
error = sfprov_get_size(node->sf_sffs->sf_handle,
node->sf_path, &endoffile);
if (error == ENOENT)
sfnode_make_stale(node);
if (error != 0) {
mutex_exit(&sffs_lock);
return (error);
}
uiop->uio_loffset = endoffile;
}
if (vp->v_type != VREG || uiop->uio_loffset < 0) {
mutex_exit(&sffs_lock);
return (EINVAL);
}
if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
limit = MAXOFFSET_T;
if (uiop->uio_loffset >= limit) {
proc_t *p = ttoproc(curthread);
mutex_enter(&p->p_lock);
(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
p, RCA_UNSAFE_SIGINFO);
mutex_exit(&p->p_lock);
mutex_exit(&sffs_lock);
return (EFBIG);
}
if (uiop->uio_loffset >= MAXOFFSET_T) {
mutex_exit(&sffs_lock);
return (EFBIG);
}
total = uiop->uio_resid;
if (total == 0) {
mutex_exit(&sffs_lock);
return (0);
}
do {
offset = uiop->uio_offset;
bytes = MIN(PAGESIZE, uiop->uio_resid);
if (offset + bytes >= limit) {
if (offset >= limit) {
error = EFBIG;
break;
}
bytes = limit - offset;
}
error = uiomove(sffs_buffer, bytes, UIO_WRITE, uiop);
if (error != 0)
break;
done = bytes;
if (error == 0)
error = sfprov_write(node->sf_file, sffs_buffer,
offset, &done);
total -= done;
if (done != bytes) {
uiop->uio_resid += bytes - done;
break;
}
} while (error == 0 && uiop->uio_resid > 0 && done > 0);
mutex_exit(&sffs_lock);
/*
* A short write is never really an error.
*/
if (total != uiop->uio_resid)
error = 0;
return (error);
}
/*ARGSUSED*/
static int
sffs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
{
sfnode_t *node = VN2SFN(vp);
int error;
mutex_enter(&sffs_lock);
error = sfnode_access(node, mode, cr);
mutex_exit(&sffs_lock);
return (error);
}
/*
* Lookup an entry in a directory and create a new vnode if found.
*/
/* ARGSUSED3 */
static int
sffs_lookup(
vnode_t *dvp, /* the directory vnode */
char *name, /* the name of the file or directory */
vnode_t **vpp, /* the vnode we found or NULL */
struct pathname *pnp,
int flags,
vnode_t *rdir,
cred_t *cred,
caller_context_t *ct,
int *direntflags,
struct pathname *realpnp)
{
int error;
sfnode_t *node;
/*
* dvp must be a directory
*/
if (dvp->v_type != VDIR)
return (ENOTDIR);
/*
* An empty component name or just "." means the directory itself.
* Don't do any further lookup or checking.
*/
if (strcmp(name, "") == 0 || strcmp(name, ".") == 0) {
VN_HOLD(dvp);
*vpp = dvp;
return (0);
}
/*
* Check permission to look at this directory. We always allow "..".
*/
mutex_enter(&sffs_lock);
if (strcmp(name, "..") != 0) {
error = sfnode_access(VN2SFN(dvp), VEXEC, cred);
if (error) {
mutex_exit(&sffs_lock);
return (error);
}
}
/*
* Lookup the node.
*/
node = sfnode_lookup(VN2SFN(dvp), name, VNON, NULL, 0, NULL);
if (node != NULL)
*vpp = sfnode_get_vnode(node);
mutex_exit(&sffs_lock);
return ((node == NULL) ? ENOENT : 0);
}
/*ARGSUSED*/
static int
sffs_create(
vnode_t *dvp,
char *name,
struct vattr *vap,
vcexcl_t exclusive,
int mode,
vnode_t **vpp,
cred_t *cr,
int flag,
caller_context_t *ct,
vsecattr_t *vsecp)
{
vnode_t *vp;
sfnode_t *node;
int error;
ASSERT(name != NULL);
/*
* this is used for regular files, not mkdir
*/
if (vap->va_type == VDIR)
return (EISDIR);
if (vap->va_type != VREG)
return (EINVAL);
/*
* is this a pre-existing file?
*/
error = sffs_lookup(dvp, name, &vp,
NULL, 0, NULL, cr, ct, NULL, NULL);
if (error == ENOENT)
vp = NULL;
else if (error != 0)
return (error);
/*
* Operation on a pre-existing file.
*/
if (vp != NULL) {
if (exclusive == EXCL) {
VN_RELE(vp);
return (EEXIST);
}
if (vp->v_type == VDIR && (mode & VWRITE) == VWRITE) {
VN_RELE(vp);
return (EISDIR);
}
mutex_enter(&sffs_lock);
node = VN2SFN(vp);
error = sfnode_access(node, mode, cr);
if (error != 0) {
mutex_exit(&sffs_lock);
VN_RELE(vp);
return (error);
}
sfnode_invalidate_stat_cache(VN2SFN(dvp));
/*
* handle truncating an existing file
*/
if (vp->v_type == VREG && (vap->va_mask & AT_SIZE) &&
vap->va_size == 0) {
sfnode_open(node);
if (node->sf_path == NULL)
error = ENOENT;
else
error = sfprov_trunc(node->sf_sffs->sf_handle,
node->sf_path);
if (error) {
mutex_exit(&sffs_lock);
VN_RELE(vp);
return (error);
}
}
mutex_exit(&sffs_lock);
*vpp = vp;
return (0);
}
/*
* Create a new node. First check for a race creating it.
*/
mutex_enter(&sffs_lock);
node = sfnode_lookup(VN2SFN(dvp), name, VNON, NULL, 0, NULL);
if (node != NULL) {
mutex_exit(&sffs_lock);
return (EEXIST);
}
/*
* Doesn't exist yet and we have the lock, so create it.
*/
sfnode_invalidate_stat_cache(VN2SFN(dvp));
int lookuperr;
node = sfnode_lookup(VN2SFN(dvp), name, VREG, NULL, 0, &lookuperr);
if (node && (vap->va_mask & AT_MODE)) {
timestruc_t dummy;
error = sfprov_set_attr(node->sf_sffs->sf_handle, node->sf_path,
AT_MODE, vap->va_mode, dummy, dummy, dummy);
if (error)
cmn_err(CE_WARN, "sffs_create: set_mode(%s, %o) failed"
" rc=%d", node->sf_path, vap->va_mode, error);
}
if (node && node->sf_parent)
sfnode_clear_dir_list(node->sf_parent);
mutex_exit(&sffs_lock);
if (node == NULL)
return (lookuperr);
*vpp = sfnode_get_vnode(node);
return (0);
}
/*ARGSUSED*/
static int
sffs_mkdir(
vnode_t *dvp,
char *nm,
vattr_t *va,
vnode_t **vpp,
cred_t *cred,
caller_context_t *ct,
int flags,
vsecattr_t *vsecp)
{
sfnode_t *node;
vnode_t *vp;
int error;
/*
* These should never happen
*/
ASSERT(nm != NULL);
ASSERT(strcmp(nm, "") != 0);
ASSERT(strcmp(nm, ".") != 0);
ASSERT(strcmp(nm, "..") != 0);
/*
* Do an unlocked look up first
*/
error = sffs_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
if (error == 0) {
VN_RELE(vp);
return (EEXIST);
}
if (error != ENOENT)
return (error);
/*
* Must be able to write in current directory
*/
mutex_enter(&sffs_lock);
error = sfnode_access(VN2SFN(dvp), VWRITE, cred);
if (error) {
mutex_exit(&sffs_lock);
return (error);
}
sfnode_invalidate_stat_cache(VN2SFN(dvp));
int lookuperr = EACCES;
node = sfnode_lookup(VN2SFN(dvp), nm, VDIR, NULL, 0, &lookuperr);
if (node && (va->va_mask & AT_MODE)) {
timestruc_t dummy;
error = sfprov_set_attr(node->sf_sffs->sf_handle, node->sf_path,
AT_MODE, va->va_mode, dummy, dummy, dummy);
if (error)
cmn_err(CE_WARN, "sffs_mkdir: set_mode(%s, %o) failed"
" rc=%d", node->sf_path, va->va_mode, error);
}
if (node && node->sf_parent)
sfnode_clear_dir_list(node->sf_parent);
mutex_exit(&sffs_lock);
if (node == NULL)
return (lookuperr);
*vpp = sfnode_get_vnode(node);
return (0);
}
/*ARGSUSED*/
static int
sffs_rmdir(
struct vnode *dvp,
char *nm,
vnode_t *cdir,
cred_t *cred,
caller_context_t *ct,
int flags)
{
sfnode_t *node;
vnode_t *vp;
int error;
/*
* Return error when removing . and ..
*/
if (strcmp(nm, ".") == 0 || strcmp(nm, "") == 0)
return (EINVAL);
if (strcmp(nm, "..") == 0)
return (EEXIST);
error = sffs_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
if (error)
return (error);
if (vp->v_type != VDIR) {
VN_RELE(vp);
return (ENOTDIR);
}
#ifdef VBOXVFS_WITH_MMAP
if (vn_vfswlock(vp)) {
VN_RELE(vp);
return (EBUSY);
}
#endif
if (vn_mountedvfs(vp)) {
VN_RELE(vp);
return (EBUSY);
}
node = VN2SFN(vp);
mutex_enter(&sffs_lock);
error = sfnode_access(VN2SFN(dvp), VEXEC | VWRITE, cred);
if (error)
goto done;
/*
* If anything else is using this vnode, then fail the remove.
* Why? Windows hosts can't remove something that is open,
* so we have to sfprov_close() it first.
* There is no errno for this - since it's not a problem on UNIX,
* but EINVAL is the closest.
*/
if (node->sf_file != NULL) {
if (vp->v_count > 1) {
error = EINVAL;
goto done;
}
(void)sfprov_close(node->sf_file);
node->sf_file = NULL;
}
/*
* Remove the directory on the host and mark the node as stale.
*/
sfnode_invalidate_stat_cache(VN2SFN(dvp));
error = sfprov_rmdir(node->sf_sffs->sf_handle, node->sf_path);
if (error == ENOENT || error == 0)
sfnode_make_stale(node);
if (node->sf_parent)
sfnode_clear_dir_list(node->sf_parent);
done:
mutex_exit(&sffs_lock);
#ifdef VBOXVFS_WITH_MMAP
vn_vfsunlock(vp);
#endif
VN_RELE(vp);
return (error);
}
#ifdef VBOXVFS_WITH_MMAP
static caddr_t
sffs_page_map(
page_t *ppage,
enum seg_rw segaccess)
{
/* Use seg_kpm driver if possible (64-bit) */
if (kpm_enable)
return (hat_kpm_mapin(ppage, NULL));
ASSERT(segaccess == S_READ || segaccess == S_WRITE);
return (ppmapin(ppage, PROT_READ | ((segaccess == S_WRITE) ? PROT_WRITE : 0), (caddr_t)-1));
}
static void
sffs_page_unmap(
page_t *ppage,
caddr_t addr)
{
if (kpm_enable)
hat_kpm_mapout(ppage, NULL, addr);
else
ppmapout(addr);
}
/*
* Called when there's no page in the cache. This will create new page(s) and read
* the file data into it.
*/
static int
sffs_readpages(
vnode_t *dvp,
offset_t off,
page_t *pagelist[],
size_t pagelistsize,
struct seg *segp,
caddr_t addr,
enum seg_rw segaccess)
{
ASSERT(MUTEX_HELD(&sffs_lock));
int error = 0;
u_offset_t io_off, total;
size_t io_len;
page_t *ppages;
page_t *pcur;
sfnode_t *node = VN2SFN(dvp);
ASSERT(node);
ASSERT(node->sf_file);
if (pagelistsize == PAGESIZE)
{
io_off = off;
io_len = PAGESIZE;
ppages = page_create_va(dvp, io_off, io_len, PG_WAIT | PG_EXCL, segp, addr);
}
else
ppages = pvn_read_kluster(dvp, off, segp, addr, &io_off, &io_len, off, pagelistsize, 0);
/* If page already exists return success */
if (!ppages)
{
*pagelist = NULL;
return (0);
}
/*
* Map & read page-by-page.
*/
total = io_off + io_len;
pcur = ppages;
while (io_off < total)
{
ASSERT3U(io_off, ==, pcur->p_offset);
caddr_t virtaddr = sffs_page_map(pcur, segaccess);
uint32_t bytes = PAGESIZE;
error = sfprov_read(node->sf_file, virtaddr, io_off, &bytes);
/*
* If we reuse pages without zero'ing them, one process can mmap() and read-past the length
* to read previously mmap'd contents (from possibly other processes).
*/
if (error == 0 && bytes < PAGESIZE)
memset(virtaddr + bytes, 0, PAGESIZE - bytes);
sffs_page_unmap(pcur, virtaddr);
if (error != 0)
{
cmn_err(CE_WARN, "sffs_readpages: sfprov_read() failed. error=%d bytes=%u\n", error, bytes);
/* Get rid of all kluster pages read & bail. */
pvn_read_done(ppages, B_ERROR);
return (error);
}
pcur = pcur->p_next;
io_off += PAGESIZE;
}
/*
* Fill in the pagelist from kluster at the requested offset.
*/
pvn_plist_init(ppages, pagelist, pagelistsize, off, io_len, segaccess);
ASSERT(pagelist == NULL || (*pagelist)->p_offset == off);
return (0);
}
/*ARGSUSED*/
static int
sffs_getpage(
vnode_t *dvp,
offset_t off,
size_t len,
uint_t *protp,
page_t *pagelist[],
size_t pagelistsize,
struct seg *segp,
caddr_t addr,
enum seg_rw segaccess,
cred_t *credp
#if !defined(VBOX_VFS_SOLARIS_10U6)
, caller_context_t *ct
#endif
)
{
int error = 0;
int is_recursive = 0;
page_t **pageliststart = pagelist;
sfnode_t *node = VN2SFN(dvp);
ASSERT(node);
ASSERT(node->sf_file);
if (segaccess == S_WRITE)
return (ENOSYS); /* Will this ever happen? */
/* Don't bother about faultahead for now. */
if (pagelist == NULL)
return (0);
if (len > pagelistsize)
len = pagelistsize;
else
len = P2ROUNDUP(len, PAGESIZE);
ASSERT(pagelistsize >= len);
if (protp)
*protp = PROT_ALL;
/*
* The buffer passed to sffs_write may be mmap'd so we may get a
* pagefault there, in which case we'll end up here with this thread
* already owning the mutex. Mutexes aren't recursive.
*/
if (mutex_owner(&sffs_lock) == curthread)
is_recursive = 1;
else
mutex_enter(&sffs_lock);
/* Don't map pages past end of the file. */
if (off + len > node->sf_stat.sf_size + PAGEOFFSET)
{
mutex_exit(&sffs_lock);
return (EFAULT);
}
while (len > 0)
{
/*
* Look for pages in the requested offset range, or create them if we can't find any.
*/
if ((*pagelist = page_lookup(dvp, off, SE_SHARED)) != NULL)
*(pagelist + 1) = NULL;
else if ((error = sffs_readpages(dvp, off, pagelist, pagelistsize, segp, addr, segaccess)) != 0)
{
while (pagelist > pageliststart)
page_unlock(*--pagelist);
*pagelist = NULL;
if (!is_recursive)
mutex_exit(&sffs_lock);
return (error);
}
while (*pagelist)
{
ASSERT3U((*pagelist)->p_offset, ==, off);
off += PAGESIZE;
addr += PAGESIZE;
if (len > 0)
{
ASSERT3U(len, >=, PAGESIZE);
len -= PAGESIZE;
}
ASSERT3U(pagelistsize, >=, PAGESIZE);
pagelistsize -= PAGESIZE;
pagelist++;
}
}
/*
* Fill the page list array with any pages left in the cache.
*/
while ( pagelistsize > 0
&& (*pagelist++ = page_lookup_nowait(dvp, off, SE_SHARED)))
{
off += PAGESIZE;
pagelistsize -= PAGESIZE;
}
*pagelist = NULL;
if (!is_recursive)
mutex_exit(&sffs_lock);
return (error);
}
/*ARGSUSED*/
static int
sffs_putpage(
vnode_t *dvp,
offset_t off,
size_t len,
int flags,
cred_t *credp
#if !defined(VBOX_VFS_SOLARIS_10U6)
, caller_context_t *ct
#endif
)
{
/*
* We don't support PROT_WRITE mmaps. For normal writes we do not map and IO via
* vop_putpage() either, therefore, afaik this shouldn't ever be called.
*/
return (ENOSYS);
}
/*ARGSUSED*/
static int
sffs_discardpage(
vnode_t *dvp,
page_t *ppage,
u_offset_t *poff,
size_t *plen,
int flags,
cred_t *pcred)
{
/*
* This would not get invoked i.e. via pvn_vplist_dirty() since we don't support
* PROT_WRITE mmaps and therefore will not have dirty pages.
*/
pvn_write_done(ppage, B_INVAL | B_ERROR | B_FORCE);
return (0);
}
/*ARGSUSED*/
static int
sffs_map(
vnode_t *dvp,
offset_t off,
struct as *asp,
caddr_t *addrp,
size_t len,
uchar_t prot,
uchar_t maxprot,
uint_t flags,
cred_t *credp
#if !defined(VBOX_VFS_SOLARIS_10U6)
, caller_context_t *ct
#endif
)
{
/*
* Invocation: mmap()->smmap_common()->VOP_MAP()->sffs_map(). Once the
* segment driver creates the new segment via segvn_create(), it'll
* invoke down the line VOP_ADDMAP()->sffs_addmap()
*/
int error = 0;
sfnode_t *node = VN2SFN(dvp);
ASSERT(node);
if ((prot & PROT_WRITE))
return (ENOTSUP);
if (off < 0 || len > MAXOFFSET_T - off)
return (ENXIO);
if (dvp->v_type != VREG)
return (ENODEV);
if (dvp->v_flag & VNOMAP)
return (ENOSYS);
if (vn_has_mandatory_locks(dvp, node->sf_stat.sf_mode))
return (EAGAIN);
mutex_enter(&sffs_lock);
as_rangelock(asp);
#if defined(VBOX_VFS_SOLARIS_10U6)
if ((flags & MAP_FIXED) == 0)
{
map_addr(addrp, len, off, 1, flags);
if (*addrp == NULL)
error = ENOMEM;
}
else
as_unmap(asp, *addrp, len); /* User specified address, remove any previous mappings */
#else
error = choose_addr(asp, addrp, len, off, ADDR_VACALIGN, flags);
#endif
if (error)
{
as_rangeunlock(asp);
mutex_exit(&sffs_lock);
return (error);
}
segvn_crargs_t vnodeargs;
memset(&vnodeargs, 0, sizeof(vnodeargs));
vnodeargs.vp = dvp;
vnodeargs.cred = credp;
vnodeargs.offset = off;
vnodeargs.type = flags & MAP_TYPE;
vnodeargs.prot = prot;
vnodeargs.maxprot = maxprot;
vnodeargs.flags = flags & ~MAP_TYPE;
vnodeargs.amp = NULL; /* anon. mapping */
vnodeargs.szc = 0; /* preferred page size code */
vnodeargs.lgrp_mem_policy_flags = 0;
error = as_map(asp, *addrp, len, segvn_create, &vnodeargs);
as_rangeunlock(asp);
mutex_exit(&sffs_lock);
return (error);
}
/*ARGSUSED*/
static int
sffs_addmap(
vnode_t *dvp,
offset_t off,
struct as *asp,
caddr_t addr,
size_t len,
uchar_t prot,
uchar_t maxprot,
uint_t flags,
cred_t *credp
#if !defined(VBOX_VFS_SOLARIS_10U6)
, caller_context_t *ct
#endif
)
{
if (dvp->v_flag & VNOMAP)
return (ENOSYS);
return (0);
}
/*ARGSUSED*/
static int
sffs_delmap(
vnode_t *dvp,
offset_t off,
struct as *asp,
caddr_t addr,
size_t len,
uint_t prot,
uint_t maxprot,
uint_t flags,
cred_t *credp
#if !defined(VBOX_VFS_SOLARIS_10U6)
, caller_context_t *ct
#endif
)
{
if (dvp->v_flag & VNOMAP)
return (ENOSYS);
return (0);
}
#endif /* VBOXVFS_WITH_MMAP */
/*ARGSUSED*/
static int
sffs_remove(
vnode_t *dvp,
char *name,
cred_t *cred,
caller_context_t *ct,
int flags)
{
vnode_t *vp;
sfnode_t *node;
int error;
/*
* These should never happen
*/
ASSERT(name != NULL);
ASSERT(strcmp(name, "..") != 0);
error = sffs_lookup(dvp, name, &vp,
NULL, 0, NULL, cred, ct, NULL, NULL);
if (error)
return (error);
node = VN2SFN(vp);
mutex_enter(&sffs_lock);
error = sfnode_access(VN2SFN(dvp), VEXEC | VWRITE, cred);
if (error)
goto done;
/*
* If anything else is using this vnode, then fail the remove.
* Why? Windows hosts can't sfprov_remove() a file that is open,
* so we have to sfprov_close() it first.
* There is no errno for this - since it's not a problem on UNIX,
* but ETXTBSY is the closest.
*/
if (node->sf_file != NULL) {
if (vp->v_count > 1) {
error = ETXTBSY;
goto done;
}
(void)sfprov_close(node->sf_file);
node->sf_file = NULL;
}
/*
* Remove the file on the host and mark the node as stale.
*/
sfnode_invalidate_stat_cache(VN2SFN(dvp));
error = sfprov_remove(node->sf_sffs->sf_handle, node->sf_path);
if (error == ENOENT || error == 0)
sfnode_make_stale(node);
if (node->sf_parent)
sfnode_clear_dir_list(node->sf_parent);
done:
mutex_exit(&sffs_lock);
VN_RELE(vp);
return (error);
}
/*ARGSUSED*/
static int
sffs_rename(
vnode_t *old_dir,
char *old_nm,
vnode_t *new_dir,
char *new_nm,
cred_t *cred,
caller_context_t *ct,
int flags)
{
char *newpath;
int error;
sfnode_t *node;
if (strcmp(new_nm, "") == 0 ||
strcmp(new_nm, ".") == 0 ||
strcmp(new_nm, "..") == 0 ||
strcmp(old_nm, "") == 0 ||
strcmp(old_nm, ".") == 0 ||
strcmp(old_nm, "..") == 0)
return (EINVAL);
/*
* make sure we have permission to do the rename
*/
mutex_enter(&sffs_lock);
error = sfnode_access(VN2SFN(old_dir), VEXEC | VWRITE, cred);
if (error == 0 && new_dir != old_dir)
error = sfnode_access(VN2SFN(new_dir), VEXEC | VWRITE, cred);
if (error)
goto done;
node = sfnode_lookup(VN2SFN(old_dir), old_nm, VNON, NULL, 0, NULL);
if (node == NULL) {
error = ENOENT;
goto done;
}
/*
* Rename the file on the host and in our caches.
*/
sfnode_invalidate_stat_cache(node);
sfnode_invalidate_stat_cache(VN2SFN(old_dir));
sfnode_invalidate_stat_cache(VN2SFN(new_dir));
newpath = sfnode_construct_path(VN2SFN(new_dir), new_nm);
error = sfprov_rename(node->sf_sffs->sf_handle, node->sf_path, newpath,
node->sf_type == VDIR);
if (error == 0)
sfnode_rename(node, VN2SFN(new_dir), newpath);
else {
kmem_free(newpath, strlen(newpath) + 1);
if (error == ENOENT)
sfnode_make_stale(node);
}
done:
mutex_exit(&sffs_lock);
return (error);
}
/*ARGSUSED*/
static int
sffs_fsync(vnode_t *vp, int flag, cred_t *cr, caller_context_t *ct)
{
sfnode_t *node;
int error;
/*
* Ask the host to sync any data it may have cached for open files.
*/
mutex_enter(&sffs_lock);
node = VN2SFN(vp);
if (node->sf_file == NULL)
error = EBADF;
else if (node->sf_sffs->sf_fsync)
error = sfprov_fsync(node->sf_file);
else
error = 0;
mutex_exit(&sffs_lock);
return (error);
}
/*
* This may be the last reference, possibly time to close the file and
* destroy the vnode. If the sfnode is stale, we'll destroy that too.
*/
/*ARGSUSED*/
static void
#if defined(VBOX_VFS_SOLARIS_10U6)
sffs_inactive(vnode_t *vp, cred_t *cr)
#else
sffs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
#endif
{
sfnode_t *node;
/*
* nothing to do if this isn't the last use
*/
mutex_enter(&sffs_lock);
node = VN2SFN(vp);
mutex_enter(&vp->v_lock);
if (vp->v_count > 1) {
--vp->v_count;
mutex_exit(&vp->v_lock);
mutex_exit(&sffs_lock);
return;
}
if (vn_has_cached_data(vp)) {
#ifdef VBOXVFS_WITH_MMAP
/* We're fine with releasing the vnode lock here as we should be covered by the sffs_lock */
mutex_exit(&vp->v_lock);
/* We won't have any dirty pages, this will just invalidate (destroy) the pages and move it to the cachelist. */
pvn_vplist_dirty(vp, 0 /* offset */, sffs_discardpage, B_INVAL, cr);
mutex_enter(&vp->v_lock);
#else
panic("sffs_inactive() found cached data");
#endif
}
/*
* destroy the vnode
*/
node->sf_vnode = NULL;
mutex_exit(&vp->v_lock);
vn_invalid(vp);
vn_free(vp);
LogFlowFunc((" %s vnode cleared\n", node->sf_path));
/*
* Close the sf_file for the node.
*/
if (node->sf_file != NULL) {
(void)sfprov_close(node->sf_file);
node->sf_file = NULL;
}
/*
* Free the directory entries for the node. This should normally
* have been taken care of in sffs_close(), but better safe than
* sorry.
*/
sfnode_clear_dir_list(node);
/*
* If the node is stale, we can also destroy it.
*/
if (node->sf_is_stale && node->sf_children == 0)
sfnode_destroy(node);
mutex_exit(&sffs_lock);
return;
}
/*
* All the work for this is really done in lookup.
*/
/*ARGSUSED*/
static int
sffs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
{
return (0);
}
/*
* All the work for this is really done in inactive.
*/
/*ARGSUSED*/
static int
sffs_close(
vnode_t *vp,
int flag,
int count,
offset_t offset,
cred_t *cr,
caller_context_t *ct)
{
sfnode_t *node;
mutex_enter(&sffs_lock);
node = VN2SFN(vp);
/*
* Free the directory entries for the node. We do this on this call
* here because the directory node may not become inactive for a long
* time after the readdir is over. Case in point, if somebody cd's into
* the directory then it won't become inactive until they cd away again.
* In such a case we would end up with the directory listing not getting
* updated (i.e. the result of 'ls' always being the same) until they
* change the working directory.
*/
sfnode_clear_dir_list(node);
sfnode_invalidate_stat_cache(node);
mutex_exit(&sffs_lock);
return (0);
}
/* ARGSUSED */
static int
sffs_seek(vnode_t *v, offset_t o, offset_t *no, caller_context_t *ct)
{
if (*no < 0 || *no > MAXOFFSET_T)
return (EINVAL);
if (v->v_type == VDIR)
{
sffs_dirents_t *cur_buf = VN2SFN(v)->sf_dir_list;
off_t offset = 0;
if (cur_buf == NULL)
return (0);
while (cur_buf != NULL) {
if (*no >= offset && *no <= offset + cur_buf->sf_len)
return (0);
offset += cur_buf->sf_len;
cur_buf = cur_buf->sf_next;
}
return (EINVAL);
}
return (0);
}
/*
* By returning an error for this, we prevent anything in sffs from
* being re-exported by NFS
*/
/* ARGSUSED */
static int
sffs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
{
return (ENOTSUP);
}
/*
* vnode operations for regular files
*/
const fs_operation_def_t sffs_ops_template[] = {
#if defined(VBOX_VFS_SOLARIS_10U6)
VOPNAME_ACCESS, sffs_access,
VOPNAME_CLOSE, sffs_close,
VOPNAME_CREATE, sffs_create,
VOPNAME_FID, sffs_fid,
VOPNAME_FSYNC, sffs_fsync,
VOPNAME_GETATTR, sffs_getattr,
VOPNAME_INACTIVE, sffs_inactive,
VOPNAME_LOOKUP, sffs_lookup,
VOPNAME_MKDIR, sffs_mkdir,
VOPNAME_OPEN, sffs_open,
VOPNAME_PATHCONF, sffs_pathconf,
VOPNAME_READ, sffs_read,
VOPNAME_READDIR, sffs_readdir,
VOPNAME_REMOVE, sffs_remove,
VOPNAME_RENAME, sffs_rename,
VOPNAME_RMDIR, sffs_rmdir,
VOPNAME_SEEK, sffs_seek,
VOPNAME_SETATTR, sffs_setattr,
VOPNAME_SPACE, sffs_space,
VOPNAME_WRITE, sffs_write,
# ifdef VBOXVFS_WITH_MMAP
VOPNAME_MAP, sffs_map,
VOPNAME_ADDMAP, sffs_addmap,
VOPNAME_DELMAP, sffs_delmap,
VOPNAME_GETPAGE, sffs_getpage,
VOPNAME_PUTPAGE, sffs_putpage,
# endif
NULL, NULL
#else
VOPNAME_ACCESS, { .vop_access = sffs_access },
VOPNAME_CLOSE, { .vop_close = sffs_close },
VOPNAME_CREATE, { .vop_create = sffs_create },
VOPNAME_FID, { .vop_fid = sffs_fid },
VOPNAME_FSYNC, { .vop_fsync = sffs_fsync },
VOPNAME_GETATTR, { .vop_getattr = sffs_getattr },
VOPNAME_INACTIVE, { .vop_inactive = sffs_inactive },
VOPNAME_LOOKUP, { .vop_lookup = sffs_lookup },
VOPNAME_MKDIR, { .vop_mkdir = sffs_mkdir },
VOPNAME_OPEN, { .vop_open = sffs_open },
VOPNAME_PATHCONF, { .vop_pathconf = sffs_pathconf },
VOPNAME_READ, { .vop_read = sffs_read },
VOPNAME_READDIR, { .vop_readdir = sffs_readdir },
VOPNAME_REMOVE, { .vop_remove = sffs_remove },
VOPNAME_RENAME, { .vop_rename = sffs_rename },
VOPNAME_RMDIR, { .vop_rmdir = sffs_rmdir },
VOPNAME_SEEK, { .vop_seek = sffs_seek },
VOPNAME_SETATTR, { .vop_setattr = sffs_setattr },
VOPNAME_SPACE, { .vop_space = sffs_space },
VOPNAME_WRITE, { .vop_write = sffs_write },
# ifdef VBOXVFS_WITH_MMAP
VOPNAME_MAP, { .vop_map = sffs_map },
VOPNAME_ADDMAP, { .vop_addmap = sffs_addmap },
VOPNAME_DELMAP, { .vop_delmap = sffs_delmap },
VOPNAME_GETPAGE, { .vop_getpage = sffs_getpage },
VOPNAME_PUTPAGE, { .vop_putpage = sffs_putpage },
# endif
NULL, NULL
#endif
};
/*
* Also, init and fini functions...
*/
int
sffs_vnode_init(void)
{
int err;
err = vn_make_ops("sffs", sffs_ops_template, &sffs_ops);
if (err)
return (err);
avl_create(&sfnodes, sfnode_compare, sizeof (sfnode_t),
offsetof(sfnode_t, sf_linkage));
avl_create(&stale_sfnodes, sfnode_compare, sizeof (sfnode_t),
offsetof(sfnode_t, sf_linkage));
sffs_buffer = kmem_alloc(PAGESIZE, KM_SLEEP);
return (0);
}
void
sffs_vnode_fini(void)
{
if (sffs_ops)
vn_freevnodeops(sffs_ops);
ASSERT(avl_first(&sfnodes) == NULL);
avl_destroy(&sfnodes);
if (sffs_buffer != NULL) {
kmem_free(sffs_buffer, PAGESIZE);
sffs_buffer = NULL;
}
}
/*
* Utility at unmount to get all nodes in that mounted filesystem removed.
*/
int
sffs_purge(struct sffs_data *sffs)
{
sfnode_t *node;
sfnode_t *prev;
/*
* Check that no vnodes are active.
*/
if (sffs->sf_rootnode->v_count > 1)
return (-1);
for (node = avl_first(&sfnodes); node;
node = AVL_NEXT(&sfnodes, node)) {
if (node->sf_sffs == sffs && node->sf_vnode &&
node->sf_vnode != sffs->sf_rootnode)
return (-1);
}
for (node = avl_first(&stale_sfnodes); node;
node = AVL_NEXT(&stale_sfnodes, node)) {
if (node->sf_sffs == sffs && node->sf_vnode &&
node->sf_vnode != sffs->sf_rootnode)
return (-1);
}
/*
* All clear to destroy all node information. Since there are no
* vnodes, the make stale will cause deletion.
*/
VN_RELE(sffs->sf_rootnode);
mutex_enter(&sffs_lock);
for (prev = NULL;;) {
if (prev == NULL)
node = avl_first(&sfnodes);
else
node = AVL_NEXT(&sfnodes, prev);
if (node == NULL)
break;
if (node->sf_sffs == sffs) {
if (node->sf_vnode != NULL)
panic("vboxfs: purge hit active vnode");
sfnode_make_stale(node);
} else {
prev = node;
}
}
mutex_exit(&sffs_lock);
return (0);
}
#if 0
/* Debug helper functions */
static void
sfnode_print(sfnode_t *node)
{
Log(("0x%p", node));
Log((" type=%s (%d)",
node->sf_type == VDIR ? "VDIR" :
node->sf_type == VNON ? "VNON" :
node->sf_type == VREG ? "VREG" : "other", node->sf_type));
Log((" ino=%d", (uint_t)node->sf_ino));
Log((" path=%s", node->sf_path));
Log((" parent=0x%p", node->sf_parent));
if (node->sf_children)
Log((" children=%d", node->sf_children));
if (node->sf_vnode)
Log((" vnode=0x%p", node->sf_vnode));
Log(("%s\n", node->sf_is_stale ? " STALE" : ""));
}
static void
sfnode_list(void)
{
sfnode_t *n;
for (n = avl_first(&sfnodes); n != NULL; n = AVL_NEXT(&sfnodes, n))
sfnode_print(n);
for (n = avl_first(&stale_sfnodes); n != NULL;
n = AVL_NEXT(&stale_sfnodes, n))
sfnode_print(n);
}
#endif