pc_vnops.c revision 3b862e9a9ce59d5dbf0177b9eb293109fde6bf36
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/vfs_opreg.h>
#include <sys/pathname.h>
#include <vm/seg_kmem.h>
caller_context_t *);
caller_context_t *);
caller_context_t *);
caller_context_t *, int *, pathname_t *);
caller_context_t *, vsecattr_t *);
caller_context_t *, int);
struct cred *, caller_context_t *, int);
caller_context_t *, int);
caller_context_t *, int);
caller_context_t *);
caller_context_t *);
caller_context_t *);
caller_context_t *);
struct cred *);
extern krwlock_t pcnodes_lock;
/*
* vnode op vectors for files and directories.
*/
struct vnodeops *pcfs_fvnodeops;
struct vnodeops *pcfs_dvnodeops;
const fs_operation_def_t pcfs_fvnodeops_template[] = {
};
const fs_operation_def_t pcfs_dvnodeops_template[] = {
};
/*ARGSUSED*/
static int
int flag,
{
return (0);
}
/*
* files are sync'ed on close to keep floppy up to date
*/
/*ARGSUSED*/
static int
int flag,
int count,
{
return (0);
}
/*ARGSUSED*/
static int
int ioflag,
struct caller_context *ct)
{
int error;
return (error);
if (error)
return (error);
return (EIO);
}
}
if (error) {
}
return (error);
}
/*ARGSUSED*/
static int
int ioflag,
struct caller_context *ct)
{
int error;
return (error);
if (error)
return (error);
return (EIO);
}
/*
* in append mode start at end of file.
*/
}
(void) pc_nodeupdate(pcp);
if (error) {
}
return (error);
}
/*
* read or write a vnode
*/
static int
int ioflag)
{
int n;
int mapon, pagecreate;
int newpage;
int error = 0;
/*
* If the filesystem was umounted by force, return immediately.
*/
return (EIO);
return (0);
}
if (uio->uio_loffset < 0)
return (EINVAL);
limit = MAXOFFSET_T;
mutex_enter(&p->p_lock);
p, RCA_UNSAFE_SIGINFO);
mutex_exit(&p->p_lock);
return (EFBIG);
}
/* the following condition will occur only for write */
return (EFBIG);
return (0);
if (limit > UINT32_MAX)
limit = UINT32_MAX;
return (EIO);
do {
/*
* Assignments to "n" in this block may appear
* to overflow in some cases. However, after careful
* analysis it was determined that all assignments to
* "n" serve only to make "n" smaller. Since "n"
* starts out as no larger than MAXBSIZE, "int" is
* safe.
*/
if (diff <= 0)
return (0);
if (diff < n)
n = (int)diff;
}
/*
* Compare limit with the actual offset + n, not the
* rounded down offset "off" or we will overflow
* the maximum file size after all.
*/
break;
}
}
/*
* Touch the page and fault it in if it is not in
* core before segmap_getmapflt can lock it. This
* is to avoid the deadlock if the buffer is mapped
* to the same file through mmap which we want to
* write to.
*/
uio_prefaultpages((long)n, uio);
pagecreate = 0;
newpage = 0;
/*
* If PAGESIZE < MAXBSIZE, perhaps we ought to deal
* with one page at a time, instead of one MAXBSIZE
* at a time, so we can fully explore pagecreate
* optimization??
*/
fsp->pcfs_clsize);
fsp->pcfs_clsize)) {
/*
* Allocate and zerofill skipped
* clusters. This may not be worth the
* effort since a small lseek beyond
* eof but still within the cluster
* will not be zeroed out.
*/
1, &bn);
}
if (!error &&
fsp->pcfs_clsize))
/*
* allocate clusters w/o zerofill
*/
0, &bn);
if (error) {
int nerror;
/*
* figure out new file size from
* cluster chain length. If this
* is detected to loop, the chain
* is corrupted and we'd better
* keep our fingers off that file.
*/
if (nerror) {
PC_DPRINTF1(2,
"cluster chain "
"corruption, "
"scluster=%d\n",
pcp->pc_scluster);
(void) segmap_release(segkmap,
base, 0);
break;
}
> 0) {
"off=%lld n=%d size=%d\n",
uio->uio_loffset);
} else {
PC_DPRINTF1(1,
"rwpcp error1=%d\n", error);
(void) segmap_release(segkmap,
base, 0);
break;
}
} else {
}
if (mapon == 0) {
pagecreate = 1;
}
} else if (n == MAXBSIZE) {
(size_t)n, 0);
pagecreate = 1;
}
}
}
/*
* Unlock the pages which have been allocated by
* page_create_va() in segmap_pagecreate().
*/
if (newpage) {
}
if (error) {
/*
* If we failed on a write, we may have already
* allocated file blocks as well as pages. It's hard
* to undo the block allocation, but we must be sure
* to invalidate any pages that may have been
* allocated.
*/
else
} else {
flags = SM_DONTNEED;
}
}
error = 0;
return (error);
}
/*ARGSUSED*/
static int
int flags,
{
int error;
char attr;
if (error)
return (error);
/*
* Note that we don't check for "invalid node" (PC_INVAL) here
* only in order to make stat() succeed. We allow no I/O on such
* a node, but do allow to check for its existence.
*/
return (EIO);
}
/*
* Copy from pcnode.
*/
else if (attr & PCA_RDONLY)
} else {
}
else
} else {
}
vap->va_nblocks =
/*
* FAT root directories have no timestamps. In order not to return
* "time zero" (1/1/1970), we record the time of the mount and give
* that. This breaks less expectations.
*/
return (0);
}
get_udatamodel() == DATAMODEL_ILP32) {
return (EOVERFLOW);
}
/*
* FAT doesn't know about POSIX ctime.
* Best approximation is to always set it to mtime.
*/
/*
* FAT only stores "last access date". If that's the
* same as the date of last modification then the time
* of last access is known. Otherwise, use midnight.
*/
else
get_udatamodel() == DATAMODEL_ILP32) {
return (EOVERFLOW);
}
return (0);
}
/*ARGSUSED*/
static int
int flags,
{
int error;
/*
* cannot set these attributes
*/
return (EINVAL);
}
/*
* pcfs_setattr is now allowed on directories to avoid silly warnings
* from 'tar' when it tries to set times on a directory, and console
* printf's on the NFS server when it gets EINVAL back on such a
* request. One possible problem with that since a directory entry
* identifies a file, '.' and all the '..' entries in subdirectories
* may get out of sync when the directory is updated since they're
* treated like separate files. We could fix that by looking for
* '.' and giving it the same attributes, and then looking for
* all the subdirectories and updating '..', but that's pretty
* expensive for something that doesn't seem likely to matter.
*/
/* can't do some ops on directories anyway */
return (EINVAL);
}
if (error)
return (error);
return (EIO);
}
if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
return (EACCES);
}
}
/*
* Change file access modes.
* If nobody has write permission, file is marked readonly.
* Otherwise file is writable by anyone.
*/
else
}
/*
* Truncate file. Must have write permission.
*/
goto out;
}
goto out;
}
if (error)
goto out;
}
/*
* Change file modified times.
*/
/*
* If SysV-compatible option to set access and
* modified times if privileged, owner, or write access,
* use current time rather than va_mtime.
*
* XXX - va_mtime.tv_sec == -1 flags this.
*/
gethrestime(&now);
}
goto out;
}
if (error)
goto out;
}
/*
* Change file access times.
*/
/*
* If SysV-compatible option to set access and
* modified times if privileged, owner, or write access,
* use current time rather than va_mtime.
*
* XXX - va_atime.tv_sec == -1 flags this.
*/
gethrestime(&now);
}
goto out;
}
if (error)
goto out;
}
out:
return (error);
}
/*ARGSUSED*/
static int
int mode,
int flags,
{
return (EIO);
return (EACCES);
/*
* If this is a boot partition, privileged users have full access while
* others have read-only access.
*/
return (EACCES);
}
return (0);
}
/*ARGSUSED*/
static int
int syncflag,
{
int error;
return (error);
if (error)
return (error);
return (EIO);
}
return (error);
}
/*ARGSUSED*/
static void
{
int error;
/*
* If the filesystem was umounted by force, all dirty
* pages associated with this vnode are invalidated
* and then the vnode will be freed.
*/
if (vn_has_cached_data(vp)) {
}
if (error == 0)
return;
}
return;
}
/*
* Check again to confirm that no intervening I/O error
* with a subsequent pc_diskchanged() call has released
* the pcnode. If it has then release the vnode as above.
*/
if (vn_has_cached_data(vp))
}
} else {
}
if (!error)
}
/*ARGSUSED*/
static int
char *nm,
int flags,
int *direntflags,
{
int error;
/*
* If the filesystem was umounted by force, return immediately.
*/
return (EIO);
/*
* verify that the dvp is still valid on the disk
*/
return (error);
if (error)
return (error);
return (EIO);
}
/*
* Null component name is a synonym for directory being searched.
*/
if (*nm == '\0') {
return (0);
}
if (!error) {
}
return (error);
}
/*ARGSUSED*/
static int
char *nm,
int mode,
int flag,
{
int error;
/*
* can't create directories. use pcfs_mkdir.
* can't create anything other than files.
*/
return (EISDIR);
return (EINVAL);
if (error)
return (error);
return (EIO);
}
if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
return (EACCES);
}
}
if (*nm == '\0') {
/*
* Null component name refers to the directory itself.
*/
} else {
}
/*
* if file exists and this is a nonexclusive create,
* check for access permissions
*/
} else if (mode) {
} else {
error = 0;
}
}
if (error) {
if (error) {
} else {
}
}
}
if (error) {
return (error);
}
return (error);
}
/*ARGSUSED*/
static int
char *nm,
int flags)
{
int error;
return (error);
if (error)
return (error);
return (EIO);
}
if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
return (EACCES);
}
}
return (error);
}
/*
* Rename a file or directory
* This rename is restricted to only rename files within a directory.
* XX should make rename more general
*/
/*ARGSUSED*/
static int
char *snm, /* old (source) entry name */
char *tnm, /* new (target) entry name */
int flags)
{
int error;
return (error);
/*
* make sure we can muck with this directory.
*/
if (error) {
return (error);
}
if (error)
return (error);
return (EIO);
}
return (error);
}
/*ARGSUSED*/
static int
char *nm,
int flags,
{
int error;
return (error);
if (error)
return (error);
return (EIO);
}
if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
return (EACCES);
}
}
if (!error) {
}
return (error);
}
/*ARGSUSED*/
static int
char *nm,
int flags)
{
int error;
return (error);
return (error);
return (EIO);
}
if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
return (EACCES);
}
}
return (error);
}
/*
* read entries in a directory.
* we must convert pc format to unix format
*/
/*ARGSUSED*/
static int
int *eofp,
int flags)
{
int boff;
int error;
/*
* If the filesystem was umounted by force, return immediately.
*/
return (EIO);
return (EINVAL);
}
/*
* verify that the dp is still valid on the disk
*/
return (error);
}
if (error)
return (error);
return (EIO);
}
*eofp = 0;
/*
* kludge up entries for "." and ".." in the root.
*/
if (offset == 0) {
return (ENOSPC);
}
}
return (ENOSPC);
}
sizeof (struct pcdir));
}
/* offset now has the real offset value into directory file */
}
for (;;) {
}
if (error) {
error = 0;
if (eofp)
*eofp = 1;
}
break;
}
}
if (eofp)
*eofp = 1;
break;
}
/*
* Don't display label because it may contain funny characters.
*/
ep++;
continue;
}
if (PCDL_IS_LFN(ep)) {
0)
break;
continue;
}
break;
}
if (bp)
return (error);
}
/*
* Called from pvn_getpages or pcfs_getpage to get a particular page.
* When we are called the pcfs is already locked.
*/
/*ARGSUSED*/
static int
{
int err;
/*
* If the filesystem was umounted by force, return immediately.
*/
return (EIO);
return (EIO);
/* pcfs doesn't do readaheads */
return (0);
err = 0;
/*
* If the accessed time on the pcnode has not already been
* This gives us approximate modified times for mmap'ed files
* which are accessed via loads in the user address space.
*/
}
/*
* Need to really do disk IO to get the page(s).
*/
int err1;
/*
* XXX - If pcfs is made MT-hot, this should go
* back to reread.
*/
panic("pcfs_getapage pvn_read_kluster");
/*
* read as many contiguous blocks as possible to
* fill this page
*/
if (err1) {
goto out;
}
(void) bdev_strategy(bp);
if (err == 0)
else
if (err)
goto out;
}
}
}
out:
if (err) {
return (err);
}
if (pagefound) {
/*
* Page exists in the cache, acquire the "shared"
* lock. If this fails, go back to reread.
*/
goto reread;
}
}
return (err);
}
/*
* Return all the pages from [off..off+len] in given file
*/
/* ARGSUSED */
static int
{
int err;
return (err);
return (ENOSYS);
if (err)
return (err);
} else {
}
return (err);
}
/*
* Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
* If len == 0, do from off to EOF.
*
* The normal cases should be len == 0 & off == 0 (entire vp list),
* len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
* (from pageout).
*
*/
/*ARGSUSED*/
static int
int flags,
{
int err;
/*
* If the filesystem was umounted by force, return immediately.
*/
return (EIO);
return (ENOSYS);
return (err);
return (EIO);
}
return (EIO);
if (curproc == proc_pageout) {
/*
* XXX - This is a quick hack to avoid blocking
* pageout. Also to avoid pcfs_getapage deadlocking
* with putpage when memory is running out,
* since we only have one global lock and we don't
* support async putpage.
* It should be fixed someday.
*
* Interestingly, this used to be a test of NOMEMWAIT().
* We only ever got here once pcfs started supporting
* NFS sharing, and then only because the NFS server
* threads seem to do writes in sched's process context.
* Since everyone else seems to just care about pageout,
* the test was changed to look for pageout directly.
*/
return (ENOMEM);
}
if (err)
return (err);
return (0);
}
if (len == 0) {
/*
* Search the entire vp list for pages >= off
*/
} else {
/*
* If we are not invalidating, synchronously
* freeing or writing pages use the routine
* page_lookup_nowait() to prevent reclaiming
* them from the free list.
*/
} else {
}
else {
if (err != 0)
break;
/*
* "io_off" and "io_len" are returned as
* the range of pages we actually wrote.
* This allows us to skip ahead more quickly
* since several pages may've been dealt
* with by this iteration of the loop.
*/
}
}
}
/*
* If doing "invalidation", make sure that
* all pages on the vnode list are actually
* gone.
*/
"pcfs_putpage: B_INVAL, pages not gone");
} else if (err) {
}
return (err);
}
/*
* Write out a single page, possibly klustering adjacent dirty pages.
*/
/*ARGSUSED*/
int
int flags,
{
int err = 0;
/*
* If the modified time on the inode has not already been
* a call from msync (B_FORCE) we set the time now.
* This gives us approximate modified times for mmap'ed files
* which are modified via stores in the user address space.
*/
}
goto out;
}
int err1;
/*
* write as many contiguous blocks as possible from this page
*/
if (err1) {
goto out;
}
(void) bdev_strategy(bp);
if (err == 0)
else
}
out:
}
if (offp)
if (lenp)
if (err) {
}
return (err);
}
/*ARGSUSED*/
static int
{
struct segvn_crargs vn_a;
int error;
return (ENOSYS);
return (ENXIO);
if (error != 0) {
return (error);
}
return (error);
}
/* ARGSUSED */
static int
{
if (*noffp < 0)
return (EINVAL);
else if (*noffp > MAXOFFSET_T)
return (EINVAL);
else
return (0);
}
/* ARGSUSED */
static int
{
return (ENOSYS);
return (0);
}
/*ARGSUSED*/
static int
{
return (ENOSYS);
return (0);
}
/*
* POSIX pathconf() support.
*/
/* ARGSUSED */
static int
int cmd,
{
switch (cmd) {
case _PC_LINK_MAX:
*valp = 1;
return (0);
case _PC_CASE_BEHAVIOR:
return (EINVAL);
case _PC_FILESIZEBITS:
/*
* Both FAT16 and FAT32 support 4GB - 1 byte for file size.
* FAT12 can only go up to the maximum filesystem capacity
* which is ~509MB.
*/
return (0);
case _PC_TIMESTAMP_RESOLUTION:
/*
* PCFS keeps track of modification times, it its own
* internal format, to a resolution of 2 seconds.
* Since 2000 million is representable in an int32_t
* without overflow (or becoming negative), we allow
* this value to be returned.
*/
*valp = 2000000000L;
return (0);
default:
}
}
/* ARGSUSED */
static int
int cmd,
int flag,
{
int error;
return (EINVAL);
return (EFBIG);
/*
* we only support the special case of l_len == 0,
* meaning free to end of file at this moment.
*/
return (EINVAL);
}
return (error);
}
/*
* Break up 'len' chars from 'buf' into a long file name chunk.
* Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
*/
void
{
int i;
for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
if (len > 0) {
len -= 2;
} else {
}
}
for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
if (len > 0) {
len -= 2;
} else {
}
}
for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
if (len > 0) {
len -= 2;
} else {
}
}
}
/*
* Extract the characters from the long filename chunk into 'buf'.
* Return the number of characters extracted.
*/
static int
{
int i;
/* Copy all the names, no filtering now */
}
}
}
}
/*
* Checksum the passed in short filename.
* This is used to validate each component of the long name to make
* sure the long name is valid (it hasn't been "detached" from the
* short filename). This algorithm was found in FreeBSD.
* (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
*/
{
uchar_t c;
char b[11];
c = b[0];
c = ((c << 7) | (c >> 1)) + b[1];
c = ((c << 7) | (c >> 1)) + b[2];
c = ((c << 7) | (c >> 1)) + b[3];
c = ((c << 7) | (c >> 1)) + b[4];
c = ((c << 7) | (c >> 1)) + b[5];
c = ((c << 7) | (c >> 1)) + b[6];
c = ((c << 7) | (c >> 1)) + b[7];
c = ((c << 7) | (c >> 1)) + b[8];
c = ((c << 7) | (c >> 1)) + b[9];
c = ((c << 7) | (c >> 1)) + b[10];
return (c);
}
/*
* Read a chunk of long filename entries into 'namep'.
* Return with offset pointing to short entry (on success), or next
* entry to read (if this wasn't a valid lfn really).
* Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
* a long filename.
*
* Can also be called with a NULL namep, in which case it just returns
* whether this was really a valid long filename and consumes it
* (used by pc_dirempty()).
*/
int
{
char *lfn;
char *lfn_base;
int boff;
int i, cs;
char *buf;
int detached = 0;
int error = 0;
int foldcase;
int count = 0;
char *outbuf;
*lfn = '\0';
/* read next block if necessary */
}
if (error) {
return (error);
}
}
/* can this happen? Bad fs? */
detached = 1;
break;
}
detached = 1;
/* process current entry */
/* see if we underflow */
else
detached = 1;
}
lep++;
}
/* read next block if necessary */
}
if (error) {
return (error);
}
}
/* should be on the short one */
detached = 1;
}
if (detached ||
!pc_valid_long_fn(lfn, 0)) {
/*
* process current entry again. This may end up another lfn
* or a short name.
*/
return (EINVAL);
}
/*
* Don't display label because it may contain
* funny characters.
*/
ep++;
return (EINVAL);
}
if (namep) {
u8l = PCMAXNAMLEN;
/*
* uconv_u16tou8() will catch conversion errors including
* the case where there is not enough room to write the
* converted result and the u8l will never go over the given
* PCMAXNAMLEN.
*/
if (error != 0) {
return (EINVAL);
}
if (foldcase) {
&error);
if (ret == -1) {
return (EINVAL);
}
}
}
return (0);
}
/*
* Read a long filename into the pc_dirent structure and copy it out.
*/
int
{
int error = 0;
if (error) {
return (0);
} else
return (error);
}
return (ENOSPC);
}
ep++;
return (0);
}
/*
* Read a short filename into the pc_dirent structure and copy it out.
*/
int
{
int error;
int foldcase;
ep++;
return (0);
}
if (error == 0) {
return (ENOSPC);
}
sizeof (struct pcdir));
} else {
}
ep++;
return (0);
}
/* ARGSUSED */
static int
{
int error;
return (EIO);
if (error)
return (error);
return (EIO);
}
return (ENOSPC);
}
pcfid->pcfid_block = 0;
pcfid->pcfid_offset = 0;
pcfid->pcfid_ctime = 0;
} else {
}
return (0);
}