hsfs_vnops.c revision 9694f18abe33defb33e388545da0a2b3f4555ace
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Vnode operations for the High Sierra filesystem
*/
#include <sys/sysmacros.h>
#include <sys/resource.h>
#include <sys/vfs_opreg.h>
#include <sys/pathname.h>
#include <vm/seg_kmem.h>
/*
* This tunable allows us to ignore inode numbers from rrip-1.12.
* In this case, we fall back to our default inode algorithm.
*/
extern int use_rrip_inodes;
/* ARGSUSED */
static int
{
return (0);
}
/*ARGSUSED*/
static int
struct caller_context *ct)
{
int error;
/*
* if vp is of type VDIR, make sure dirent
* is filled up with all info (because of ptbl)
*/
}
/* Sanity checks. */
return (0);
do {
/*
* We want to ask for only the "right" amount of data.
* In this case that means:-
*
* We can't get data from beyond our EOF. If asked,
* we will give a short read.
*
* segmap_getmapflt returns buffers of MAXBSIZE bytes.
* These buffers are always MAXBSIZE aligned.
* If our starting offset is not MAXBSIZE aligned,
* we can only ask for less than MAXBSIZE bytes.
*
* If our requested offset and length are such that
* they belong in different MAXBSIZE aligned slots
* then we'll be making more than one call on
* segmap_getmapflt.
*
* This diagram shows the variables we use and their
* relationships.
*
* |<-----MAXBSIZE----->|
* +--------------------------...+
* |.....mapon->|<--n-->|....*...|EOF
* +--------------------------...+
* uio_loffset->|
* uio_resid....|<---------->|
* diff.........|<-------------->|
*
* So, in this case our offset is not aligned
* and our request takes us outside of the
* MAXBSIZE window. We will break this up into
* two segmap_getmapflt calls.
*/
size_t n;
if (n <= 0) {
/* EOF or request satisfied. */
return (0);
}
if (error == 0) {
/*
* if read a whole block, or read to eof,
* won't need this buffer again soon.
*/
flags = SM_DONTNEED;
else
flags = 0;
} else
return (error);
}
/*ARGSUSED2*/
static int
int flags,
{
}
else
/* no. of blocks = no. of data blocks + no. of xar blocks */
return (0);
}
/*ARGSUSED*/
static int
{
return (EINVAL);
return (ENOENT);
}
/*ARGSUSED*/
static void
{
int nopage;
/*
* Note: acquiring and holding v_lock for quite a while
* here serializes on the vnode; this is unfortunate, but
* likely not to overly impact performance, as the underlying
* device (CDROM drive) is quite slow.
*/
panic("hsfs_inactive: v_count < 1");
/*NOTREACHED*/
}
return;
}
/*
* Free the hsnode.
* If there are no pages associated with the
* hsnode, give it back to the kmem_cache,
* else put at the end of this file system's
* internal free list.
*/
/*
* exit these locks now, since hs_freenode may
* kmem_free the hsnode and embedded vnode
*/
} else {
}
}
/*ARGSUSED*/
static int
char *nm,
int flags,
{
int error;
if (*nm == '\0') {
return (0);
}
/*
* If we're looking for ourself, life is simple.
*/
return (error);
return (0);
}
}
/*ARGSUSED*/
static int
int *eofp)
{
struct hs_direntry hd;
int error;
int hdlen; /* length of hs directory entry */
long ndlen; /* length of dirent entry */
int bytes_wanted;
char *outbuf; /* ptr to dirent buffer */
char *dname;
int dnamelen;
if (eofp)
*eofp = 1;
return (0);
}
if (error)
goto done;
while (offset < last_offset) {
/*
* Very similar validation code is found in
* process_dirblock(), hsfs_node.c.
* For an explanation, see there.
* It may make sense for the future to
* "consolidate" the code in hs_parsedir(),
* process_dirblock() and hsfs_readdir() into
* a single utility function.
*/
if (hdlen < HDE_ROOT_DIR_REC_SIZE ||
/*
* advance to next sector boundary
*/
if (hdlen)
continue;
}
/*
* Just ignore invalid directory entries.
* XXX - maybe hs_parsedir() will detect EXISTENCE bit
*/
last_offset - offset)) {
/*
* Determine if there is enough room
*/
goto done; /* output buffer full */
}
/*
* If the media carries rrip-v1.12 or newer,
* and we trust the inodes from the rrip data
* (use_rrip_inodes != 0), use that data. If the
* media has been created by a recent mkisofs
* version, we may trust all numbers in the
* starting extent number; otherwise, we cannot
* do this for zero sized files. We use
* HS_DUMMY_INO in this case and make sure that
* we will not map all files to the same
* meta data.
*/
} else {
(fsp->hsfs_flags &
HSFSMNT_INODE) == 0) {
}
}
/* strncpy(9f) will zero uninitialized bytes */
/*
* free up space allocated for symlink
*/
}
}
}
}
/*
* Got here for one of the following reasons:
* 1) outbuf is full (error == 0)
* 2) end of directory reached (error == 0)
* 3) error reading directory sector (error != 0)
* 4) directory entry crosses sector boundary (error == 0)
*
* If any directory entries have been copied, don't report
* case 4. Instead, return the valid directory entries.
*
* If no entries have been copied, report the error.
* If case 4, this will be indistiguishable from EOF.
*/
done:
if (ndlen != 0) {
}
return (error);
}
static int
{
return (ENOSPC);
}
return (0);
}
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static int
int flag,
int count,
{
return (0);
}
/*ARGSUSED2*/
static int
{
}
/*
* the seek time of a CD-ROM is very slow, and data transfer
* rate is even worse (max. 150K per sec). The design
* decision is to reduce access to cd-rom as much as possible,
* and to transfer a sizable block (read-ahead) of data at a time.
* UFS style of read ahead one block at a time is not appropriate,
* and is not supported
*/
/*
* KLUSTSIZE should be a multiple of PAGESIZE and <= MAXPHYS.
*/
/* we don't support read ahead */
int hsfs_lostpage; /* no. of times we lost original page */
/*
* Used to prevent biodone() from releasing buf resources that
* we didn't allocate in quite the usual way.
*/
/*ARGSUSED*/
int
{
return (0);
}
/*
* Each file may have a different interleaving on disk. This makes
* things somewhat interesting. The gist is that there are some
* number of contiguous data sectors, followed by some other number
* of contiguous skip sectors. The sum of those two sets of sectors
* defines the interleave size. Unfortunately, it means that we generally
* can't simply read N sectors starting at a given offset to satisfy
* any given request.
*
* What we do is get the relevant memory pages via pvn_read_kluster(),
* then stride through the interleaves, setting up a buf for each
* sector that needs to be brought in. Instead of kmem_alloc'ing
* space for the sectors, though, we just point at the appropriate
* spot in the relevant page for each of them. This saves us a bunch
* of copying.
*/
/*ARGSUSED*/
static int
{
int err;
int remainder; /* must be signed */
int chunk_lbn_count;
int chunk_data_bytes;
int xarsiz;
/*
* We don't support asynchronous operation at the moment, so
* just pretend we did it. If the pages are ever actually
* needed, they'll get brought in then.
*/
return (0);
/* file data size */
/* disk addr for start of file */
/* xarsiz byte must be skipped for data */
/* how many logical blocks in an interleave (data+skip) */
if (chunk_lbn_count == 0) {
chunk_lbn_count = 1;
}
/*
* Convert interleaving size into bytes. The zero case
* (no interleaving) optimization is handled as a side-
* effect of the read-ahead logic.
*/
} else {
}
err = 0;
pagefound = 0;
/*
* Do some read-ahead. This mostly saves us a bit of
* system cpu time more than anything else when doing
* sequential reads. At some point, could do the
* read-ahead asynchronously which might gain us something
* on wall time, but it seems unlikely....
*
* We do the easy case here, which is to read through
* the end of the chunk, minus whatever's at the end that
* won't exactly fill a page.
*/
} else {
}
/*
* Some cd writers don't write sectors that aren't used. Also,
* there's no point in reading sectors we'll never look at. So,
* if we're asked to go beyond the end of a file, truncate to the
* length of that file.
*
* Additionally, this behaviour is required by section 6.4.5 of
* ISO 9660:1988(E).
*/
}
/* A little paranoia. */
/*
* After all that, make sure we're asking for things in units
* that bdev_strategy() will understand (see bug 4202551).
*/
/* search for page in buffer */
/*
* Need to really do disk IO to get the page.
*/
goto again;
/* check for truncation */
/*
* xxx Clean up and return EIO instead?
* xxx Ought to go to u_offset_t for everything, but we
* xxx call lots of things that want uint_t arguments.
*/
/*
* get enough buffers for worst-case scenario
* (i.e., no coalescing possible).
*/
SEMA_DEFAULT, NULL);
SEMA_DEFAULT, NULL);
}
/*
* If our filesize is not an integer multiple of PAGESIZE,
* we zero that part of the last page that's between EOF and
* the PAGESIZE boundary.
*/
if (xlen != 0)
count++) {
/* Compute disk address for interleaving. */
/* considered without skips */
/* factor in skips */
/* convert to physical byte offset for lbn */
/* don't forget offset into lbn */
/* get virtual block number for driver */
+ offset_bytes + offset_extra);
/* this branch taken first time through loop */
(caddr_t)-1);
/* ppmapin() guarantees not to return NULL */
} else {
}
/*
* We specifically use the b_lblkno member here
* as even in the 32 bit world driver_block can
* get very large in line with the ISO9660 spec.
*/
- byte_offset;
/*
* remaining_bytes can't be zero, as we derived
* which_chunk_lbn directly from byte_offset.
*/
/* coalesce-read the rest of the chunk */
} else {
/* get the final bits */
}
/*
* It would be nice to do multiple pages'
* worth at once here when the opportunity
* arises, as that has been shown to improve
* our wall time. However, to do that
* requires that we use the pageio subsystem,
* which doesn't mix well with what we're
* already using here. We can't use pageio
* all the time, because that subsystem
* assumes that a page is stored in N
* contiguous blocks on the device.
* Interleaving violates that assumption.
*/
}
break;
}
}
}
/* Now wait for everything to come in */
if (err == 0) {
} else
}
/* Don't leak resources */
}
}
}
if (err) {
return (err);
}
/*
* Lock the requested page, and the one after it if possible.
* Don't bother if our caller hasn't given us a place to stash
* the page pointers, since otherwise we'd lock pages that would
* never get unlocked.
*/
if (pagefound) {
int index;
/*
* Make sure it's in memory before we say it's here.
*/
goto reread;
}
index = 1;
/*
* Try to lock the next page, if it exists, without
* blocking.
*/
/* LINTED (plsz is unsigned) */
break;
}
return (0);
}
}
return (err);
}
static int
{
int err;
/* does not support write */
panic("write attempt on READ ONLY HSFS");
/*NOTREACHED*/
}
return (ENOSYS);
}
/*
* Determine file data size for EOF check.
*/
return (EFAULT); /* beyond EOF */
else
return (err);
}
/*
* This function should never be called. We need to have it to pass
* it as an argument to other functions.
*/
/*ARGSUSED*/
int
int flags,
{
/* should never happen - just destroy it */
return (0);
}
/*
* The only flags we support are B_INVAL, B_FREE and B_DONTNEED.
* B_INVAL is set by:
*
* 1) the MC_SYNC command of memcntl(2) to support the MS_INVALIDATE flag.
* 2) the MC_ADVISE command of memcntl(2) with the MADV_DONTNEED advice
* which translates to an MC_SYNC with the MS_INVALIDATE flag.
*
* The B_FREE (as well as the B_DONTNEED) flag is set when the
* MADV_SEQUENTIAL advice has been used. VOP_PUTPAGE is invoked
* from SEGVN to release pages behind a pagefault.
*/
/*ARGSUSED*/
static int
int flags,
{
int error = 0;
panic("hsfs_putpage: bad v_count");
/*NOTREACHED*/
}
return (ENOSYS);
return (0);
if (len == 0) /* from 'off' to EOF */
else {
/*
* We insist on getting the page only if we are
* about to invalidate, free or write it and
* the B_ASYNC flag is not set.
*/
} else {
}
continue;
/*
* Normally pvn_getdirty() should return 0, which
* impies that it has done the job for us.
* The shouldn't-happen scenario is when it returns 1.
* This means that the page has been modified and
* needs to be put back.
* Since we can't write on a CD, we fake a failed
* I/O and force pvn_write_done() to destroy the page.
*/
"hsfs_putpage: dirty HSFS page");
}
}
}
return (error);
}
/*ARGSUSED*/
static int
{
struct segvn_crargs vn_a;
int error;
/* VFS_RECORD(vp->v_vfsp, VS_MAP, VS_CALL); */
return (ENOSYS);
return (ENXIO);
return (ENODEV);
}
/*
* If file is being locked, disallow mapping.
*/
return (EAGAIN);
return (ENOMEM);
}
} else {
/*
* User specified address - blow away any previous mappings
*/
}
return (error);
}
/* ARGSUSED */
static int
{
return (ENOSYS);
return (0);
}
/*ARGSUSED*/
static int
{
return (ENOSYS);
return (0);
}
/* ARGSUSED */
static int
{
}
/* ARGSUSED */
static int
int cmd,
int flag,
struct flk_callback *flk_cbp,
{
/*
* If the file is being mapped, disallow fs_frlock.
* We are not holding the hs_contents_lock while checking
* hs_mapcnt because the current locking strategy drops all
* locks before calling fs_frlock.
* So, hs_mapcnt could change before we enter fs_frlock making
* it meaningless to have held hs_contents_lock in the first place.
*/
return (EAGAIN);
}
/* ARGSUSED */
static int
{
int error = 0;
switch (cmd) {
case _PC_NAME_MAX:
break;
case _PC_FILESIZEBITS:
break;
default:
}
return (error);
}
const fs_operation_def_t hsfs_vnodeops_template[] = {
};
struct vnodeops *hsfs_vnodeops;