ufs_trans.c revision 1e13ea4b2899c8ca6672a89c6dfa92716efa5a88
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
extern kmutex_t ufsvfs_mutex;
extern struct ufsvfs *ufs_instances;
/*
*/
int
{
int error;
int retry = 0;
/*
* find fs's that paniced or have errored logging devices
*/
/*
* not mounted; continue
*/
continue;
/*
* disallow unmounts (hlock occurs below)
*/
if (TRANS_ISERROR(ufsvfsp))
}
/*
* hlock the fs's that paniced or have errored logging devices
*/
break;
return (retry);
/*
* hlock the file system
*/
if (!LOCKFS_IS_ELOCK(&lockfs)) {
/*
* retry after awhile; another app currently doing lockfs
*/
retry = 1;
} else {
if (ufsfx_get_failure_qlen() > 0) {
}
}
retry = 1;
}
/*
* allow unmounts
*/
goto again;
}
/*ARGSUSED*/
void
{
}
void
{
return;
} else {
return;
}
}
void
{
return;
} else {
return;
}
}
void
{
return;
} else {
return;
}
}
/*ARGSUSED*/
int
{
return (0);
}
/*ARGSUSED*/
int
{
return (ENOENT);
/*
* Do not use brwrite() here since the buffer is already
* marked for retry or not by the code that called
* TRANS_BUF().
*/
return (0);
}
/*
* If we did not find the real buf for this block above then
* clear the dev so the buf won't be found by mistake
* for this block later. We had to allocate at least a 1 byte
* buffer to keep brelse happy.
*/
}
return (ENOENT);
}
/*ARGSUSED*/
int
{
int error;
/*
* Grab the quota lock (if the file system has not been forcibly
* unmounted).
*/
if (ufsvfsp)
if (ufsvfsp)
if (error)
return (ENOENT);
return (0);
}
return (ENOENT);
}
#ifdef DEBUG
/*
* These routines maintain the metadata map (matamap)
*/
/*
* update the metadata map at mount
*/
static int
{
/*
* wrong file system; keep looking
*/
return (0);
/*
* load the metadata map
*/
return (0);
}
void
{
int i;
/*
* put static metadata into matamap
* superblock
* cylinder groups
* inode groups
* existing inodes
*/
}
}
/*
* clear the metadata map at umount
*/
void
{
}
/*
* summary info (may be extended during growfs test)
*/
void
{
}
/*
* scan an allocation block (either inode or true block)
*/
static void
unsigned int nblk)
{
int i;
} else {
*fragsp = 0;
}
}
}
/*
* scan an indirect allocation block (either inode or true block)
*/
static void
int level)
{
int i;
return;
}
if (level)
}
/*
* put appropriate metadata into matamap for this inode
*/
void
{
int i;
if (frags)
}
/*
* freeing possible metadata (block of user data)
*/
void
{
}
/*
* allocating metadata
*/
void
int indir)
{
}
#endif /* DEBUG */
/*
* ufs_trans_dir is used to declare a directory delta
*/
int
{
if (error == 0) /* treat UFS_HOLE as an I/O error */
return (error);
}
return (error);
}
/*ARGSUSED*/
int
{
/*
* Lock the quota subsystem (ufsvfsp can be NULL
* if the DQ_ERROR is set).
*/
if (ufsvfsp)
/*
* If this transaction has been cancelled by closedq_scan_inode(),
* then bail out now. We don't call dqput() in this case because
* it has already been done.
*/
if (ufsvfsp)
return (0);
}
/*
* Paranoia to make sure that there is at least one
* reference to the dquot struct. We are done with
* the dquot (due to an error) so clear logging
* specific markers.
*/
if (ufsvfsp)
return (1);
}
/*
* Paranoia to make sure that there is at least one
* reference to the dquot struct. Clear the
* modification flag because the operation is now in
* the log. Also clear the logging specific markers
* that were set in ufs_trans_quota().
*/
}
/*
* At this point, the logging specific flag should be clear,
* but add paranoia just in case something has gone wrong.
*/
if (ufsvfsp)
return (0);
}
/*
* ufs_trans_quota take in a uid, allocates the disk space, placing the
* quota record into the metamap, then declares the delta.
*/
/*ARGSUSED*/
void
{
/*
* Mark this dquot to indicate that we are starting a logging
* file system operation for this dquot. Also increment the
* reference count so that the dquot does not get reused while
* it is on the mapentry_t list. DQ_TRANS is cleared and the
* reference count is decremented by ufs_trans_push_quota.
*
* If the file system is force-unmounted while there is a
* pending quota transaction, then closedq_scan_inode() will
* clear the DQ_TRANS flag and decrement the reference count.
*
* Since deltamap_add() drops multiple transactions to the
* same dq_mof and ufs_trans_push_quota() won't get called,
* we use DQ_TRANS to prevent repeat transactions from
* incrementing the reference count (or calling TRANS_DELTA()).
*/
}
}
void
{
}
long ufs_trans_avgbfree = 0; /* will be adjusted for testing */
/*
* Calculate the log reservation for the given write or truncate
*/
static ulong_t
{
long niblk = 0;
int resv;
long avgbfree;
/*
* Assume that the request will fit in 1 or 2 cg's,
* resv is the amount of log space to reserve (in bytes).
*/
/*
* get max position of write in fs blocks
*/
/*
* request size in fs blocks
*/
/*
* Adjust for sparse files
*/
if (trunc)
/*
* Adjust avgbfree (for testing)
*/
/*
* Calculate maximum number of blocks of triple indirect
* pointers to write.
*/
long nl2ptr;
long n3blk;
else
}
/*
* calculate maximum number of blocks of double indirect
* pointers to write.
*/
long n2blk;
else
}
/*
* Add in indirect pointer block write
*/
niblk += 1;
}
/*
* Calculate deltas for indirect pointer writes
*/
/*
* maximum number of cg's needed for request
*/
/*
* maximum amount of log space needed for request
*/
if (ncg > 2)
return (resv);
}
/*
* Calculate the amount of log space that needs to be reserved for this
* trunc request. If the amount of log space is too large, then
* calculate the the size that the requests needs to be split into.
*/
void
int *resvp,
{
int is_sparse = 0;
/*
* *resvp is the amount of log space to reserve (in bytes).
* when nonzero, *residp is the number of bytes to truncate.
*/
*residp = 0;
} else {
/*
* truncate up, doesn't really use much space,
* the default above should be sufficient.
*/
goto done;
}
/*
* There is no need to split sparse file truncation into
* as many chunks as that of regular files.
*/
nchunks = 1;
incr = 0;
do {
/*
* If this is the first iteration, set "incr".
*/
if (!incr) {
/*
* If this request takes too much log space,
* it will be split into "nchunks". If this split
* is not enough, linearly increment the nchunks in
* the next iteration.
*/
} else {
incr = 1;
}
} else
} while (resv > ufs_trans_max_resv);
if (nchunks > 1) {
}
done:
}
int
{
int do_block = 0;
/*
* Not logging; just do the trunc
*/
if (!TRANS_ISTRANS(ufsvfsp)) {
return (err);
}
/*
* within the lockfs protocol but *not* part of a transaction
*/
/*
* Trunc the file (in pieces, if necessary)
*/
if (resid) {
/*
* resid is only set if we have to truncate in chunks
*/
/*
* Partially trunc file down to desired size (length).
* Only retain I_FREE on the last partial trunc.
* Round up size to a block boundary, to ensure the truncate
* doesn't have to allocate blocks. This is done both for
* performance and to fix a bug where if the block can't be
* allocated then the inode delete fails, but the inode
* is still freed with attached blocks and non-zero size
* (bug 4348738).
*/
} else
if (!do_block)
goto again;
}
return (err);
}
/*
* Fault in the pages of the first n bytes specified by the uio structure.
* 1 byte in each page is touched and the uio struct is unmodified.
* Any error will terminate the process as this is only a best
* attempt to get the pages resident.
*/
static void
{
caddr_t p;
while (n) {
if (cnt == 0) {
/* empty iov entry */
iov++;
continue;
}
n -= cnt;
/*
* touch each page in this segment.
*/
while (cnt) {
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
return;
break;
case UIO_SYSSPACE:
return;
break;
}
p += incr;
}
/*
* touch the last byte in case it straddles a page.
*/
p--;
switch (uio->uio_segflg) {
case UIO_USERSPACE:
case UIO_USERISPACE:
return;
break;
case UIO_SYSSPACE:
return;
break;
}
iov++;
}
}
/*
* Calculate the amount of log space that needs to be reserved for this
* write request. If the amount of log space is too large, then
* calculate the size that the requests needs to be split into.
* First try fixed chunks of size ufs_trans_max_resid. If that
* is too big, iterate down to the largest size that will fit.
* Pagein the pages in the first chunk here, so that the pagein is
* avoided later when the transaction is open.
*/
void
int *resvp,
int *residp)
{
int nchunks;
*residp = 0;
if (resv <= ufs_trans_max_resv) {
return;
}
nchunks = 1;
nchunks++;
}
/*
* If this request takes too much log space, it will be split
*/
if (nchunks > 1)
}
/*
* Issue write request.
*
* Split a large request into smaller chunks.
*/
int
int ioflag,
int resv,
long resid)
{
long realresid;
int err;
/*
* since the write is too big and would "HOG THE LOG" it needs to
* be broken up and done in pieces. NOTE, the caller will
* issue the EOT after the request has been completed
*/
/*
* Perform partial request (uiomove will update uio for us)
* Request is split up into "resid" size chunks until
* "realresid" bytes have been transferred.
*/
/*
* Error or request is done; caller issues final EOT
*/
return (err);
}
/*
* Generate EOT for this part of the request
*/
} else {
}
/*
* Make sure the input buffer is resident before starting
* the next transaction.
*/
/*
* Generate BOT for next part of the request
*/
int error;
} else {
}
/*
* Error during EOT (probably device error while writing commit rec)
*/
if (err)
return (err);
goto again;
}