lufs.c revision 4f3979a52917aaa51d62e0b7a20028ab903c50ae
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/fssnap_if.h>
#include <sys/sysmacros.h>
#include <sys/inttypes.h>
extern kmutex_t ufs_scan_lock;
struct kmem_cache *lufs_sv;
struct kmem_cache *lufs_bp;
/* Tunables */
/* Generation of header ids */
/*
* Logging delta and roll statistics
*/
struct delta_kstats {
} dkstats = {
{ "superblock_deltas", KSTAT_DATA_UINT64 },
{ "bitmap_deltas", KSTAT_DATA_UINT64 },
{ "suminfo_deltas", KSTAT_DATA_UINT64 },
{ "allocblk_deltas", KSTAT_DATA_UINT64 },
{ "ab0_deltas", KSTAT_DATA_UINT64 },
{ "dir_deltas", KSTAT_DATA_UINT64 },
{ "inode_deltas", KSTAT_DATA_UINT64 },
{ "fbiwrite_deltas", KSTAT_DATA_UINT64 },
{ "quota_deltas", KSTAT_DATA_UINT64 },
{ "shadow_deltas", KSTAT_DATA_UINT64 },
{ "superblock_rolled", KSTAT_DATA_UINT64 },
{ "bitmap_rolled", KSTAT_DATA_UINT64 },
{ "suminfo_rolled", KSTAT_DATA_UINT64 },
{ "allocblk_rolled", KSTAT_DATA_UINT64 },
{ "ab0_rolled", KSTAT_DATA_UINT64 },
{ "dir_rolled", KSTAT_DATA_UINT64 },
{ "inode_rolled", KSTAT_DATA_UINT64 },
{ "fbiwrite_rolled", KSTAT_DATA_UINT64 },
{ "quota_rolled", KSTAT_DATA_UINT64 },
{ "shadow_rolled", KSTAT_DATA_UINT64 }
};
/*
* General logging kstats
*/
{ "master_reads", KSTAT_DATA_UINT64 },
{ "master_writes", KSTAT_DATA_UINT64 },
{ "log_reads_inmem", KSTAT_DATA_UINT64 },
{ "log_reads", KSTAT_DATA_UINT64 },
{ "log_writes", KSTAT_DATA_UINT64 },
{ "log_master_reads", KSTAT_DATA_UINT64 },
{ "log_roll_reads", KSTAT_DATA_UINT64 },
{ "log_roll_writes", KSTAT_DATA_UINT64 }
};
int
{
return (0);
}
static void
{
drv_usecwait(10);
}
int
{
/*
* In case of panic, busy wait for completion
*/
if (panicstr)
else
}
int
{
/*
* In case of panic, busy wait for completion and run md daemon queues
*/
if (panicstr)
}
static void
{
*sp = 0;
while (nb--)
}
static int
{
return (0);
}
return (1);
}
void
{
return;
/*
* Wait for a pending top_issue_sync which is
* dispatched (via taskq_dispatch()) but hasnt completed yet.
*/
while (mtm->mtm_taskq_sync_count != 0) {
}
/* Roll committed transactions */
/* Kill the roll thread */
/* release saved alloction info */
/* release circular bufs */
/* release maps */
if (ul->un_deltamap)
if (ul->un_matamap)
/* release state buffer MUST BE LAST!! (contains our ondisk data) */
}
int
{
int i;
/* LINTED: warning: logical expression always true: op "||" */
/*
* Get the allocation table
* During a remount the superblock pointed to by the ufsvfsp
* is out of date. Hence the need for the ``new'' superblock
* pointer, fs, passed in as a parameter.
*/
return (EIO);
}
return (ENODEV);
}
/*
* It is possible to get log blocks with all zeros.
* We should also check for nextents to be zero in such case.
*/
return (EDOM);
}
/*
* Put allocation into memory. This requires conversion between
* on the ondisk format of the extent (type extent_t) and the
* in-core format of the extent (type ic_extent_t). The
* difference is the in-core form of the extent block stores
* the physical offset of the extent in disk blocks, which
* can require more than a 32-bit field.
*/
}
/*
* Get the log state
*/
return (EIO);
}
}
/*
* Put ondisk struct into an anonymous buffer
* This buffer will contain the memory for the ml_odunit struct
*/
/*
* Verify the log state
*
* eventually roll the bad log until the first IO error.
* fsck will then repair the file system.
*
*
*/
return (EIO);
}
/*
* Initialize the incore-only fields
*/
if (ronly)
/*
* Aquire the ufs_scan_lock before linking the mtm data
* structure so that we keep ufs_sync() and ufs_update() away
* when they execute the ufs_scan_inodes() run while we're in
*/
/* remember the state of the log before the log scan */
/*
* Error during scan
*
*
*/
if (!ronly) {
/*
* Aquire the ufs_scan_lock before de-linking
* the mtm data structure so that we keep ufs_sync()
* and ufs_update() away when they execute the
* ufs_scan_inodes() run while we're in progress of
*/
return (EIO);
}
}
if (!ronly)
return (0);
}
{
/*
* The formula below implements an exponential, modular sequence.
*
* ID(N) = (SEED * (BASE^N)) % PRIME
*
* The numbers will be pseudo random. They depend on SEED, BASE, PRIME,
* but will sweep through almost all of the range 1....PRIME-1.
* Most importantly they will not repeat for PRIME-2 (4294967289)
* repetitions. If they would repeat that could possibly cause hangs,
*/
/* Checking if new identity used already */
/*
* The following preserves the algorithm for the fix for
* "panic: free: freeing free frag, dev:0x2000000018, blk:34605,
* cg:26, ino:148071,".
* If the header identities un_head_ident are equal to the
* present element in the sequence, the next element of the
* sequence is returned instead.
*/
}
return (id);
}
static void
lufs_genid_init(void)
{
/* Initialization */
/* Seed the algorithm */
do {
gethrestime(&tv);
} while (last_loghead_ident == UINT32_C(0));
}
static int
{
/* LINTED: warning: logical expression always true: op "||" */
return (EIO);
}
return (0);
}
/*
* Free log space
* Assumes the file system is write locked and is not logging
*/
static int
{
int error = 0, i, j;
long nfno;
char clean;
/*
* Nothing to free
*/
return (0);
/*
* Mark the file system as FSACTIVE and no log but honor the
* current value of fs_reclaim. The reclaim thread could have
* been active when lufs_disable() was called and if fs_reclaim
* is reset to zero here it could lead to lost inodes.
*/
goto errout;
}
/*
* fetch the allocation block
* superblock -> one block of extents -> log data
*/
goto errout;
}
/*
* Free up the allocated space (dummy inode needed for free())
*/
}
/*
* Push the metadata dirtied during the allocations
*/
if (error)
goto errout;
/*
* Free the dummy inode
*/
return (0);
/*
* Free up all resources
*/
if (bp)
if (ip)
return (error);
}
/*
* Allocate log space
* Assumes the file system is write locked and is not logging
*/
static int
{
int error = 0;
/*
* Mark the file system as FSACTIVE
*/
/*
* Allocate the allocation block (need dummy shadow inode;
* we use a shadow inode so the quota sub-system ignores
* the block allocations.)
* superblock -> one block of extents -> log data
*/
if (error)
goto errout;
goto errout;
}
else
/*
* Initialize the first extent
*/
if (error)
goto errout;
else
while (nb) {
if (error) {
if (tb < ldl_minlogsize)
goto errout;
error = 0;
break;
}
else {
break;
}
else
}
}
goto errout;
}
/*
* Initialize the first two sectors of the log
*/
if (error)
goto errout;
/*
* We are done initializing the allocation block and the log
*/
/*
* Update the superblock and push the dirty metadata
*/
if (error)
goto errout;
goto errout;
}
/*
* Everything is safely on disk; update log space pointer in sb
*/
/*
* Free the dummy inode
*/
/* inform user of real log size */
return (0);
/*
* Free all resources
*/
if (bp)
if (logbno) {
}
if (ip) {
}
return (error);
}
/*
* Disable logging
*/
int
{
int error = 0;
/*
* Logging is already disabled; done
*/
return (0);
/*
* Readonly file system
*/
return (0);
}
/*
* File system must be write locked to disable logging
*/
if (error) {
return (error);
}
if (!LOCKFS_IS_ULOCK(&lf)) {
return (0);
}
if (error) {
return (0);
}
goto errout;
/*
* WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT
*/
/*
* Disable logging:
* Suspend the reclaim thread and force the delete thread to exit.
* When a nologging mount has completed there may still be
* work for reclaim to do so just suspend this thread until
* it's [deadlock-] safe for it to continue. The delete
* thread won't be needed as ufs_iinactive() calls
* ufs_delete() when logging is disabled.
* Freeze and drain reader ops.
* Commit any outstanding reader transactions (ufs_flush).
* Set the ``unmounted'' bit in the ufstrans struct.
* If debug, remove metadata from matamap.
* Disable matamap processing.
* NULL the trans ops table.
* Free all of the incore structs related to logging.
* Allow reader ops.
*/
(void) ufs_quiesce(ulp);
ufsvfsp->vfs_domatamap = 0;
/*
* Free all of the incore structs
* Aquire the ufs_scan_lock before de-linking the mtm data
* structure so that we keep ufs_sync() and ufs_update() away
* when they execute the ufs_scan_inodes() run while we're in
*/
(void) lufs_unsnarf(ufsvfsp);
ufsvfsp->vfs_nolog_si = 0;
/*
* Free the log space and mark the superblock as FSACTIVE
*/
/*
* Allow the reclaim thread to continue.
*/
/*
* Unlock the file system
*/
if (error)
return (0);
return (error);
}
/*
* Enable logging
*/
int
{
int error;
int reclaim;
/*
* Check if logging is already enabled
*/
/* for root ensure logging option is set */
return (0);
}
/*
* Come back here to recheck if we had to disable the log.
*/
error = 0;
reclaim = 0;
/*
* Adjust requested log size
*/
if (flp->nbytes_actual == 0) {
}
/*
* logging is enabled and the log is the right size; done
*/
return (0);
/*
* Readonly file system
*/
return (0);
}
/*
* File system must be write locked to enable logging
*/
if (error) {
return (error);
}
if (!LOCKFS_IS_ULOCK(&lf)) {
return (0);
}
if (error) {
return (0);
}
/*
* Grab appropriate locks to synchronize with the rest
* of the system
*/
/*
* File system must be fairly consistent to enable logging
*/
goto unlockout;
}
/*
* A write-locked file system is only active if there are
* open deleted files; so remember to set FS_RECLAIM later.
*/
/*
* Logging is already enabled; must be changing the log's size
*/
/*
* Before we can disable logging, we must give up our
* lock. As a consequence of unlocking and disabling the
* log, the fs structure may change. Because of this, when
* disabling is complete, we will go back to recheck to
* repeat all of the checks that we performed to get to
* this point. Disabling sets fs->fs_logbno to 0, so this
* will not put us into an infinite loop.
*/
if (error) {
return (0);
}
return (0);
goto recheck;
}
if (error)
goto errout;
/*
* Create all of the incore structs
*/
if (error)
goto errout;
/*
* DON'T ``GOTO ERROUT'' PAST THIS POINT
*/
/*
* Pretend we were just mounted with logging enabled
* Get the ops vector
* If debug, record metadata locations with log subsystem
* Start the delete thread
* Start the reclaim thread, if necessary
*/
} else
/*
* Unlock the file system
*/
if (error) {
return (0);
}
/*
* There's nothing in the log yet (we've just allocated it)
* so directly write out the super block.
* Note, we have to force this sb out to disk
* (not just to the log) so that if we crash we know we are logging
*/
return (0);
/*
* Aquire the ufs_scan_lock before de-linking the mtm data
* structure so that we keep ufs_sync() and ufs_update() away
* when they execute the ufs_scan_inodes() run while we're in
*/
(void) lufs_unsnarf(ufsvfsp);
return (error);
}
void
{
char *va;
int (*saviodone)();
int entire_range;
/*
* get a linked list of overlapping deltas
* returns with &mtm->mtm_rwlock held
*/
/*
* no overlapping deltas were found; read master
*/
} else {
(void) bdev_strategy(bp);
}
return;
}
/*
* if necessary, sync read the data from master
* errors are returned in bp
*/
if (!entire_range) {
(void) bdev_strategy(bp);
if (trans_not_wait(bp))
}
/*
* sync read the data from the log
* errors are returned inline
*/
}
/*
* unlist the deltas
*/
/*
* all done
*/
}
}
void
{
char *va;
mapentry_t *me;
/*
* if there are deltas, move into log
*/
if (me) {
/*
* move to logmap
*/
if (ufs_crb_enable) {
} else {
}
}
return;
}
return;
}
/*
* Check that we are not updating metadata, or if so then via B_PHYS.
*/
/* If snapshots are enabled, write through the snapshot driver */
else
(void) bdev_strategy(bp);
}
void
{
else
}
/* ARGSUSED */
static int
{
if (rw == KSTAT_WRITE) {
} else {
}
return (0);
}
extern size_t ufs_crb_limit;
extern int ufs_max_crb_divisor;
void
lufs_init(void)
{
/* Create kmem caches */
_init_top();
if (&bio_lufs_strategy != NULL)
/*
* Initialise general logging and delta kstats
*/
if (ksp) {
}
if (ksp) {
}
/* Initialize generation of logging ids */
/*
* Set up the maximum amount of kmem that the crbs (system wide)
* can use.
*/
}