dsl_dataset.c revision 2734506673e02ba01f6ed716f8db376305a78bad
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_traverse.h>
#include <sys/zfs_context.h>
#include <sys/zfs_ioctl.h>
/*
* We use weighted reference counts to express the various forms of exclusion
* between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open
* is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
* This makes the exclusion logic simple: the total refcnt for all opens cannot
* exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their
* weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume
* just over half of the refcnt space, so there can't be more than one, but it
* can peacefully coexist with any number of STANDARD opens.
*/
0, /* DS_MODE_NONE - invalid */
1, /* DS_MODE_STANDARD - unlimited number */
DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */
};
/*
* Figure out how much of this delta should be propogated to the dsl_dir
* layer. If there's a refreservation, that space has already been
* partially accounted for in our ancestors.
*/
static int64_t
{
if (ds->ds_reserved == 0)
return (delta);
}
void
{
/* It could have been compressed away to nothing */
if (BP_IS_HOLE(bp))
return;
/*
* Account for the meta-objset space in its placeholder
* dsl_dir.
*/
return;
}
}
void
{
/* No block pointer => nothing to free */
if (BP_IS_HOLE(bp))
return;
int err;
/*
* Account for the meta-objset space in its placeholder
* dataset.
*/
return;
}
int err;
} else {
/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
used;
}
}
}
}
{
return (0);
/*
* The snapshot creation could fail, but that would cause an
* incorrect FALSE return, which would only result in an
* overestimation of the amount of space that an operation would
* consume, which is OK.
*
* There's also a small window where we could miss a pending
* snapshot, because we could set the sync task in the quiescing
* phase. So this should only be used as a guess.
*/
if (ds->ds_trysnap_txg >
}
int
{
}
/* ARGSUSED */
static void
{
/* open_refcount == DS_REF_MAX when deleting */
}
}
static int
{
int err;
if (ds->ds_snapname[0])
return (0);
return (0);
if (err)
return (err);
return (err);
}
int
{
int err;
if (err)
return (err);
NULL);
if (err == 0) {
}
if (err) {
/*
* we don't really need to close the blist if we
* just opened it.
*/
return (err);
}
}
} else {
if (snapname) {
#ifdef ZFS_DEBUG
if (err == 0) {
&foundobj);
}
#endif
} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
}
}
if (!dsl_dataset_is_snapshot(ds)) {
/*
* In sync context, we're called with either no lock
* or with the write lock. If we're not syncing,
* we're always called with the read lock held.
*/
if (need_lock)
if (err == 0) {
}
if (need_lock)
} else {
}
if (err == 0) {
}
DS_MODE_NONE, ds);
}
if (err) {
return (err);
}
} else {
ds->ds_fsid_guid =
}
}
!DS_MODE_IS_INCONSISTENT(mode)) ||
return (EBUSY);
}
return (0);
}
int
{
dsl_pool_t *dp;
const char *tail;
int err = 0;
if (err)
return (err);
if (obj == 0) {
/* A dataset with no associated objset */
goto out;
}
if (err)
goto out;
if (tail[0] != '@') {
goto out;
}
tail++;
/* Look for a snapshot */
if (!DS_MODE_IS_READONLY(mode)) {
goto out;
}
if (err)
goto out;
}
out:
/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
return (err);
}
int
{
}
void
{
} else {
if (ds->ds_snapname[0]) {
/*
* We use a "recursive" mutex so that we
* can call dprintf_ds() with ds_lock held.
*/
} else {
}
}
}
}
static int
{
int result;
} else {
if (ds->ds_snapname[0]) {
++result; /* adding one for the @-sign */
/* see dsl_datset_name */
} else {
}
}
}
return (result);
}
void
{
}
void
{
}
{
} else {
}
return (rv);
}
void
{
VERIFY(0 ==
}
{
if (origin) {
}
return (dsobj);
}
{
return (dsobj);
}
struct destroyarg {
char *snapname;
char *failed;
};
static int
{
char *cp;
int err;
*cp = '\0';
return (0);
if (err) {
return (err);
}
return (0);
}
/*
* Destroy 'snapname' in all descendants of 'fsname'.
*/
int
{
int err;
struct destroyarg da;
if (err)
return (err);
if (err == 0)
}
/*
* If it was successful, destroy_sync would have
* closed the ds
*/
if (err)
}
return (err);
}
/*
* ds must be opened EXCLUSIVE or PRIMARY. on return (whether
* successful or not), ds will be closed and caller can no longer
* dereference it.
*/
int
{
int err;
DS_MODE_EXCLUSIVE) == 0) {
return (EBUSY);
}
}
if (dsl_dataset_is_snapshot(ds)) {
/* Destroying a snapshot is simpler */
goto out;
}
/*
* Check for errors and mark this ds as inconsistent, in
* case we crash while freeing the objects.
*/
if (err)
goto out;
if (err)
goto out;
/*
* remove the objects in open context, so that we won't
* have too much to do in syncing context.
*/
if (err) {
/*
* Perhaps there is not enough disk
* space. Just deal with it from
* dsl_dataset_destroy_sync().
*/
continue;
}
}
/* Make sure it's not dirty before we finish destroying it. */
goto out;
if (ds->ds_user_ptr) {
}
if (err)
goto out;
/*
* Blow away the dsl_dir + head dataset.
*/
/* if it is successful, *destroy_sync will close the ds+dd */
if (err)
out:
if (err)
return (err);
}
int
{
}
void *
void *p, dsl_dataset_evict_func_t func)
{
void *old;
ds->ds_user_ptr = p;
}
return (old);
}
void *
{
return (ds->ds_user_ptr);
}
blkptr_t *
{
}
void
{
/* If it's the meta-objset, set dp_meta_rootbp */
} else {
}
}
spa_t *
{
}
void
{
dsl_pool_t *dp;
return;
panic("dirtying snapshot!");
/* up the hold count until we can be written out */
}
}
/*
* The unique space in the head dataset can be calculated by subtracting
* the space used in the most recent snapshot, that is still being used
* in this file system, from the space currently in use. To figure out
* the space in the most recent snapshot still in use, we need to take
* the total space used in the snapshot and subtract out the space that
* has been freed up since the snapshot was taken.
*/
static void
{
else
mrs_used = 0;
&dluncomp));
if (!DS_UNIQUE_IS_ACCURATE(ds) &&
}
static uint64_t
{
}
struct killarg {
};
static int
{
/*
* Since this callback is not called concurrently, no lock is
* needed on the accounting values.
*/
/* XXX check for EIO? */
return (0);
}
/* ARGSUSED */
static int
{
/*
* We can only roll back to emptyness if it is a ZPL objset.
*/
return (EINVAL);
/*
* This must not be a snapshot.
*/
return (EINVAL);
/*
* If we made changes this txg, traverse_dsl_dataset won't find
* them. Try again.
*/
return (EAGAIN);
return (0);
}
/* ARGSUSED */
static void
{
/*
* Before the roll back destroy the zil.
*/
/*
* We need to make sure that the objset_impl_t is reopened after
* we do the rollback, otherwise it will have the wrong
* objset_phys_t. Normally this would happen when this
* DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the
* dataset to be immediately evicted. But when doing "zfs recv
* -F", we reopen the objset before that, so that there is no
* window where the dataset is closed and inconsistent.
*/
}
/* Zero out the deadlist. */
{
/* Free blkptrs that we gave birth to */
/* only deduct space beyond any refreservation */
}
/* Change our contents to that of the prev snapshot */
}
} else {
/* Zero out our contents, recreate objset */
}
}
/* ARGSUSED */
static int
{
int err;
/*
* Can't delete a head dataset if there are snapshots of it.
* (Except if the only snapshots are from the branch we cloned
* from.)
*/
return (EINVAL);
/*
* This is really a dsl_dir thing, but check it here so that
* we'll be less likely to leave this dataset inconsistent &
* nearly destroyed.
*/
if (err)
return (err);
if (count != 0)
return (EEXIST);
return (0);
}
/* ARGSUSED */
static void
{
/* Mark it as inconsistent on-disk, in case we crash */
}
/* ARGSUSED */
int
{
/* Can't delete a branch point. */
return (EEXIST);
/*
* Can't delete a head dataset if there are snapshots of it.
* (Except if the only snapshots are from the branch we cloned
* from.)
*/
return (EINVAL);
/*
* If we made changes this txg, traverse_dsl_dataset won't find
* them. Try again.
*/
return (EAGAIN);
/* XXX we should do some i/o error checking... */
return (0);
}
void
{
int err;
int after_branch_point = FALSE;
/* Remove our reservation */
if (ds->ds_reserved != 0) {
}
} else {
}
if (after_branch_point &&
/* This clone is toast. */
} else if (!after_branch_point) {
}
}
/*
* Transfer to our deadlist (which will become next's
* new deadlist) any entries from next's current
* deadlist which were born before prev, and free the
* other entries.
*
* XXX we're doing this long task with the config lock held
*/
&bp) == 0) {
if (ds_prev && !after_branch_point &&
}
} else {
/* XXX check return value? */
}
}
/* free next's deadlist */
/* set next's deadlist to our deadlist */
/*
* Update next's unique to include blocks which
* were previously shared by only this snapshot
* and it. Those blocks will be born after the
* prev snap and before this snap, and will have
* died after the next snap and before the one
* after that (ie. be on the snap after next's
* deadlist).
*
* XXX we're doing this long task with the
* config lock held
*/
itor = 0;
}
}
} else {
ds_next);
if (ds_prev) {
} else {
}
/*
* Reduce the amount of our unconsmed refreservation
* being charged to our parent by the amount of
* new unique data we have gained.
*/
0, 0, tx);
}
}
/*
* NB: unique_bytes might not be accurate for the head objset.
* Before SPA_VERSION 9, we didn't update its value when we
* deleted the most recent snapshot.
*/
} else {
/*
* There's no next snapshot, so this is a head dataset.
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
/*
* Free everything that we point to (that's born after
* the previous snapshot, if we are a clone)
*
* XXX we're doing this long task with the config lock held
*/
}
}
/* Erase the link in the dataset */
/*
* dsl_dir_sync_destroy() called us, they'll destroy
* the dataset.
*/
} else {
/* remove from snapshot namespace */
#ifdef ZFS_DEBUG
{
}
#endif
}
}
static int
{
if (!dmu_tx_is_syncing(tx))
return (0);
/*
* If there's an fs-only reservation, any blocks that might become
* owned by the snapshot dataset must be accommodated by space
* outside of the reservation.
*/
return (ENOSPC);
/*
* Propogate any reserved space for this snapshot to other
* snapshot checks in this sync group.
*/
if (asize > 0)
return (0);
}
/* ARGSUSED */
int
{
int err;
/*
* We don't allow multiple snapshots of the same txg. If there
* is already one, try again.
*/
return (EAGAIN);
/*
* Check for conflicting name snapshot name.
*/
if (err == 0)
return (EEXIST);
return (err);
/*
* Check that the dataset's name is not too long. Name consists
* of the dataset's length + 1 for the @-sign + snapshot name's length
*/
return (ENAMETOOLONG);
if (err)
return (err);
return (0);
}
void
{
int err;
}
}
/*
* If we have a reference-reservation on this dataset, we will
* need to increase the amount of refreservation being charged
* since our unique space is going to zero.
*/
if (ds->ds_reserved) {
}
"dataset = %llu", dsobj);
}
void
{
/*
* in case we had to change ds_fsid_guid when we opened it,
* sync it out now.
*/
}
void
{
ds->ds_reserved);
/*
* This is a snapshot; override the dd's space used with
* our unique space and compression ratio.
*/
}
}
void
{
}
/* clone origin is really a dsl_dir thing... */
}
}
{
return (ds->ds_fsid_guid);
}
void
{
/*
* Adjust available bytes according to refquota
*/
else
*availbytesp = 0;
}
}
{
return (B_FALSE);
return (B_TRUE);
return (B_FALSE);
}
/* ARGSUSED */
static int
{
char *newsnapname = arg2;
int err;
if (err)
return (err);
/* new name better not be in use */
if (err == 0)
err = 0;
/* dataset name + 1 for the "@" + the new snapshot name must fit */
err = ENAMETOOLONG;
return (err);
}
static void
{
const char *newsnapname = arg2;
int err;
}
struct renamesnaparg {
char failed[MAXPATHLEN];
char *oldsnap;
char *newsnap;
};
static int
{
char *cp;
int err;
*cp = '@';
/*
* For recursive snapshot renames the parent won't be changing
*/
return (err);
}
*cp = '\0';
return (0);
}
if (err) {
*cp = '\0';
return (err);
}
#ifdef _KERNEL
/* for all filesystems undergoing rename, we'll need to unmount it */
#endif
*cp = '\0';
return (0);
}
static int
{
int err;
struct renamesnaparg *ra;
/* truncate the snapshot name to get the fsname */
*cp = '\0';
if (err) {
return (err);
}
if (err == 0) {
}
}
}
if (err)
return (err);
}
static int
{
return (ENAMETOOLONG);
return (0);
}
int
{
const char *tail;
int err;
if (err)
return (err);
/* if we're growing, validate child size lengths */
if (delta > 0)
if (!err)
return (err);
}
if (tail[0] != '@') {
/* the name ended in a nonexistant component */
return (ENOENT);
}
/* new name must be snapshot in same filesystem */
return (EINVAL);
tail++;
return (EXDEV);
if (recursive) {
} else {
if (err)
return (err);
}
return (err);
}
struct promotearg {
};
/* ARGSUSED */
static int
{
int err;
/* Check that it is a clone */
return (EINVAL);
/* Since this is so expensive, don't do the preliminary check */
if (!dmu_tx_is_syncing(tx))
return (0);
goto out;
{
goto out;
}
goto out;
}
/* find origin's new next ds */
goto out;
newnext_ds = prev;
}
/* compute origin's new unique space */
}
goto out;
/* Walk the snapshots that we are moving */
/* CONSTCOND */
while (TRUE) {
/* Check that the snapshot name does not conflict */
if (err == 0)
goto out;
}
/*
* compute space to transfer. Each snapshot gave birth to:
* (my used) - (prev's used) + (deadlist's used)
*/
/* If we reach the first snapshot, we're done. */
break;
goto out;
goto out;
/*
* We could be a clone of a clone. If we reach our
* parent's branch point, we're done.
*/
break;
}
}
/* Check that there is enough space here */
out:
if (origin_ds)
if (newnext_ds)
if (name)
return (err);
}
static void
{
char *name;
/*
* We need to explicitly open odd, since origin_ds's dd will be
* changing.
*/
/* move snapshots to this dir */
/* CONSTCOND */
while (TRUE) {
/* move snap name entry */
/* change containing dsl_dir */
break;
break;
}
}
/* change origin's next snap */
/* change origin */
/* change space accounting */
/* log history record */
}
int
dsl_dataset_promote(const char *name)
{
int err;
struct promotearg pa;
if (err)
return (err);
if (err) {
return (err);
}
/*
* Add in 128x the snapnames zapobj size, since we will be moving
* a bunch of snapnames to the promoted ds, and dirtying their
* bonus buffers.
*/
return (err);
}
struct cloneswaparg {
};
/* ARGSUSED */
static int
{
/* they should both be heads */
return (EINVAL);
/* the branch point should be just before them */
return (EINVAL);
/* cds should be the clone */
return (EINVAL);
/* the clone should be a child of the origin */
return (EINVAL);
/* ohds shouldn't be modified unless 'force' */
return (ETXTBSY);
/* adjust amount of any unconsumed refreservation */
if (csa->unused_refres_delta > 0 &&
return (ENOSPC);
return (0);
}
/* ARGSUSED */
static void
{
int err;
}
}
/* compute unique space */
}
/* reset origin's unique bytes */
/* swap blkptrs */
{
}
/* set dd_*_bytes */
{
&cdl_comp, &cdl_uncomp));
&odl_comp, &odl_uncomp));
}
#define SWITCH64(x, y) \
{ \
(x) = (y); \
(y) = __tmp; \
}
/* swap ds_*_bytes */
/* apply any parent delta for change in unconsumed refreservation */
0, 0, tx);
/* swap deadlists */
}
/*
* Swap 'clone' with its origin head file system.
*/
int
{
struct cloneswaparg csa;
}
/*
* Given a pool name and a dataset object number in that pool,
* return the name of that dataset.
*/
int
{
dsl_pool_t *dp;
int error;
return (error);
return (error);
}
return (0);
}
int
{
int error = 0;
/*
* Make a space adjustment for reserved bytes.
*/
}
return (0);
}
/*
* If they are requesting more space, and our current estimate
* is over quota, they get to try again unless the actual
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
else
}
return (error);
}
/* ARGSUSED */
static int
{
return (ENOTSUP);
if (new_quota == 0)
return (0);
return (ENOSPC);
return (0);
}
/* ARGSUSED */
void
{
}
int
{
int err;
if (err)
return (err);
/*
* If someone removes a file, then tries to set the quota, we
* want to make sure the file freeing takes effect.
*/
}
return (err);
}
static int
{
if (new_reservation > INT64_MAX)
return (EOVERFLOW);
return (ENOTSUP);
if (dsl_dataset_is_snapshot(ds))
return (EINVAL);
/*
* If we are doing the preliminary check in open context, the
* space estimates may be inaccurate.
*/
if (!dmu_tx_is_syncing(tx))
return (0);
if (delta > 0 &&
return (ENOSPC);
return (ENOSPC);
return (0);
}
/* ARGSUSED */
static void
{
}
int
{
int err;
if (err)
return (err);
return (err);
}