dmu_objset.c revision c166b69d29138aed7a415fe7cef698e54c6ae945
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
/* Portions Copyright 2010 Robert Milkowski */
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_synctask.h>
#include <sys/dsl_deleg.h>
#include <sys/dmu_impl.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_onexit.h>
#include <sys/dsl_destroy.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
* before it can be safely accessed.
*/
/*
* Tunable to overwrite the maximum number of threads for the parallization
* of dmu_objset_find_dp, needed to speed up the import of pools with many
* datasets.
* Default is 4 times the number of leaf vdevs.
*/
int dmu_find_threads = 0;
static void dmu_objset_find_dp_cb(void *arg);
void
dmu_objset_init(void)
{
}
void
dmu_objset_fini(void)
{
}
spa_t *
{
}
zilog_t *
{
}
{
else
}
{
return (os->os_dsl_dataset);
}
{
}
void
{
}
{
}
{
}
{
return (os->os_logbias);
}
static void
{
/*
* Inheritance should have been done by now.
*/
}
static void
{
/*
* Inheritance and range checking should have been done by now.
*/
}
static void
{
/*
* Inheritance and range checking should have been done by now.
*/
}
static void
{
enum zio_checksum checksum;
/*
* Inheritance should have been done by now.
*/
}
static void
{
/*
* Inheritance and range checking should have been done by now.
*/
newval == ZFS_CACHE_METADATA);
}
static void
{
/*
* Inheritance and range checking should have been done by now.
*/
newval == ZFS_CACHE_METADATA);
}
static void
{
/*
* Inheritance and range checking should have been done by now.
*/
newval == ZFS_SYNC_DISABLED);
}
static void
{
/*
* Inheritance and range checking should have been done by now.
*/
}
static void
{
}
static void
{
}
void
{
if (size == sizeof (objset_phys_t)) {
}
}
int
{
int i, err;
if (DMU_OS_IS_L2CACHEABLE(os))
if (err != 0) {
/* convert checksum errors into IO errors */
return (err);
}
/* Increase the blocksize if we are permitted. */
}
} else {
sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
}
/*
* Note: the changed_cb will be called once before the register
* func returns, thus changing the checksum/compression from the
*/
/*
* Note: it's valid to open the objset if the dataset is
* long-held, in which case the pool_config lock will not
* be held.
*/
}
if (err == 0) {
}
if (!ds->ds_is_snapshot) {
if (err == 0) {
}
if (err == 0) {
}
if (err == 0) {
}
if (err == 0) {
}
if (err == 0) {
}
if (err == 0) {
}
if (err == 0) {
}
if (err == 0) {
}
}
if (needlock)
if (err != 0) {
return (err);
}
} else {
/* It's the meta-objset. */
}
for (i = 0; i < TXG_SIZE; i++) {
}
}
return (0);
}
int
{
int err = 0;
/*
* We shouldn't be doing anything with dsl_dataset_t's unless the
* pool_config lock is held, or the dataset is long-held.
*/
if (err == 0) {
}
}
return (err);
}
/*
* Holds the pool while the objset is held. Therefore only one objset
* can be held at a time.
*/
int
{
dsl_pool_t *dp;
int err;
if (err != 0)
return (err);
if (err != 0) {
return (err);
}
if (err != 0) {
}
return (err);
}
static int
{
int err;
if (err != 0) {
}
return (err);
}
/*
* dsl_pool must not be held when this is called.
* Upon successful return, there will be a longhold on the dataset,
* and the dsl_pool will not be held.
*/
int
{
dsl_pool_t *dp;
int err;
if (err != 0)
return (err);
if (err != 0) {
return (err);
}
return (err);
}
int
{
int err;
if (err != 0)
return (err);
}
void
{
}
/*
* When we are called, os MUST refer to an objset associated with a dataset
* that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
* == tag. We will then release and reacquire ownership of the dataset while
* holding the pool config_rwlock to avoid intervening namespace or ownership
* changes may occur.
*
* This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
* release the hold on its dataset and acquire a new one on the dataset of the
* same name so that it can be partially torn down and reconstructed.
*/
void
{
dsl_pool_t *dp;
char name[ZFS_MAX_DATASET_NAME_LEN];
}
void
{
}
void
{
/*
* Skip dnodes without holds. We have to do this dance
* because dnode_add_ref() only works if there is already a
* hold. If the dnode has no holds, then it has no dbufs.
*/
} else {
}
}
}
}
/*
* Objset eviction processing is split into into two pieces.
* The first marks the objset as evicting, evicts any dbufs that
* have a refcount of zero, and then queues up the objset for the
* second phase of eviction. Once os->os_dnodes has been cleared by
* dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
* The second phase closes the special dnodes, dequeues the objset from
* the list of those undergoing eviction, and finally frees the objset.
*
* NOTE: Due to asynchronous eviction processing (invocation of
* dnode_buf_pageout()), it is possible for the meta dnode for the
* objset to have no holds even though os->os_dnodes is not empty.
*/
void
{
for (int t = 0; t < TXG_SIZE; t++)
if (ds)
} else {
}
}
void
{
if (DMU_USERUSED_DNODE(os)) {
}
/*
* This is a barrier to prevent the objset from going away in
* dnode_move() until we can safely ensure that the objset is still in
* use. We consider the objset valid before the barrier and invalid
* after the barrier.
*/
}
{
}
/* called from dsl for meta-objset */
objset_t *
{
else
/*
* We don't want to have to increase the meta-dnode's nlevels
* later, because then we could do it in quescing context while
* we are also accessing it in open context.
*
* This precaution is not necessary for the MOS (ds == NULL),
* because the MOS is only updated in syncing context.
* This is most fortunate: the MOS is the only objset that
* needs to be synced multiple times as spa_sync() iterates
* to convergence, so minimizing its dn_nlevels matters.
*/
int levels = 1;
/*
* Determine the number of levels necessary for the meta-dnode
* to contain DN_MAX_OBJECT dnodes. Note that in order to
* ensure that we do not overflow 64 bits, there has to be
* a nlevels that gives us a number of blocks > DN_MAX_OBJECT
* but < 2^64. Therefore,
* (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be
* less than (64 - log2(DN_MAX_OBJECT)) (16).
*/
levels++;
}
if (dmu_objset_userused_enabled(os)) {
}
return (os);
}
typedef struct dmu_objset_create_arg {
const char *doca_name;
void *doca_userarg;
/*ARGSUSED*/
static int
{
const char *tail;
int error;
return (SET_ERROR(ENAMETOOLONG));
if (error != 0)
return (error);
}
return (error);
}
static void
{
const char *tail;
}
}
int
{
return (dsl_sync_task(name,
5, ZFS_SPACE_CHECK_NORMAL));
}
typedef struct dmu_objset_clone_arg {
const char *doca_clone;
const char *doca_origin;
/*ARGSUSED*/
static int
{
const char *tail;
int error;
return (SET_ERROR(ENAMETOOLONG));
if (error != 0)
return (error);
}
if (error != 0) {
}
if (error != 0)
return (error);
/* You can only clone snapshots, not the head datasets. */
if (!origin->ds_is_snapshot) {
}
return (0);
}
static void
{
const char *tail;
char namebuf[ZFS_MAX_DATASET_NAME_LEN];
}
int
{
return (dsl_sync_task(clone,
5, ZFS_SPACE_CHECK_NORMAL));
}
int
{
int err;
return (err);
}
static void
{
/*
* Initialize dn_zio outside dnode_sync() because the
* meta-dnode needs to set it ouside dnode_sync().
*/
if (newlist) {
}
}
}
/* ARGSUSED */
static void
{
/*
* Update rootbp fill count: it should be the number of objects
* allocated in the object set (not counting the "special"
* objects that are stored in the objset_phys_t -- the meta
*/
for (int i = 0; i < dnp->dn_nblkptr; i++)
}
/* ARGSUSED */
static void
{
} else {
}
}
/* called from dsl */
void
{
int txgoff;
/* XXX the write_done callback should really give us the tx... */
/*
* This is the MOS. If we have upgraded,
* spa_max_replication() could change, so reset
* os_copies here.
*/
}
/*
* Create the root block IO
*/
/*
* Sync special dnodes - the parent IO for the sync is the root block
*/
if (DMU_USERUSED_DNODE(os) &&
}
if (dmu_objset_userused_enabled(os)) {
/*
* We must create the list here because it uses the
* dn_dirty_link[] of this txg.
*/
}
}
/*
* Free intent log blocks up to this tx.
*/
}
{
}
void
{
}
{
}
static void
{
if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
if (subtract)
}
}
void
{
int flags;
}
/*
* We intentionally modify the zap object even if the
* net delta is zero. Otherwise
* the block of the zap obj could be shared between
* datasets but need to be different between them after
* a bprewrite.
*/
if (flags & DN_ID_OLD_EXIST) {
}
if (flags & DN_ID_NEW_EXIST) {
}
dn->dn_oldused = 0;
dn->dn_oldflags = 0;
if (dn->dn_bonuslen == 0)
else
}
}
}
/*
*
* If a dirty record for transaction group that is syncing can't
* be found then NULL is returned. In the NULL case it is assumed
*/
static void *
{
void *data;
if (db->db_dirtycnt == 0)
break;
} else {
if (dn->dn_bonuslen == 0 &&
else
}
return (data);
}
void
{
int error;
return;
return;
} else {
}
int rf = 0;
rf |= DB_RF_HAVESTRUCT;
have_spill = B_TRUE;
} else {
return;
}
if (before) {
} else if (data) {
}
/*
* Must always call the callback in case the object
* type has changed and that type isn't an object type to track
*/
/*
* If we don't know what the old values are then just assign
* them to 0, since that is a new file being created.
*/
if (flags & DN_ID_OLD_EXIST) {
} else {
}
error = 0;
}
if (db)
if (have_spill) {
} else {
}
if (have_spill)
}
{
}
int
{
int err = 0;
return (0);
if (!dmu_objset_userused_enabled(os))
if (dmu_objset_is_snapshot(os))
/*
* We simply need to mark every object dirty, so that it will be
* synced out and now accounted. If this is called
* concurrently, or if we already did some work before crashing,
* that's fine, since we track each object's accounted state
* independently.
*/
int objerr;
if (objerr != 0)
continue;
if (objerr != 0) {
continue;
}
}
return (0);
}
void
{
}
{
}
void
{
if (os->os_dsl_dataset)
}
void
{
}
int
{
else
return (B_FALSE);
}
int
{
}
int
{
}
return (SET_ERROR(ENAMETOOLONG));
}
if (idp)
if (case_conflict)
return (0);
}
int
{
/* there is no next dir on a snapshot! */
}
return (SET_ERROR(ENAMETOOLONG));
}
if (idp)
return (0);
}
typedef struct dmu_objset_find_ctx {
void *dc_arg;
int dc_flags;
int *dc_error;
static void
{
int err = 0;
/* don't process if there already was an error */
goto out;
if (err != 0)
goto out;
/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
goto out;
}
/*
* Iterate over all children.
*/
(void) zap_cursor_advance(&zc)) {
sizeof (uint64_t));
else
}
}
/*
* Iterate over all snapshots.
*/
if (err == 0) {
(void) zap_cursor_advance(&zc)) {
sizeof (uint64_t));
if (err != 0)
break;
if (err != 0)
break;
}
}
}
if (err != 0)
goto out;
/*
* Apply to self.
*/
if (err != 0)
goto out;
out:
if (err != 0) {
/* only keep first error */
}
}
static void
dmu_objset_find_dp_cb(void *arg)
{
/*
* We need to get a pool_config_lock here, as there are several
* asssert(pool_config_held) down the stack. Getting a lock via
* dsl_pool_config_enter is risky, as it might be stalled by a
* pending writer. This would deadlock, as the write lock can
* only be granted when our parent thread gives up the lock.
* The _prio interface gives us priority over a pending writer.
*/
}
/*
* Find objsets under and including ddobj, call func(ds) on each.
* The order for the enumeration is completely undefined.
* func is called with dsl_pool_config held.
*/
int
{
int error = 0;
int ntasks;
/*
* In case a write lock is held we can't make use of
* parallelism, as down the stack of the worker threads
* the lock is asserted via dsl_pool_config_held.
* In case of a read lock this is solved by getting a read
* lock in each worker thread, which isn't possible in case
* of a writer lock. So we fall back to the synchronous path
* here.
* In the future it might be possible to get some magic into
* dsl_pool_config_held in a way that it returns true for
* the worker threads so that a single lock held from this
* thread suffices. For now, stay single threaded.
*/
return (error);
}
if (ntasks == 0)
INT_MAX, 0);
}
/* dcp will be freed by task */
/*
* PORTING: this code relies on the property of taskq_wait to wait
* until no more tasks are queued and no more tasks are active. As
* we always queue new tasks from within other tasks, task_wait
* reliably waits for the full recursion to finish, even though we
* enqueue new tasks after taskq_wait has been called.
* On platforms other than illumos, taskq_wait may not have this
* property.
*/
taskq_wait(tq);
return (error);
}
/*
* Find all objsets under name, and for each, call 'func(child_name, arg)'.
* The dp_config_rwlock must not be held when this is called, and it
* will not be held when the callback is called.
* Therefore this function should only be used when the pool is not changing
* (e.g. in syncing context), or the callback can deal with the possible races.
*/
static int
{
char *child;
int err;
if (err != 0) {
return (err);
}
/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
return (0);
}
/*
* Iterate over all children.
*/
if (flags & DS_FIND_CHILDREN) {
(void) zap_cursor_advance(&zc)) {
sizeof (uint64_t));
if (err != 0)
break;
}
if (err != 0) {
return (err);
}
}
/*
* Iterate over all snapshots.
*/
if (flags & DS_FIND_SNAPSHOTS) {
if (err == 0) {
(void) zap_cursor_advance(&zc)) {
sizeof (uint64_t));
if (err != 0)
break;
}
}
}
if (err != 0)
return (err);
/* Apply to self. */
}
/*
* See comment above dmu_objset_find_impl().
*/
int
int flags)
{
int error;
if (error != 0)
return (error);
return (error);
}
void
{
}
void *
{
return (os->os_user_ptr);
}
/*
* Determine name of filesystem, given name of snapshot.
* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes
*/
int
{
return (SET_ERROR(ENAMETOOLONG));
return (0);
}