dmu.c revision e19302335c33c8c6e0b0b5e426fc1f6352c84b5d
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dmu_impl.h>
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>
#include <sys/dmu_zfetch.h>
#include <sys/zfs_ioctl.h>
#include <sys/zio_checksum.h>
};
int
{
int err;
/* dataset_verify(dd); */
if (err)
return (err);
} else {
if (err) {
}
}
return (err);
}
int
dmu_bonus_max(void)
{
return (DN_MAX_BONUSLEN);
}
/*
* returns ENOENT, EIO, or 0.
*/
int
{
if (err)
return (err);
}
if (count == 1)
return (0);
}
int
{
int err;
if (length == 0) {
if (numbufsp)
*numbufsp = 0;
return (0);
}
if (length > zfetch_array_rd_sz)
if (err)
return (err);
if (dn->dn_datablkshift) {
} else {
nblks = 1;
}
for (i = 0; i < nblks; i++) {
return (EIO);
}
/* initiate async i/o */
}
}
/* wait for async i/o */
if (err) {
return (err);
}
/* wait for other io to complete */
if (read) {
for (i = 0; i < nblks; i++) {
if (err) {
return (err);
}
}
}
return (0);
}
void
{
int i;
if (numbufs == 0)
return;
for (i = 0; i < numbufs; i++) {
if (dbp[i])
}
}
void
{
if (len == 0) { /* they're interested in the bonus buffer */
return;
return;
}
/*
* XXX - Note, if the dnode for the requested object is not
* already cached, we will do a *synchronous* read in the
* dnode_hold() call. The same is true for any indirects.
*/
if (err != 0)
return;
if (dn->dn_datablkshift) {
} else {
}
if (nblks != 0) {
for (i = 0; i < nblks; i++)
}
}
int
{
if (err)
return (err);
return (0);
}
int
void *buf)
{
/*
* Deal with odd block sizes, where there can't be data past the
* first block.
*/
if (err)
return (err);
if (dn->dn_datablkshift == 0) {
}
while (size > 0) {
int err;
/*
* NB: we could do this block-at-a-time, but it's nice
* to be reading in parallel.
*/
if (err)
return (err);
for (i = 0; i < numbufs; i++) {
int tocpy;
int bufoff;
}
}
return (0);
}
void
{
int numbufs, i;
for (i = 0; i < numbufs; i++) {
int tocpy;
int bufoff;
else
}
}
#ifdef _KERNEL
int
{
int numbufs, i;
int err = 0;
if (err)
return (err);
for (i = 0; i < numbufs; i++) {
int tocpy;
int bufoff;
else
/*
* XXX uiomove could block forever (eg. nfs-backed
* pages). There needs to be a uiolockdown() function
* to lock the pages in memory, so that uiomove won't
* block.
*/
if (err)
break;
}
return (err);
}
#endif
struct backuparg {
int err;
};
static int
{
}
static int
{
/* write a FREE record */
return (EINTR);
return (0);
}
static int
{
/* write a DATA record */
return (EINTR);
return (EINTR);
return (0);
}
static int
{
/* write a FREEOBJECTS record */
return (EINTR);
return (0);
}
static int
{
/* write an OBJECT record */
return (EINTR);
return (EINTR);
/* free anything past the end of the file */
return (EINTR);
return (EINTR);
return (0);
}
static int
{
int err = 0;
return (EINTR);
int i;
for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
if (err)
break;
}
} else if (level == 0 &&
if (abuf) {
}
} else {
}
}
return (err);
}
int
{
int err;
/* tosnap must be a snapshot */
return (EINVAL);
/* fromsnap must be an earlier snapshot from the same fs as tosnap */
return (EXDEV);
if (fromds)
}
if (err) {
return (err);
}
return (0);
}
struct restorearg {
int err;
int byteswap;
char *buf;
int buflen; /* number of valid bytes in buf */
int bufoff; /* next offset to read */
int bufsize; /* amount of memory allocated for buf */
};
static int
{
const char *snapname;
/* this must be a filesytem */
goto die;
if (err)
goto die;
goto die;
}
/* must already be a snapshot of this fs */
goto die;
}
/* most recent snapshot must match fromguid */
if (err)
goto die;
goto die;
}
/* must not have any changes since most recent snapshot */
goto die;
}
/* new snapshot name must not exist */
goto die;
}
snapname++;
if (err == 0)
return (err);
}
/* The point of no (unsuccessful) return. */
return (0);
die:
if (ds_prev)
if (ds)
return (err);
}
static int
{
int err;
return (EINVAL);
}
*cp = '\0';
if (fslastname == NULL) {
return (EINVAL);
}
fslastname++;
if (err) {
return (err);
}
/* the point of no (unsuccessful) return */
return (0);
}
static int
{
int err;
char *snapname;
/* XXX verify that drr_toname is in dd */
return (EINVAL);
snapname++;
/* create snapshot */
if (err)
return (err);
/* set snapshot's creation time and guid */
return (0);
}
void *
{
void *rv;
/* some things will require 8-byte alignment, so everything must */
return (NULL);
/* Could compute checksum here? */
}
else
return (rv);
}
static void
{
case DRR_BEGIN:
break;
case DRR_OBJECT:
/* DO64(drr_object.drr_allocation_txg); */
break;
case DRR_FREEOBJECTS:
break;
case DRR_WRITE:
break;
case DRR_FREE:
break;
case DRR_END:
break;
}
}
static int
{
int err;
return (EINVAL);
return (EINVAL);
}
/* currently free, want to be allocated */
if (err) {
return (err);
}
} else {
/* currently allocated, want to be allocated */
/*
* We may change blocksize, so need to
* hold_write
*/
if (err) {
return (err);
}
}
if (err) {
return (EINVAL);
}
if (drro->drr_bonuslen) {
void *data;
}
drro->drr_bonuslen);
}
}
return (0);
}
/* ARGSUSED */
static int
struct drr_freeobjects *drrfo)
{
return (EINVAL);
int err;
continue;
if (err) {
return (err);
}
return (EINVAL);
}
return (0);
}
static int
{
void *data;
int err;
return (EINVAL);
return (EINVAL);
if (err) {
return (err);
}
return (0);
}
/* ARGSUSED */
static int
{
int err;
return (EINVAL);
return (EINVAL);
if (err) {
return (err);
}
return (err);
}
int
{
struct restorearg ra;
char *cp;
} else {
goto out;
}
/*
* NB: this assumes that struct drr_begin will be the largest in
* dmu_replay_record_t's drr_u, and thus we don't need to pad it
* with zeros to make it the same length as we wrote out.
*/
} else {
}
}
goto out;
}
/*
* Process the begin in syncing context.
*/
if (drrb->drr_fromguid) {
/* incremental backup */
*cp = '\0';
*cp = '@';
goto out;
} else {
/* full backup */
const char *tail;
*cp = '\0';
*cp = '@';
goto out;
goto out;
}
}
goto out;
/*
* Open the objset we are modifying.
*/
*cp = '\0';
*cp = '@';
/*
* Read records and process them.
*/
goto out;
}
case DRR_OBJECT:
{
/*
* We need to make a copy of the record header,
* because restore_{object,write} may need to
* restore_read(), which will invalidate drr.
*/
break;
}
case DRR_FREEOBJECTS:
{
struct drr_freeobjects drrfo =
break;
}
case DRR_WRITE:
{
break;
}
case DRR_FREE:
{
break;
}
case DRR_END:
{
/*
* We compare against the *previous* checksum
* value, because the stored checksum is of
* everything before the DRR_END record.
*/
goto out;
}
/*
* dd may be the parent of the dd we are
* restoring into (eg. if it's a full backup).
*/
goto out;
}
default:
goto out;
}
}
out:
if (os)
/*
* processed the begin properly. 'os' will only be set if this
* is the case.
*/
/*
* rollback or destroy what we created, so we don't
* leave it in the restoring state.
*/
if (drrb->drr_fromguid) {
/* incremental: rollback to most recent snapshot */
(void) dsl_dir_sync_task(dd,
} else {
/* full: destroy whole fs */
*cp = '\0';
if (cp) {
(void) dsl_dir_sync_task(dd,
}
*cp = '@';
}
}
if (dd)
if (sizep)
}
/*
* Intent log support: sync the block at <os, object, offset> to disk.
* N.B. and XXX: the caller is responsible for serializing dmu_sync()s
* of the same block, and for making sure that the data isn't changing
* while dmu_sync() is writing it.
*
* Return values:
*
* EALREADY: this txg has already been synced, so there's nothing to to.
* The caller should not log the write.
*
* ENOENT: the block was dbuf_free_range()'d, so there's nothing to do.
* The caller should not log the write.
*
* EINPROGRESS: the block is in the process of being synced by the
* usual mechanism (spa_sync()), so we can't sync it here.
* The caller should txg_wait_synced() and not log the write.
*
* EBUSY: another thread is trying to dmu_sync() the same dbuf.
* (This case cannot arise under the current locking rules.)
* The caller should txg_wait_synced() and not log the write.
*
* ESTALE: the block was dirtied or freed while we were writing it,
* so the data is no longer valid.
* The caller should txg_wait_synced() and not log the write.
*
* 0: success. Sets *bp to the blkptr just written, and sets
* *blkoff to the data's offset within that block.
*/
int
{
int err;
dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n",
/*
* XXX why is this routine using dmu_buf_*() and casting between
* dmu_buf_impl_t and dmu_buf_t?
*/
/*
* If this txg already synced, there's nothing to do.
*/
/*
* If we're running ziltest, we need the blkptr regardless.
*/
if (err)
return (err);
/* if db_blkptr == NULL, this was an empty write */
else
return (0);
}
return (EALREADY);
}
/*
* If this txg is in the middle of syncing, just wait for it.
*/
return (EINPROGRESS);
}
if (err)
return (err);
/*
* If this dbuf isn't dirty, must have been free_range'd.
* There's no need to log writes to freed blocks, so we're done.
*/
return (ENOENT);
}
/*
* If we already did a dmu_sync() of this dbuf in this txg,
* free the old block before writing the new one.
*/
if (blk == IN_DMU_SYNC) {
return (EBUSY);
}
if (!BP_IS_HOLE(blk)) {
}
}
if (!BP_IS_HOLE(blk)) {
BP_SET_LEVEL(blk, 0);
}
/* copy the block pointer back to caller */
/* Note that this block does not free on disk until txg syncs */
/*
* XXX can we use ARC_NOWAIT here?
* XXX should we be ignoring the return code?
*/
if (!BP_IS_HOLE(blk)) {
}
return (ESTALE);
}
return (0);
}
{
/* XXX assumes dnode_hold will not get an i/o error */
return (rv);
}
int
{
int err;
if (err)
return (err);
return (err);
}
void
{
/* XXX assumes dnode_hold will not get an i/o error */
}
void
{
/* XXX assumes dnode_hold will not get an i/o error */
}
int
{
int i, err;
if (err)
return (err);
/*
* Sync any current changes before
* we go trundling through the block pointers.
*/
for (i = 0; i < TXG_SIZE; i++) {
break;
}
if (i != TXG_SIZE) {
if (err)
return (err);
}
return (err);
}
void
{
}
/*
* Get information on a DMU object.
* If doi is NULL, just indicates whether the object exists.
*/
int
{
if (err)
return (err);
return (0);
}
/*
* As above, but faster; can be used when you have a held dbuf in hand.
*/
void
{
}
/*
* Faster still when you only care about the size.
* This is specifically optimized for zfs_getattr().
*/
void
{
}
/*
* Given a bookmark, return the name of the dataset, object, and range in
* human-readable format.
*/
int
{
dsl_pool_t *dp;
return (ENOSPC);
if (err) {
return (err);
}
if (err)
goto out;
} else {
}
} else {
}
if (err)
goto out;
out:
if (dn)
return (err);
}
void
{
int i;
for (i = 0; i < count; i++)
}
void
{
int i;
for (i = 0; i < count; i++)
}
void
{
int i;
for (i = 0; i < count; i++)
}
/* ARGSUSED */
void
{
}
void
dmu_init(void)
{
dbuf_init();
dnode_init();
arc_init();
}
void
dmu_fini(void)
{
arc_fini();
dnode_fini();
dbuf_fini();
}