dmu_traverse.c revision 5dabedee088a5b1080fff5e8b615fff203828fad
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>
#include <sys/dmu_impl.h>
/*
* Compare two bookmarks.
*
* For ADVANCE_PRE, the visitation order is:
*
* objset 0, 1, 2, ..., ZB_MAXOBJSET.
* object 0, 1, 2, ..., ZB_MAXOBJECT.
* blkoff 0, 1, 2, ...
* level ZB_MAXLEVEL, ..., 2, 1, 0.
*
* where blkoff = blkid << BP_SPAN_SHIFT(level, width), and thus a valid
* ordering vector is:
*
* < objset, object, blkoff, -level >
*
* For ADVANCE_POST, the starting offsets aren't sequential but ending
* offsets [blkoff = (blkid + 1) << BP_SPAN_SHIFT(level, width)] are.
* The visitation order is:
*
* objset 1, 2, ..., ZB_MAXOBJSET, 0.
* object 1, 2, ..., ZB_MAXOBJECT, 0.
* blkoff 1, 2, ...
* level 0, 1, 2, ..., ZB_MAXLEVEL.
*
* and thus a valid ordering vector is:
*
* < objset - 1, object - 1, blkoff, level >
*
* Both orderings can be expressed as:
*
* < objset + bias, object + bias, blkoff, level ^ bias >
*
* where 'bias' is either 0 or -1 (for ADVANCE_PRE or ADVANCE_POST)
* and 'blkoff' is (blkid - bias) << BP_SPAN_SHIFT(level, wshift).
*
* Special case: an objset's osphys is represented as level -1 of object 0.
* It is always either the very first or very last block we visit in an objset.
* Therefore, if either bookmark's level is -1, level alone determines order.
*/
static int
int advance)
{
return (-1);
return (1);
return (-1);
return (1);
return (0);
return (-1);
return (1);
}
{ \
}
{ \
}
static int
{
if (advance & ADVANCE_PRE) {
if (objset >= ZB_MAXOBJSET)
return (ERANGE);
} else {
if (objset >= ZB_MAXOBJSET)
objset = 0;
}
return (ERANGE);
return (EAGAIN);
}
static int
{
if (advance & ADVANCE_PRE) {
if (object >= ZB_MAXOBJECT) {
} else {
}
} else {
} else {
if (object >= ZB_MAXOBJECT)
object = 0;
}
}
return (ERANGE);
return (EAGAIN);
}
static int
{
if (advance & ADVANCE_PRE) {
} else {
return (ERANGE);
}
return (ERANGE);
return (EAGAIN);
}
static int
{
if (advance & ADVANCE_PRE) {
level--;
} else {
blkid++;
return (ERANGE);
break;
level++;
}
}
} else {
level = 0;
} else {
level++;
}
dnp->dn_maxblkid) {
return (ERANGE);
level++;
}
}
return (ERANGE);
return (EAGAIN);
}
static int
{
/*
* Before we issue the callback, prune against maxtxg.
*
* We prune against mintxg before we get here because it's a big win.
* If a given block was born in txg 37, then we know that the entire
* subtree below that block must have been born in txg 37 or earlier.
* We can therefore lop off huge branches of the tree as we go.
*
* There's no corresponding optimization for maxtxg because knowing
* that bp->blk_birth >= maxtxg doesn't imply anything about the bp's
* children. In fact, the copy-on-write design of ZFS ensures that
* top-level blocks will pretty much always be new.
*
* Therefore, in the name of simplicity we don't prune against
* maxtxg until the last possible moment -- that being right now.
*/
return (0);
/*
* Debugging: verify that the order we visit things
* agrees with the order defined by compare_bookmark().
*/
}
th->th_callbacks++;
}
static int
{
int error;
return (0);
return (0);
if (BP_IS_HOLE(bp)) {
return (0);
}
error = 0;
th->th_arc_hits++;
} else {
BP_GET_LSIZE(bp));
}
if (error) {
}
dprintf("cache %02x error %d <%llu, %llu, %d, %llx>\n",
return (error);
}
static int
{
int rc;
return (ERANGE);
break;
if (i != first) {
i--;
return (ENOTBLK);
}
}
return (rc);
}
if (BP_IS_HOLE(&bp[i])) {
return (0);
}
}
return (0);
}
static int
{
int i, rc;
for (;;) {
break;
for (i = 0; i < DNODES_PER_BLOCK; i++) {
return (0);
}
}
}
break;
}
*objectp = ZB_MAXOBJECT;
return (rc);
}
/* ARGSUSED */
static void
{
return;
}
}
/* ARGSUSED */
static void
{
return;
}
}
}
static void
{
/*
* We only want to visit blocks that have been claimed but not yet
* replayed (or, in read-only mode, blocks that *would* be claimed).
*/
return;
}
static int
{
int worklimit = 100;
int rc;
dprintf("<%llu, %llu, %d, %llx>\n",
if (rc) /* If we get ERESTART, we've got nowhere left to go */
if (rc != 0)
return (rc);
if (rc != 0) {
th->th_advance);
return (rc);
}
zseg->seg_mintxg =
}
if (rc) {
return (rc);
}
}
}
if (rc != 0)
return (rc);
}
for (;;) {
break;
if (rc == 0) {
if (rc) {
return (rc);
}
}
}
break;
/*
* Give spa_sync() a chance to run.
*/
return (EAGAIN);
}
if (--worklimit == 0)
return (EAGAIN);
}
return (rc);
}
/*
* It is the caller's responsibility to ensure that the dsl_dataset_t
* doesn't go away during traversal.
*/
int
{
int err;
continue;
return (err);
}
int
{
int rc;
return (0);
th->th_restarts++;
return (EAGAIN);
}
return (rc);
}
/*
* Note: (mintxg, maxtxg) is an open interval; mintxg and maxtxg themselves
* are not included. The blocks covered by this segment will all have
* mintxg < birth < maxtxg.
*/
static void
{
}
void
{
else
}
void
{
objset, 0, -1, 0,
else
objset, 1, 0, 0,
objset, 0, -1, 0);
}
void
{
0, 0, -1, 0,
else
1, 1, 0, 0,
0, 0, -1, 0);
}
int zio_flags)
{
int d, l;
for (d = 0; d < ZB_DEPTH; d++) {
for (l = 0; l < ZB_MAXLEVEL; l++) {
if ((advance & ADVANCE_DATA) ||
l != 0 || d != ZB_DN_CACHE)
}
}
return (th);
}
void
{
int d, l;
for (d = 0; d < ZB_DEPTH; d++)
for (l = 0; l < ZB_MAXLEVEL; l++)
}
dprintf("%llu hit, %llu ARC, %llu IO, %llu cb, %llu sync, %llu again\n",
}