zdb.c revision 3f84190c280ddf5917fb4682f34fdaa328ee195c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#include <stdio.h>
#include <unistd.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
#include <sys/zfs_znode.h>
#include <sys/vdev_impl.h>
#include <sys/metaslab_impl.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>
#include <sys/zil_impl.h>
#include <sys/resource.h>
#include <sys/dmu_traverse.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/zfeature.h>
#include <zfs_comutil.h>
#include <libzfs.h>
#ifndef lint
extern boolean_t zfs_recover;
extern int zfs_vdev_async_read_max_active;
#else
#endif
const char cmdname[] = "zdb";
extern void dump_intent_log(zilog_t *);
int zopt_objects = 0;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities.
*/
const char *
{
return ("default,verbose"); /* $UMEM_DEBUG setting */
}
const char *
_umem_logging_init(void)
{
return ("fail,contents"); /* $UMEM_LOGGING setting */
}
static void
usage(void)
{
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
" %s [-divPA] [-e -p path...] [-U config] dataset "
"[object...]\n"
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
"poolname [vdev [metaslab...]]\n"
" %s -R [-A] [-e [-p path...]] poolname "
"vdev:offset:size[:flags]\n"
" %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
" %s -l [-uA] device\n"
" %s -C [-A] [-U config]\n\n",
"separator character '/' or '@'\n");
"dataset is dumped\n");
"those objects are dumped\n\n");
"all data) blocks\n");
"load spacemaps)\n");
"device\n\n");
"with other options:\n");
"panic recovery (-AA) or both (-AAA)\n");
"safe range of transaction groups\n");
"cachefile\n");
"work with dataset)\n");
"-e to specify path to vdev dir\n");
"dump all read blocks into specified directory\n");
"searching for uberblocks\n");
"specify the maximum number of "
"to make only that option verbose\n");
exit(1);
}
/*
* Called for usage errors that are discovered after a call to spa_open(),
* dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
*/
static void
{
exit(1);
}
/* ARGSUSED */
static void
{
}
/* ARGSUSED */
static void
{
return;
(void) printf("\t\tpool_create_len = %llu\n",
(void) printf("\t\tphys_max_off = %llu\n",
(void) printf("\t\tbof = %llu\n",
(void) printf("\t\teof = %llu\n",
(void) printf("\t\trecords_lost = %llu\n",
}
static void
{
if (dump_opt['P'])
else
}
const char histo_stars[] = "****************************************";
static void
{
int i;
int maxidx = 0;
for (i = 0; i < size; i++) {
maxidx = i;
minidx = i;
}
if (max < histo_width)
max = histo_width;
(void) printf("\t\t\t%3u: %6llu %s\n",
}
}
static void
{
int error;
if (error)
return;
if (zs.zs_ptrtbl_len == 0) {
(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
return;
}
(void) printf("\tFat ZAP stats:\n");
(void) printf("\t\tPointer table:\n");
(void) printf("\t\t\t%llu elements\n",
(void) printf("\t\t\tzt_blk: %llu\n",
(void) printf("\t\t\tzt_numblks: %llu\n",
(void) printf("\t\t\tzt_shift: %llu\n",
(void) printf("\t\t\tzt_blks_copied: %llu\n",
(void) printf("\t\t\tzt_nextblk: %llu\n",
(void) printf("\t\tZAP entries: %llu\n",
(void) printf("\t\tLeaf blocks: %llu\n",
(void) printf("\t\tTotal blocks: %llu\n",
(void) printf("\t\tzap_block_type: 0x%llx\n",
(void) printf("\t\tzap_magic: 0x%llx\n",
(void) printf("\t\tzap_salt: 0x%llx\n",
(void) printf("\t\tLeafs with 2^n pointers:\n");
(void) printf("\t\tBlocks with n*5 entries:\n");
(void) printf("\t\tBlocks n/10 full:\n");
(void) printf("\t\tEntries with n chunks:\n");
(void) printf("\t\tBuckets with n entries:\n");
}
/*ARGSUSED*/
static void
{
}
/*ARGSUSED*/
static void
{
(void) printf("\tUNKNOWN OBJECT TYPE\n");
}
/*ARGSUSED*/
void
{
}
/*ARGSUSED*/
static void
{
}
/*ARGSUSED*/
static void
{
void *prop;
int i;
(void) printf("\n");
zap_cursor_advance(&zc)) {
if (attr.za_num_integers == 0) {
(void) printf("\n");
continue;
}
} else {
for (i = 0; i < attr.za_num_integers; i++) {
switch (attr.za_integer_length) {
case 2:
(void) printf("%u ",
break;
case 4:
(void) printf("%u ",
break;
case 8:
(void) printf("%lld ",
break;
}
}
}
(void) printf("\n");
}
}
static void
{
return;
(void) printf("\t\tnum_blkptrs = %llu\n",
if (size >= BPOBJ_SIZE_V1) {
}
(void) printf("\t\tsubobjs = %llu\n",
(void) printf("\t\tnum_subobjs = %llu\n",
}
return;
char blkbuf[BP_SPRINTF_LEN];
if (err != 0) {
break;
}
}
}
/* ARGSUSED */
static void
{
if (err != 0) {
return;
}
if (subobjs[i] != 0)
last_nonzero = i;
}
for (int64_t i = 0; i <= last_nonzero; i++) {
}
}
/*ARGSUSED*/
static void
{
/* contents are printed elsewhere, properly decoded */
}
/*ARGSUSED*/
static void
{
(void) printf("\n");
zap_cursor_advance(&zc)) {
if (attr.za_num_integers == 0) {
(void) printf("\n");
continue;
}
(void) printf(" %llx : [%d:%d:%d]\n",
}
}
/*ARGSUSED*/
static void
{
int i;
(void) printf("\n");
zap_cursor_advance(&zc)) {
if (attr.za_num_integers == 0) {
(void) printf("\n");
continue;
}
for (i = 0; i != attr.za_num_integers; i++)
(void) printf("]\n");
}
}
/*ARGSUSED*/
static void
{
const char *typenames[] = {
/* 0 */ "not specified",
/* 1 */ "FIFO",
/* 2 */ "Character Device",
/* 3 */ "3 (invalid)",
/* 4 */ "Directory",
/* 5 */ "5 (invalid)",
/* 6 */ "Block Device",
/* 7 */ "7 (invalid)",
/* 8 */ "Regular File",
/* 9 */ "9 (invalid)",
/* 10 */ "Symbolic Link",
/* 11 */ "11 (invalid)",
/* 12 */ "Socket",
/* 13 */ "Door",
/* 14 */ "Event Port",
/* 15 */ "15 (invalid)",
};
(void) printf("\n");
zap_cursor_advance(&zc)) {
(void) printf("\t\t%s = %lld (type: %s)\n",
}
}
int
{
int refcount = 0;
return (1);
return (0);
}
for (int c = 0; c < vd->vdev_children; c++)
return (refcount);
}
int
{
int refcount = 0;
for (int m = 0; m < vd->vdev_ms_count; m++) {
refcount++;
}
}
for (int c = 0; c < vd->vdev_children; c++)
return (refcount);
}
static int
{
(void) feature_get_refcount(spa,
if (expected_refcount != actual_refcount) {
(void) printf("space map refcount mismatch: expected %lld != "
"actual %lld\n",
return (2);
}
return (0);
}
static void
{
"INVALID", "INVALID", "INVALID", "INVALID" };
return;
/*
* Print out the freelist entries in both encoded and decoded form.
*/
alloc = 0;
if (SM_DEBUG_DECODE(entry)) {
(void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
} else {
(void) printf("\t [%6llu] %c range:"
" %010llx-%010llx size: %06llx\n",
else
}
}
(void) printf("space_map_object alloc (%llu) INCONSISTENT "
"with space map summary (%llu)\n",
}
}
static void
{
char maxbuf[32];
(void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
"freepct", free_pct);
(void) printf("\tIn-memory histogram:\n");
}
static void
{
char freebuf[32];
(void) printf(
"\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
}
}
/*
* The space map histogram represents free space in chunks
* of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
*/
(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
}
}
}
static void
{
(void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n",
"offset", "spacemap", "free");
(void) printf("\t%15s %19s %15s %10s\n",
"---------------", "-------------------",
"---------------", "-------------");
}
static void
{
for (int c = 0; c < rvd->vdev_children; c++) {
continue;
(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
"fragmentation",
} else {
(void) printf("%3llu%%\n",
}
}
if (fragmentation == ZFS_FRAG_INVALID)
else
}
static void
{
(void) printf("\nMetaslabs:\n");
c = zopt_object[0];
if (c >= children)
if (zopt_objects > 1) {
for (m = 1; m < zopt_objects; m++) {
else
"number %llu\n",
(u_longlong_t)zopt_object[m]);
}
(void) printf("\n");
return;
}
children = c + 1;
}
for (; c < children; c++) {
for (m = 0; m < vd->vdev_ms_count; m++)
(void) printf("\n");
}
}
static void
{
char blkbuf[BP_SPRINTF_LEN];
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (ddp->ddp_phys_birth == 0)
continue;
(void) printf("index %llx refcnt %llu %s %s\n",
}
}
static void
{
if (dds->dds_blocks == 0)
return;
(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
"dedup * compress / copies = %.2f\n\n",
}
static void
{
char name[DDT_NAMELEN];
int error;
return;
return;
(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
name,
return;
return;
return;
(void) printf("\n");
}
static void
{
ddt_histogram_t ddh_total = { 0 };
ddt_stat_t dds_total = { 0 };
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
class++) {
}
}
}
if (dds_total.dds_blocks == 0) {
(void) printf("All DDTs are empty\n");
return;
}
(void) printf("\n");
(void) printf("DDT histogram (aggregated over all DDTs):\n");
}
}
static void
{
(void) printf("%s [%llu,%llu) length %llu\n",
(u_longlong_t)(size));
}
static void
{
char prefix[256];
if (indent == 0)
(void) printf("\nDirty time logs:\n\n");
for (int t = 0; t < DTL_TYPES; t++) {
if (range_tree_space(rt) == 0)
continue;
}
for (int c = 0; c < vd->vdev_children; c++)
}
static void
{
int error;
struct tm t;
char tbuf[30];
char internalstr[MAXPATHLEN];
do {
"error %d\n", error);
return;
}
break;
} while (len != 0);
(void) printf("\nHistory:\n");
for (int i = 0; i < num; i++) {
&time) != 0)
goto next;
&cmd) != 0) {
if (nvlist_lookup_uint64(events[i],
ZPOOL_HIST_INT_EVENT, &ievent) != 0)
goto next;
ZPOOL_HIST_TXG, &txg) == 0);
ZPOOL_HIST_INT_STR, &intstr) == 0);
if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
goto next;
(void) snprintf(internalstr,
sizeof (internalstr),
"[internal %s txg:%lld] %s",
intstr);
cmd = internalstr;
}
(void) localtime_r(&tsec, &t);
next:
if (!printed)
(void) printf("unrecognized record:\n");
}
}
}
/*ARGSUSED*/
static void
{
}
static uint64_t
const zbookmark_phys_t *zb)
{
}
}
static void
{
return;
}
if (BP_IS_EMBEDDED(bp)) {
"EMBEDDED et=%u %llxL/%llxP B=%llu",
(int)BPE_GET_ETYPE(bp),
return;
}
blkbuf[0] = '\0';
for (int i = 0; i < ndvas; i++)
if (BP_IS_HOLE(bp)) {
"%llxL B=%llu",
} else {
"%llxL/%llxP F=%llu B=%llu/%llu",
}
}
static void
const dnode_phys_t *dnp)
{
char blkbuf[BP_SPRINTF_LEN];
int l;
if (!BP_IS_EMBEDDED(bp)) {
}
} else {
(void) printf(" ");
}
}
}
static int
{
int err = 0;
return (0);
int i;
if (err)
return (err);
/* recursively visit blocks below this */
if (err)
break;
}
if (!err)
}
return (err);
}
/*ARGSUSED*/
static void
{
int j;
(void) printf("Indirect blocks:\n");
for (j = 0; j < dnp->dn_nblkptr; j++) {
}
(void) printf("\n");
}
/*ARGSUSED*/
static void
{
char nice[32];
return;
(void) printf("\t\thead_dataset_obj = %llu\n",
(void) printf("\t\tparent_dir_obj = %llu\n",
(void) printf("\t\torigin_obj = %llu\n",
(void) printf("\t\tchild_dir_zapobj = %llu\n",
(void) printf("\t\tprops_zapobj = %llu\n",
(void) printf("\t\tdeleg_zapobj = %llu\n",
(void) printf("\t\tflags = %llx\n",
DO(CHILD_RSRV);
}
/*ARGSUSED*/
static void
{
char blkbuf[BP_SPRINTF_LEN];
return;
(void) printf("\t\tdir_obj = %llu\n",
(void) printf("\t\tprev_snap_obj = %llu\n",
(void) printf("\t\tprev_snap_txg = %llu\n",
(void) printf("\t\tnext_snap_obj = %llu\n",
(void) printf("\t\tsnapnames_zapobj = %llu\n",
(void) printf("\t\tnum_children = %llu\n",
(void) printf("\t\tuserrefs_obj = %llu\n",
(void) printf("\t\tcreation_txg = %llu\n",
(void) printf("\t\tdeadlist_obj = %llu\n",
(void) printf("\t\tfsid_guid = %llu\n",
(void) printf("\t\tguid = %llu\n",
(void) printf("\t\tflags = %llx\n",
(void) printf("\t\tnext_clones_obj = %llu\n",
(void) printf("\t\tprops_obj = %llu\n",
}
/* ARGSUSED */
static int
{
char blkbuf[BP_SPRINTF_LEN];
}
return (0);
}
static void
{
char bytes[32];
return;
(void) printf("\n %s: %llu datasets, %s\n",
return;
(void) printf("\n");
}
/* ARGSUSED */
static int
{
char blkbuf[BP_SPRINTF_LEN];
return (0);
}
static void
{
char bytes[32];
char comp[32];
char uncomp[32];
return;
(void) printf(" %*s: object %llu, %llu local blkptrs, "
"%llu subobjs in object %llu, %s (%s/%s comp)\n",
int error;
if (error != 0) {
(void) printf("ERROR %u while trying to open "
"subobj id %llu\n",
continue;
}
}
} else {
(void) printf(" %*s: object %llu, %llu blkptrs, %s\n",
bytes);
}
return;
if (indent == 0) {
(void) printf("\n");
}
}
static void
{
char bytes[32];
char comp[32];
char uncomp[32];
return;
return;
}
(void) printf("\n Deadlist: %s (%s/%s comp)\n",
return;
(void) printf("\n");
/* force the tree to be loaded */
char buf[128];
} else {
(void) printf("mintxg %llu -> obj %llu\n",
}
}
}
static avl_tree_t idx_tree;
static avl_tree_t domain_tree;
static boolean_t fuid_table_loaded;
static void
{
if (fuid_table_loaded) {
}
}
/*
* print uid or gid information.
* For normal POSIX id just the id is printed in decimal format.
* For CIFS files with FUID the fuid is printed in hex followed by
* the domain-rid string.
*/
static void
{
if (FUID_INDEX(id)) {
char *domain;
} else {
}
}
static void
{
/* Load domain table, if not already loaded */
/* first find the fuid object. It lives in the master node */
&idx_tree, &domain_tree);
}
}
/*ARGSUSED*/
static void
{
int idx = 0;
int error;
if (!sa_loaded) {
if (version >= ZPL_VERSION_SA) {
}
ZPL_END, &sa_attr_table)) != 0) {
(void) printf("sa_setup failed errno %d, can't "
"display znode contents\n", error);
return;
}
}
(void) printf("Failed to get handle for SA znode\n");
return;
}
&links, 8);
&mode, 8);
&fsize, 8);
acctm, 16);
modtm, 16);
crtm, 16);
chgtm, 16);
&pflags, 8);
(void) sa_handle_destroy(hdl);
return;
}
if (error != 0) {
}
(void) sa_handle_destroy(hdl);
return;
}
sizeof (uint64_t)) == 0)
sizeof (uint64_t)) == 0)
}
/*ARGSUSED*/
static void
{
}
/*ARGSUSED*/
static void
{
}
dump_none, /* unallocated */
dump_zap, /* object directory */
dump_uint64, /* object array */
dump_none, /* packed nvlist */
dump_packed_nvlist, /* packed nvlist size */
dump_none, /* bpobj */
dump_bpobj, /* bpobj header */
dump_none, /* SPA space map header */
dump_none, /* SPA space map */
dump_none, /* ZIL intent log */
dump_dnode, /* DMU dnode */
dump_dmu_objset, /* DMU objset */
dump_dsl_dir, /* DSL directory */
dump_zap, /* DSL directory child map */
dump_zap, /* DSL dataset snap map */
dump_zap, /* DSL props */
dump_dsl_dataset, /* DSL dataset */
dump_znode, /* ZFS znode */
dump_acl, /* ZFS V0 ACL */
dump_uint8, /* ZFS plain file */
dump_zpldir, /* ZFS directory */
dump_zap, /* ZFS master node */
dump_zap, /* ZFS delete queue */
dump_uint8, /* zvol object */
dump_zap, /* zvol prop */
dump_uint8, /* other uint8[] */
dump_uint64, /* other uint64[] */
dump_zap, /* other ZAP */
dump_zap, /* persistent error log */
dump_uint8, /* SPA history */
dump_history_offsets, /* SPA history offsets */
dump_zap, /* Pool properties */
dump_zap, /* DSL permissions */
dump_acl, /* ZFS ACL */
dump_uint8, /* ZFS SYSACL */
dump_none, /* FUID nvlist */
dump_packed_nvlist, /* FUID nvlist size */
dump_zap, /* DSL dataset next clones */
dump_zap, /* DSL scrub queue */
dump_zap, /* snapshot refcount tags */
dump_ddt_zap, /* DDT ZAP object */
dump_zap, /* DDT statistics */
dump_znode, /* SA object */
dump_zap, /* SA Master Node */
dump_sa_attrs, /* SA attribute registration */
dump_sa_layouts, /* SA attribute layouts */
dump_zap, /* DSL scrub translations */
dump_none, /* fake dedup BP */
dump_zap, /* deadlist */
dump_none, /* deadlist hdr */
dump_zap, /* dsl clones */
dump_bpobj_subobjs, /* bpobj subobjs */
dump_unknown, /* Unknown type, must be last */
};
static void
{
char bonus_size[32];
char aux[50];
int error;
if (*print_header) {
(void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n",
"Object", "lvl", "iblk", "dblk", "dsize", "lsize",
"%full", "type");
*print_header = 0;
}
if (object == 0) {
} else {
if (error)
fatal("dmu_bonus_hold(%llu) failed, errno %u",
}
aux[0] = '\0';
}
}
(void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n",
(void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n",
}
if (verbosity >= 4) {
(void) printf("\tdnode flags: %s%s%s\n",
"USED_BYTES " : "",
"USERUSED_ACCOUNTED " : "",
"SPILL_BLKPTR" : "");
(void) printf("\tdnode maxblkid: %llu\n",
*print_header = 1;
}
if (verbosity >= 5)
if (verbosity >= 5) {
/*
* Report the list of segments that comprise the object.
*/
int minlvl = 1;
minlvl = 0;
}
for (;;) {
char segsize[32];
if (error)
break;
(void) printf("\t\tsegment [%016llx, %016llx)"
if (error)
break;
}
}
}
static char *objset_types[DMU_OST_NUMTYPES] = {
"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
static void
{
char numbuf[32];
char osname[MAXNAMELEN];
char *type = "UNKNOWN";
int print_header = 1;
int i, error;
} else {
}
if (verbosity >= 4) {
} else {
blkbuf[0] = '\0';
}
(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
"%s, %llu objects%s\n",
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
&print_header);
(void) printf("\n");
return;
}
if (verbosity < 2)
return;
return;
object_count = 0;
}
object = 0;
object_count++;
}
(void) printf("\n");
abort();
}
}
static void
{
(void) printf("\ttimestamp = %llu UTC = %s",
char blkbuf[BP_SPRINTF_LEN];
}
}
static void
{
int error = 0;
if (error == 0) {
(void) printf("\nMOS Configuration:\n");
} else {
}
}
static void
dump_cachefile(const char *cachefile)
{
int fd;
char *buf;
exit(1);
}
exit(1);
}
exit(1);
}
exit(1);
}
exit(1);
}
dump_nvlist(config, 0);
}
#define ZDB_MAX_UB_HEADER_SIZE 32
static void
{
char header[ZDB_MAX_UB_HEADER_SIZE];
for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
if (uberblock_verify(ub))
continue;
"Uberblock[%d]\n", i);
}
}
static void
dump_label(const char *dev)
{
int fd;
len++;
} else {
}
exit(1);
}
exit(1);
}
(void) printf("cannot use '%s': character device required\n",
path);
exit(1);
}
for (int l = 0; l < VDEV_LABELS; l++) {
(void) printf("--------------------------------------------\n");
(void) printf("LABEL %d\n", l);
(void) printf("--------------------------------------------\n");
(void) printf("failed to read label %d\n", l);
continue;
}
(void) printf("failed to unpack label %d\n", l);
} else {
if ((nvlist_lookup_nvlist(config,
ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
}
if (dump_opt['u'])
}
}
/*ARGSUSED*/
static int
{
int error;
if (error) {
return (0);
}
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
continue;
dataset_feature_count[f]++;
}
return (0);
}
/*
* Block statistics.
*/
typedef struct zdb_blkstats {
/*
* Extended object types to report deferred frees and dedup auto-ditto blocks.
*/
#define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
static char *zdb_ot_extname[] = {
"deferred free",
"dedup ditto",
"other",
"Total",
};
#define ZB_TOTAL DN_MAX_LEVELS
typedef struct zdb_cb {
int zcb_readfails;
int zcb_haderrors;
} zdb_cb_t;
static void
{
return;
for (int i = 0; i < 4; i++) {
int equal;
/*
* The histogram is only big enough to record blocks up to
* SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
* "other", bucket.
*/
switch (BP_GET_NDVAS(bp)) {
case 2:
zb->zb_ditto_samevdev++;
break;
case 3:
if (equal != 0)
zb->zb_ditto_samevdev++;
break;
}
}
if (BP_IS_EMBEDDED(bp)) {
[BPE_GET_PSIZE(bp)]++;
return;
}
if (dump_opt['L'])
return;
if (BP_GET_DEDUP(bp)) {
refcnt = 0;
} else {
if (ddt_phys_total_refcnt(dde) == 0)
}
}
}
static void
{
char blkbuf[BP_SPRINTF_LEN];
else
blkbuf[0] = '\0';
(void) printf("zdb_blkptr_cb: "
"Got error %d reading "
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
blkbuf);
}
}
static int
{
return (0);
char blkbuf[BP_SPRINTF_LEN];
(void) printf("objset %llu object %llu "
"level %lld offset 0x%llx %s\n",
blkbuf);
}
if (BP_IS_HOLE(bp))
return (0);
if (!BP_IS_EMBEDDED(bp) &&
/* If it's an intent log block, failure is expected. */
}
zcb->zcb_readfails = 0;
/* only call gethrtime() every 100 blocks */
static int iters;
if (++iters > 100)
iters = 0;
else
return (0);
char buf[10];
int kb_per_sec =
int sec_remaining =
"\r%5s completed (%4dMB/s) "
"estimated time remaining: %uhr %02umin %02usec ",
sec_remaining % 60);
}
return (0);
}
static void
{
(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
}
static metaslab_ops_t zdb_metaslab_ops = {
NULL /* alloc */
};
static void
{
ddt_bookmark_t ddb = { 0 };
int error;
return;
for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (ddp->ddp_phys_birth == 0)
continue;
if (p == DDT_PHYS_DITTO) {
} else {
zcb->zcb_dedup_asize +=
zcb->zcb_dedup_blocks++;
}
}
if (!dump_opt['L']) {
}
}
}
static void
{
if (!dump_opt['L']) {
/*
* For leak detection, we overload the metaslab
* ms_tree to contain allocated segments
* instead of free segments. As a result,
* we can't use the normal metaslab_load/unload
* interfaces.
*/
"\rloading space map for "
"vdev %llu of %llu, "
"metaslab %llu of %llu ...",
(longlong_t)c,
(longlong_t)m,
/*
* We don't want to spend the CPU
* manipulating the size-ordered
* tree, so clear the range_tree
* ops.
*/
}
}
}
}
}
static void
{
if (!dump_opt['L']) {
for (int c = 0; c < rvd->vdev_children; c++) {
for (int m = 0; m < vd->vdev_ms_count; m++) {
/*
* The ms_tree has been overloaded to
* contain allocated segments. Now that we
* finished traversing all blocks, any
* block that remains in the ms_tree
* represents an allocated block that we
* did not claim during the traversal.
* Claimed blocks would have been removed
* from the ms_tree.
*/
}
}
}
}
/* ARGSUSED */
static int
{
char blkbuf[BP_SPRINTF_LEN];
(void) printf("[%s] %s\n",
"deferred free", blkbuf);
}
return (0);
}
static int
{
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
/*
* Load all space maps as SM_ALLOC maps, then traverse the pool
* claiming each block we discover. If the pool is perfectly
* consistent, the space maps will be empty when we're done.
* Anything left over is a leak; any block we can't claim (because
* it's not part of any space map) is a double allocation,
* reference to a freed block, or an unclaimed log block.
*/
/*
* If there's a deferred-free bplist, process that first.
*/
}
}
/*
* If we've traversed the data blocks then we need to wait for those
*/
if (dump_opt['c']) {
for (int i = 0; i < max_ncpus; i++) {
}
}
if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
for (int e = 0; e < 256; e++) {
if (zcb.zcb_errors[e] != 0) {
(void) printf("\t%5d %llu\n",
}
}
}
/*
* Report any leaked segments.
*/
if (total_found == total_alloc) {
if (!dump_opt['L'])
(void) printf("\n\tNo leaks (block sum matches space"
" maps exactly)\n");
} else {
(void) printf("block traversal size %llu != alloc %llu "
"(%s %lld)\n",
}
return (2);
(void) printf("\n");
(void) printf("\tbp count: %10llu\n",
(void) printf("\tganged count: %10llu\n",
(void) printf("\tbp logical: %10llu avg: %6llu\n",
(void) printf("\tbp physical: %10llu avg:"
" %6llu compression: %6.2f\n",
(void) printf("\tbp allocated: %10llu avg:"
" %6llu compression: %6.2f\n",
(void) printf("\tbp deduped: %10llu ref>1:"
" %6llu deduplication: %6.2f\n",
(void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
if (zcb.zcb_embedded_blocks[i] == 0)
continue;
(void) printf("\n");
(void) printf("\tadditional, non-pointer bps of type %u: "
"%10llu\n",
(void) printf("\t number of (compressed) bytes: "
"number of bps\n");
sizeof (zcb.zcb_embedded_histogram[i]) /
sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
}
}
if (tzb->zb_ditto_samevdev != 0) {
(void) printf("\tDittoed blocks on same vdev: %llu\n",
}
int l, t, level;
(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
"\t avg\t comp\t%%Total\tType\n");
for (t = 0; t <= ZDB_OT_TOTAL; t++) {
char *typename;
if (t < DMU_OT_NUMTYPES)
else
(void) printf("%6s\t%5s\t%5s\t%5s"
"\t%5s\t%5s\t%6s\t%s\n",
"-",
"-",
"-",
"-",
"-",
"-",
"-",
typename);
continue;
}
continue;
continue;
continue;
(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
"\t%5.2f\t%6.2f\t",
else
(void) printf(" L%d %s\n",
(void) printf("\t number of ganged "
"blocks: %s\n", gang);
}
(void) printf("psize "
"(in 512-byte sectors): "
"number of blocks\n");
PSIZE_HISTO_SIZE, 0);
}
}
}
}
(void) printf("\n");
if (leaks)
return (2);
if (zcb.zcb_haderrors)
return (3);
return (0);
}
typedef struct zdb_ddt_entry {
/* ARGSUSED */
static int
{
avl_tree_t *t = arg;
return (0);
(void) printf("traversing objset %llu, %llu objects, "
"%lu blocks so far\n",
avl_numnodes(t));
}
return (0);
}
return (0);
}
static void
{
avl_tree_t t;
ddt_histogram_t ddh_total = { 0 };
ddt_stat_t dds_total = { 0 };
zdb_ddt_add_cb, &t);
&dds, 0);
}
avl_destroy(&t);
(void) printf("Simulated DDT histogram:\n");
}
static void
{
int rc = 0;
if (dump_opt['S']) {
return;
}
(void) printf("\nCached configuration:\n");
}
if (dump_opt['C'])
if (dump_opt['u'])
if (dump_opt['D'])
if (dump_opt['M'])
"Deferred frees", 0);
"Pool snapshot frees", 0);
}
if (spa_feature_is_active(spa,
"Pool dataset frees");
}
}
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
if (!(spa_feature_table[f].fi_flags &
continue;
}
(void) feature_get_refcount(spa,
&spa_feature_table[f], &refcount);
if (dataset_feature_count[f] != refcount) {
(void) printf("%s feature refcount mismatch: "
"%lld datasets != %lld refcount\n",
rc = 2;
} else {
(void) printf("Verified %s feature refcount "
"of %llu is correct\n",
}
}
}
if (rc == 0)
if (dump_opt['s'])
if (dump_opt['h'])
if (rc != 0)
}
#define ZDB_FLAG_CHECKSUM 0x0001
#define ZDB_FLAG_DECOMPRESS 0x0002
#define ZDB_FLAG_BSWAP 0x0004
#define ZDB_FLAG_GBH 0x0008
#define ZDB_FLAG_INDIRECT 0x0010
#define ZDB_FLAG_PHYS 0x0020
#define ZDB_FLAG_RAW 0x0040
#define ZDB_FLAG_PRINT_BLKPTR 0x0080
int flagbits[256];
static void
{
char blkbuf[BP_SPRINTF_LEN];
if (flags & ZDB_FLAG_BSWAP)
}
static void
{
int i;
for (i = 0; i < nbps; i++)
}
static void
{
}
static void
{
if (flags & ZDB_FLAG_BSWAP)
}
static void
{
int i, j;
char *hdr, *c;
if (do_bswap)
hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8";
else
hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f";
for (i = 0; i < nwords; i += 2) {
(void) printf("%06llx: %016llx %016llx ",
(u_longlong_t)(i * sizeof (uint64_t)),
c = (char *)&d[i];
for (j = 0; j < 2 * sizeof (uint64_t); j++)
(void) printf("\n");
}
}
/*
* There are two acceptable formats:
* child[.child]* - For example: 0.1.1
*
* The second form can be used to specify arbitrary vdevs anywhere
* in the heirarchy. For example, in a pool with a mirror of
* RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
*/
static vdev_t *
{
char *s, *p, *q;
int i;
return (NULL);
/* First, assume the x.x.x.x format */
goto name;
if (i < 0 || i >= vdev->vdev_children)
return (NULL);
if (*s == '\0')
return (vdev);
name:
for (i = 0; i < vdev->vdev_children; i++) {
continue;
else
return (vc);
}
return (vc);
return (vc);
return (vc);
}
return (NULL);
}
/*
* Read a block from a pool and print it out. The syntax of the
* block descriptor is:
*
* pool:vdev_specifier:offset:size[:flags]
*
* pool - The name of the pool you wish to read from
* vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
* offset - offset, in hex, in bytes
* size - Amount of data to read, in hex, in bytes
* flags - A string of characters specifying options
* b: Decode a blkptr at given offset within block
* *c: Calculate and display checksums
* d: Decompress data before dumping
* e: Byteswap data before dumping
* g: Display data as a gang block header
* i: Display as an indirect block
* p: Do I/O to physical offset
* r: Dump raw data to stdout
*
* * = not yet implemented
*/
static void
{
int flags = 0;
int i, error;
vdev = s ? s : "";
flagstr = s ? s : "";
s = NULL;
if (size == 0)
s = "size must not be zero";
s = "size must be a multiple of sector size";
s = "offset must be a multiple of sector size";
if (s) {
return;
}
for (i = 0; flagstr[i]; i++) {
if (bit == 0) {
(void) printf("***Invalid flag: %c\n",
flagstr[i]);
continue;
}
/* If it's not something with an argument, keep going */
if ((bit & (ZDB_FLAG_CHECKSUM |
ZDB_FLAG_PRINT_BLKPTR)) == 0)
continue;
p = &flagstr[i + 1];
if (bit == ZDB_FLAG_PRINT_BLKPTR)
if (*p != ':' && *p != '\0') {
(void) printf("***Invalid flag arg: '%s'\n", s);
return;
}
}
}
return;
} else {
else
}
BP_SET_LEVEL(bp, 0);
BP_SET_DEDUP(bp, 0);
/*
* Treat this as a normal block read.
*/
} else {
/*
* Treat this as a vdev child I/O.
*/
}
if (error) {
goto out;
}
if (flags & ZDB_FLAG_DECOMPRESS) {
/*
* We don't know how the data was compressed, so just try
* every decompress function at every inflated blocksize.
*/
enum zio_compress c;
SPA_MAXBLOCKSIZE - psize) == 0);
SPA_MAXBLOCKSIZE - psize) == 0);
lsize -= SPA_MINBLOCKSIZE) {
for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
break;
}
if (c != ZIO_COMPRESS_FUNCTIONS)
break;
}
goto out;
}
} else {
}
if (flags & ZDB_FLAG_PRINT_BLKPTR)
zdb_print_blkptr((blkptr_t *)(void *)
else if (flags & ZDB_FLAG_RAW)
else if (flags & ZDB_FLAG_INDIRECT)
flags);
else if (flags & ZDB_FLAG_GBH)
else
out:
}
static boolean_t
{
char *s;
if (guid != 0) {
return (v == guid);
} else {
}
return (B_FALSE);
}
static char *
{
char sep;
int count = 0;
importargs_t args = { 0 };
*sepp = '\0';
}
count++;
/* print previously found config */
}
(void) printf("%s\n",
nvpair_name(elem));
} else {
}
}
}
}
if (count > 1)
(void) fatal("\tMatched %d pools - use pool GUID "
"instead of pool name or \n"
"\tpool name part of a dataset name to select pool", count);
if (sepp)
/*
* If pool GUID was specified for pool id, replace it with pool name
*/
}
return (name);
}
int
{
int i, c;
int dump_all = 1;
int verbose = 0;
int error = 0;
char **searchdirs = NULL;
int nsearch = 0;
char *target;
int rewind = ZPOOL_NEVER_REWIND;
"bcdhilmMI:suCDRSAFLXx:evp:t:U:P")) != -1) {
switch (c) {
case 'b':
case 'c':
case 'd':
case 'h':
case 'i':
case 'l':
case 'm':
case 's':
case 'u':
case 'C':
case 'D':
case 'M':
case 'R':
case 'S':
dump_opt[c]++;
dump_all = 0;
break;
case 'A':
case 'F':
case 'L':
case 'X':
case 'e':
case 'P':
dump_opt[c]++;
break;
case 'I':
if (max_inflight == 0) {
"than 0\n");
usage();
}
break;
case 'p':
if (searchdirs == NULL) {
searchdirs = umem_alloc(sizeof (char *),
} else {
sizeof (char *), UMEM_NOFAIL);
sizeof (char *));
nsearch * sizeof (char *));
searchdirs = tmp;
}
break;
case 't':
if (max_txg < TXG_INITIAL) {
"specified: %s\n", optarg);
usage();
}
break;
case 'U':
break;
case 'v':
verbose++;
break;
case 'x':
vn_dumpdir = optarg;
break;
default:
usage();
break;
}
}
usage();
}
/*
* ZDB does not typically re-read blocks; therefore limit the ARC
* to 256 MB, which can be used entirely for metadata.
*/
/*
* "zdb -b" uses traversal prefetch which uses async reads.
* For good performance, let several of them be active at once.
*/
g_zfs = libzfs_init();
if (dump_all)
for (c = 0; c < 256; c++) {
dump_opt[c] = 1;
if (dump_opt[c])
}
usage();
if (argc < 1) {
return (0);
}
usage();
}
if (dump_opt['l']) {
dump_label(argv[0]);
return (0);
}
error = 0;
if (dump_opt['e']) {
if (name) {
(void) printf("\nConfiguration for import:\n");
}
if (nvlist_add_nvlist(cfg,
ZPOOL_REWIND_POLICY, policy) != 0) {
fatal("can't open '%s': %s",
}
ZFS_IMPORT_MISSING_LOG)) != 0) {
}
}
}
if (error == 0) {
NULL);
if (error) {
/*
* If we're missing the log device then
* try opening the pool after clearing the
* log state.
*/
error = 0;
}
if (!error) {
}
}
} else {
}
}
if (error)
argv++;
argc--;
if (!dump_opt['R']) {
if (argc > 0) {
zopt_objects = argc;
for (i = 0; i < zopt_objects; i++) {
errno = 0;
if (zopt_object[i] == 0 && errno != 0)
fatal("bad number %s: %s",
}
}
} else {
}
} else {
for (i = 0; i < argc; i++)
}
kernel_fini();
return (0);
}