dsl_scan.c revision 3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * CDDL HEADER START
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * The contents of this file are subject to the terms of the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Common Development and Distribution License (the "License").
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * You may not use this file except in compliance with the License.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * See the License for the specific language governing permissions
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * and limitations under the License.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * When distributing Covered Code, include this CDDL HEADER in each
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If applicable, add the following below this CDDL HEADER, with the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * fields enclosed by brackets "[]" replaced with your own identifying
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * information: Portions Copyright [yyyy] [name of copyright owner]
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * CDDL HEADER END
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingtypedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingstatic void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingint zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingint zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingboolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingboolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingenum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingint dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* the order has to match pool_scan_type */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (err == 0) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * There was an old-style scrub in progress. Restart a
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * new-style scrub from the beginning.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "restarting new-style scrub in txg %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Load the queue obj from the old location so that it
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * can be freed by dsl_scan_done().
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (void) zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * A new-type scrub was in progress on an old
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * pool, and the pool was accessed by old
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * software. Restart from the beginning, since
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * the old software may have changed the pool in
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * the meantime.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "by old software; restarting in txg %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* rewrite all disk labels */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If this is an incremental scrub, limit the DDT scrub phase
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * to just the auto-ditto class (for correctness); the rest
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * of the scrub should go faster using top-down pruning.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* back to the generic stuff */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "func=%u mintxg=%llu maxtxg=%llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling static const char *old_names[] = {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_bookmark",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_ddt_bookmark",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_ddt_class_max",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_queue",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_min_txg",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_max_txg",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_func",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "scrub_errors",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* Remove any remnants of an old-style scrub. */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling for (i = 0; old_names[i]; i++) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If we were "restarted" from a stopped state, don't bother
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * with anything else.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If the scrub/resilver completed, update all DTLs to
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * reflect this. Whether it succeeded or not, vacate
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * all temporary scrub DTLs.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * We may have finished replacing a device.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Let the async thread assess this and handle the detach.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling const zbookmark_t *zb, dnode_phys_t *dnp, arc_buf_t *pbuf,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingstatic void dsl_scan_visitdnode(dsl_scan_t *, dsl_dataset_t *ds,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dnode_phys_t *dnp, arc_buf_t *buf, uint64_t object, dmu_tx_t *tx);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling arc_done_func_t *done, void *private, int priority, int zio_flags,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (arc_read(pio, spa, bpp, pbuf, done, private,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling arc_done_func_t *done, void *private, int priority, int zio_flags,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (arc_read_nolock(pio, spa, bpp, done, private,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (zb->zb_objset == 0 && zb->zb_object == 0 &&
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* dnp is the dnode for zb1->zb_object */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * A bookmark in the deadlist is considered to be after
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * everything else.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* The objset_phys_t isn't before anything. */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* we never skip user/group accounting objects */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* We only know how to resume from level-0 blocks. */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingtypedef struct zil_scan_arg {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * One block ("stubby") can be allocated a long time ago; we
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * want to visit that one because it has been allocated
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * (on-disk) even if it hasn't been claimed (even though for
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * scrub there's nothing to do to it).
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * birth can be < claim_txg if this record's txg is
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * already txg sync'ed (but this log block contains
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * other records that are not synced)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * We only want to visit blocks that have been claimed but not yet
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * replayed (or, in read-only mode, blocks that *would* be claimed).
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_min_txg ||
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * XXX need to make sure all of these arc_read() prefetches are
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * done before setting xlateall (similar to dsl_read())
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (void) arc_read(scn->scn_prefetch_zio_root, scn->scn_dp->dp_spa, bp,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling buf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * We never skip over user/group accounting objects (obj<0)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If we already visited this bp & everything below (in
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * a prior txg sync), don't bother doing it again.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If we found the block we're trying to resume from, or
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * we went past it to a different object, zero it out to
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * indicate that it's OK to start checking for pausing
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Return nonzero on i/o error.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Return new buf to write out in *bufp.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling const zbookmark_t *zb, dmu_tx_t *tx, arc_buf_t **bufp)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling } else if (BP_GET_TYPE(bp) == DMU_OT_USERGROUP_USED) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling &osp->os_meta_dnode, *bufp, DMU_META_DNODE_OBJECT, tx);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * We also always visit user/group accounting
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * objects, and never skip them, even if we are
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * pausing. This is necessary so that the space
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * deltas from this txg get integrated.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dmu_objset_type_t ostype, dnode_phys_t *dnp, arc_buf_t *buf,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * The arguments are in this order because mdb can only print the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * first 5; we want them to be useful.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* ASSERT(pbuf == NULL || arc_released(pbuf)); */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * For non-user-accounting blocks, we need to read the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * new bp (from a deleted snapshot, found in
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * check_existing_xlation). If we used the old bp,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * pointers inside this block from before we resumed
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * would be untranslated.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * For user-accounting blocks, we need to read the old
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * bp, because we will apply the entire space delta to
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * it (original untranslated -> translations from
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * deleted snap -> now).
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If dsl_scan_ddt() has aready visited this block, it will have
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * already done any translations or scrubbing, so don't call the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * callback again.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If this block is from the future (after cur_max_txg), then we
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * are doing this on behalf of a deleted snapshot, and we will
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * revisit the future block on the next pass of this dataset.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Don't scan it now unless we need to because something
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * under it was modified.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* Note, scn_cur_{min,max}_txg stays the same. */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("destroying ds %llu; currently traversing; "
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "reset zb_objset to %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("destroying ds %llu; currently traversing; "
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "reset bookmark to -1,0,0,0",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * We keep the same mintxg; it could be >
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * ds_creation_txg if the previous snapshot was
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * deleted too.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "replacing with %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("destroying ds %llu; in queue; removing",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * dsl_scan_sync() should be called after this, and should sync
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * out our changed state, but just to be safe, do it here.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("snapshotting ds %llu; currently traversing; "
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "reset zb_objset to %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "replacing with %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (scn->scn_phys.scn_bookmark.zb_objset == ds1->ds_object) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_bookmark.zb_objset = ds2->ds_object;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "reset zb_objset to %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling } else if (scn->scn_phys.scn_bookmark.zb_objset == ds2->ds_object) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_bookmark.zb_objset = ds1->ds_object;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "reset zb_objset to %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* Both were there to begin with */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "replacing with %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg) == 0) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "replacing with %llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingenqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Iterate over the bps in this ds.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling char *dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "pausing=%u",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * We've finished this pass over this dataset.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If we did not completely visit this dataset, do another pass.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (scn->scn_phys.scn_flags & DSF_VISIT_DS_AGAIN) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Add descendent datasets to work queue.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_queue_obj, ds->ds_phys->ds_next_snap_obj,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * A bug in a previous version of the code could
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * cause upgrade_clones_cb() to not set
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * ds_next_snap_obj when it should, leading to a
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * missing entry. Therefore we can only use the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * next_clones_obj when its count is correct.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (err == 0 &&
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingenqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If this is a clone, we don't need to worry about it for now.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ds->ds_object, ds->ds_phys->ds_prev_snap_txg, tx) == 0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Scrub/dedup interaction.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If there are N references to a deduped block, we don't want to scrub it
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * N times -- ideally, we should scrub it exactly once.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * We leverage the fact that the dde's replication class (enum ddt_class)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * To prevent excess scrubbing, the scrub begins by walking the DDT
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * to find all blocks with refcnt > 1, and scrubs each of these once.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Since there are two replication classes which contain blocks with
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Finally the top-down scrub begins, only visiting blocks with refcnt == 1.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * There would be nothing more to say if a block's refcnt couldn't change
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * during a scrub, but of course it can so we must account for changes
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * in a block's replication class.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Here's an example of what can occur:
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * when visited during the top-down scrub phase, it will be scrubbed twice.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * This negates our scrub optimization, but is otherwise harmless.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * on each visit during the top-down scrub phase, it will never be scrubbed.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * while a scrub is in progress, it scrubs the block right then.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* There should be no pending changes to the dedup table */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling/* ARGSUSED */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingdsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* First do the MOS & ORIGIN */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If we were paused, continue from here. Note if the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * ds we were paused on was deleted, the zb_objset may
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * be -1, so we will skip this and find a new objset
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * In case we were paused right at the end of the ds, zero the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * bookmark so we don't think that we're still trying to resume.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_t));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* keep pulling things out of the zap-object-as-queue */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* Set up min/max txg */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Check for scn_restart_txg before checking spa_load_state, so
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * that we can restart an old-style scan while the pool is being
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * imported (see dsl_scan_init).
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling "ddt bm=%llu/%llu/%llu/%llx",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_prefetch_zio_root = zio_root(dp->dp_spa, NULL,
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* finished with scan. */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * This will start a new scan, or restart an existing one.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling if (txg == 0) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (dp->dp_scan->scn_phys.scn_state == DSS_SCANNING &&
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling dp->dp_scan->scn_phys.scn_func == POOL_SCAN_RESILVER);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * scrub consumers
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Lingcount_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * If we resume after a reboot, zab will be NULL; don't record
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * incomplete stats in that case.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling for (i = 0; i < 4; i++) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling } else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) {
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* If it's an intent log block, failure is expected. */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Keep track of how much data we've examined so that
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * zpool(1M) status can make useful progress reports.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* if it's a resilver, this may not be in the target range */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Gang members may be spread across multiple
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * vdevs, so the best estimate we have is the
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * scrub range, which has already been checked.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * XXX -- it would be better to change our
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * allocation policy to ensure that all
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * gang members reside on the same vdev.
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight)
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling /* do not relocate this block */
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling return (0);
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * Purge all vdev caches and probe all devices. We do this here
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * rather than in sync context because this requires a writer lock
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * on the spa_config lock, which we can't do from sync context. The
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * spa_scrub_reopen flag indicates that vdev_open() should not
3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5Lin Ling * attempt to start another scrub.