bpobj.c revision d04756377ddd1cf28ebcf652541094e17b03c889
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * CDDL HEADER START
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * The contents of this file are subject to the terms of the
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * Common Development and Distribution License (the "License").
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * You may not use this file except in compliance with the License.
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * or http://www.opensolaris.org/os/licensing.
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * See the License for the specific language governing permissions
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * and limitations under the License.
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * When distributing Covered Code, include this CDDL HEADER in each
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * If applicable, add the following below this CDDL HEADER, with the
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * fields enclosed by brackets "[]" replaced with your own identifying
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * CDDL HEADER END
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
d04756377ddd1cf28ebcf652541094e17b03c889Matthew Ahrens * Copyright (c) 2013 by Delphix. All rights reserved.
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj).
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrensbpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx)
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens if (spa_feature_is_enabled(spa, empty_bpobj_feat)) {
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens if (!spa_feature_is_active(spa, empty_bpobj_feat)) {
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens spa_feature_decr(dmu_objset_spa(os), empty_bpobj_feat, tx);
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens if (!spa_feature_is_active(dmu_objset_spa(os), empty_bpobj_feat)) {
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens ASSERT(obj != dmu_objset_pool(os)->dp_empty_bpobj);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0)
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens epb = doi.doi_data_block_size / sizeof (uint64_t);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (dbuf == NULL || dbuf->db_offset > offset) {
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
837b568b3a2559f8c9b9403f95104271a85d129eGeorge Wilson err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens /* Lame workaround for closing a bpobj that was never opened. */
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (dbuf == NULL || dbuf->db_offset > offset) {
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens epb = doi.doi_data_block_size / sizeof (uint64_t);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens uint64_t used_before, comp_before, uncomp_before;
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens uint64_t used_after, comp_after, uncomp_after;
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (dbuf == NULL || dbuf->db_offset > offset) {
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens err = bpobj_iterate_impl(&sublist, func, arg, tx, free);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_phys->bpo_bytes -= used_before - used_after;
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens bpo->bpo_phys->bpo_comp -= comp_before - comp_after;
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens /* If there are no entries, there should be no bytes. */
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) ||
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * Iterate and remove the entries. If func returns nonzero, iteration
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * will stop and that entry will not be removed.
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * Iterate the entries. If func returns nonzero, iteration will stop.
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) {
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens /* No point in having an empty subobj. */
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
3b2aab18808792cbd248a12f1edf139b89833c13Matthew Ahrens ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
3b2aab18808792cbd248a12f1edf139b89833c13Matthew Ahrens ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens * If subobj has only one block of subobjs, then move subobj's
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens * subobjs to bpo's subobj list directly. This reduces
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens * recursion in bpobj_iterate due to nested subobjs.
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi));
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens if (doi.doi_max_offset == doi.doi_data_block_size) {
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs;
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs,
d04756377ddd1cf28ebcf652541094e17b03c889Matthew Ahrens * Make sure that we are not asking dmu_write()
d04756377ddd1cf28ebcf652541094e17b03c889Matthew Ahrens * to write more data than we have in our buffer.
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
4bc4cb79b47ef763700720f01652c6ed7c56b95dMatthew Ahrens numsubsub * sizeof (subobj), subdb->db_data, tx);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
f17457368189aa911f774c38c1f21875a568bdcaMatthew Ahrens ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens /* We never need the fill count. */
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens /* The bpobj will compress better if we can leave off the checksum */
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb);
b420f3adeb349714478d1a7813d2c0e069d41555Richard Lowe VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens/* ARGSUSED */
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensspace_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
19b94df933188a15d4f0d6c568f0bab3f127892eMatthew Ahrens if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
19b94df933188a15d4f0d6c568f0bab3f127892eMatthew Ahrens sra->used += bp_get_dsize_sync(sra->spa, bp);
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens return (bpobj_space_range(bpo, 0, UINT64_MAX,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * Return the amount of space in the bpobj which is:
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * mintxg < blk_birth <= maxtxg
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrensbpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * As an optimization, if they want the whole txg range, just
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens * get bpo_bytes rather than iterating over the bps.
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
cde58dbc6a23d4d38db7c8866312be83221c765fMatthew Ahrens return (bpobj_space(bpo, usedp, compp, uncompp));