fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
f65e61c04bc28ffd6bda04619c84330b420450b5ahrens * Common Development and Distribution License (the "License").
f65e61c04bc28ffd6bda04619c84330b420450b5ahrens * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
06e0070d70ba2ee95f5aa2645423eb2cf1546788Mark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
653af1b809998570c7e89fe7a0d3f90992bf0216Stephen Blinick * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
c3d26abc9ee97b4f60233556aadeb57e0bd30bb9Matthew Ahrens * Copyright (c) 2014 Integros [integros.com]
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Define DNODE_STATS to turn on statistic gathering. By default, it is only
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * turned on when DEBUG is also defined.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson#endif /* DEBUG */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson#endif /* DNODE_STATS */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Ericksonstatic kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
0f6d88aded0d165f5954688a9b13bac76c38da84Alex Reece return (-1);
0f6d88aded0d165f5954688a9b13bac76c38da84Alex Reece return (-1);
0f6d88aded0d165f5954688a9b13bac76c38da84Alex Reece return (-1);
86bb58aec7165f8a0303564575c65e5a2ad58bf1Alex Reece return (-1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrens rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
b5e70f9763532dbf8f22010666d457240881c038Ricardo M. Correia cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
3b2aab18808792cbd248a12f1edf139b89833c13Matthew Ahrens * Every dbuf has a reference, and dropping a tracked reference is
3b2aab18808792cbd248a12f1edf139b89833c13Matthew Ahrens * O(number of references), so don't track dn_holds.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bzero(&dn->dn_next_nblkptr[0], sizeof (dn->dn_next_nblkptr));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bzero(&dn->dn_next_nlevels[0], sizeof (dn->dn_next_nlevels));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bzero(&dn->dn_next_indblkshift[0], sizeof (dn->dn_next_indblkshift));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bzero(&dn->dn_next_bonustype[0], sizeof (dn->dn_next_bonustype));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 0; i < TXG_SIZE; i++) {
0f6d88aded0d165f5954688a9b13bac76c38da84Alex Reece avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 0; i < TXG_SIZE; i++) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
ad135b5d644628e791c3188a6ecbd9c257961ef8Christopher Siden ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(dn->dn_datablkshift, <=, SPA_MAXBLOCKSHIFT);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 0; i < TXG_SIZE; i++) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
148434217c040ea38dc844384f6ba68d9b325906Matthew Ahrens ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT)));
fa9e4066f08beec538e775443c5be79dd423fcabahrens dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * dn_nblkptr is only one byte, so it's OK to read it in either
fa9e4066f08beec538e775443c5be79dd423fcabahrens * byte order. We can't read dn_bouslen.
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 0; i < dnp->dn_nblkptr * sizeof (blkptr_t)/8; i++)
fa9e4066f08beec538e775443c5be79dd423fcabahrens * OK to check dn_bonuslen for zero, because it won't matter if
fa9e4066f08beec538e775443c5be79dd423fcabahrens * we have the wrong byte order. This is necessary because the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * dnode dnode is smaller than a regular dnode.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Note that the bonus length calculated here may be
fa9e4066f08beec538e775443c5be79dd423fcabahrens * longer than the actual bonus buffer. This is because
fa9e4066f08beec538e775443c5be79dd423fcabahrens * we always put the bonus buffer after the last block
fa9e4066f08beec538e775443c5be79dd423fcabahrens * pointer (instead of packing it against the end of the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * dnode buffer).
ad135b5d644628e791c3188a6ecbd9c257961ef8Christopher Siden ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
ad135b5d644628e791c3188a6ecbd9c257961ef8Christopher Siden dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum /* Swap SPILL block if we have one */
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 0; i < size; i++) {
1934e92fc930c49429ad71a8ca97340f33227e78maybeednode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
1934e92fc930c49429ad71a8ca97340f33227e78maybee dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN;
1934e92fc930c49429ad71a8ca97340f33227e78maybee dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaumdnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
06e0070d70ba2ee95f5aa2645423eb2cf1546788Mark Shellenbaum ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
bf16b11e8deb633dd6c4296d46e92399d1582df4Matthew Ahrens dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
503ad85c168c7992ccc310af845a581cff3c72b5Matthew Ahrensdnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Defer setting dn_objset until the dnode is ready to be a candidate
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * for the dnode_move() callback.
fa9e4066f08beec538e775443c5be79dd423fcabahrens dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
ad135b5d644628e791c3188a6ecbd9c257961ef8Christopher Siden ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs /* Lost the allocation race. */
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * Exclude special dnodes from os_dnodes so an empty os_dnodes
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * signifies that the special dnodes have no references from
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * their children (the entries in os_dnodes). This allows
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * dnode_destroy() to easily determine if the last child has
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * been removed and then complete eviction of the objset.
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * Everything else must be valid before assigning dn_objset
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs * makes the dnode eligible for dnode_move().
5a98e54b3632348add05cdbf50bbf52e1b839c10Brendan Gregg - Sun Microsystems arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Caller must be holding the dnode handle, which is released upon return.
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson /* the dnode can no longer move, so we can release the handle */
5a98e54b3632348add05cdbf50bbf52e1b839c10Brendan Gregg - Sun Microsystems arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
fa9e4066f08beec538e775443c5be79dd423fcabahrensdnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
f676ed3419ae468da6c6831c143b0b54173e08d2ahrens dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
b515258426fed6c7311fd3f1dea697cfbd4085c6Matthew Ahrens spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
fa9e4066f08beec538e775443c5be79dd423fcabahrens ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
fa9e4066f08beec538e775443c5be79dd423fcabahrens dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset,
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum (bonustype == DMU_OT_SA && bonuslen == 0) ||
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 0; i < TXG_SIZE; i++) {
c717a56157ae0e6fca6a1e3689ae1edc385716a3maybee ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
f676ed3419ae468da6c6831c143b0b54173e08d2ahrens dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
1934e92fc930c49429ad71a8ca97340f33227e78maybee dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
f676ed3419ae468da6c6831c143b0b54173e08d2ahrens dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
fa9e4066f08beec538e775443c5be79dd423fcabahrensdnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
fa9e4066f08beec538e775443c5be79dd423fcabahrens dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
b515258426fed6c7311fd3f1dea697cfbd4085c6Matthew Ahrens spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
06e0070d70ba2ee95f5aa2645423eb2cf1546788Mark Shellenbaum (bonustype != DMU_OT_NONE && bonuslen != 0) ||
06e0070d70ba2ee95f5aa2645423eb2cf1546788Mark Shellenbaum (bonustype == DMU_OT_SA && bonuslen == 0));
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock /* clean up any unreferenced dbufs */
2bf405a25eb25f79638fc951ff8d8857ad384417Mark Maybee /* change blocksize */
2bf405a25eb25f79638fc951ff8d8857ad384417Mark Maybee dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
2bf405a25eb25f79638fc951ff8d8857ad384417Mark Maybee dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
06e0070d70ba2ee95f5aa2645423eb2cf1546788Mark Shellenbaum if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
06e0070d70ba2ee95f5aa2645423eb2cf1546788Mark Shellenbaum nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
da03de9920a5a87150a121e9851479c6b3364d8aMark Maybee dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* change type */
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* change bonus size and type */
da03de9920a5a87150a121e9851479c6b3364d8aMark Maybee /* fix up the bonus db_size */
1934e92fc930c49429ad71a8ca97340f33227e78maybee DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Ericksonstatic struct {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson#endif /* DNODE_STATS */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson /* Copy fields. */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bcopy(&odn->dn_next_nblkptr[0], &ndn->dn_next_nblkptr[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bcopy(&odn->dn_next_nlevels[0], &ndn->dn_next_nlevels[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bcopy(&odn->dn_next_indblkshift[0], &ndn->dn_next_indblkshift[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bcopy(&odn->dn_next_bonustype[0], &ndn->dn_next_bonustype[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bcopy(&odn->dn_rm_spillblk[0], &ndn->dn_rm_spillblk[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bcopy(&odn->dn_next_bonuslen[0], &ndn->dn_next_bonuslen[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson for (i = 0; i < TXG_SIZE; i++) {
bf16b11e8deb633dd6c4296d46e92399d1582df4Matthew Ahrens bcopy(&odn->dn_free_ranges[0], &ndn->dn_free_ranges[0],
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(refcount_count(&odn->dn_tx_holds) == 0);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Update back pointers. Updating the handle fixes the back pointer of
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * every descendant dbuf as well as the bonus dbuf.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Invalidate the original dnode by clearing all of its back pointers.
0f6d88aded0d165f5954688a9b13bac76c38da84Alex Reece avl_create(&odn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Set the low bit of the objset pointer to ensure that dnode_move()
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * recognizes the dnode as invalid in any subsequent callback.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Satisfy the destructor.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson for (i = 0; i < TXG_SIZE; i++) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Mark the dnode.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Ericksondnode_move(void *buf, void *newbuf, size_t size, void *arg)
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * The dnode is on the objset's list of known dnodes if the objset
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * pointer is valid. We set the low bit of the objset pointer when
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * freeing the dnode to invalidate it, and the memory patterns written
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * by kmem (baddcafe and deadbeef) set at least one of the two low bits.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * A newly created dnode sets the objset pointer last of all to indicate
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * that the dnode is known and in a valid state to be moved by this
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DNODE_STAT_ADD(dnode_move_stats.dms_dnode_invalid);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Ensure that the objset does not go away during the move.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck1);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * If the dnode is still valid, then so is the objset. We know that no
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * valid objset can be freed while we hold os_lock, so we can safely
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * ensure that the objset remains in use.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Recheck the objset pointer in case the dnode was removed just before
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * acquiring the lock.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck2);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * At this point we know that as long as we hold os->os_lock, the dnode
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * cannot be freed and fields within the dnode can be safely accessed.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * The objset listing this dnode cannot go away as long as this dnode is
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * on its list.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DNODE_STAT_ADD(dnode_move_stats.dms_dnode_special);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(odn->dn_dbuf != NULL); /* only "special" dnodes have no parent */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Lock the dnode handle to prevent the dnode from obtaining any new
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * holds. This also prevents the descendant dbufs and the bonus dbuf
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * from accessing the dnode, so that we can discount their holds. The
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * handle is safe to access because we know that while the dnode cannot
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * go away, neither can its handle. Once we hold dnh_zrlock, we can
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * safely move any dnode referenced only by dbufs.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson if (!zrl_tryenter(&odn->dn_handle->dnh_zrlock)) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DNODE_STAT_ADD(dnode_move_stats.dms_dnode_handle);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Ensure a consistent view of the dnode's holds and the dnode's dbufs.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * We need to guarantee that there is a hold for every dbuf in order to
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * determine whether the dnode is actively referenced. Falsely matching
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * a dbuf to an active hold would lead to an unsafe move. It's possible
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * that a thread already having an active dnode hold is about to add a
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * dbuf, and we can't compare hold and dbuf counts while the add is in
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson if (!rw_tryenter(&odn->dn_struct_rwlock, RW_WRITER)) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DNODE_STAT_ADD(dnode_move_stats.dms_dnode_rwlock);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * A dbuf may be removed (evicted) without an active dnode hold. In that
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * case, the dbuf count is decremented under the handle lock before the
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * dbuf's hold is released. This order ensures that if we count the hold
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * after the dbuf is removed but before its hold is released, we will
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * treat the unmatched hold as active and exit safely. If we count the
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * hold before the dbuf is removed, the hold is discounted, and the
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * removal is blocked until the move completes.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson /* We can't have more dbufs than dnode holds. */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DTRACE_PROBE3(dnode__move, dnode_t *, odn, int64_t, refcount,
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DNODE_STAT_ADD(dnode_move_stats.dms_dnode_active);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * At this point we know that anyone with a hold on the dnode is not
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * actively referencing it. The dnode is known and in a valid state to
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * move. We're holding the locks needed to execute the critical section.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson list_link_replace(&odn->dn_link, &ndn->dn_link);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson /* If the dnode was safe to move, the refcount cannot have changed. */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(refcount == refcount_count(&ndn->dn_holds));
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson#endif /* _KERNEL */
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Wait for final references to the dnode to clear. This can
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * only happen if the arc is asyncronously evicting state that
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * has a hold on this dnode while we are trying to evict this
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Ericksondnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs for (i = 0; i < children_dnodes->dnc_count; i++) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * The dnode handle lock guards against the dnode moving to
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * another valid address, so there is no need here to guard
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * against changes to or from NULL.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If there are holds on this dnode, then there should
fa9e4066f08beec538e775443c5be79dd423fcabahrens * be holds on the dnode's containing dbuf as well; thus
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * it wouldn't be eligible for eviction and this function
fa9e4066f08beec538e775443c5be79dd423fcabahrens * would not have been called.
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson kmem_free(children_dnodes, sizeof (dnode_children_t) +
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs children_dnodes->dnc_count * sizeof (dnode_handle_t));
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * EINVAL - invalid object number.
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * EIO - i/o error.
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * succeeds even for free dnodes.
503ad85c168c7992ccc310af845a581cff3c72b5Matthew Ahrensdnode_hold_impl(objset_t *os, uint64_t object, int flag,
e14bb3258d05c1b1077e2db7cf77088924e56919Jeff Bonwick * If you are holding the spa config lock as writer, you shouldn't
dcba9f3fbefe06ad19972b4de0351924601e5767George Wilson * be asking the DMU to do *anything* unless it's the root pool
dcba9f3fbefe06ad19972b4de0351924601e5767George Wilson * which may require us to read from the root filesystem while
dcba9f3fbefe06ad19972b4de0351924601e5767George Wilson * holding some (not all) of the locks as writer.
dcba9f3fbefe06ad19972b4de0351924601e5767George Wilson ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
44ecc5327ab4ce0750dcca2a17e05566bf2812e2George Wilson spa_config_held(os->os_spa, SCL_STATE, RW_WRITER)));
148434217c040ea38dc844384f6ba68d9b325906Matthew Ahrens if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson DMU_USERUSED_DNODE(os) : DMU_GROUPUSED_DNODE(os);
148434217c040ea38dc844384f6ba68d9b325906Matthew Ahrens if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
148434217c040ea38dc844384f6ba68d9b325906Matthew Ahrens if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT);
a2cdcdd260232b58202b11a9bfc0103c9449ed52Paul Dagnelie blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t));
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson for (i = 0; i < epb; i++) {
40510e8eba18690b9a9843b26393725eeb0f1dacJosef 'Jeff' Sipek dmu_buf_init_user(&children_dnodes->dnc_dbu, NULL,
bc9014e6a81272073b9854d9f65dd59e18d18c35Justin Gibbs winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
d2b3cbbd7f3a37bc7c01b526d3eb312acd070423Jorgen Lundman for (i = 0; i < epb; i++) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson kmem_free(children_dnodes, sizeof (dnode_children_t) +
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
28d97a71b8094bcc695c914ba67d41bee3cd3a8aMark Shellenbaum (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson /* Now we can rely on the hold to prevent the dnode from moving. */
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Return held dnode if the object is allocated, NULL if not.
503ad85c168c7992ccc310af845a581cff3c72b5Matthew Ahrensdnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
1934e92fc930c49429ad71a8ca97340f33227e78maybee * Can only add a reference if there is already at least one
1934e92fc930c49429ad71a8ca97340f33227e78maybee * reference on the dnode. Returns FALSE if unable to add a
1934e92fc930c49429ad71a8ca97340f33227e78maybee * new reference.
cd485b49201b16c079663125308af274b6299e96Justin T. Gibbsdnode_rele_and_unlock(dnode_t *dn, void *tag)
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson /* Get while the hold prevents the dnode from moving. */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * It's unsafe to release the last hold on a dnode by dnode_rele() or
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * indirectly by dbuf_rele() while relying on the dnode handle to
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * prevent the dnode from moving, since releasing the last hold could
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * result in the dnode's parent dbuf evicting its dnode handles. For
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * that reason anyone calling dnode_rele() or dbuf_rele() without some
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * other direct or indirect hold on the dnode must first drop the dnode
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(refs > 0 || dnh->dnh_zrlock.zr_owner != curthread);
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* NOTE: the DNODE_DNODE does not have a dn_dbuf */
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * Another thread could add a hold to the dnode handle in
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * dnode_hold_impl() while holding the parent dbuf. Since the
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * hold on the parent dbuf prevents the handle from being
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * destroyed, the hold on the handle is OK. We can't yet assert
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * that the handle has zero references, but that will be
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * asserted anyway when the handle gets destroyed.
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(dn->dn_phys->dn_type || dn->dn_allocated_txg);
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson ASSERT(dn->dn_free_txg == 0 || dn->dn_free_txg >= txg);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum * Determine old uid/gid when necessary
06e0070d70ba2ee95f5aa2645423eb2cf1546788Mark Shellenbaum dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If we are already marked dirty, we're done.
c543ec060d1359f6c8a9507242521f344a2ac3efahrens if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
fa9e4066f08beec538e775443c5be79dd423fcabahrens list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn);
fa9e4066f08beec538e775443c5be79dd423fcabahrens list_insert_tail(&os->os_dirty_dnodes[txg&TXG_MASK], dn);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The dnode maintains a hold on its containing dbuf as
fa9e4066f08beec538e775443c5be79dd423fcabahrens * long as there are holds on it. Each instantiated child
744947dc83c634d985ed3ad79ac9c5e28d1865fdTom Erickson * dbuf maintains a hold on the dnode. When the last child
fa9e4066f08beec538e775443c5be79dd423fcabahrens * drops its hold, the dnode will drop its hold on the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * containing dbuf. We add a "dirty hold" here so that the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * dnode will hang around after we finish processing its
fa9e4066f08beec538e775443c5be79dd423fcabahrens * children.
1934e92fc930c49429ad71a8ca97340f33227e78maybee VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg));
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* we should be the only holder... hopefully */
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); */
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If the dnode is already dirty, it needs to be moved from
fa9e4066f08beec538e775443c5be79dd423fcabahrens * the dirty list to the free list.
c543ec060d1359f6c8a9507242521f344a2ac3efahrens list_remove(&dn->dn_objset->os_dirty_dnodes[txgoff], dn);
c543ec060d1359f6c8a9507242521f344a2ac3efahrens list_insert_tail(&dn->dn_objset->os_free_dnodes[txgoff], dn);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Try to change the block size for the indicated dnode. This can only
fa9e4066f08beec538e775443c5be79dd423fcabahrens * succeed if there are no blocks allocated or dirty beyond first block
fa9e4066f08beec538e775443c5be79dd423fcabahrensdnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
b515258426fed6c7311fd3f1dea697cfbd4085c6Matthew Ahrens ASSERT3U(size, <=, spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
b143e04be7f04fe2274dda9b7004bc95e860f761ahrens if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Check for any allocated blocks beyond the first */
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID &&
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee /* resize the old block */
a2cdcdd260232b58202b11a9bfc0103c9449ed52Paul Dagnelie err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee /* rele after we have fixed the blocksize in the dnode */
b143e04be7f04fe2274dda9b7004bc95e860f761ahrens return (0);
8346f03fb3b709a113891e0aec5f1f8d45f8e758Jonathan W Adams/* read-holding callers must not rely on the lock being continuously held */
8346f03fb3b709a113891e0aec5f1f8d45f8e758Jonathan W Adamsdnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
8346f03fb3b709a113891e0aec5f1f8d45f8e758Jonathan W Adams * if we have a read-lock, check to see if we need to do any work
8346f03fb3b709a113891e0aec5f1f8d45f8e758Jonathan W Adams * before upgrading to a write-lock.
8346f03fb3b709a113891e0aec5f1f8d45f8e758Jonathan W Adams if (!rw_tryupgrade(&dn->dn_struct_rwlock)) {
8346f03fb3b709a113891e0aec5f1f8d45f8e758Jonathan W Adams rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
c543ec060d1359f6c8a9507242521f344a2ac3efahrens * Compute the number of levels necessary to support the new maxblkid.
c543ec060d1359f6c8a9507242521f344a2ac3efahrens ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
c717a56157ae0e6fca6a1e3689ae1edc385716a3maybee /* dirty the left indirects */
c717a56157ae0e6fca6a1e3689ae1edc385716a3maybee /* transfer the dirty records to the new indirect */
c717a56157ae0e6fca6a1e3689ae1edc385716a3maybee dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrensdnode_dirty_l1(dnode_t *dn, uint64_t l1blkid, dmu_tx_t *tx)
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens dmu_buf_impl_t *db = dbuf_hold_level(dn, 1, l1blkid, FTAG);
fa9e4066f08beec538e775443c5be79dd423fcabahrensdnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
fa9e4066f08beec538e775443c5be79dd423fcabahrens * First, block align the region to free:
43466aae47bfcd2ad9bf501faec8e75c08095e4fMax Grossman * Freeing the whole block; fast-track this request.
43466aae47bfcd2ad9bf501faec8e75c08095e4fMax Grossman * Note that we won't dirty any indirect blocks,
43466aae47bfcd2ad9bf501faec8e75c08095e4fMax Grossman * which is fine because we will be freeing the entire
43466aae47bfcd2ad9bf501faec8e75c08095e4fMax Grossman * file and thus all indirect blocks will be freed
43466aae47bfcd2ad9bf501faec8e75c08095e4fMax Grossman * by free_children().
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee /* Freeing past end-of-data */
b143e04be7f04fe2274dda9b7004bc95e860f761ahrens /* Freeing part of the block. */
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* zero out any partial block data at the start of the range */
a2cdcdd260232b58202b11a9bfc0103c9449ed52Paul Dagnelie if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* don't dirty if it isn't on disk and isn't dirty */
b143e04be7f04fe2274dda9b7004bc95e860f761ahrens /* If the range was less than one block, we're done */
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee /* If the remaining range is past end of file, we're done */
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee /* zero out any partial block data at the end of the range */
a2cdcdd260232b58202b11a9bfc0103c9449ed52Paul Dagnelie if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee /* don't dirty if not on disk and not dirty */
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee /* If the range did not include a full block, we are done */
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * Dirty all the indirect blocks in this range. Note that only
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * the first and last indirect blocks can actually be written
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * (if they were partially freed) -- they must be dirtied, even if
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * they do not exist on disk yet. The interior blocks will
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * be freed by free_children(), so they will not actually be written.
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * Even though these interior blocks will not be written, we
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * dirty them for two reasons:
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * - It ensures that the indirect blocks remain in memory until
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * syncing context. (They have already been prefetched by
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * dmu_tx_hold_free(), so we don't have to worry about reading
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * them serially here.)
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * - The dirty space accounting will put pressure on the txg sync
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * mechanism to begin syncing, and to delay transactions if there
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * is a large amount of freeing. Even though these indirect
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * blocks will not be written, we could need to write the same
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * amount of space if we copy the freed BPs into deadlists.
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens int shift = dn->dn_datablkshift + dn->dn_indblkshift -
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens for (uint64_t i = first + 1; i < last; i++) {
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * Set i to the blockid of the next non-hole
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * level-1 indirect block at or after i. Note
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * that dnode_next_offset() operates in terms of
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * level-0-equivalent bytes.
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens int err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK,
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * Normally we should not see an error, either
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * from dnode_next_offset() or dbuf_hold_level()
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * (except for ESRCH from dnode_next_offset).
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * If there is an i/o error, then when we read
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * this block in syncing context, it will use
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * ZIO_FLAG_MUSTSUCCEED, and thus hang/panic according
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * to the "failmode" property. dnode_next_offset()
46e1baa6cf6d5432f5fd231bb588df8f9570c858Matthew Ahrens * doesn't have a flag to indicate MUSTSUCCEED.
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee * Add this range to the dnode range list.
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee * We will finish up this free operation in the syncing phase.
bf16b11e8deb633dd6c4296d46e92399d1582df4Matthew Ahrens range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
bf16b11e8deb633dd6c4296d46e92399d1582df4Matthew Ahrens range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
bf16b11e8deb633dd6c4296d46e92399d1582df4Matthew Ahrens dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum for (i = 0; i < TXG_SIZE; i++) {
0a586cea3ceec7e5e50e7e54c745082a7a333ac2Mark Shellenbaum if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK)
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If we're in the process of opening the pool, dp will not be
fa9e4066f08beec538e775443c5be79dd423fcabahrens * set yet, but there shouldn't be anything dirty.
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 0; i < TXG_SIZE; i++) {
bf16b11e8deb633dd6c4296d46e92399d1582df4Matthew Ahrens range_tree_contains(dn->dn_free_ranges[i], blkid, 1))
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (i < TXG_SIZE);
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* call from syncing context when we actually write/free space for this dnode */
99653d4ee642c6528e88224f12409a5f23060994eschrock dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n",
99653d4ee642c6528e88224f12409a5f23060994eschrock ASSERT3U(space + delta, >=, space); /* no overflow */
e7437265dc2a4920c197ed4337665539d358b22cahrens if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) {
99653d4ee642c6528e88224f12409a5f23060994eschrock ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
69962b5647e4a8b9b14998733b765925381b727eMatthew Ahrens * Call when we think we're going to write/free space in open context to track
69962b5647e4a8b9b14998733b765925381b727eMatthew Ahrens * the amount of memory in use by the currently open txg.
fa9e4066f08beec538e775443c5be79dd423fcabahrensdnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
69962b5647e4a8b9b14998733b765925381b727eMatthew Ahrens int64_t aspace = spa_get_asize(os->os_spa, space);
69962b5647e4a8b9b14998733b765925381b727eMatthew Ahrens dsl_dir_willuse_space(ds->ds_dir, aspace, tx);
69962b5647e4a8b9b14998733b765925381b727eMatthew Ahrens dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * Scans a block at the indicated "level" looking for a hole or data,
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * depending on 'flags'.
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * If level > 0, then we are scanning an indirect block looking at its
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * pointers. If level == 0, then we are looking at a block of dnodes.
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * If we don't find what we are looking for in the block, we return ESRCH.
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * Otherwise, return with *offset pointing to the beginning (if searching
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * forwards) or end (if searching backwards) of the range covered by the
f7170741490edba9d1d9c697c177c887172bc741Will Andrews * block pointer we matched on (or dnode).
762562058c49df3abfc6555f5b8174f3900e5c78Mark Maybee * The basic search algorithm used below by dnode_next_offset() is to
762562058c49df3abfc6555f5b8174f3900e5c78Mark Maybee * use this function to search up the block tree (widen the search) until
762562058c49df3abfc6555f5b8174f3900e5c78Mark Maybee * we find something (i.e., we don't return ESRCH) and then search back
762562058c49df3abfc6555f5b8174f3900e5c78Mark Maybee * down the tree (narrow the search) until we reach our original search
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybeednode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
fa9e4066f08beec538e775443c5be79dd423fcabahrens dprintf("probing object %llu offset %llx level %d of %u\n",
fa9e4066f08beec538e775443c5be79dd423fcabahrens dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
a2cdcdd260232b58202b11a9bfc0103c9449ed52Paul Dagnelie uint64_t blkid = dbuf_whichblock(dn, lvl, *offset);
a2cdcdd260232b58202b11a9bfc0103c9449ed52Paul Dagnelie error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FALSE, FTAG, &db);
1c8564a7573482b45fcc6f9bc0c2de70f92c193cMark Maybee * This can only happen when we are searching up
1c8564a7573482b45fcc6f9bc0c2de70f92c193cMark Maybee * the block tree for data. We don't really need to
1c8564a7573482b45fcc6f9bc0c2de70f92c193cMark Maybee * adjust the offset, as we will just end up looking
1c8564a7573482b45fcc6f9bc0c2de70f92c193cMark Maybee * at the pointer to this block in its parent, and its
1c8564a7573482b45fcc6f9bc0c2de70f92c193cMark Maybee * going to be unallocated, so we will skip over it.
98572ac1e6b4d1bdba6ab2df94e0a0c37df56337ahrens error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
43466aae47bfcd2ad9bf501faec8e75c08095e4fMax Grossman if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
1c8564a7573482b45fcc6f9bc0c2de70f92c193cMark Maybee * This can only happen when we are searching up the tree
1c8564a7573482b45fcc6f9bc0c2de70f92c193cMark Maybee * and these conditions mean that we need to keep climbing.
6754306ec9a89fd28806908d10c76141e8fbba3fahrens } else if (lvl == 0) {
08f3f137579a3563791f39cd2aff588a251a723cJonathan W Adams if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee if (i < 0 || i == blkfill)
762562058c49df3abfc6555f5b8174f3900e5c78Mark Maybee /* traversing backwards; position offset at the end */
762562058c49df3abfc6555f5b8174f3900e5c78Mark Maybee *offset = MIN(*offset + (1ULL << span) - 1, start);
762562058c49df3abfc6555f5b8174f3900e5c78Mark Maybee if (i < 0 || i >= epb)
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Find the next hole, data, or sparse region at or after *offset.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The value 'blkfill' tells us how many items we expect to find
fa9e4066f08beec538e775443c5be79dd423fcabahrens * in an L0 data block; this value is 1 for normal objects,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * DNODES_PER_BLOCK for the meta dnode, and some fraction of
fa9e4066f08beec538e775443c5be79dd423fcabahrens * DNODES_PER_BLOCK when searching for sparse regions thereof.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Examples:
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee * dnode_next_offset(dn, flags, offset, 1, 1, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Used in dmu_offset_next().
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee * dnode_next_offset(mdn, flags, offset, 0, DNODES_PER_BLOCK, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Finds the next free/allocated dnode an objset's meta-dnode.
6754306ec9a89fd28806908d10c76141e8fbba3fahrens * Only finds objects that have new contents since txg (ie.
6754306ec9a89fd28806908d10c76141e8fbba3fahrens * bonus buffer changes and content removal are ignored).
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Used in dmu_object_next().
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybee * dnode_next_offset(mdn, DNODE_FIND_HOLE, offset, 2, DNODES_PER_BLOCK >> 2, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Finds the next L2 meta-dnode bp that's at most 1/4 full.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Used in dmu_object_alloc().
cdb0ab79ea1af7b8fc339a04d4bf7426dc77ec4emaybeednode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
0fbc0cd0e52a11f6c4397a1714f94412cbf98b60Matthew Ahrens * There's always a "virtual hole" at the end of the object, even
0fbc0cd0e52a11f6c4397a1714f94412cbf98b60Matthew Ahrens * if all BP's which physically exist are non-holes.
0fbc0cd0e52a11f6c4397a1714f94412cbf98b60Matthew Ahrens if ((flags & DNODE_FIND_HOLE) && error == ESRCH && txg == 0 &&
0fbc0cd0e52a11f6c4397a1714f94412cbf98b60Matthew Ahrens minlvl == 1 && blkfill == 1 && !(flags & DNODE_FIND_BACKWARDS)) {