fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Common Development and Distribution License (the "License").
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
bf16b11e8deb633dd6c4296d46e92399d1582df4Matthew Ahrens * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * The data for a given space map can be kept on blocks of any size.
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * Larger blocks entail fewer i/o operations, but they also cause the
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * DMU to keep more data in-core, and also to waste more i/o bandwidth
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * when only a few blocks have changed since the last transaction group.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Load the space map disk into the specified range tree. Segments of maptype
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * are added to the range tree, other segment types are removed.
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick * Note: space_map_load() will drop sm_lock across dmu_read() calls.
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick * The caller must be OK with this.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonspace_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson range_tree_add(rt, sm->sm_start, sm->sm_size);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
a2cdcdd260232b58202b11a9bfc0103c9449ed52Paul Dagnelie dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson error = dmu_read(sm->sm_os, space_map_object(sm), offset, size,
fa9e4066f08beec538e775443c5be79dd423fcabahrens entry_map_end = entry_map + (size / sizeof (uint64_t));
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (entry = entry_map; entry < entry_map_end; entry++) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) +
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson VERIFY0(P2PHASE(offset, 1ULL << sm->sm_shift));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson VERIFY3U(offset + size, <=, sm->sm_start + sm->sm_size);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonspace_map_histogram_verify(space_map_t *sm, range_tree_t *rt)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Verify that the in-core range tree does not have any
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * ranges smaller than our sm_shift size.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonspace_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Transfer the content of the range tree histogram to the space
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * map histogram. The space map histogram contains 32 buckets ranging
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * between 2^sm_shift to 2^(32+sm_shift-1). The range tree,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * however, can represent ranges from 2^0 to 2^63. Since the space
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * map only cares about allocatable blocks (minimum of sm_shift) we
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * can safely ignore all ranges in the range tree smaller than sm_shift.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson for (int i = sm->sm_shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Since the largest histogram bucket in the space map is
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * 2^(32+sm_shift-1), we need to normalize the values in
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * the range tree for any bucket larger than that size. For
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * example given an sm_shift of 9, ranges larger than 2^40
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * would get normalized as if they were 1TB ranges. Assume
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * the range tree had a count of 5 in the 2^44 (16TB) bucket,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * the calculation below would normalize this to 5 * 2^4 (16).
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson rt->rt_histogram[i] << (i - idx - sm->sm_shift);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Increment the space map's index as long as we haven't
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * reached the maximum bucket size. Accumulate all ranges
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * larger than the max bucket size into the last bucket.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonspace_map_entries(space_map_t *sm, range_tree_t *rt)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * All space_maps always have a debug entry so account for it here.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Traverse the range tree and calculate the number of space map
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * entries that would be required to write out the range tree.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Note: space_map_write() will drop sm_lock across dmu_write() calls.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonspace_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens uint64_t expected_entries, actual_entries = 1;
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * This field is no longer necessary since the in-core space map
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * now contains the object number but is maintained for backwards
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * compatibility.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson sm->sm_phys->smp_alloc += range_tree_space(rt);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson sm->sm_phys->smp_alloc -= range_tree_space(rt);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson while (size != 0) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson dmu_write(os, space_map_object(sm), sm->sm_phys->smp_objsize,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson ASSERT3U(expected_entries, ==, actual_entries);
01f55e48fb4d524eaf70687728aa51b7762e2e97George Wilson * Ensure that the space_map's accounting wasn't changed
01f55e48fb4d524eaf70687728aa51b7762e2e97George Wilson * while we were in the middle of writing it out.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson error = dmu_bonus_hold(sm->sm_os, sm->sm_object, sm, &sm->sm_dbuf);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonspace_map_open(space_map_t **smp, objset_t *os, uint64_t object,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson uint64_t start, uint64_t size, uint8_t shift, kmutex_t *lp)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson sm = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonspace_map_truncate(space_map_t *sm, dmu_tx_t *tx)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * If the space map has the wrong bonus size (because
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * SPA_FEATURE_SPACEMAP_HISTOGRAM has recently been enabled), or
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * the wrong block size (because space_map_blksz has changed),
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * free and re-allocate its object with the updated sizes.
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * Otherwise, just truncate the current object.
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens doi.doi_data_block_size != space_map_blksz) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson "old bonus %u, old blocksz %u", dmu_tx_get_txg(tx),
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens sm->sm_object = space_map_alloc(sm->sm_os, tx);
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * If the spacemap is reallocated, its histogram
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * will be reset. Do the same in the common case so that
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * bugs related to the uncommon case do not go unnoticed.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Update the in-core space_map allocation and length values.
2acef22db7808606888f8f92715629ff3ba555b9Matthew Ahrens if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
2acef22db7808606888f8f92715629ff3ba555b9Matthew Ahrens spa_feature_incr(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
2acef22db7808606888f8f92715629ff3ba555b9Matthew Ahrens if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (doi.doi_bonus_size != SPACE_MAP_SIZE_V0) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson VERIFY3U(dmu_object_free(sm->sm_os, space_map_object(sm), tx), ==, 0);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Returns the already synced, on-disk allocated space.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Returns the already synced, on-disk length;
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Returns the allocated space that is currently syncing.