metaslab.c revision ea8dc4b6d2251b437950c0056bc626b311c73c27
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Common Development and Distribution License (the "License").
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * or http://www.opensolaris.org/os/licensing.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens#pragma ident "%Z%%M% %I% %E% SMI"
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/zfs_context.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/spa_impl.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/dmu.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/dmu_tx.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/space_map.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/metaslab_impl.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/vdev_impl.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens#include <sys/zio.h>
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ==========================================================================
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Metaslab classes
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ==========================================================================
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_class_t *
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_class_create(void)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_class_t *mc;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_rotor = NULL;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (mc);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_class_destroy(metaslab_class_t *mc)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_t *mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens while ((mg = mc->mc_rotor) != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_class_remove(mc, mg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_destroy(mg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens kmem_free(mc, sizeof (metaslab_class_t));
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_t *mgprev, *mgnext;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(mg->mg_class == NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((mgprev = mc->mc_rotor) == NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_prev = mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_next = mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens } else {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mgnext = mgprev->mg_next;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_prev = mgprev;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_next = mgnext;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mgprev->mg_next = mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mgnext->mg_prev = mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_rotor = mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_class = mc;
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_t *mgprev, *mgnext;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(mg->mg_class == mc);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mgprev = mg->mg_prev;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mgnext = mg->mg_next;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (mg == mgnext) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_rotor = NULL;
fa9e4066f08beec538e775443c5be79dd423fcabahrens } else {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_rotor = mgnext;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mgprev->mg_next = mgnext;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mgnext->mg_prev = mgprev;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_prev = NULL;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_next = NULL;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_class = NULL;
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ==========================================================================
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Metaslab groups
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ==========================================================================
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic int
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_compare(const void *x1, const void *x2)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens const metaslab_t *m1 = x1;
fa9e4066f08beec538e775443c5be79dd423fcabahrens const metaslab_t *m2 = x2;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (m1->ms_weight < m2->ms_weight)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (m1->ms_weight > m2->ms_weight)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (-1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If the weights are identical, use the offset to force uniqueness.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (m1->ms_map.sm_start < m2->ms_map.sm_start)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (-1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (m1->ms_map.sm_start > m2->ms_map.sm_start)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3P(m1, ==, m2);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_group_t *
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_t *mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP);
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_create(&mg->mg_metaslab_tree, metaslab_compare,
fa9e4066f08beec538e775443c5be79dd423fcabahrens sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node));
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_aliquot = 2ULL << 20; /* XXX -- tweak me */
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_vd = vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_class_add(mc, mg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (mg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_group_destroy(metaslab_group_t *mg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_destroy(&mg->mg_metaslab_tree);
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_destroy(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens kmem_free(mg, sizeof (metaslab_group_t));
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_group_add(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(msp->ms_group == NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_group = mg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_weight = weight;
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_add(&mg->mg_metaslab_tree, msp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(msp->ms_group == mg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_remove(&mg->mg_metaslab_tree, msp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_group = NULL;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(msp->ms_group == mg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_remove(&mg->mg_metaslab_tree, msp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_weight = weight;
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_add(&mg->mg_metaslab_tree, msp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ==========================================================================
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Metaslabs
fa9e4066f08beec538e775443c5be79dd423fcabahrens * ==========================================================================
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, metaslab_t **mspp,
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t start, uint64_t size, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_t *vd = mg->mg_vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_t *msp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens int fm;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_smo = smo;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_create(&msp->ms_map, start, size, vd->vdev_ashift,
fa9e4066f08beec538e775443c5be79dd423fcabahrens &msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (fm = 0; fm < TXG_SIZE; fm++) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_create(&msp->ms_allocmap[fm], start, size,
fa9e4066f08beec538e775443c5be79dd423fcabahrens vd->vdev_ashift, &msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_create(&msp->ms_freemap[fm], start, size,
fa9e4066f08beec538e775443c5be79dd423fcabahrens vd->vdev_ashift, &msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If we're opening an existing pool (txg == 0) or creating
fa9e4066f08beec538e775443c5be79dd423fcabahrens * a new one (txg == TXG_INITIAL), all space is available now.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If we're adding space to an existing pool, the new space
fa9e4066f08beec538e775443c5be79dd423fcabahrens * does not become available until after this txg has synced.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We enforce this by assigning an initial weight of 0 to new space.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * (Transactional allocations for this txg would actually be OK;
fa9e4066f08beec538e775443c5be79dd423fcabahrens * it's intent log allocations that cause trouble. If we wrote
fa9e4066f08beec538e775443c5be79dd423fcabahrens * a log block in this txg and lost power, the log replay would be
fa9e4066f08beec538e775443c5be79dd423fcabahrens * based on the DVA translations that had been synced in txg - 1.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Those translations would not include this metaslab's vdev.)
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_add(mg, msp, txg > TXG_INITIAL ? 0 : size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (txg == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We're opening the pool. Make the metaslab's
fa9e4066f08beec538e775443c5be79dd423fcabahrens * free space available immediately.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_space_update(vd, size, smo->smo_alloc);
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_sync_done(msp, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens } else {
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We're adding a new metaslab to an already-open pool.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Declare all of the metaslab's space to be free.
fa9e4066f08beec538e775443c5be79dd423fcabahrens *
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Note that older transaction groups cannot allocate
fa9e4066f08beec538e775443c5be79dd423fcabahrens * from this metaslab until its existence is committed,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * because we set ms_last_alloc to the current txg.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens smo->smo_alloc = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_usable_space = size;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_add(&msp->ms_map, start, size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_map_incore = 1;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* XXX -- we'll need a call to picker_init here */
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_dirty[txg & TXG_MASK] |= MSD_ADD;
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_last_alloc = txg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_dirty(vd, VDD_ADD, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) txg_list_add(&vd->vdev_ms_list, msp, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens *mspp = msp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_fini(metaslab_t *msp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens int fm;
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_t *mg = msp->ms_group;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_space_update(mg->mg_vd, -msp->ms_map.sm_size,
fa9e4066f08beec538e775443c5be79dd423fcabahrens -msp->ms_smo->smo_alloc);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_remove(mg, msp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* XXX -- we'll need a call to picker_fini here */
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_vacate(&msp->ms_map, NULL, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_map_incore = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_destroy(&msp->ms_map);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (fm = 0; fm < TXG_SIZE; fm++) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_destroy(&msp->ms_allocmap[fm]);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_destroy(&msp->ms_freemap[fm]);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens kmem_free(msp, sizeof (metaslab_t));
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Write a metaslab to disk in the context of the specified transaction group.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_sync(metaslab_t *msp, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_t *vd = msp->ms_group->mg_vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens spa_t *spa = vd->vdev_spa;
fa9e4066f08beec538e775443c5be79dd423fcabahrens objset_t *os = spa->spa_meta_objset;
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *allocmap = &msp->ms_allocmap[txg & TXG_MASK];
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *freemap = &msp->ms_freemap[txg & TXG_MASK];
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_obj_t *smo = msp->ms_smo;
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint8_t *dirty = &msp->ms_dirty[txg & TXG_MASK];
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t alloc_delta;
fa9e4066f08beec538e775443c5be79dd423fcabahrens dmu_buf_t *db;
fa9e4066f08beec538e775443c5be79dd423fcabahrens dmu_tx_t *tx;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens dprintf("%s offset %llx\n", vdev_description(vd), msp->ms_map.sm_start);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (*dirty & MSD_ADD)
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_space_update(vd, msp->ms_map.sm_size, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (*dirty & (MSD_ALLOC | MSD_FREE)) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (smo->smo_object == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(smo->smo_objsize == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(smo->smo_alloc == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens smo->smo_object = dmu_object_alloc(os,
fa9e4066f08beec538e775443c5be79dd423fcabahrens DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT,
fa9e4066f08beec538e775443c5be79dd423fcabahrens DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(smo->smo_object != 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens dmu_write(os, vd->vdev_ms_array, sizeof (uint64_t) *
fa9e4066f08beec538e775443c5be79dd423fcabahrens (msp->ms_map.sm_start >> vd->vdev_ms_shift),
fa9e4066f08beec538e775443c5be79dd423fcabahrens sizeof (uint64_t), &smo->smo_object, tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens alloc_delta = allocmap->sm_space - freemap->sm_space;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_space_update(vd, 0, alloc_delta);
fa9e4066f08beec538e775443c5be79dd423fcabahrens smo->smo_alloc += alloc_delta;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_last_alloc == txg && msp->ms_map.sm_space == 0 &&
fa9e4066f08beec538e775443c5be79dd423fcabahrens (*dirty & MSD_CONDENSE) == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *sm = &msp->ms_map;
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *tsm;
fa9e4066f08beec538e775443c5be79dd423fcabahrens int i;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(msp->ms_map_incore);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_merge(freemap, freed_map);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_vacate(allocmap, NULL, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Write out the current state of the allocation
fa9e4066f08beec538e775443c5be79dd423fcabahrens * world. The current metaslab is full, minus
fa9e4066f08beec538e775443c5be79dd423fcabahrens * stuff that's been freed this txg (freed_map),
fa9e4066f08beec538e775443c5be79dd423fcabahrens * minus allocations from txgs in the future.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_add(sm, sm->sm_start, sm->sm_size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (i = 1; i < TXG_CONCURRENT_STATES; i++) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens tsm = &msp->ms_allocmap[(txg + i) & TXG_MASK];
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_iterate(tsm, space_map_remove, sm);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_iterate(freed_map, space_map_remove, sm);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_write(sm, smo, os, tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(sm->sm_space == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(freemap->sm_space == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(allocmap->sm_space == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens *dirty |= MSD_CONDENSE;
fa9e4066f08beec538e775443c5be79dd423fcabahrens } else {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_sync(allocmap, NULL, smo, SM_ALLOC, os, tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_sync(freemap, freed_map, smo, SM_FREE,
fa9e4066f08beec538e775443c5be79dd423fcabahrens os, tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock VERIFY(0 == dmu_bonus_hold(os, smo->smo_object, FTAG, &db));
fa9e4066f08beec538e775443c5be79dd423fcabahrens dmu_buf_will_dirty(db, tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(db->db_size, ==, sizeof (*smo));
fa9e4066f08beec538e775443c5be79dd423fcabahrens bcopy(smo, db->db_data, db->db_size);
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock dmu_buf_rele(db, FTAG);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens dmu_tx_commit(tx);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens *dirty &= ~(MSD_ALLOC | MSD_FREE | MSD_ADD);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg));
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Called after a transaction group has completely synced to mark
fa9e4066f08beec538e775443c5be79dd423fcabahrens * all of the metaslab's free space as usable.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_sync_done(metaslab_t *msp, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t weight;
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint8_t *dirty = &msp->ms_dirty[txg & TXG_MASK];
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_obj_t *smo = msp->ms_smo;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens dprintf("%s offset %llx txg %llu\n",
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_description(msp->ms_group->mg_vd), msp->ms_map.sm_start, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U((*dirty & (MSD_ALLOC | MSD_FREE | MSD_ADD)), ==, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_usable_space = msp->ms_map.sm_size - smo->smo_alloc;
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_usable_end = smo->smo_objsize;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens weight = msp->ms_usable_space;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (txg != 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *freed_map =
fa9e4066f08beec538e775443c5be79dd423fcabahrens &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* XXX -- we'll need a call to picker_fini here */
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* If we're empty, don't bother sticking around */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_usable_space == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_vacate(&msp->ms_map, NULL, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_map_incore = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT3U(freed_map->sm_space, ==, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens weight = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens } else {
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Add the freed blocks to the available space map */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_map_incore)
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_merge(freed_map, &msp->ms_map);
fa9e4066f08beec538e775443c5be79dd423fcabahrens else
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_vacate(freed_map, NULL, NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens weight += msp->ms_map.sm_size;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_last_alloc == txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Safe to use for allocation now */
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_last_alloc = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens *dirty = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_sort(msp->ms_group, msp, weight);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The first-fit block picker. No picker_init or picker_fini,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * this is just an experiment to see how it feels to separate out
fa9e4066f08beec538e775443c5be79dd423fcabahrens * the block selection policy from the map updates.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Note: the 'cursor' argument is a form of PPD.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic uint64_t
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_pick_block(space_map_t *sm, uint64_t size, uint64_t *cursor)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_tree_t *t = &sm->sm_root;
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t align = size & -size;
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_seg_t *ss, ssearch;
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_index_t where;
fa9e4066f08beec538e775443c5be79dd423fcabahrens int tried_once = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensagain:
fa9e4066f08beec538e775443c5be79dd423fcabahrens ssearch.ss_start = *cursor;
fa9e4066f08beec538e775443c5be79dd423fcabahrens ssearch.ss_end = *cursor + size;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ss = avl_find(t, &ssearch, &where);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (ss == NULL)
fa9e4066f08beec538e775443c5be79dd423fcabahrens ss = avl_nearest(t, where, AVL_AFTER);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens while (ss != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t offset = P2ROUNDUP(ss->ss_start, align);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (offset + size <= ss->ss_end) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens *cursor = offset + size;
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (offset);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens ss = AVL_NEXT(t, ss);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* If we couldn't find a block after cursor, search again */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (tried_once == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens tried_once = 1;
fa9e4066f08beec538e775443c5be79dd423fcabahrens *cursor = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens goto again;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (-1ULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic uint64_t
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_getblock(metaslab_t *msp, uint64_t size, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *sm = &msp->ms_map;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_t *vd = msp->ms_group->mg_vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t offset;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(MUTEX_HELD(&msp->ms_lock));
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(msp->ms_map_incore);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(sm->sm_space != 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(P2PHASE(size, 1ULL << vd->vdev_ashift) == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens offset = metaslab_pick_block(sm, size,
fa9e4066f08beec538e775443c5be79dd423fcabahrens &msp->ms_map_cursor[highbit(size & -size) - vd->vdev_ashift - 1]);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (offset != -1ULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_remove(sm, offset, size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (offset);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Intent log support: upon opening the pool after a crash, notify the SPA
fa9e4066f08beec538e775443c5be79dd423fcabahrens * of blocks that the intent log has allocated for immediate write, but
fa9e4066f08beec538e775443c5be79dd423fcabahrens * which are still considered free by the SPA because the last transaction
fa9e4066f08beec538e775443c5be79dd423fcabahrens * group didn't commit yet.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensint
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_claim(spa_t *spa, dva_t *dva, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t vdev = DVA_GET_VDEV(dva);
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t offset = DVA_GET_OFFSET(dva);
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t size = DVA_GET_ASIZE(dva);
fa9e4066f08beec538e775443c5be79dd423fcabahrens objset_t *os = spa->spa_meta_objset;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_t *vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_t *msp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_t *sm;
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_obj_t *smo;
fa9e4066f08beec538e775443c5be79dd423fcabahrens int error;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((vd = vdev_lookup_top(spa, vdev)) == NULL)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (ENXIO);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (ENXIO);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
fa9e4066f08beec538e775443c5be79dd423fcabahrens sm = &msp->ms_map;
fa9e4066f08beec538e775443c5be79dd423fcabahrens smo = msp->ms_smo;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (DVA_GET_GANG(dva))
fa9e4066f08beec538e775443c5be79dd423fcabahrens size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_map_incore == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = space_map_load(sm, smo, SM_FREE, os,
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_usable_end, sm->sm_size - msp->ms_usable_space);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(error == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (error) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (error);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_map_incore = 1;
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* XXX -- we'll need a call to picker_init here */
fa9e4066f08beec538e775443c5be79dd423fcabahrens bzero(msp->ms_map_cursor, sizeof (msp->ms_map_cursor));
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_remove(sm, offset, size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((msp->ms_dirty[txg & TXG_MASK] & MSD_ALLOC) == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_dirty[txg & TXG_MASK] |= MSD_ALLOC;
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_last_alloc = txg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_dirty(vd, VDD_ALLOC, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) txg_list_add(&vd->vdev_ms_list, msp, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic int
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_usable(metaslab_t *msp, uint64_t size, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Enforce segregation across transaction groups.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* XXX -- We should probably not assume we know what ms_weight means */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_last_alloc == txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (msp->ms_map.sm_space >= size && msp->ms_weight >= size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_last_alloc != 0)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_map.sm_space >= size && msp->ms_weight >= size)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* XXX -- the weight test should be in terms of MINFREE */
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (msp->ms_usable_space >= size && msp->ms_weight >= size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic metaslab_t *
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_pick(metaslab_group_t *mg, uint64_t size, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_t *msp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens avl_tree_t *t = &mg->mg_metaslab_tree;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp))
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (metaslab_usable(msp, size, txg))
fa9e4066f08beec538e775443c5be79dd423fcabahrens break;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&mg->mg_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (msp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrensstatic metaslab_t *
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_group_alloc(spa_t *spa, metaslab_group_t *mg, uint64_t size,
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t *offp, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_t *msp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens int error;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens while ((msp = metaslab_pick(mg, size, txg)) != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_obj_t *smo = msp->ms_smo;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (!metaslab_usable(msp, size, txg)) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens continue;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp->ms_map_incore == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = space_map_load(&msp->ms_map, smo, SM_FREE,
fa9e4066f08beec538e775443c5be79dd423fcabahrens spa->spa_meta_objset, msp->ms_usable_end,
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_map.sm_size - msp->ms_usable_space);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(error == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (error) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_sort(mg, msp, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens continue;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_map_incore = 1;
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* XXX -- we'll need a call to picker_init here */
fa9e4066f08beec538e775443c5be79dd423fcabahrens bzero(msp->ms_map_cursor, sizeof (msp->ms_map_cursor));
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens *offp = metaslab_getblock(msp, size, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (*offp != -1ULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((msp->ms_dirty[txg & TXG_MASK] & MSD_ALLOC) == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_t *vd = mg->mg_vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_dirty[txg & TXG_MASK] |= MSD_ALLOC;
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_last_alloc = txg;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_dirty(vd, VDD_ALLOC, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) txg_list_add(&vd->vdev_ms_list,
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (msp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_sort(msp->ms_group, msp, size - 1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (NULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Allocate a block for the specified i/o.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensint
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_alloc(spa_t *spa, uint64_t psize, dva_t *dva, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_t *msp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_group_t *mg, *rotor;
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_class_t *mc;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_t *vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t offset = -1ULL;
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t asize;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc = spa_metaslab_class_select(spa);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Start at the rotor and loop through all mgs until we find something.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Note that there's no locking on mc_rotor or mc_allocated because
fa9e4066f08beec538e775443c5be79dd423fcabahrens * nothing actually breaks if we miss a few updates -- we just won't
fa9e4066f08beec538e775443c5be79dd423fcabahrens * allocate quite as evenly. It all balances out over time.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg = rotor = mc->mc_rotor;
fa9e4066f08beec538e775443c5be79dd423fcabahrens do {
fa9e4066f08beec538e775443c5be79dd423fcabahrens vd = mg->mg_vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens asize = vdev_psize_to_asize(vd, psize);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp = metaslab_group_alloc(spa, mg, asize, &offset, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (msp != NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(offset != -1ULL);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If we've just selected this metaslab group,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * figure out whether the corresponding vdev is
fa9e4066f08beec538e775443c5be79dd423fcabahrens * over- or under-used relative to the pool,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and set an allocation bias to even it out.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (mc->mc_allocated == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_stat_t *vs = &vd->vdev_stat;
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t alloc, space;
fa9e4066f08beec538e775443c5be79dd423fcabahrens int64_t vu, su;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens alloc = spa_get_alloc(spa);
fa9e4066f08beec538e775443c5be79dd423fcabahrens space = spa_get_space(spa);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Determine percent used in units of 0..1024.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * (This is just to avoid floating point.)
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens vu = (vs->vs_alloc << 10) / (vs->vs_space + 1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens su = (alloc << 10) / (space + 1);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens /*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Bias by at most +/- 25% of the aliquot.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_bias = ((su - vu) *
fa9e4066f08beec538e775443c5be79dd423fcabahrens (int64_t)mg->mg_aliquot) / (1024 * 4);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens dprintf("bias = %lld\n", mg->mg_bias);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (atomic_add_64_nv(&mc->mc_allocated, asize) >=
fa9e4066f08beec538e775443c5be79dd423fcabahrens mg->mg_aliquot + mg->mg_bias) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_rotor = mg->mg_next;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_allocated = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens DVA_SET_VDEV(dva, vd->vdev_id);
fa9e4066f08beec538e775443c5be79dd423fcabahrens DVA_SET_OFFSET(dva, offset);
fa9e4066f08beec538e775443c5be79dd423fcabahrens DVA_SET_GANG(dva, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens DVA_SET_ASIZE(dva, asize);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_rotor = mg->mg_next;
fa9e4066f08beec538e775443c5be79dd423fcabahrens mc->mc_allocated = 0;
fa9e4066f08beec538e775443c5be79dd423fcabahrens } while ((mg = mg->mg_next) != rotor);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens dprintf("spa=%p, psize=%llu, txg=%llu: no\n", spa, psize, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens DVA_SET_VDEV(dva, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens DVA_SET_OFFSET(dva, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens DVA_SET_GANG(dva, 0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (ENOSPC);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Free the block represented by DVA in the context of the specified
fa9e4066f08beec538e775443c5be79dd423fcabahrens * transaction group.
fa9e4066f08beec538e775443c5be79dd423fcabahrens */
fa9e4066f08beec538e775443c5be79dd423fcabahrensvoid
fa9e4066f08beec538e775443c5be79dd423fcabahrensmetaslab_free(spa_t *spa, dva_t *dva, uint64_t txg)
fa9e4066f08beec538e775443c5be79dd423fcabahrens{
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t vdev = DVA_GET_VDEV(dva);
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t offset = DVA_GET_OFFSET(dva);
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t size = DVA_GET_ASIZE(dva);
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_t *vd;
fa9e4066f08beec538e775443c5be79dd423fcabahrens metaslab_t *msp;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (txg > spa_freeze_txg(spa))
fa9e4066f08beec538e775443c5be79dd423fcabahrens return;
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((vd = vdev_lookup_top(spa, vdev)) == NULL) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens cmn_err(CE_WARN, "metaslab_free(): bad vdev %llu",
fa9e4066f08beec538e775443c5be79dd423fcabahrens (u_longlong_t)vdev);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens cmn_err(CE_WARN, "metaslab_free(): bad offset %llu",
fa9e4066f08beec538e775443c5be79dd423fcabahrens (u_longlong_t)offset);
fa9e4066f08beec538e775443c5be79dd423fcabahrens ASSERT(0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens return;
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (DVA_GET_GANG(dva))
fa9e4066f08beec538e775443c5be79dd423fcabahrens size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_enter(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((msp->ms_dirty[txg & TXG_MASK] & MSD_FREE) == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens msp->ms_dirty[txg & TXG_MASK] |= MSD_FREE;
fa9e4066f08beec538e775443c5be79dd423fcabahrens vdev_dirty(vd, VDD_FREE, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens (void) txg_list_add(&vd->vdev_ms_list, msp, txg);
fa9e4066f08beec538e775443c5be79dd423fcabahrens }
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens space_map_add(&msp->ms_freemap[txg & TXG_MASK], offset, size);
fa9e4066f08beec538e775443c5be79dd423fcabahrens
fa9e4066f08beec538e775443c5be79dd423fcabahrens mutex_exit(&msp->ms_lock);
fa9e4066f08beec538e775443c5be79dd423fcabahrens}