metaslab.c revision bf16b11e8deb633dd6c4296d46e92399d1582df4
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/space_map.h>
#include <sys/metaslab_impl.h>
#include <sys/vdev_impl.h>
#include <sys/spa_impl.h>
/*
* Allow allocations to switch to gang blocks quickly. We do this to
* avoid having to load lots of space_maps in a given txg. There are,
* however, some cases where we want to avoid "fast" ganging and instead
* we want to do an exhaustive search of all metaslabs on this device.
* Currently we don't allow any gang, zil, or dump device related allocations
* to "fast" gang.
*/
#define CAN_FASTGANG(flags) \
#define METASLAB_ACTIVE_MASK \
/*
* The in-core space map representation is more compact than its on-disk form.
* The zfs_condense_pct determines how much more compact the in-core
* space_map representation must be before we compact it on-disk.
* Values should be greater than or equal to 100.
*/
int zfs_condense_pct = 200;
/*
* This value defines the number of allowed allocation failures per vdev.
* If a device reaches this threshold in a given txg then we consider skipping
* allocations on that device. The value of zfs_mg_alloc_failures is computed
*/
int zfs_mg_alloc_failures = 0;
/*
* The zfs_mg_noalloc_threshold defines which metaslab groups should
* be eligible for allocation. The value is defined as a percentage of
* a free space. Metaslab groups that have more free space than
* zfs_mg_noalloc_threshold are always eligible for allocations. Once
* a metaslab group's free space is less than or equal to the
* zfs_mg_noalloc_threshold the allocator will avoid allocating to that
* group unless all groups in the pool have reached zfs_mg_noalloc_threshold.
* Once all groups in the pool reach zfs_mg_noalloc_threshold then all
* groups are allowed to accept allocations. Gang blocks are always
* eligible to allocate on any metaslab group. The default value of 0 means
* no metaslab group will be excluded based on this criterion.
*/
int zfs_mg_noalloc_threshold = 0;
/*
* When set will load all metaslabs when pool is first opened.
*/
int metaslab_debug_load = 0;
/*
* When set will prevent metaslabs from being unloaded.
*/
int metaslab_debug_unload = 0;
/*
* Minimum size which forces the dynamic allocator to change
* it's allocation strategy. Once the space map cannot satisfy
* an allocation of this size then it switches to using more
* aggressive strategy (i.e search by size rather than offset).
*/
/*
* The minimum free space, in percent, which must be available
* in a space map to continue allocations in a first-fit fashion.
* Once the space_map's free space drops below this level we dynamically
* switch to using best-fit allocations.
*/
int metaslab_df_free_pct = 4;
/*
* A metaslab is considered "free" if it contains a contiguous
* segment which is greater than metaslab_min_alloc_size.
*/
/*
* Percentage of all cpus that can be used by the metaslab taskq.
*/
int metaslab_load_pct = 50;
/*
* Determines how many txgs a metaslab may remain loaded without having any
* allocations from it. As long as a metaslab continues to be used we will
* keep it loaded.
*/
/*
* Should we be willing to write data to degraded vdevs?
*/
/*
* Max number of metaslabs per group to preload.
*/
/*
*/
/*
*/
/*
* ==========================================================================
* Metaslab classes
* ==========================================================================
*/
{
return (mc);
}
void
{
}
int
{
/*
* Must hold one of the spa_config locks.
*/
return (0);
do {
return (0);
}
void
{
}
{
}
{
return (mc->mc_deferred);
}
{
}
{
}
/*
* ==========================================================================
* Metaslab groups
* ==========================================================================
*/
static int
{
return (1);
return (-1);
/*
* If the weights are identical, use the offset to force uniqueness.
*/
return (-1);
return (1);
return (0);
}
/*
* Update the allocatable flag and the metaslab group's capacity.
* The allocatable flag is set to true if the capacity is below
* the zfs_mg_noalloc_threshold. If a metaslab group transitions
* from allocatable to non-allocatable or vice versa then the metaslab
* group's class is updated to reflect the transition.
*/
static void
{
/*
* The mc_alloc_groups maintains a count of the number of
* groups in this metaslab class that are still above the
* zfs_mg_noalloc_threshold. This is used by the allocating
* threads to determine if they should avoid allocations to
* a given group. The allocator will avoid allocations to a group
* if that group has reached or is below the zfs_mg_noalloc_threshold
* and there are still other groups that are above the threshold.
* When a group transitions from allocatable to non-allocatable or
* vice versa we update the metaslab class to reflect that change.
* When the mc_alloc_groups value drops to 0 that means that all
* groups have reached the zfs_mg_noalloc_threshold making all groups
* eligible for allocations. This effectively means that all devices
* are balanced again.
*/
mc->mc_alloc_groups--;
mc->mc_alloc_groups++;
}
{
mg->mg_activation_count = 0;
return (mg);
}
void
{
/*
* We may have gone below zero with the activation count
* either because we never activated in the first place or
* because we're done, and possibly removing the vdev.
*/
}
void
{
if (++mg->mg_activation_count <= 0)
return;
} else {
}
}
void
{
if (--mg->mg_activation_count != 0) {
return;
}
} else {
}
}
static void
{
}
static void
{
}
static void
{
/*
* Although in principle the weight can be any value, in
* practice we do not use values in the range [1, 510].
*/
}
/*
* Determine if a given metaslab group should skip allocations. A metaslab
* group should avoid allocations if its used capacity has crossed the
* zfs_mg_noalloc_threshold and there is at least one metaslab group
* that can still handle allocations.
*/
static boolean_t
{
/*
* A metaslab group is considered allocatable if its free capacity
* is greater than the set value of zfs_mg_noalloc_threshold, it's
* associated with a slog, or there are no other metaslab groups
* with free capacity greater than zfs_mg_noalloc_threshold.
*/
}
/*
* ==========================================================================
* Range tree callbacks
* ==========================================================================
*/
/*
* Comparison function for the private size-ordered tree. Tree is sorted
* by size, larger sizes at the end of the tree.
*/
static int
{
return (-1);
return (1);
return (-1);
return (1);
return (0);
}
/*
* Create any block allocator specific components. The current allocators
* rely on using both a size-ordered range_tree_t and an array of uint64_t's.
*/
static void
{
}
/*
* Destroy the block allocator specific components.
*/
static void
{
}
static void
{
}
static void
{
}
static void
{
/*
* Normally one would walk the tree freeing nodes along the way.
* Since the nodes are shared with the range trees we can avoid
* walking all nodes and just reinitialize the avl tree. The nodes
* will be freed by the range tree, so we don't want to free them here.
*/
}
static range_tree_ops_t metaslab_rt_ops = {
};
/*
* ==========================================================================
* Metaslab block operations
* ==========================================================================
*/
/*
* Return the maximum contiguous segment within the metaslab.
*/
{
return (0ULL);
}
{
if (start != -1ULL) {
}
return (start);
}
/*
* ==========================================================================
* Common allocator routines
* ==========================================================================
*/
/*
* This is a helper function that can be used by the allocator to find
* a suitable block to allocate. This will search the specified AVL
* tree looking for a block that matches the specified criteria.
*/
static uint64_t
{
return (offset);
}
}
/*
* If we know we've searched the whole map (*cursor == 0), give up.
* Otherwise, reset the cursor to the beginning and try again.
*/
if (*cursor == 0)
return (-1ULL);
*cursor = 0;
}
/*
* ==========================================================================
* The first-fit block allocator
* ==========================================================================
*/
static uint64_t
{
/*
* Find the largest power of 2 block size that evenly divides the
* requested size. This is used to try to allocate blocks with similar
* alignment from the same area of the metaslab (i.e. same cursor
* bucket) but it does not guarantee that other allocations sizes
* may exist in the same region.
*/
}
/* ARGSUSED */
static boolean_t
{
return (B_TRUE);
}
static metaslab_ops_t metaslab_ff_ops = {
};
/*
* ==========================================================================
* Dynamic block allocator -
* Uses the first fit allocation scheme until space get low and then
* adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
* and metaslab_df_free_pct to determine when to switch the allocation scheme.
* ==========================================================================
*/
static uint64_t
{
/*
* Find the largest power of 2 block size that evenly divides the
* requested size. This is used to try to allocate blocks with similar
* alignment from the same area of the metaslab (i.e. same cursor
* bucket) but it does not guarantee that other allocations sizes
* may exist in the same region.
*/
return (-1ULL);
/*
* If we're running low on space switch to using the size
* sorted AVL tree (best-fit).
*/
if (max_size < metaslab_df_alloc_threshold ||
t = &msp->ms_size_tree;
*cursor = 0;
}
}
static boolean_t
{
if (max_size >= metaslab_df_alloc_threshold &&
return (B_FALSE);
return (B_TRUE);
}
static metaslab_ops_t metaslab_df_ops = {
};
/*
* ==========================================================================
* Cursor fit block allocator -
* Select the largest region in the metaslab, set the cursor to the beginning
* of the range and the cursor_end to the end of the range. As allocations
* are made advance the cursor. Continue allocating from the cursor until
* the range is exhausted and then find a new range.
* ==========================================================================
*/
static uint64_t
{
return (-1ULL);
}
return (offset);
}
static boolean_t
{
}
static metaslab_ops_t metaslab_cf_ops = {
};
/*
* ==========================================================================
* New dynamic fit allocator -
* Select a region that is large enough to allocate 2^metaslab_ndf_clump_shift
* contiguous blocks. If no region is found then just use the largest segment
* that remains.
* ==========================================================================
*/
/*
* Determines desired number of contiguous blocks (2^metaslab_ndf_clump_shift)
* to request from the allocator.
*/
static uint64_t
{
return (-1ULL);
t = &msp->ms_size_tree;
}
}
return (-1ULL);
}
static boolean_t
{
return (metaslab_block_maxsize(msp) <=
}
static metaslab_ops_t metaslab_ndf_ops = {
};
/*
* ==========================================================================
* Metaslabs
* ==========================================================================
*/
/*
* Wait for any in-progress metaslab loads to complete.
*/
void
{
while (msp->ms_loading) {
}
}
int
{
int error = 0;
/*
* If the space map has not been allocated yet, then treat
* all the space in the metaslab as free and add it to the
* ms_tree.
*/
else
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
}
}
return (error);
}
void
{
}
{
/*
* We only open space map objects that already exist. All others
* will be opened when we finally allocate an object for it.
*/
if (object != 0) {
}
/*
* We create the main range tree here, but we don't create the
* alloctree and freetree until metaslab_sync_done(). This serves
* two purposes: it allows metaslab_sync_done() to detect the
* addition of new space; and for debugging, it ensures that we'd
* data fault on any attempt to use this metaslab before it's ready.
*/
/*
* If we're opening an existing pool (txg == 0) or creating
* a new one (txg == TXG_INITIAL), all space is available now.
* If we're adding space to an existing pool, the new space
* does not become available until after this txg has synced.
*/
if (txg <= TXG_INITIAL)
metaslab_sync_done(msp, 0);
/*
* If metaslab_debug_load is set and we're initializing a metaslab
* that has an allocated space_map object then load the its space
* map so that can verify frees.
*/
}
if (txg != 0) {
}
return (msp);
}
void
{
for (int t = 0; t < TXG_SIZE; t++) {
}
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
}
}
/*
* Apply a weighting factor based on the histogram information for this
* metaslab. The current weighting factor is somewhat arbitrary and requires
* additional investigation. The implementation provides a measure of
* "weighted" free space and gives a higher weighting for larger contiguous
* regions. The weighting factor is determined by counting the number of
* sm_shift sectors that exist in each region represented by the histogram.
* That value is then multiplied by the power of 2 exponent and the sm_shift
* value.
*
* For example, assume the 2^21 histogram bucket has 4 2MB regions and the
* metaslab has an sm_shift value of 9 (512B):
*
* 1) calculate the number of sm_shift sectors in the region:
* 2^21 / 2^9 = 2^12 = 4096 * 4 (number of regions) = 16384
* 2) multiply by the power of 2 exponent and the sm_shift value:
* 16384 * 21 * 9 = 3096576
* This value will be added to the weighting of the metaslab.
*/
static uint64_t
{
int i;
/*
* A null space map means that the entire metaslab is free,
* calculate a weight factor that spans the entire size of the
* metaslab.
*/
}
return (0);
continue;
/*
* Determine the number of sm_shift sectors in the region
* indicated by the histogram. For example, given an
* sm_shift value of 9 (512 bytes) and i = 4 then we know
* that we're looking at an 8K region in the histogram
* (i.e. 9 + 4 = 13, 2^13 = 8192). To figure out the
* number of sm_shift sectors (512 bytes in this example),
* we would take 8192 / 512 = 16. Since the histogram
* is offset by sm_shift we can simply use the value of
* of i to calculate this (i.e. 2^i = 16 where i = 4).
*/
}
}
static uint64_t
{
/*
* This vdev is in the process of being removed so there is nothing
* for us to do here.
*/
if (vd->vdev_removing) {
return (0);
}
/*
* The baseline weight is the metaslab's free space.
*/
/*
* Modern disks have uniform bit density and constant angular velocity.
* Therefore, the outer recording zones are faster (higher bandwidth)
* than the inner zones by the ratio of outer to inner track diameter,
* which is typically around 2:1. We account for this by assigning
* higher weight to lower metaslabs (multiplier ranging from 2x to 1x).
* In effect, this means that we'll select the metaslab with the most
* free bandwidth rather than simply the one with the most free space.
*/
/*
* If this metaslab is one we're actively using, adjust its
* weight to make it preferable to any inactive metaslab so
* we'll polish it off.
*/
}
return (weight);
}
static int
{
if (error) {
return (error);
}
}
}
return (0);
}
static void
{
/*
* If size < SPA_MINBLOCKSIZE, then we will not allocate from
* this metaslab again. In that case, it had better be empty,
* or we would be leaving space on the table.
*/
}
static void
metaslab_preload(void *arg)
{
(void) metaslab_load(msp);
/*
* Set the ms_access_txg value so that we don't unload it right away.
*/
}
static void
{
int m = 0;
return;
}
/*
* Prefetch the next potential metaslabs
*/
/* If we have reached our preload limit then we're done */
if (++m > metaslab_preload_limit)
break;
}
}
/*
* Determine if the space map's on-disk footprint is past our tolerance
* for inefficiency. We would like to use the following criteria to make
* our decision:
*
* 1. The size of the space map object should not dramatically increase as a
* result of writing out the free space range tree.
*
* 2. The minimal on-disk space map representation is zfs_condense_pct/100
* times the size than the free space range tree representation
* (i.e. zfs_condense_pct = 110 and in-core = 1MB, minimal = 1.1.MB).
*
* Checking the first condition is tricky since we don't want to walk
* the entire AVL tree calculating the estimated on-disk size. Instead we
* use the size-ordered range tree in the metaslab and calculate the
* size required to write out the largest segment in our free tree. If the
* size required to represent that segment on disk is larger than the space
* map object then we avoid condensing this map.
*
* To determine the second criterion we use a best-case estimate and assume
* each segment can be represented on-disk as a single 64-bit entry. We refer
* to this best-case estimate as the space map's minimal form.
*/
static boolean_t
{
/*
* Use the ms_size_tree range tree, which is ordered by size, to
* obtain the largest segment in the free tree. If the tree is empty
* then we should condense the map.
*/
return (B_TRUE);
/*
* Calculate the number of 64-bit entries this segment would
* require when written to disk. If this single segment would be
* larger on-disk than the entire current on-disk structure, then
* clearly condensing will increase the on-disk structure size.
*/
}
/*
* Condense the on-disk space map representation to its minimized form.
* The minimized form consists of a small number of allocations followed by
* the entries of the free range tree.
*/
static void
{
/*
* Create an range tree that is 100% allocated. We remove segments
* that have been freed in this txg, any deferred frees that exist,
* and any allocation in the future. Removing segments should be
* a relatively inexpensive operation since we expect these trees to
* have a small number of nodes.
*/
/*
* Remove what's been freed in this txg from the condense_tree.
* Since we're in sync_pass 1, we know that all the frees from
* this txg are in the freetree.
*/
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
}
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
}
/*
* We're about to drop the metaslab's lock thus allowing
* other consumers to change it's content. Set the
* metaslab's ms_condensing flag to ensure that
* allocations on this metaslab do not occur while we're
* in the middle of committing it to disk. This is only critical
* for the ms_tree as all other range trees use per txg
* views of their content.
*/
/*
* While we would ideally like to create a space_map representation
* that consists only of allocation records, doing so can be
* prohibitively expensive because the in-core free tree can be
* large, and therefore computationally expensive to subtract
* from the condense_tree. Instead we sync out two trees, a cheap
* allocation only tree followed by the in-core free tree. While not
* optimal, this is typically close to optimal, and much cheaper to
* compute.
*/
}
/*
* Write a metaslab to disk in the context of the specified transaction group.
*/
void
{
/*
* This metaslab has just been added so there's no work to do now.
*/
return;
}
if (range_tree_space(alloctree) == 0 &&
range_tree_space(*freetree) == 0)
return;
/*
* The only state that can actually be changing concurrently with
* metaslab_sync() is the metaslab's ms_tree. No other thread can
* be modifying this txg's alloctree, freetree, freed_tree, or
* space_map_phys_t. Therefore, we only hold ms_lock to satify
* space_map ASSERTs. We drop it whenever we call into the DMU,
* because the DMU can call down to us (e.g. via zio_free()) at
* any time.
*/
VERIFY3U(new_object, !=, 0);
}
} else {
}
/*
* When the space map is loaded, we have an accruate
* histogram in the range tree. This gives us an opportunity
* to bring the space map's histogram up-to-date so we clear
* it first before updating it.
*/
} else {
/*
* Since the space map is not loaded we simply update the
* exisiting histogram with what was freed in this txg. This
* means that the on-disk histogram may not have an accurate
* view of the free space but it's close enough to allow
* us to make allocation decisions.
*/
}
/*
* For sync pass 1, we avoid traversing this txg's free range tree
* and instead will just swap the pointers for freetree and
* freed_tree. We can safely do this since the freed_tree is
* guaranteed to be empty on the initial pass.
*/
} else {
}
}
}
/*
* Called after a transaction group has completely synced to mark
* all of the metaslab's free space as usable.
*/
void
{
/*
* If this metaslab is just becoming available, initialize its
* alloctrees, freetrees, and defertree and add its capacity to
* the vdev.
*/
for (int t = 0; t < TXG_SIZE; t++) {
}
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
}
}
/*
* If there's a metaslab_load() in progress, wait for it to complete
* so that we have a consistent view of the in-core space map.
*/
/*
* Move the frees from the defer_tree back to the free
* range tree (if it's loaded). Swap the freed_tree and the
* defer_tree -- this is safe to do because we've just emptied out
* the defer_tree.
*/
if (msp->ms_deferspace != 0) {
/*
* Keep syncing this metaslab until all deferred frees
* are back in circulation.
*/
}
for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
}
if (!metaslab_debug_unload)
}
}
void
{
/*
* Preload the next potential metaslabs
*/
}
static uint64_t
{
return (1ULL << 63);
return (0);
}
static uint64_t
{
int i;
for (i = 0; i < d; i++) {
break;
}
}
for (;;) {
"requirement: vdev %llu, txg %llu, mg %p, "
"msp %p, psize %llu, asize %llu, "
"failures %llu, weight %llu",
return (-1ULL);
}
/*
* If the selected metaslab is condensing, skip it.
*/
if (msp->ms_condensing)
continue;
break;
min_distance >> 1);
for (i = 0; i < d; i++)
break;
if (i == d)
break;
}
return (-1ULL);
/*
* If we've already reached the allowable number of failed
* allocation attempts on this metaslab group then we
* consider skipping it. We skip it only if we're allowed
* to "fast" gang, the physical size is larger than
* a gang block, and we're attempting to allocate from
* the primary metaslab.
*/
"vdev %llu, txg %llu, mg %p, msp[%llu] %p, "
"psize %llu, asize %llu, failures %llu",
return (-1ULL);
}
/*
* Ensure that the metaslab we have selected is still
* capable of handling our request. It's possible that
* another thread may have changed the weight while we
* were blocked on the metaslab lock.
*/
continue;
}
continue;
}
continue;
}
/*
* If this metaslab is currently condensing then pick again as
* we can't manipulate this metaslab until it's committed
* to disk.
*/
if (msp->ms_condensing) {
continue;
}
break;
}
return (offset);
}
/*
* Allocate a block for the specified i/o.
*/
static int
{
int dshift = 3;
int all_zero;
/*
* For testing, make some blocks above a certain size be gang blocks.
*/
/*
* Start at the rotor and loop through all mgs until we find something.
* Note that there's no locking on mc_rotor or mc_aliquot because
* nothing actually breaks if we miss a few updates -- we just won't
* allocate quite as evenly. It all balances out over time.
*
* If we are doing ditto or log blocks, try to spread them across
* consecutive vdevs. If we're forced to reuse a vdev before we've
* allocated all of our ditto blocks, then try and spread them out on
* that vdev as much as possible. If it turns out to not be possible,
* gradually lower our standards until anything becomes acceptable.
* Also, allocating on consecutive vdevs (as opposed to random vdevs)
* gives us hope of containing our fault domains to something we're
* able to reason about. Otherwise, any two top-level vdev failures
* will guarantee the loss of data. With consecutive allocation,
* only two adjacent top-level vdev failures will result in data loss.
*
* If we are doing gang blocks (hintdva is non-NULL), try to keep
* ourselves on the same vdev as our gang block header. That
* way, we can hope for locality in vdev_cache, plus it makes our
* fault domains something tractable.
*/
if (hintdva) {
/*
* It's possible the vdev we're using as the hint no
* longer exists (i.e. removed). Consult the rotor when
* all else fails.
*/
if (flags & METASLAB_HINTBP_AVOID &&
} else {
}
} else if (d != 0) {
} else {
}
/*
* If the hint put us into the wrong metaslab class, or into a
* metaslab group that has been passivated, just follow the rotor.
*/
top:
do {
/*
* Don't allocate from faulted devices.
*/
if (zio_lock) {
} else {
}
/*
* Determine if the selected metaslab group is eligible
* for allocations. If we're ganging or have requested
* an allocation for the smallest gang block size
* then we don't want to avoid allocating to the this
* metaslab group. If we're in this condition we should
* try to allocate from any device possible so that we
* don't inadvertently return ENOSPC and suspend the pool
* even though space is still available.
*/
if (!allocatable)
goto next;
/*
* Avoid writing single-copy data to a failing vdev
* unless the user instructs us that it is okay.
*/
d == 0 && dshift == 3 &&
goto next;
}
distance = 0;
else
if (offset != -1ULL) {
/*
* If we've just selected this metaslab group,
* figure out whether the corresponding vdev is
* over- or under-used relative to the pool,
* and set an allocation bias to even it out.
*/
if (mc->mc_aliquot == 0) {
/*
* Calculate how much more or less we should
* try to allocate from this device during
* this iteration around the rotor.
* For example, if a device is 80% full
* and the pool is 20% full then we should
* reduce allocations by 60% on this device.
*
* mg_bias = (20 - 80) * 512K / 100 = -307K
*
* This reduces allocations by 307K for this
* iteration.
*/
}
mc->mc_aliquot = 0;
}
return (0);
}
next:
mc->mc_aliquot = 0;
if (!all_zero) {
dshift++;
goto top;
}
if (!allocatable && !zio_lock) {
dshift = 3;
goto top;
}
}
/*
* Free the block represented by DVA in the context of the specified
* transaction group.
*/
static void
{
return;
ASSERT(0);
return;
}
if (DVA_GET_GANG(dva))
if (now) {
} else {
}
}
/*
* Intent log support: upon opening the pool after a crash, notify the SPA
* of blocks that the intent log has allocated for immediate write, but
* which are still considered free by the SPA because the last transaction
* group didn't commit yet.
*/
static int
{
int error = 0;
if (DVA_GET_GANG(dva))
return (error);
}
}
return (0);
}
int
{
int error = 0;
}
for (int d = 0; d < ndvas; d++) {
if (error != 0) {
for (d--; d >= 0; d--) {
}
return (error);
}
}
return (0);
}
void
{
for (int d = 0; d < ndvas; d++)
}
int
{
int error = 0;
if (txg != 0) {
/*
* First do a dry run to make sure all DVAs are claimable,
* so we don't have to unwind from partial failures below.
*/
return (error);
}
for (int d = 0; d < ndvas; d++)
break;
return (error);
}
void
{
if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0)
return;
for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
for (int j = 0; j < TXG_SIZE; j++)
for (int j = 0; j < TXG_DEFER_SIZE; j++)
}
}