metaslab.c revision fa9e4066f08beec538e775443c5be79dd423fcab
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
#include <sys/space_map.h>
#include <sys/metaslab_impl.h>
#include <sys/vdev_impl.h>
/*
* ==========================================================================
* Metaslab classes
* ==========================================================================
*/
metaslab_class_create(void)
{
return (mc);
}
void
{
}
}
void
{
} else {
}
}
void
{
} else {
}
}
/*
* ==========================================================================
* Metaslab groups
* ==========================================================================
*/
static int
{
return (1);
return (-1);
/*
* If the weights are identical, use the offset to force uniqueness.
*/
return (-1);
return (1);
return (0);
}
{
return (mg);
}
void
{
}
void
{
}
void
{
}
void
{
}
/*
* ==========================================================================
* Metaslabs
* ==========================================================================
*/
void
{
int fm;
}
/*
* If we're opening an existing pool (txg == 0) or creating
* a new one (txg == TXG_INITIAL), all space is available now.
* If we're adding space to an existing pool, the new space
* does not become available until after this txg has synced.
* We enforce this by assigning an initial weight of 0 to new space.
*
* (Transactional allocations for this txg would actually be OK;
* it's intent log allocations that cause trouble. If we wrote
* a log block in this txg and lost power, the log replay would be
* based on the DVA translations that had been synced in txg - 1.
* Those translations would not include this metaslab's vdev.)
*/
if (txg == 0) {
/*
* We're opening the pool. Make the metaslab's
* free space available immediately.
*/
metaslab_sync_done(msp, 0);
} else {
/*
* We're adding a new metaslab to an already-open pool.
* Declare all of the metaslab's space to be free.
*
* Note that older transaction groups cannot allocate
* from this metaslab until its existence is committed,
* because we set ms_last_alloc to the current txg.
*/
/* XXX -- we'll need a call to picker_init here */
}
}
void
{
int fm;
/* XXX -- we'll need a call to picker_fini here */
msp->ms_map_incore = 0;
}
}
/*
* Write a metaslab to disk in the context of the specified transaction group.
*/
void
{
if (smo->smo_object == 0) {
}
(*dirty & MSD_CONDENSE) == 0) {
int i;
/*
* Write out the current state of the allocation
* world. The current metaslab is full, minus
* stuff that's been freed this txg (freed_map),
* minus allocations from txgs in the future.
*/
for (i = 1; i < TXG_CONCURRENT_STATES; i++) {
}
*dirty |= MSD_CONDENSE;
} else {
}
}
}
/*
* Called after a transaction group has completely synced to mark
* all of the metaslab's free space as usable.
*/
void
{
dprintf("%s offset %llx txg %llu\n",
if (txg != 0) {
/* XXX -- we'll need a call to picker_fini here */
/* If we're empty, don't bother sticking around */
if (msp->ms_usable_space == 0) {
msp->ms_map_incore = 0;
weight = 0;
} else {
/* Add the freed blocks to the available space map */
if (msp->ms_map_incore)
else
}
/* Safe to use for allocation now */
msp->ms_last_alloc = 0;
*dirty = 0;
}
}
/*
* The first-fit block picker. No picker_init or picker_fini,
* this is just an experiment to see how it feels to separate out
* the block selection policy from the map updates.
* Note: the 'cursor' argument is a form of PPD.
*/
static uint64_t
{
int tried_once = 0;
return (offset);
}
}
/* If we couldn't find a block after cursor, search again */
if (tried_once == 0) {
tried_once = 1;
*cursor = 0;
goto again;
}
return (-1ULL);
}
static uint64_t
{
if (offset != -1ULL) {
}
return (offset);
}
/*
* Intent log support: upon opening the pool after a crash, notify the SPA
* of blocks that the intent log has allocated for immediate write, but
* which are still considered free by the SPA because the last transaction
* group didn't commit yet.
*/
int
{
int error;
return (ENXIO);
return (ENXIO);
if (DVA_GET_GANG(dva))
if (msp->ms_map_incore == 0) {
if (error) {
return (error);
}
/* XXX -- we'll need a call to picker_init here */
}
}
return (0);
}
static int
{
/*
* Enforce segregation across transaction groups.
*/
/* XXX -- We should probably not assume we know what ms_weight means */
if (msp->ms_last_alloc != 0)
return (0);
return (1);
/* XXX -- the weight test should be in terms of MINFREE */
}
static metaslab_t *
{
break;
return (msp);
}
static metaslab_t *
{
int error;
continue;
}
if (msp->ms_map_incore == 0) {
if (error) {
continue;
}
/* XXX -- we'll need a call to picker_init here */
}
if (*offp != -1ULL) {
}
return (msp);
}
}
return (NULL);
}
/*
* Allocate a block for the specified i/o.
*/
int
{
/*
* Start at the rotor and loop through all mgs until we find something.
* Note that there's no locking on mc_rotor or mc_allocated because
* nothing actually breaks if we miss a few updates -- we just won't
* allocate quite as evenly. It all balances out over time.
*/
do {
/*
* If we've just selected this metaslab group,
* figure out whether the corresponding vdev is
* over- or under-used relative to the pool,
* and set an allocation bias to even it out.
*/
if (mc->mc_allocated == 0) {
/*
* Determine percent used in units of 0..1024.
* (This is just to avoid floating point.)
*/
/*
* Bias by at most +/- 25% of the aliquot.
*/
}
mc->mc_allocated = 0;
}
DVA_SET_GANG(dva, 0);
return (0);
}
mc->mc_allocated = 0;
DVA_SET_VDEV(dva, 0);
DVA_SET_OFFSET(dva, 0);
DVA_SET_GANG(dva, 0);
return (ENOSPC);
}
/*
* Free the block represented by DVA in the context of the specified
* transaction group.
*/
void
{
return;
(u_longlong_t)vdev);
ASSERT(0);
return;
}
ASSERT(0);
return;
}
if (DVA_GET_GANG(dva))
}
}