spa.h revision 43466aae47bfcd2ad9bf501faec8e75c08095e4f
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_SPA_H
#define _SYS_SPA_H
#include <sys/zfs_context.h>
#include <sys/sysmacros.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Forward references that lots of things need.
*/
typedef struct metaslab metaslab_t;
typedef struct metaslab_group metaslab_group_t;
typedef struct metaslab_class metaslab_class_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt_entry ddt_entry_t;
struct dsl_pool;
struct dsl_dataset;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
*/
/*
* We currently support nine block sizes, from 512 bytes to 128K.
* We could go higher, but the benefits are near-zero and the cost
* of COWing a giant block to modify one byte would become excessive.
*/
#define SPA_MINBLOCKSHIFT 9
#define SPA_MAXBLOCKSHIFT 17
/*
* Size of block to hold the configuration data (a packed nvlist)
*/
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
* The ASIZE encoding should be at least 64 times larger (6 more bits)
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
* overhead, three DVAs per bp, plus one more bit in case we do anything
* else that expands the ASIZE.
*/
/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
* The members of the dva_t should be considered opaque outside the SPA.
*/
typedef struct dva {
} dva_t;
/*
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
*/
typedef struct zio_cksum {
} zio_cksum_t;
/*
* Each block is described by its DVAs, time of birth, checksum, etc.
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | vdev3 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* c | checksum[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* d | checksum[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* e | checksum[2] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* f | checksum[3] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* vdev virtual device ID
* offset offset into virtual device
* LSIZE logical size
* PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers)
* GRID RAID-Z layout information (reserved for future use)
* cksum checksum function
* comp compression function
* G gang block indicator
* B byteorder (endianness)
* D dedup
* X unused
* lvl level of indirection
* type DMU object type
* phys birth txg of block allocation; zero if same as logical birth txg
* log. birth transaction group in which the block was logically born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
/*
* A block is a hole when it has either 1) never been written to, or
* 2) is zero-filled. In both cases, ZFS can return all zeroes for all reads
* without physically allocating disk space. Holes are represented in the
* blkptr_t structure by zeroed blk_dva. Correct checking for holes is
* done through the BP_IS_HOLE macro. For holes, the logical size, level,
* DMU object type, and birth times are all also stored for holes that
* were written to at some point (i.e. were punched after having been filled).
*/
typedef struct blkptr {
} blkptr_t;
/*
* Macros to get and set fields in a bp or DVA.
*/
#define DVA_GET_ASIZE(dva) \
#define DVA_SET_ASIZE(dva, x) \
SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_OFFSET(dva) \
#define DVA_SET_OFFSET(dva, x) \
#define BP_GET_LSIZE(bp) \
#define BP_SET_LSIZE(bp, x) \
#define BP_GET_PSIZE(bp) \
#define BP_SET_PSIZE(bp, x) \
#define BP_PHYSICAL_BIRTH(bp) \
{ \
}
#define BP_GET_ASIZE(bp) \
#define BP_GET_UCSIZE(bp) \
#define BP_GET_NDVAS(bp) \
#define BP_COUNT_GANG(bp) \
{ \
}
/* BP_IS_RAIDZ(bp) assumes no block compression */
{ \
(bp)->blk_phys_birth = 0; \
}
#ifdef _BIG_ENDIAN
#define ZFS_HOST_BYTEORDER (0ULL)
#else
#define ZFS_HOST_BYTEORDER (1ULL)
#endif
#define BP_SPRINTF_LEN 320
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
int len = 0; \
int copies = 0; \
\
} else if (BP_IS_HOLE(bp)) { \
" birth=%lluL", \
} \
} else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
if (DVA_IS_VALID(dva)) \
copies++; \
"DVA[%d]=<%llu:%llx:%llx>%c", d, \
ws); \
} \
if (BP_IS_GANG(bp) && \
copies--; \
"[L%llu %s] %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \
type, \
checksum, \
compress, \
ws, \
ws, \
} \
}
#define BP_GET_BUFC_TYPE(bp) \
typedef enum spa_import_type {
/* state manipulation functions */
extern int spa_destroy(char *pool);
#define SPA_ASYNC_CONFIG_UPDATE 0x01
#define SPA_ASYNC_REMOVE 0x02
#define SPA_ASYNC_PROBE 0x04
#define SPA_ASYNC_RESILVER_DONE 0x08
#define SPA_ASYNC_RESILVER 0x10
#define SPA_ASYNC_AUTOEXPAND 0x20
#define SPA_ASYNC_REMOVE_DONE 0x40
#define SPA_ASYNC_REMOVE_STOP 0x80
/*
* Controls the behavior of spa_vdev_remove().
*/
#define SPA_REMOVE_UNSPARE 0x01
#define SPA_REMOVE_DONE 0x02
/* device manipulation */
int replacing);
int replace_done);
/* spare state (which is global across all pools) */
/* L2ARC state (which is global across all pools) */
/* scanning */
/* spa syncing */
extern void spa_sync_allpools(void);
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
/*
* SPA configuration functions in spa_config.c
*/
#define SPA_CONFIG_UPDATE_POOL 0
#define SPA_CONFIG_UPDATE_VDEVS 1
extern void spa_config_load(void);
int getstats);
/*
* Miscellaneous SPA routines in spa_misc.c
*/
/* Namespace manipulation */
/* Refcount functions */
#define SCL_NONE 0x00
#define SCL_CONFIG 0x01
#define SCL_STATE 0x02
#define SCL_ALLOC 0x08
#define SCL_ZIO 0x10
#define SCL_FREE 0x20
#define SCL_VDEV 0x40
#define SCL_LOCKS 7
/* Pool configuration locks */
/* Pool vdev state change lock */
/* Log state */
typedef enum spa_log_state {
SPA_LOG_UNKNOWN = 0, /* unknown log state */
SPA_LOG_MISSING, /* missing log(s) */
SPA_LOG_CLEAR, /* clear the log(s) */
SPA_LOG_GOOD, /* log(s) are good */
/* Log claim callback */
/* Accessor functions */
extern int spa_busy(void);
/* Miscellaneous support routines */
extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern void spa_evict_all(void);
extern char *spa_his_ievent_table[];
char *his_buf);
/* error handling */
struct zbookmark;
/* vdev cache */
extern void vdev_cache_stat_init(void);
extern void vdev_cache_stat_fini(void);
/* Initialization and termination */
extern void spa_fini(void);
extern void spa_boot_init();
/* properties */
/* asynchronous event notification */
#ifdef ZFS_DEBUG
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
} \
#else
#endif
#define spa_dbgmsg(spa, ...) \
{ \
if (spa_debug_enabled(spa)) \
}
extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SPA_H */