zfsimpl.h revision 199767f8919635c4928607450d9e0abb932109ce
/*-
* Copyright (c) 2002 McAfee, Inc.
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Marshall
* Kirk McKusick and McAfee Research,, the Security Research Division of
* part of the DARPA CHATS research program
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright 2013 by Saso Kiselkov. All rights reserved.
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
#define MAXNAMELEN 256
#define _NOTE(s)
/* CRC64 table */
/*
* Macros for various sorts of alignment and rounding when the alignment
* is known to be a power of 2.
*/
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
*/
/*
* Macros to reverse byte order
*/
#define BSWAP_8(x) ((x) & 0xff)
#define SPA_MINBLOCKSHIFT 9
#define SPA_OLDMAXBLOCKSHIFT 17
#define SPA_MAXBLOCKSHIFT 24
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
* The ASIZE encoding should be at least 64 times larger (6 more bits)
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
* overhead, three DVAs per bp, plus one more bit in case we do anything
* else that expands the ASIZE.
*/
/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
* The members of the dva_t should be considered opaque outside the SPA.
*/
typedef struct dva {
} dva_t;
/*
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
*/
typedef struct zio_cksum {
} zio_cksum_t;
/*
* secret and is suitable for use in MAC algorithms as the key.
*/
typedef struct zio_cksum_salt {
/*
* Each block is described by its DVAs, time of birth, checksum, etc.
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | vdev3 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* c | checksum[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* d | checksum[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* e | checksum[2] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* f | checksum[3] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* vdev virtual device ID
* offset offset into virtual device
* LSIZE logical size
* PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers)
* GRID RAID-Z layout information (reserved for future use)
* cksum checksum function
* comp compression function
* G gang block indicator
* B byteorder (endianness)
* D dedup
* X encryption (on version 30, which is not supported)
* E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
* phys birth txg of block allocation; zero if same as logical birth txg
* log. birth transaction group in which the block was logically born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
/*
* "Embedded" blkptr_t's don't actually point to a block, instead they
* have a data payload embedded in the blkptr_t itself. See the comment
* in blkptr.c for more details.
*
* The blkptr_t is laid out as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | payload |
* 1 | payload |
* 2 | payload |
* 3 | payload |
* 4 | payload |
* 5 | payload |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | payload |
* 8 | payload |
* 9 | payload |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | payload |
* c | payload |
* d | payload |
* e | payload |
* f | payload |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* payload contains the embedded data
* B (byteorder) byteorder (endianness)
* D (dedup) padding (set to zero)
* X encryption (set to zero; see above)
* E (embedded) set to one
* lvl indirection level
* type DMU object type
* etype how to interpret embedded data (BP_EMBEDDED_TYPE_*)
* comp compression function of payload
* PSIZE size of payload after compression, in bytes
* LSIZE logical size of payload, in bytes
* note that 25 bits is enough to store the largest
* "normal" BP's LSIZE (2^16 * 2^9) in bytes
* log. birth transaction group in which the block was logically born
*
* Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded
* bp's they are stored in units of SPA_MINBLOCKSHIFT.
* Generally, the generic BP_GET_*() macros can be used on embedded BP's.
* The B, D, X, lvl, type, and comp fields are stored the same as with normal
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
* other macros, as they assert that they are only used on BP's of the correct
* "embedded-ness".
*/
#define BPE_GET_ETYPE(bp) \
#define BPE_SET_ETYPE(bp, t) do { \
#define BPE_GET_LSIZE(bp) \
#define BPE_SET_LSIZE(bp, x) do { \
#define BPE_GET_PSIZE(bp) \
#define BPE_SET_PSIZE(bp, x) do { \
typedef enum bp_embedded_type {
BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */
#define BPE_NUM_WORDS 14
typedef struct blkptr {
} blkptr_t;
/*
* Macros to get and set fields in a bp or DVA.
*/
#define DVA_GET_ASIZE(dva) \
#define DVA_SET_ASIZE(dva, x) \
SPA_MINBLOCKSHIFT, 0, x)
#define DVA_GET_OFFSET(dva) \
#define DVA_SET_OFFSET(dva, x) \
#define BP_GET_LSIZE(bp) \
(BP_IS_EMBEDDED(bp) ? \
#define BP_SET_LSIZE(bp, x) do { \
#define BP_GET_PSIZE(bp) \
#define BP_SET_PSIZE(bp, x) \
#define BP_PHYSICAL_BIRTH(bp) \
#define BP_GET_ASIZE(bp) \
#define BP_GET_UCSIZE(bp) \
#define BP_GET_NDVAS(bp) \
{ \
}
{ \
(bp)->blk_phys_birth = 0; \
}
#define BPE_NUM_WORDS 14
/*
* Embedded checksum
*/
#define ZEC_MAGIC 0x210da7ab10c7a11ULL
typedef struct zio_eck {
} zio_eck_t;
/*
* Gang block headers are self-checksumming and contain an array
* of block pointers.
*/
#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE
#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \
#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \
sizeof (zio_eck_t) - \
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
sizeof (uint64_t))
typedef struct zio_gbh {
#define VDEV_RAIDZ_MAXPARITY 3
/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
#define VDEV_UBERBLOCK_SHIFT(vd) \
#define VDEV_UBERBLOCK_COUNT(vd) \
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
typedef struct vdev_phys {
} vdev_phys_t;
typedef struct vdev_label {
} vdev_label_t; /* 256K total */
/*
* vdev_dirty() flags
*/
#define VDD_METASLAB 0x01
#define VDD_DTL 0x02
/*
* Size and offset of embedded boot loader region on each label.
* The total size of the first two labels plus the boot area is 4MB.
*/
/*
* Size of label regions at the start and end of each leaf device.
*/
#define VDEV_LABELS 4
enum zio_checksum {
ZIO_CHECKSUM_INHERIT = 0,
};
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
enum zio_compress {
ZIO_COMPRESS_INHERIT = 0,
};
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
/* nvlist pack encoding */
#define NV_ENCODE_NATIVE 0
#define NV_ENCODE_XDR 1
typedef enum {
DATA_TYPE_UNKNOWN = 0,
} data_type_t;
/*
* On-disk version number.
*/
#define SPA_VERSION_1 1ULL
#define SPA_VERSION_2 2ULL
#define SPA_VERSION_3 3ULL
#define SPA_VERSION_4 4ULL
#define SPA_VERSION_5 5ULL
#define SPA_VERSION_6 6ULL
#define SPA_VERSION_7 7ULL
#define SPA_VERSION_8 8ULL
#define SPA_VERSION_9 9ULL
#define SPA_VERSION_10 10ULL
#define SPA_VERSION_11 11ULL
#define SPA_VERSION_12 12ULL
#define SPA_VERSION_13 13ULL
#define SPA_VERSION_14 14ULL
#define SPA_VERSION_15 15ULL
#define SPA_VERSION_16 16ULL
#define SPA_VERSION_17 17ULL
#define SPA_VERSION_18 18ULL
#define SPA_VERSION_19 19ULL
#define SPA_VERSION_20 20ULL
#define SPA_VERSION_21 21ULL
#define SPA_VERSION_22 22ULL
#define SPA_VERSION_23 23ULL
#define SPA_VERSION_24 24ULL
#define SPA_VERSION_25 25ULL
#define SPA_VERSION_26 26ULL
#define SPA_VERSION_27 27ULL
#define SPA_VERSION_28 28ULL
#define SPA_VERSION_5000 5000ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
#define SPA_VERSION SPA_VERSION_5000
#define SPA_VERSION_STRING "5000"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
* Used in the code when checking for presence or absence of a feature.
* Feel free to define multiple symbolic names for each version if there
* were multiple changes to on-disk structures during that version.
*
* NOTE: When checking the current SPA_VERSION in your code, be sure
* to use spa_version() since it reports the version of the
* last synced uberblock. Checking the in-flight version can
* be dangerous in some cases.
*/
#define SPA_VERSION_INITIAL SPA_VERSION_1
#define SPA_VERSION_SPARES SPA_VERSION_3
#define SPA_VERSION_RAID6 SPA_VERSION_3
#define SPA_VERSION_DNODE_BYTES SPA_VERSION_3
#define SPA_VERSION_BOOTFS SPA_VERSION_6
#define SPA_VERSION_SLOGS SPA_VERSION_7
#define SPA_VERSION_FUID SPA_VERSION_9
#define SPA_VERSION_REFQUOTA SPA_VERSION_9
#define SPA_VERSION_L2CACHE SPA_VERSION_10
#define SPA_VERSION_ORIGIN SPA_VERSION_11
#define SPA_VERSION_DSL_SCRUB SPA_VERSION_11
#define SPA_VERSION_SNAP_PROPS SPA_VERSION_12
#define SPA_VERSION_USERSPACE SPA_VERSION_15
#define SPA_VERSION_STMF_PROP SPA_VERSION_16
#define SPA_VERSION_RAIDZ3 SPA_VERSION_17
#define SPA_VERSION_USERREFS SPA_VERSION_18
#define SPA_VERSION_HOLES SPA_VERSION_19
#define SPA_VERSION_DEDUP SPA_VERSION_21
#define SPA_VERSION_SLIM_ZIL SPA_VERSION_23
#define SPA_VERSION_SA SPA_VERSION_24
#define SPA_VERSION_SCAN SPA_VERSION_25
#define SPA_VERSION_DIR_CLONES SPA_VERSION_26
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
#define SPA_VERSION_FEATURES SPA_VERSION_5000
#define SPA_VERSION_IS_SUPPORTED(v) \
(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
/*
* The following are configuration names used in the nvlist describing a pool's
* configuration.
*/
#define ZPOOL_CONFIG_VERSION "version"
#define ZPOOL_CONFIG_POOL_NAME "name"
#define ZPOOL_CONFIG_POOL_STATE "state"
#define ZPOOL_CONFIG_POOL_TXG "txg"
#define ZPOOL_CONFIG_POOL_GUID "pool_guid"
#define ZPOOL_CONFIG_CREATE_TXG "create_txg"
#define ZPOOL_CONFIG_TOP_GUID "top_guid"
#define ZPOOL_CONFIG_VDEV_TREE "vdev_tree"
#define ZPOOL_CONFIG_TYPE "type"
#define ZPOOL_CONFIG_CHILDREN "children"
#define ZPOOL_CONFIG_ID "id"
#define ZPOOL_CONFIG_GUID "guid"
#define ZPOOL_CONFIG_PATH "path"
#define ZPOOL_CONFIG_DEVID "devid"
#define ZPOOL_CONFIG_PHYS_PATH "phys_path"
#define ZPOOL_CONFIG_METASLAB_ARRAY "metaslab_array"
#define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift"
#define ZPOOL_CONFIG_ASHIFT "ashift"
#define ZPOOL_CONFIG_ASIZE "asize"
#define ZPOOL_CONFIG_DTL "DTL"
#define ZPOOL_CONFIG_STATS "stats"
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
#define ZPOOL_CONFIG_SPARES "spares"
#define ZPOOL_CONFIG_IS_SPARE "is_spare"
#define ZPOOL_CONFIG_NPARITY "nparity"
#define ZPOOL_CONFIG_HOSTID "hostid"
#define ZPOOL_CONFIG_HOSTNAME "hostname"
#define ZPOOL_CONFIG_IS_LOG "is_log"
#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
* as offline and degraded.
*/
#define ZPOOL_CONFIG_OFFLINE "offline"
#define ZPOOL_CONFIG_FAULTED "faulted"
#define ZPOOL_CONFIG_DEGRADED "degraded"
#define ZPOOL_CONFIG_REMOVED "removed"
#define ZPOOL_CONFIG_FRU "fru"
#define ZPOOL_CONFIG_AUX_STATE "aux_state"
#define VDEV_TYPE_ROOT "root"
#define VDEV_TYPE_MIRROR "mirror"
#define VDEV_TYPE_REPLACING "replacing"
#define VDEV_TYPE_RAIDZ "raidz"
#define VDEV_TYPE_DISK "disk"
#define VDEV_TYPE_FILE "file"
#define VDEV_TYPE_MISSING "missing"
#define VDEV_TYPE_HOLE "hole"
#define VDEV_TYPE_SPARE "spare"
#define VDEV_TYPE_LOG "log"
#define VDEV_TYPE_L2CACHE "l2cache"
/*
* This is needed in userland to report the minimum necessary device size.
*/
/*
* The location of the pool configuration repository, shared between kernel and
* userland.
*/
#define ZPOOL_CACHE "/boot/zfs/zpool.cache"
/*
* vdev states are ordered from least to most healthy.
* A vdev that's CANT_OPEN or below is considered unusable.
*/
typedef enum vdev_state {
VDEV_STATE_UNKNOWN = 0, /* Uninitialized vdev */
VDEV_STATE_CLOSED, /* Not currently open */
VDEV_STATE_OFFLINE, /* Not allowed to open */
VDEV_STATE_REMOVED, /* Explicitly removed from system */
VDEV_STATE_CANT_OPEN, /* Tried to open, but failed */
VDEV_STATE_FAULTED, /* External request to fault device */
VDEV_STATE_DEGRADED, /* Replicated vdev with unhealthy kids */
VDEV_STATE_HEALTHY /* Presumed good */
} vdev_state_t;
/*
* vdev aux states. When a vdev is in the CANT_OPEN state, the aux field
* of the vdev stats structure uses these constants to distinguish why.
*/
typedef enum vdev_aux {
VDEV_AUX_NONE, /* no error */
VDEV_AUX_OPEN_FAILED, /* ldi_open_*() or vn_open() failed */
VDEV_AUX_CORRUPT_DATA, /* bad label or disk contents */
VDEV_AUX_NO_REPLICAS, /* insufficient number of replicas */
VDEV_AUX_BAD_GUID_SUM, /* vdev guid sum doesn't match */
VDEV_AUX_TOO_SMALL, /* vdev size is too small */
VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
VDEV_AUX_SPARED /* hot spare used in another pool */
} vdev_aux_t;
/*
* pool state. The following states are written to disk as part of the normal
* SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE. The remaining states are
* software abstractions used at various levels to communicate pool state.
*/
typedef enum pool_state {
POOL_STATE_ACTIVE = 0, /* In active use */
POOL_STATE_EXPORTED, /* Explicitly exported */
POOL_STATE_DESTROYED, /* Explicitly destroyed */
POOL_STATE_SPARE, /* Reserved for hot spare use */
POOL_STATE_UNINITIALIZED, /* Internal spa_t state */
POOL_STATE_UNAVAIL, /* Internal libzfs state */
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
} pool_state_t;
/*
* The uberblock version is incremented whenever an incompatible on-disk
* format change is made to the SPA, DMU, or ZAP.
*
* Note: the first two fields should never be moved. When a storage pool
* is opened, the uberblock must be read off the disk before the version
* can be checked. If the ub_version field is moved, we may not detect
* version mismatch. If the ub_magic field is moved, applications that
* expect the magic number in the first word won't work.
*/
struct uberblock {
};
/*
* Flags.
*/
#define DNODE_MUST_BE_ALLOCATED 1
#define DNODE_MUST_BE_FREE 2
/*
* Fixed constants.
*/
/*
* Derived constants.
*/
/* The +2 here is a cheesy way to round up */
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
#define DNODE_FLAG_USED_BYTES (1<<0)
/* Does dnode have a SA spill blkptr in bonus? */
typedef struct dnode_phys {
/* accounting is protected by dn_dirty_mtx */
} dnode_phys_t;
typedef enum dmu_object_byteswap {
/*
* Allocating a new byteswap type number makes the on-disk format
* incompatible with any other format that uses the same number.
*
* Data can usually be structured to work with one of the
* DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
*/
#define DMU_OT_NEWTYPE 0x80
#define DMU_OT_METADATA 0x40
#define DMU_OT_BYTESWAP_MASK 0x3f
/*
* Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t).
*/
(DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK))
typedef enum dmu_object_type {
/* general: */
DMU_OT_OBJECT_DIRECTORY, /* ZAP */
DMU_OT_OBJECT_ARRAY, /* UINT64 */
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
DMU_OT_BPLIST, /* UINT64 */
DMU_OT_BPLIST_HDR, /* UINT64 */
/* spa: */
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
DMU_OT_SPACE_MAP, /* UINT64 */
/* zil: */
DMU_OT_INTENT_LOG, /* UINT64 */
/* dmu: */
DMU_OT_DNODE, /* DNODE */
DMU_OT_OBJSET, /* OBJSET */
/* dsl: */
DMU_OT_DSL_DIR, /* UINT64 */
DMU_OT_DSL_DIR_CHILD_MAP, /* ZAP */
DMU_OT_DSL_DS_SNAP_MAP, /* ZAP */
DMU_OT_DSL_PROPS, /* ZAP */
DMU_OT_DSL_DATASET, /* UINT64 */
/* zpl: */
DMU_OT_ZNODE, /* ZNODE */
DMU_OT_OLDACL, /* Old ACL */
DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */
DMU_OT_DIRECTORY_CONTENTS, /* ZAP */
DMU_OT_MASTER_NODE, /* ZAP */
DMU_OT_UNLINKED_SET, /* ZAP */
/* zvol: */
DMU_OT_ZVOL, /* UINT8 */
DMU_OT_ZVOL_PROP, /* ZAP */
/* other; for testing only! */
DMU_OT_PLAIN_OTHER, /* UINT8 */
DMU_OT_UINT64_OTHER, /* UINT64 */
DMU_OT_ZAP_OTHER, /* ZAP */
/* new object types: */
DMU_OT_ERROR_LOG, /* ZAP */
DMU_OT_SPA_HISTORY, /* UINT8 */
DMU_OT_SPA_HISTORY_OFFSETS, /* spa_his_phys_t */
DMU_OT_POOL_PROPS, /* ZAP */
DMU_OT_DSL_PERMS, /* ZAP */
DMU_OT_ACL, /* ACL */
DMU_OT_SYSACL, /* SYSACL */
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
DMU_OT_NEXT_CLONES, /* ZAP */
DMU_OT_SCAN_QUEUE, /* ZAP */
DMU_OT_USERGROUP_USED, /* ZAP */
DMU_OT_USERGROUP_QUOTA, /* ZAP */
DMU_OT_USERREFS, /* ZAP */
DMU_OT_DDT_ZAP, /* ZAP */
DMU_OT_DDT_STATS, /* ZAP */
DMU_OT_SA, /* System attr */
DMU_OT_SA_MASTER_NODE, /* ZAP */
DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */
DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */
DMU_OT_SCAN_XLATE, /* ZAP */
DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */
/*
* Names for valid types declared with DMU_OT().
*/
typedef enum dmu_objset_type {
DMU_OST_OTHER, /* For testing only! */
DMU_OST_ANY, /* Be careful! */
/*
* header for all bonus and spill buffers.
* The header has a fixed portion with a variable number
* of "lengths" depending on the number of variable sized
* attribues which are determined by the "layout number"
*/
typedef struct sa_hdr_phys {
/* ... Data follows the lengths. */
/*
* sa_hdr_phys -> sa_layout_info
*
* 16 10 0
* +--------+-------+
* | hdrsz |layout |
* +--------+-------+
*
* Bits 0-10 are the layout number
* Bits 11-16 are the size of the header.
* The hdrsize is the number * 8
*
* For example.
* hdrsz of 1 ==> 8 byte header
* 2 ==> 16 byte header
*
*/
{ \
}
#define SA_MODE_OFFSET 0
#define SA_SIZE_OFFSET 8
#define SA_GEN_OFFSET 16
#define SA_UID_OFFSET 24
#define SA_GID_OFFSET 32
#define SA_PARENT_OFFSET 40
/*
* Intent log header - this on disk structure holds fields to manage
* the log. All fields are 64 bit to easily handle cross architectures.
*/
typedef struct zil_header {
} zil_header_t;
#define OBJSET_PHYS_SIZE 2048
typedef struct objset_phys {
typedef struct dsl_dir_phys {
/*
* how much space our children are accounting for; for leaf
* datasets, == physical space used by fs + snaps
*/
/* Administrative quota setting */
/* Administrative reservation setting */
typedef struct dsl_dataset_phys {
/*
* The ds_fsid_guid is a 56-bit ID that can change to avoid
* collisions. The ds_guid is a 64-bit ID that will never
* change, so there is a small probability that it will collide.
*/
/*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
#define DMU_POOL_FEATURES_FOR_READ "features_for_read"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
#define DMU_POOL_ERRLOG_LAST "errlog_last"
#define DMU_POOL_SPARES "spares"
#define DMU_POOL_DEFLATE "deflate"
#define DMU_POOL_HISTORY "history"
#define DMU_POOL_PROPS "pool_props"
#define ZAP_MAGIC 0x2F52AB2ABULL
#define ZAP_HASHBITS 28
#define MZAP_ENT_LEN 64
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
typedef struct mzap_ent_phys {
char mze_name[MZAP_NAME_LEN];
typedef struct mzap_phys {
/* actually variable size depending on block size */
} mzap_phys_t;
/*
* The (fat) zap is stored in one object. It is an array of
* 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
*
* ptrtbl fits in first block:
* [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
*
* ptrtbl too big for first block:
* [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
*
*/
/* any other values are ptrtbl blocks */
/*
* the embedded pointer table takes up half a block:
* block size / entry size (2^3) / 2
*/
/*
* The embedded pointer table starts half-way through the block. Since
* the pointer table itself is half the block, it starts at (64-bit)
* word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
*/
/*
* TAKE NOTE:
* If zap_phys_t is modified, zap_byteswap() must be modified.
*/
typedef struct zap_phys {
struct zap_table_phys {
} zap_ptrtbl;
/*
* This structure is followed by padding, and then the embedded
* pointer table. The embedded pointer table takes up second
* half of the block. It is accessed using the
* ZAP_EMBEDDED_PTRTBL_ENT() macro.
*/
} zap_phys_t;
typedef struct zap_table_phys zap_table_phys_t;
typedef struct fat_zap {
int zap_block_shift; /* block size shift */
} fat_zap_t;
#define ZAP_LEAF_MAGIC 0x2AB1EAF
/* chunk size = 24 bytes */
#define ZAP_LEAF_CHUNKSIZE 24
/*
* The amount of space available for chunks is:
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
* entries - header space (2*chunksize)
*/
#define ZAP_LEAF_NUMCHUNKS(l) \
ZAP_LEAF_CHUNKSIZE - 2)
/*
* The amount of space within the chunk available for the array is:
* chunk size - space for type (1) - space for next pointer (2)
*/
#define ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
/*
* Low water mark: when there are only this many chunks free, start
* growing the ptrtbl. Ideally, this should be larger than a
* "reasonably-sized" entry. 20 chunks is more than enough for the
* largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
* while still being only around 3% for 16k blocks.
*/
#define ZAP_LEAF_LOW_WATER (20)
/*
* The leaf hash table has block size / 2^5 (32) number of entries,
* which should be more than enough for the maximum number of entries,
* which is less than block size / CHUNKSIZE (24) / minimum number of
* chunks per entry (3).
*/
/*
* The chunks start immediately after the hash table. The end of the
* hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
* chunk_t.
*/
#define ZAP_LEAF_CHUNK(l, idx) \
((zap_leaf_chunk_t *) \
typedef enum zap_chunk_type {
ZAP_CHUNK_FREE = 253,
ZAP_CHUNK_ENTRY = 252,
ZAP_CHUNK_ARRAY = 251,
ZAP_CHUNK_TYPE_MAX = 250
/*
* TAKE NOTE:
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
*/
typedef struct zap_leaf_phys {
struct zap_leaf_header {
/* above is accessable to zap, below is zap_leaf private */
} l_hdr; /* 2 24-byte chunks */
/*
* The header is followed by a hash table with
* ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
* followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
* zap_leaf_chunk structures. These structures are accessed
* with the ZAP_LEAF_CHUNK() macro.
*/
typedef union zap_leaf_chunk {
struct zap_leaf_entry {
} l_entry;
struct zap_leaf_array {
} l_array;
struct zap_leaf_free {
} l_free;
typedef struct zap_leaf {
int l_bs; /* block size shift */
} zap_leaf_t;
/*
* Define special zfs pflags
*/
#define MASTER_NODE_OBJ 1
/*
* special attributes for master node.
*/
#define ZFS_FSID "FSID"
#define ZFS_UNLINKED_SET "DELETE_QUEUE"
#define ZFS_ROOT_OBJ "ROOT"
#define ZPL_VERSION_OBJ "VERSION"
#define ZFS_PROP_BLOCKPERPAGE "BLOCKPERPAGE"
#define ZFS_PROP_NOGROWBLOCKS "NOGROWBLOCKS"
#define ZFS_FLAG_BLOCKPERPAGE 0x1
#define ZFS_FLAG_NOGROWBLOCKS 0x2
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
*/
#define ZPL_VERSION 1ULL
/*
* The directory entry has the type (currently unused on Solaris) in the
* top 4 bits, and the object number in the low 48 bits. The "middle"
* 12 bits are unused.
*/
typedef struct ace {
} ace_t;
#define ACE_SLOT_CNT 6
typedef struct zfs_znode_acl {
/*
* This is the persistent portion of the znode. It is stored
* in the "bonus buffer" of the file. Short symbolic links
* are also stored in the bonus buffer.
*/
typedef struct znode_phys {
/*
* Data may pad out any remaining bytes in the znode buffer, eg:
*
* |<---------------------- dnode_phys (512) ------------------------>|
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
* |<---- znode (264) ---->|<---- data (56) ---->|
*
* At present, we only use this space to store symbolic links.
*/
} znode_phys_t;
/*
* In-core vdev representation.
*/
struct vdev;
typedef struct vdev {
const char *v_name; /* vdev name */
const char *v_phys_path; /* vdev bootpath */
const char *v_devid; /* vdev devid */
int v_id; /* index in parent */
int v_ashift; /* offset to block shift */
int v_nparity; /* # parity for raidz */
int v_nchildren; /* # children */
void *v_read_priv; /* private data for read function */
} vdev_t;
/*
* In-core pool representation.
*/
typedef struct spa {
char *spa_name; /* pool name */
int spa_inited; /* initialized */
} spa_t;
static void decode_embedded_bp_compressed(const blkptr_t *, void *);