/*-
* Copyright (c) 2002 McAfee, Inc.
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Marshall
* Kirk McKusick and McAfee Research,, the Security Research Division of
* part of the DARPA CHATS research program
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright 2013 by Saso Kiselkov. All rights reserved.
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
#define _NOTE(s)
/* CRC64 table */
/*
* Macros for various sorts of alignment and rounding when the alignment
* is known to be a power of 2.
*/
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
*/
/*
* Macros to reverse byte order
*/
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
* The ASIZE encoding should be at least 64 times larger (6 more bits)
* to support up to 4-way RAID-Z mirror mode with worst-case gang block
* overhead, three DVAs per bp, plus one more bit in case we do anything
* else that expands the ASIZE.
*/
/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
* The members of the dva_t should be considered opaque outside the SPA.
*/
typedef struct dva {
} dva_t;
/*
* Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
*/
typedef struct zio_cksum {
} zio_cksum_t;
/*
* secret and is suitable for use in MAC algorithms as the key.
*/
typedef struct zio_cksum_salt {
/*
* Each block is described by its DVAs, time of birth, checksum, etc.
* The word-by-word, bit-by-bit layout of the blkptr is as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | vdev3 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* c | checksum[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* d | checksum[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* e | checksum[2] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* f | checksum[3] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* vdev virtual device ID
* offset offset into virtual device
* LSIZE logical size
* PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers)
* GRID RAID-Z layout information (reserved for future use)
* cksum checksum function
* comp compression function
* G gang block indicator
* B byteorder (endianness)
* D dedup
* X encryption (on version 30, which is not supported)
* E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
* phys birth txg of block allocation; zero if same as logical birth txg
* log. birth transaction group in which the block was logically born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
/*
* "Embedded" blkptr_t's don't actually point to a block, instead they
* have a data payload embedded in the blkptr_t itself. See the comment
* in blkptr.c for more details.
*
* The blkptr_t is laid out as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | payload |
* 1 | payload |
* 2 | payload |
* 3 | payload |
* 4 | payload |
* 5 | payload |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | payload |
* 8 | payload |
* 9 | payload |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | payload |
* c | payload |
* d | payload |
* e | payload |
* f | payload |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* payload contains the embedded data
* B (byteorder) byteorder (endianness)
* D (dedup) padding (set to zero)
* X encryption (set to zero; see above)
* E (embedded) set to one
* lvl indirection level
* type DMU object type
* etype how to interpret embedded data (BP_EMBEDDED_TYPE_*)
* comp compression function of payload
* PSIZE size of payload after compression, in bytes
* LSIZE logical size of payload, in bytes
* note that 25 bits is enough to store the largest
* "normal" BP's LSIZE (2^16 * 2^9) in bytes
* log. birth transaction group in which the block was logically born
*
* Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded
* bp's they are stored in units of SPA_MINBLOCKSHIFT.
* Generally, the generic BP_GET_*() macros can be used on embedded BP's.
* The B, D, X, lvl, type, and comp fields are stored the same as with normal
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
* other macros, as they assert that they are only used on BP's of the correct
* "embedded-ness".
*/
typedef enum bp_embedded_type {
typedef struct blkptr {
} blkptr_t;
/*
* Macros to get and set fields in a bp or DVA.
*/
SPA_MINBLOCKSHIFT, 0, x)
(BP_IS_EMBEDDED(bp) ? \
{ \
}
{ \
(bp)->blk_phys_birth = 0; \
}
/*
* Embedded checksum
*/
typedef struct zio_eck {
} zio_eck_t;
/*
* Gang block headers are self-checksumming and contain an array
* of block pointers.
*/
sizeof (zio_eck_t) - \
(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
sizeof (uint64_t))
typedef struct zio_gbh {
/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
typedef struct vdev_phys {
} vdev_phys_t;
typedef struct vdev_label {
/*
* vdev_dirty() flags
*/
/*
* Size and offset of embedded boot loader region on each label.
* The total size of the first two labels plus the boot area is 4MB.
*/
/*
* Size of label regions at the start and end of each leaf device.
*/
enum zio_checksum {
ZIO_CHECKSUM_INHERIT = 0,
};
enum zio_compress {
ZIO_COMPRESS_INHERIT = 0,
};
/* nvlist pack encoding */
#define NV_ENCODE_NATIVE 0
typedef enum {
DATA_TYPE_UNKNOWN = 0,
} data_type_t;
/*
* On-disk version number.
*/
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
* Used in the code when checking for presence or absence of a feature.
* Feel free to define multiple symbolic names for each version if there
* were multiple changes to on-disk structures during that version.
*
* NOTE: When checking the current SPA_VERSION in your code, be sure
* to use spa_version() since it reports the version of the
* last synced uberblock. Checking the in-flight version can
* be dangerous in some cases.
*/
#define SPA_VERSION_IS_SUPPORTED(v) \
(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
/*
* The following are configuration names used in the nvlist describing a pool's
* configuration.
*/
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
* as offline and degraded.
*/
/*
* This is needed in userland to report the minimum necessary device size.
*/
/*
* The location of the pool configuration repository, shared between kernel and
* userland.
*/
/*
* vdev states are ordered from least to most healthy.
* A vdev that's CANT_OPEN or below is considered unusable.
*/
typedef enum vdev_state {
} vdev_state_t;
/*
* vdev aux states. When a vdev is in the CANT_OPEN state, the aux field
* of the vdev stats structure uses these constants to distinguish why.
*/
typedef enum vdev_aux {
} vdev_aux_t;
/*
* pool state. The following states are written to disk as part of the normal
* SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE. The remaining states are
* software abstractions used at various levels to communicate pool state.
*/
typedef enum pool_state {
} pool_state_t;
/*
* The uberblock version is incremented whenever an incompatible on-disk
* format change is made to the SPA, DMU, or ZAP.
*
* Note: the first two fields should never be moved. When a storage pool
* is opened, the uberblock must be read off the disk before the version
* can be checked. If the ub_version field is moved, we may not detect
* version mismatch. If the ub_magic field is moved, applications that
* expect the magic number in the first word won't work.
*/
struct uberblock {
};
/*
* Flags.
*/
/*
* Fixed constants.
*/
/*
* Derived constants.
*/
/* The +2 here is a cheesy way to round up */
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
/* Does dnode have a SA spill blkptr in bonus? */
typedef struct dnode_phys {
/* accounting is protected by dn_dirty_mtx */
} dnode_phys_t;
typedef enum dmu_object_byteswap {
/*
* Allocating a new byteswap type number makes the on-disk format
* incompatible with any other format that uses the same number.
*
* Data can usually be structured to work with one of the
* DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
*/
/*
* Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t).
*/
(DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK))
typedef enum dmu_object_type {
/* general: */
/* spa: */
/* zil: */
/* dmu: */
/* dsl: */
/* zpl: */
/* zvol: */
/* other; for testing only! */
/* new object types: */
/*
* Names for valid types declared with DMU_OT().
*/
typedef enum dmu_objset_type {
/*
* header for all bonus and spill buffers.
* The header has a fixed portion with a variable number
* of "lengths" depending on the number of variable sized
* attribues which are determined by the "layout number"
*/
typedef struct sa_hdr_phys {
/* ... Data follows the lengths. */
/*
* sa_hdr_phys -> sa_layout_info
*
* 16 10 0
* +--------+-------+
* | hdrsz |layout |
* +--------+-------+
*
* Bits 0-10 are the layout number
* Bits 11-16 are the size of the header.
* The hdrsize is the number * 8
*
* For example.
* hdrsz of 1 ==> 8 byte header
* 2 ==> 16 byte header
*
*/
{ \
}
#define SA_MODE_OFFSET 0
/*
* Intent log header - this on disk structure holds fields to manage
* the log. All fields are 64 bit to easily handle cross architectures.
*/
typedef struct zil_header {
} zil_header_t;
typedef struct objset_phys {
typedef struct dsl_dir_phys {
/*
* how much space our children are accounting for; for leaf
* datasets, == physical space used by fs + snaps
*/
/* Administrative quota setting */
/* Administrative reservation setting */
typedef struct dsl_dataset_phys {
/*
* The ds_fsid_guid is a 56-bit ID that can change to avoid
* collisions. The ds_guid is a 64-bit ID that will never
* change, so there is a small probability that it will collide.
*/
/*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/
typedef struct mzap_ent_phys {
typedef struct mzap_phys {
/* actually variable size depending on block size */
} mzap_phys_t;
/*
* The (fat) zap is stored in one object. It is an array of
* 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
*
* ptrtbl fits in first block:
* [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
*
* ptrtbl too big for first block:
* [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
*
*/
/* any other values are ptrtbl blocks */
/*
* the embedded pointer table takes up half a block:
* block size / entry size (2^3) / 2
*/
/*
* The embedded pointer table starts half-way through the block. Since
* the pointer table itself is half the block, it starts at (64-bit)
* word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
*/
/*
* TAKE NOTE:
* If zap_phys_t is modified, zap_byteswap() must be modified.
*/
typedef struct zap_phys {
struct zap_table_phys {
} zap_ptrtbl;
/*
* This structure is followed by padding, and then the embedded
* pointer table. The embedded pointer table takes up second
* half of the block. It is accessed using the
* ZAP_EMBEDDED_PTRTBL_ENT() macro.
*/
} zap_phys_t;
typedef struct fat_zap {
} fat_zap_t;
/* chunk size = 24 bytes */
/*
* The amount of space available for chunks is:
* block size (1<<l->l_bs) - hash entry size (2) * number of hash
* entries - header space (2*chunksize)
*/
#define ZAP_LEAF_NUMCHUNKS(l) \
ZAP_LEAF_CHUNKSIZE - 2)
/*
* The amount of space within the chunk available for the array is:
* chunk size - space for type (1) - space for next pointer (2)
*/
/*
* Low water mark: when there are only this many chunks free, start
* growing the ptrtbl. Ideally, this should be larger than a
* "reasonably-sized" entry. 20 chunks is more than enough for the
* largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
* while still being only around 3% for 16k blocks.
*/
/*
* The leaf hash table has block size / 2^5 (32) number of entries,
* which should be more than enough for the maximum number of entries,
* which is less than block size / CHUNKSIZE (24) / minimum number of
* chunks per entry (3).
*/
/*
* The chunks start immediately after the hash table. The end of the
* hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
* chunk_t.
*/
((zap_leaf_chunk_t *) \
typedef enum zap_chunk_type {
/*
* TAKE NOTE:
* If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
*/
typedef struct zap_leaf_phys {
struct zap_leaf_header {
/* above is accessable to zap, below is zap_leaf private */
/*
* The header is followed by a hash table with
* ZAP_LEAF_HASH_NUMENTRIES(zap) entries. The hash table is
* followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
* zap_leaf_chunk structures. These structures are accessed
* with the ZAP_LEAF_CHUNK() macro.
*/
typedef union zap_leaf_chunk {
struct zap_leaf_entry {
} l_entry;
struct zap_leaf_array {
} l_array;
struct zap_leaf_free {
} l_free;
typedef struct zap_leaf {
} zap_leaf_t;
/*
* Define special zfs pflags
*/
/*
* special attributes for master node.
*/
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
*/
/*
* The directory entry has the type (currently unused on Solaris) in the
* top 4 bits, and the object number in the low 48 bits. The "middle"
* 12 bits are unused.
*/
typedef struct ace {
} ace_t;
typedef struct zfs_znode_acl {
/*
* This is the persistent portion of the znode. It is stored
* in the "bonus buffer" of the file. Short symbolic links
* are also stored in the bonus buffer.
*/
typedef struct znode_phys {
/*
* Data may pad out any remaining bytes in the znode buffer, eg:
*
* |<---------------------- dnode_phys (512) ------------------------>|
* |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
* |<---- znode (264) ---->|<---- data (56) ---->|
*
* At present, we only use this space to store symbolic links.
*/
} znode_phys_t;
/*
* In-core vdev representation.
*/
struct vdev;
typedef struct vdev {
} vdev_t;
/*
* In-core pool representation.
*/
typedef struct spa {
} spa_t;
static void decode_embedded_bp_compressed(const blkptr_t *, void *);