md_mddb.h revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_MD_MDDB_H
#define _SYS_MD_MDDB_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/buf.h>
#ifdef __cplusplus
extern "C" {
#endif
#if 0 /* DRP FOR DEBUGGING */
#define MDDB_FAKE
#endif
/* Private flags */
#define MD_PRV_GOTIT 0x0001 /* Been snarfed */
#define MD_PRV_DELETE 0x0002 /* Record pending to be deleted */
#define MD_PRV_COMMIT 0x0004 /* Record pending to be commited */
#define MD_PRV_CLEANUP 0x0008 /* Record pending to be cleaned up */
#define MD_PRV_CONVD 0x0010 /* Record has been converted (32->64) */
#define MD_PRV_PENDDEL (MD_PRV_GOTIT | MD_PRV_DELETE)
#define MD_PRV_PENDCOM (MD_PRV_GOTIT | MD_PRV_COMMIT)
#define MD_PRV_PENDCLEAN (MD_PRV_GOTIT | MD_PRV_CLEANUP)
#define MDDB_E_INVALID (-1) /* an invalid argument was passed */
#define MDDB_E_EXISTS (-2) /* doing an operation a 2nd time which can */
/* only be done once */
#define MDDB_E_MASTER (-3) /* problem occurred accessing mastor block */
/* returned from NEW_DEV */
#define MDDB_E_TOOSMALL (-4) /* device is not large enough */
#define MDDB_E_NORECORD (-5) /* record does not exits */
/*
* returned from: mddb_getnextrec
* mddb_getrecsize
* mddb_commitrec
* mddb_commitrecs
* mddb_deleterec
*/
#define MDDB_E_NOSPACE (-6) /* no space to create record */
#define MDDB_E_NOTNOW (-7) /* do not presently have enough resources */
/* to perform requested operation */
#define MDDB_E_NODB (-8) /* no database exist */
#define MDDB_E_NOTOWNER (-9) /* have not been told to grab this set */
#define MDDB_E_STALE (-10) /* database is stale */
#define MDDB_E_TOOFEW (-11) /* not enough replicas available */
#define MDDB_E_TAGDATA (-12) /* tagged data detected */
#define MDDB_E_ACCOK (-13) /* 50/50 mode */
#define MDDB_E_NTAGDATA (-14) /* tagop try, no tag data */
#define MDDB_E_ACCNOTOK (-15) /* accop try, no accept possible */
#define MDDB_E_NOLOCBLK (-16) /* No valid locators found */
#define MDDB_E_NOLOCNMS (-17) /* No valid locator name information */
#define MDDB_E_NODIRBLK (-18) /* No directory blocks found */
#define MDDB_E_NOTAGREC (-19) /* No tag record blocks found */
#define MDDB_E_NOTAG (-20) /* No matching tag record found */
#define MDDB_E_NODEVID (-21) /* No device id found */
#define MDDB_MINBLKS 16 /* enough for a few metadevices */
#define MDDB_MAXBLKS 8192 /* size of free bit map (must be / 8) */
#define MDDB_MN_MINBLKS 32768 /* Multinode metadb minimum size */
/* 16MB */
#define MDDB_MN_MAXBLKS 524288 /* size of free bit map (must be / 8) */
/* 256MB */
#define MDDB_C_STALE 0x0001
#define MDDB_C_TOOFEW 0x0002
#define MDDB_C_NOTOWNER 0x0004
#define MDDB_C_SET_MN_STALE 0x0008 /* Set MN set to stale */
#define MDDB_C_IMPORT 0x0010
/*
* Defines used to set/reset new master flag in set structure.
* Used during reconfig cycle to determine quickly if there is
* new master for the set.
*/
#define MDDB_NM_SET 0x0001
#define MDDB_NM_RESET 0x0002
#define MDDB_NM_GET 0x0004
/* Definitions of flag in Locator Block Device ID data area - mddb_did_info */
#define MDDB_DID_EXISTS 0x0001 /* Device ID exists */
#define MDDB_DID_VALID 0x0002 /* Device ID valid on current system */
#define MDDB_DID_UPDATED 0x0004 /* locator/sidelocator info updated */
/* Definitions of flag in Locator Block - mddb_lb */
#define MDDB_DEVID_STYLE 0x0001 /* Locator Block in Device ID format */
#define MDDB_MNSET 0x0002 /* MDDB is for a multi-node set */
#define MDDB_MAX_PATCH 25 /* number of locations that */
/* can be patched in etc/system */
/*
* Set struct used by all parts of the driver, to store anchor pointers.
*/
typedef struct md_set {
uint_t s_status; /* set status */
void **s_ui; /* set unit incore anchor */
void **s_un; /* set unit anchor */
void *s_hsp; /* set Hot Spare Pool anchor */
void *s_hs; /* set Hot Spare anchor */
void *s_db; /* set MDDB anchor */
kmutex_t s_dbmx; /* set MDDB mutex */
void *s_nm; /* set namespace anchor */
mddb_recid_t s_nmid; /* set namespace anchor record */
void *s_did_nm; /* set device id namespace anchor */
mddb_recid_t s_did_nmid; /* set device id namespace anchor rec */
void *s_dtp; /* set data tag rec */
int s_am_i_master; /* incore master flag for this node */
md_mn_nodeid_t s_nodeid; /* nodeid of this node - for MN sets */
uint_t s_rcnt; /* incore resync count for set */
} md_set_t;
#define MDDB_MAGIC_MB 0x6d646d62 /* magic number for master blocks */
#define MDDB_MAGIC_DB 0x6d646462 /* magic number for directory blocks */
#define MDDB_MAGIC_RB 0x6d647262 /* magic number for record blocks */
#define MDDB_MAGIC_LB 0x6d646c62 /* magic number for locator blocks */
#define MDDB_MAGIC_LN 0x6d646c6e /* magic number for locator names */
#define MDDB_MAGIC_DT 0x6d646474 /* magic number for data tag */
#define MDDB_MAGIC_DI 0x6d646469 /* magic number for device ID block */
#define MDDB_MAGIC_DU 0x6d646475 /* magic num for dummy mb */
#define MDDB_MAGIC_DE 0x6d646465 /* magic num for mb devid */
#define MDDB_GLOBAL_XOR 1234567890
#define MDDB_REV_MAJOR (uint_t)0xff00
#define MDDB_REV_MINOR (uint_t)0x00ff
/*
* MDDB_REV_MNMB:
* If a MN diskset, master block revision is set to MDDB_REV_MNMB.
* Even though the master block structure is no different
* for a MN set, setting the revision field to a different
* number keeps any pre-MN_diskset code from accessing
* this diskset. It also allows for an early determination
* of a MN diskset when reading in from disk so that the
* proper size locator block and locator names structure
* can be read in thus saving time on diskset startup.
* Since no change in master block structure, the MDDB_REV_MINOR
* portion of the revision was incremented.
*
* MDDB_REV_MNLB:
* If a MN diskset, the locator block structure is a different size in
* order to accomodate up to MD_MNMAXSIDES nodes in a diskset
* with any nodeid (sideno) allowed.
* The revision is set to MDDB_REV_MNLB which is a change of the
* MDDB_REV_MAJOR portion of the revision.
*
* MDDB_REV_MNLN:
* If a MN diskset, the locator names is a different size in
* order to accomodate up to MD_MNMAXSIDES nodes in a diskset
* with any nodeid (sideno) allowed.
* The revision is set to MDDB_REV_MNLN which is a change of the
* MDDB_REV_MAJOR portion of the revision.
*/
#define MDDB_REV_MB (uint_t)0x0201
#define MDDB_REV_MNMB (uint_t)0x0202
#define MDDB_REV_DB (uint_t)0x0201
#define MDDB_REV_LB (uint_t)0x0500
#define MDDB_REV_MNLB (uint_t)0x0600
#define MDDB_REV_LN (uint_t)0x0100
#define MDDB_REV_MNLN (uint_t)0x0300
#define MDDB_REV_RB (uint_t)0x0200
#define MDDB_REV_RB64 (uint_t)0x0201
#define MDDB_REV_DT (uint_t)0x0100
#define MDDB_REV_DI (uint_t)0x0100
#define MDDB_BSIZE (uint_t)DEV_BSIZE
#define MDDB_PREFIXCNT 10
#define MDDB_DRVNMCNT 10
typedef int mddb_block_t;
#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
#pragma pack(4)
#endif
typedef struct md_mnname_suffix {
md_name_suffix mn_ln_suffix;
uint_t mn_ln_sideno;
} md_mnname_suffix_t;
typedef struct mddb_ln {
int ln_magic;
uint_t ln_revision;
uint_t ln_checksum;
struct timeval32 ln_timestamp;
md_name_prefix ln_prefixes[MDDB_PREFIXCNT];
/* Don't change array sizes without changing RNDUP_BLKCNT */
md_name_suffix ln_suffixes[MD_MAXSIDES][MDDB_NLB];
} mddb_ln_t;
/*
* Locator name structure for MN diskset. Same as for traditional
* and local diskset except that more sides are supported and the
* side number can be any number since the side number is stored
* in the ln_mnsuffixes structure instead of being used as an index
* into that array. This means that the whole array may need to be
* searched in order to find the correct information given a side number.
*/
typedef struct mddb_mnln {
int ln_magic;
uint_t ln_revision;
uint_t ln_checksum;
struct timeval32 ln_timestamp;
md_name_prefix ln_prefixes[MDDB_PREFIXCNT];
/* Don't change array sizes without changing MDDB_MNLNCNT */
md_mnname_suffix_t ln_mnsuffixes[MD_MNMAXSIDES][MDDB_NLB];
} mddb_mnln_t;
#define RNDUP_BLKCNT(sz, delta) (((sz) - \
((delta) * \
((MD_MAXSIDES - 1) * MDDB_NLB)) + \
MDDB_BSIZE - 1) / MDDB_BSIZE)
#define MDDB_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), 0)
#define MDDB_LOCAL_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), \
sizeof (md_name_suffix))
#define MDDB_MNLNCNT ((sizeof (mddb_mnln_t) + (MDDB_BSIZE - 1)) \
/ MDDB_BSIZE)
typedef struct mddb_dt {
uint_t dt_mag;
uint_t dt_rev;
uint_t dt_cks;
mddb_dtag_t dt_dtag;
} mddb_dt_t;
#define MDDB_DT_BYTES (roundup(sizeof (mddb_dt_t), MDDB_BSIZE))
#define MDDB_DT_BLOCKS (btodb(MDDB_DT_BYTES))
typedef union identifier {
char serial[MDDB_SN_LEN];
struct timeval32 createtime;
} identifier_t;
typedef struct mddb_locator {
dev32_t l_dev;
daddr32_t l_blkno;
int l_flags;
} mddb_locator_t;
typedef struct mddb_sidelocator {
uchar_t l_drvnm_index;
minor_t l_mnum;
} mddb_sidelocator_t;
typedef struct mddb_mnsidelocator {
uchar_t mnl_drvnm_index;
minor_t mnl_mnum;
uint_t mnl_sideno;
} mddb_mnsidelocator_t;
typedef struct mddb_drvnm {
uchar_t dn_len;
char dn_data[MD_MAXDRVNM];
} mddb_drvnm_t;
/*
* Locator Block Device ID Information
* Several device id's may share one disk block in an effort to
* conserve used replica space.
*/
typedef struct mddb_did_info {
uint_t info_flags; /* MDDB Device ID flags */
uint_t info_firstblk; /* Device ID Start Block */
uint_t info_blkcnt; /* Device ID Block Count */
uint_t info_offset; /* Device ID offset w/i Block */
uint_t info_length; /* Device ID Length */
uint_t info_checksum; /* Device ID Checksum */
char info_minor_name[32]; /* Minor name of lb dev */
} mddb_did_info_t;
typedef struct mddb_did_blk {
int blk_magic; /* used for verification */
uint_t blk_revision; /* used for verification */
int blk_checksum; /* used for verification */
uint_t blk_commitcnt; /* matches LB's commitcnt */
mddb_did_info_t blk_info[MDDB_NLB];
} mddb_did_blk_t;
#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
#pragma pack()
#endif
#define MDDB_DID_BYTES (roundup(sizeof (mddb_did_blk_t), MDDB_BSIZE))
#define MDDB_DID_BLOCKS (btodb(MDDB_DID_BYTES))
/*
* Device ID Disk Blocks.
* Incore linked list of disk blocks containing device IDs.
* The list is built when reading in the mddb_did_blk structure and
* when reading in the actual disk blocks containing device ids.
* This list is used to easily write out all disk blocks containing
* device ids.
*/
typedef struct mddb_did_db {
uint_t db_firstblk; /* Disk Block's logical addr */
uint_t db_blkcnt; /* Contig Disk Block Count */
caddr_t db_ptr; /* Ptr to incore Block(s) */
struct mddb_did_db *db_next; /* Ptr to next in list */
} mddb_did_db_t;
/*
* Device ID Free List.
* Incore linked list of free space in disk blocks containing device IDs.
* Used to manage placement of device IDs in disk blocks.
* All disk blocks on free list are also in linked list of disk block
* containing device IDs (mddb_did_db_t).
*/
typedef struct mddb_did_free {
uint_t free_blk; /* Disk Block's logical addr */
uint_t free_offset; /* offset of free space */
uint_t free_length; /* length of free space */
struct mddb_did_free *free_next; /* Ptr to next in list */
} mddb_did_free_t;
/*
* Device ID Incore Area
* Contains pointer to Device ID Disk Block list and
* Device ID Free List.
* Also contains incore array of pointers to device IDs. Pointers
* point into the device ID Disk Block list and are used as a
* shortcut to find incore device IDs.
*/
typedef struct mddb_did_ic {
mddb_did_blk_t *did_ic_blkp;
mddb_did_db_t *did_ic_dbp;
mddb_did_free_t *did_ic_freep;
ddi_devid_t did_ic_devid[MDDB_NLB]; /* Ptr to device IDs */
} mddb_did_ic_t;
/*
* Locator Block (LB):
* - Are fixed size, but the size is different
* for local/shared set db replicas.
* - All LB's start at logical block 0.
* - After a replica quorum is found, there is
* is only one incore copy of the LB.
* - LB's are only written when replicas are added, deleted, or errored.
* - LB's provide information about other replica's and their state.
*/
#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
#pragma pack(4)
#endif
typedef struct mddb_lb {
int lb_magic; /* used for verification */
uint_t lb_revision; /* used for verification */
int lb_checksum; /* used for verification */
uint_t lb_commitcnt; /* IMPORTANT */
struct timeval32 lb_timestamp; /* informative only */
int lb_loccnt; /* used for verification */
identifier_t lb_ident; /* used for verification */
uint_t lb_flags; /* flags describing LB */
uint_t lb_spare[8]; /* Spare/Pad */
mddb_block_t lb_didfirstblk; /* Devid Array Start Block */
mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */
mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */
mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */
struct timeval32 lb_inittime; /* creation of database */
set_t lb_setno; /* used for verification */
mddb_block_t lb_blkcnt; /* used for verification */
mddb_block_t lb_lnfirstblk;
mddb_block_t lb_lnblkcnt;
mddb_block_t lb_dbfirstblk;
mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT];
mddb_locator_t lb_locators[MDDB_NLB];
/* Don't change array sizes without changing RNDUP_BLKCNT */
mddb_sidelocator_t lb_sidelocators[MD_MAXSIDES][MDDB_NLB];
} mddb_lb_t;
#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
#pragma pack()
#endif
/*
* Locator block structure for MN diskset. Same as for traditional
* and local diskset except that more sides are supported and the
* side number can be any number since the side number is stored
* in the lb_mnsidelocators structure instead of being used as an index
* into that array. This means that the whole array may need to be
* searched in order to find the correct information given a side number.
*/
typedef struct mddb_mnlb {
int lb_magic; /* used for verification */
uint_t lb_revision; /* used for verification */
int lb_checksum; /* used for verification */
uint_t lb_commitcnt; /* IMPORTANT */
struct timeval32 lb_timestamp; /* informative only */
int lb_loccnt; /* used for verification */
identifier_t lb_ident; /* used for verification */
uint_t lb_flags; /* flags describing LB */
uint_t lb_spare[8]; /* Spare/Pad */
mddb_block_t lb_didfirstblk; /* Devid Array Start Block */
mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */
mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */
mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */
struct timeval32 lb_inittime; /* creation of database */
set_t lb_setno; /* used for verification */
mddb_block_t lb_blkcnt; /* used for verification */
mddb_block_t lb_lnfirstblk;
mddb_block_t lb_lnblkcnt;
mddb_block_t lb_dbfirstblk;
mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT];
mddb_locator_t lb_locators[MDDB_NLB];
/* Don't change array sizes without changing MDDB_MNLBCNT */
mddb_mnsidelocator_t lb_mnsidelocators[MD_MNMAXSIDES][MDDB_NLB];
} mddb_mnlb_t;
#define MDDB_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), 0)
#define MDDB_LOCAL_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), \
sizeof (mddb_sidelocator_t))
#define MDDB_MNLBCNT ((sizeof (mddb_mnlb_t) + (MDDB_BSIZE - 1)) \
/ MDDB_BSIZE)
typedef struct mddb_map {
daddr32_t m_consecutive;
daddr32_t m_firstblk;
} mddb_map_t;
/*
* Master block(s) (MB)
* - Are written by userland; Never by the driver!
* - Each replica has there own master blocks,
* the master block(s) are not shared.
* - MB's are not in the logical block address space of the database.
* - MB's are a fixed size record (MDDB_BSIZE)
* - MB's provide the logical to physical block translation,
* for their replica.
*/
typedef struct mddb_mb {
int mb_magic; /* used for verification */
uint_t mb_revision; /* used for verification */
uint_t mb_checksum; /* used for verification */
#ifdef _LP64
uint32_t mb_next; /* incore to next mb */
#else
struct mddb_mb *mb_next; /* incore to next mb */
#endif /* _LP64 */
daddr32_t mb_nextblk; /* block # for next mb */
md_timeval32_t mb_timestamp; /* timestamp */
daddr32_t mb_blkcnt; /* size of blkmap */
daddr32_t mb_blkno; /* physical loc. for this MB */
set_t mb_setno; /* used for verification */
struct timeval32 mb_setcreatetime; /* set creation timestamp */
int spares[7];
mddb_map_t mb_blkmap; /* logical->physical blk map */
int mb_devid_magic; /* verify devid in mb */
short mb_devid_len; /* len of following devid */
char mb_devid[1]; /* devid byte array */
} mddb_mb_t;
/*
* In-core version of mddb_mb. It is known that the mddb_mb is 512 bytes on
* disk, really, and so this structure is 512 + sizeof(struct mddb_mb_ic *)
*/
#define MDDB_IC_BSIZE (MDDB_BSIZE + sizeof (struct mddb_mb_ic *))
typedef struct mddb_mb_ic {
struct mddb_mb_ic *mbi_next;
struct mddb_mb mbi_mddb_mb;
} mddb_mb_ic_t;
/*
* there can be no address in record block. The checksum must
* stay the same where ever the record is in memory. Many
* things depend on this. Also the timestamp is the time the the
* record was committed not the time it was written to a particular
* device.
*
* Old definition of mddb_rb, for 32-bit apps and libraries
*/
typedef struct mddb_rb {
uint_t rb_magic;
uint_t rb_revision;
uint_t rb_checksum;
uint_t rb_checksum_fiddle;
uint_t rb_private;
void *rb_userdata;
uint_t rb_commitcnt;
uint_t rb_spare[1];
struct timeval32 rb_timestamp;
int rb_data[1];
} mddb_rb_t;
/* This is, and always will be, the on-disk version of mddb_rb */
typedef struct mddb_rb32 {
uint_t rb_magic;
uint_t rb_revision;
uint_t rb_checksum;
uint_t rb_checksum_fiddle;
uint_t rb_private;
uint32_t rb_userdata;
uint_t rb_commitcnt;
uint_t rb_spare[1];
struct timeval32 rb_timestamp;
int rb_data[1];
} mddb_rb32_t;
/*
* directory entries
*/
typedef struct mddb_optinfo {
int o_li;
int o_flags;
} mddb_optinfo_t;
/* Old definition of mddb_de, for 32-bit apps and libraries */
typedef struct mddb_de {
struct mddb_de *de_next;
mddb_rb_t *de_rb;
mddb_recid_t de_recid;
mddb_type_t de_type1;
uint_t de_type2;
uint_t de_reqsize;
uint_t de_recsize;
mddb_block_t de_blkcount;
uint_t de_flags;
mddb_optinfo_t de_optinfo[2];
mddb_block_t de_blks[1];
} mddb_de_t;
/*
* In core version of mddb_de, includes pointer for mddb_rb32_t user data
* mddb_rb32_t is used incore
*/
typedef struct mddb_de_ic {
void *de_rb_userdata;
void *de_rb_userdata_ic;
uint_t de_owner_nodeid;
struct mddb_de_ic *de_next;
mddb_rb32_t *de_rb;
mddb_recid_t de_recid;
mddb_type_t de_type1;
uint_t de_type2;
size_t de_reqsize;
size_t de_icreqsize;
size_t de_recsize;
uint_t de_blkcount;
uint_t de_flags;
mddb_optinfo_t de_optinfo[2];
mddb_block_t de_blks[1];
} mddb_de_ic_t;
typedef struct mddb_db {
uint_t db_magic;
uint_t db_revision;
uint_t db_checksum;
mddb_block_t db_blknum;
struct mddb_db *db_next;
mddb_block_t db_nextblk;
struct timeval32 db_timestamp;
uint_t db_recsum;
#ifdef _KERNEL
mddb_de_ic_t *db_firstentry;
#else
mddb_de_t *db_firstentry;
#endif
} mddb_db_t;
/*
* This is, and always will be, the on-disk version of mddb_de
* When mddb_de32 is read in it is converted into mddb_de_ic
*/
typedef struct mddb_de32 {
uint32_t de32_next;
uint32_t de32_rb;
mddb_recid_t de32_recid;
mddb_type_t de32_type1;
uint_t de32_type2;
uint_t de32_reqsize;
uint_t de32_recsize;
mddb_block_t de32_blkcount;
uint_t de32_flags;
mddb_optinfo_t de32_optinfo[2];
mddb_block_t de32_blks[1];
} mddb_de32_t;
/*
* This is, and always will be, the on-disk version of mddb_db
* When mddb_db32 is read in it is converted into mddb_db
* To minimize impact on mddb format mddb_db fileds remain intact
*/
typedef struct mddb_db32 {
uint_t db32_magic;
uint_t db32_revision;
uint_t db32_checksum;
mddb_block_t db32_blknum;
uint32_t db32_next;
mddb_block_t db32_nextblk;
struct timeval32 db32_timestamp;
uint_t db32_recsum;
uint32_t db32_firstentry;
} mddb_db32_t;
#define de32tode(from, to) \
{ \
int i; \
to->de_rb_userdata = NULL; \
to->de_owner_nodeid = MD_MN_INVALID_NID; \
to->de_next = (struct mddb_de_ic *)(uintptr_t)from->de32_next; \
to->de_rb = (mddb_rb32_t *)(uintptr_t)from->de32_rb; \
to->de_recid = from->de32_recid; \
to->de_type1 = from->de32_type1; \
to->de_type2 = from->de32_type2; \
to->de_reqsize = from->de32_reqsize; \
to->de_recsize = from->de32_recsize; \
to->de_blkcount = from->de32_blkcount; \
to->de_flags = from->de32_flags; \
to->de_optinfo[0] = from->de32_optinfo[0]; \
to->de_optinfo[1] = from->de32_optinfo[1]; \
for (i = 0; i < from->de32_blkcount; i++) \
to->de_blks[i] = from->de32_blks[i]; \
}
#define detode32(from, to) \
{ \
int i; \
to->de32_next = (uint32_t)(uintptr_t)from->de_next; \
to->de32_rb = (uint32_t)(uintptr_t)from->de_rb; \
to->de32_recid = from->de_recid; \
to->de32_type1 = from->de_type1; \
to->de32_type2 = from->de_type2; \
to->de32_reqsize = from->de_reqsize; \
to->de32_recsize = from->de_recsize; \
to->de32_blkcount = from->de_blkcount; \
to->de32_flags = from->de_flags; \
to->de32_optinfo[0] = from->de_optinfo[0]; \
to->de32_optinfo[1] = from->de_optinfo[1]; \
for (i = 0; i < from->de_blkcount; i++) \
to->de32_blks[i] = from->de_blks[i]; \
}
#define db32todb(from, to) \
to->db_magic = from->db32_magic; \
to->db_revision = from->db32_revision; \
to->db_checksum = from->db32_checksum; \
to->db_blknum = from->db32_blknum; \
to->db_next = (struct mddb_db *)(uintptr_t)from->db32_next; \
to->db_nextblk = from->db32_nextblk; \
to->db_timestamp = from->db32_timestamp; \
to->db_recsum = from->db32_recsum; \
to->db_firstentry = (mddb_de_ic_t *)(uintptr_t)from->db32_firstentry;
#define dbtodb32(from, to) \
to->db32_magic = from->db_magic; \
to->db32_revision = from->db_revision; \
to->db32_checksum = from->db_checksum; \
to->db32_blknum = from->db_blknum; \
to->db32_next = (uint32_t)(uintptr_t)from->db_next; \
to->db32_nextblk = from->db_nextblk; \
to->db32_timestamp = from->db_timestamp; \
to->db32_recsum = from->db_recsum; \
to->db32_firstentry = (uint32_t)(uintptr_t)from->db_firstentry;
/*
* information about a replica of the data base
*/
typedef struct mddb_ri {
struct mddb_ri *ri_next;
uint_t ri_flags;
uint_t ri_commitcnt;
int ri_transplant;
md_dev64_t ri_dev;
daddr32_t ri_blkno;
char ri_driver[16];
mddb_mb_ic_t *ri_mbip;
mddb_lb_t *ri_lbp;
mddb_dt_t *ri_dtp;
mddb_did_ic_t *ri_did_icp;
ddi_devid_t ri_devid;
ddi_devid_t ri_old_devid;
char ri_minor_name[MDDB_MINOR_NAME_MAX];
char ri_devname[MAXPATHLEN];
} mddb_ri_t;
typedef struct mddb_bf {
struct mddb_bf *bf_next;
mddb_locator_t *bf_locator;
buf_t bf_buf;
} mddb_bf_t;
/*
* Information for sets of databases (which include replicas)
*/
#define MDDB_BITSRECID 31
#define MDDB_SETSHIFT (MDDB_BITSRECID - MD_BITSSET)
#define MDDB_SETMASK (MD_SETMASK << MDDB_SETSHIFT)
#define MDDB_RECIDMASK ((1 << MDDB_SETSHIFT) - 1)
#define DBSET(id) (((id) & MDDB_SETMASK) >> MDDB_SETSHIFT)
#define DBID(id) ((id) & MDDB_RECIDMASK)
#define MAKERECID(s, i) ((((s) << MDDB_SETSHIFT) & MDDB_SETMASK) | \
((i) & MDDB_RECIDMASK))
#define MDDB_PARSE_LOCBLK 0x00000001
#define MDDB_PARSE_LOCNM 0x00000002
#define MDDB_PARSE_OPTRECS 0x00000004
#define MDDB_PARSE_MASK 0x0000000F
#define MDDB_BLOCK_PARSE 0x00000001 /* Block sending parse msgs */
#define MDDB_UNBLOCK_PARSE 0x00000002 /* Unblock sending parse msgs */
/*
* We need to keep s_ident and s_inittime 32 bit. They are used in mddb_lb
*/
typedef struct mddb_set {
uint_t s_setno; /* set number */
uint_t s_sideno; /* side number */
identifier_t s_ident; /* set identifier */
char *s_setname; /* set name */
mddb_mb_ic_t **s_mbiarray; /* master blocks array */
mddb_db_t *s_dbp; /* directory block */
mddb_lb_t *s_lbp; /* locator block */
/* May be cast to mddb_mnlb_t */
/* if accessing sidenames in */
/* MN diskset */
mddb_ln_t *s_lnp; /* locator names block */
/* May be cast to mddb_mnln_t */
/* if accessing sidenames in */
/* MN diskset */
mddb_dtag_lst_t *s_dtlp; /* List of data tags found */
mddb_did_ic_t *s_did_icp; /* Device ID incore area */
mddb_ri_t *s_rip; /* replicas incore list */
int s_freeblkcnt; /* visable for test code */
int s_totalblkcnt; /* visable for test code */
int s_mn_parseflags; /* mddb parse flags for MNset */
int s_mn_parseflags_sending; /* parse flgs sent to slaves */
uchar_t *s_freebitmap; /* free blocks bitmap */
uint_t s_freebitmapsize; /* size of bitmap */
struct timeval32 s_inittime; /* timestamp set created */
mddb_recid_t s_zombie; /* zombie record - createrec */
int s_staledeletes; /* number of stale deleterec */
int s_optcmtcnt; /* Following are opt. record */
int s_opthavelck; /* bookkeeping records ... */
int s_optwantlck;
kcondvar_t s_optwantlck_cv;
int s_optwaiterr;
int s_opthungerr;
kcondvar_t s_opthungerr_cv;
int s_opthavequeuinglck;
int s_optwantqueuinglck;
kcondvar_t s_optqueuing_cv;
ulong_t s_bufmisses;
mddb_bf_t *s_freebufhead;
int s_bufwakeup;
kcondvar_t s_buf_cv;
size_t s_databuffer_size;
void *s_databuffer;
int s_singlelockgotten;
int s_singlelockwanted;
kcondvar_t s_single_thread_cv;
md_hi_arr_t s_med;
} mddb_set_t;
#ifndef MDDB_FAKE
#ifdef _KERNEL
/* md_mddb.c */
extern uint_t mddb_lb_did_convert(mddb_set_t *,
uint_t, uint_t *);
extern void mddb_locatorblock2splitname(mddb_ln_t *,
int, side_t, md_splitname *);
extern int mddb_configure(mddb_cfgcmd_t,
struct mddb_config *);
extern mddb_recid_t mddb_getnextrec(mddb_recid_t,
mddb_type_t, uint_t);
extern int mddb_getoptloc(mddb_optloc_t *);
extern void *mddb_getrecaddr(mddb_recid_t);
extern void *mddb_getrecaddr_resize(mddb_recid_t, size_t,
off_t);
extern int mddb_getrecprivate(mddb_recid_t);
extern void mddb_setrecprivate(mddb_recid_t, uint_t);
extern mddb_de_ic_t *mddb_getrecdep(mddb_recid_t);
extern mddb_type_t mddb_getrectype1(mddb_recid_t);
extern int mddb_getrectype2(mddb_recid_t);
extern int mddb_getrecsize(mddb_recid_t);
extern int mddb_commitrec(mddb_recid_t);
extern int mddb_commitrecs(mddb_recid_t *);
extern int mddb_deleterec(mddb_recid_t);
extern mddb_recstatus_t mddb_getrecstatus(mddb_recid_t);
extern mddb_recid_t mddb_createrec(size_t usersize,
mddb_type_t type, uint_t type2,
md_create_rec_option_t option, set_t setno);
extern void mddb_init(void);
extern void mddb_unload(void);
extern void mddb_unload_set(set_t setno);
extern mddb_recid_t mddb_makerecid(set_t setno, mddb_recid_t id);
extern set_t mddb_getsetnum(mddb_recid_t id);
extern char *mddb_getsetname(set_t setno);
extern side_t mddb_getsidenum(set_t setno);
extern int mddb_ownset(set_t setno);
extern int getmed_ioctl(mddb_med_parm_t *medpp, int mode);
extern int setmed_ioctl(mddb_med_parm_t *medpp, int mode);
extern int updmed_ioctl(mddb_med_upd_parm_t *medpp,
int mode);
extern int take_set(mddb_config_t *cp, int mode);
extern int release_set(mddb_config_t *cp, int mode);
extern int gettag_ioctl(mddb_dtag_get_parm_t *dtgpp,
int mode);
extern int usetag_ioctl(mddb_dtag_use_parm_t *dtupp,
int mode);
extern int accept_ioctl(mddb_accept_parm_t *medpp,
int mode);
extern int md_update_locator_namespace(set_t setno,
side_t side, char *dname, char *pname,
md_dev64_t devt);
extern int mddb_validate_lb(set_t setno, int *rmaxsz);
extern int mddb_getinvlb_devid(set_t setno, int count,
int size, char **ctdptr);
extern int md_update_minor(set_t, side_t, mdkey_t);
#ifdef DEBUG
extern void mddb_check(void);
#endif /* DEBUG */
#endif /* _KERNEL */
#else
caddr_t mddb_fakeit;
#define md_lb_did_convert(a, b, c) (0)
#define mddb_configure(a, b) (0)
#define mddb_getnextrec(a, b, c) ((mddb_recid_t)0)
#define mddb_getrecaddr(a) (mddb_fakeit)
#define mddb_getrecprivate(a) (0)
#define mddb_setrecprivate(a, b) (0)
#define mddb_getrectype1(a) (0)
#define mddb_getrectype2(a) (0)
#define mddb_getrecsize(a) (0)
#define mddb_commitrec(a) (0)
#define mddb_commitrecs(a) (0)
#define mddb_deleterec(a) (0)
#define mddb_getrecstatus(a) (MDDB_OK)
#define mddb_createrec(s, a, b) (0xffff & (int)(mddb_fakeit = \
(caddr_t)kmem_zalloc(s, KM_SLEEP)))
#define mddb_unload() (0)
#endif
#define MDDB_NOSLEEP 1
#define MDDB_SLEEPOK 0
#define MDDB_NOOLDOK 0x1
#define MDDB_MUSTEXIST 0x2
#define MDDB_NOINIT 0x4
#define MDDB_MULTINODE 0x8
#define MDDB_MN_STALE 0x10 /* MN set is stale */
/* Flags passed to selectreplicas - not a bit mask */
#define MDDB_SCANALL 1
#define MDDB_RETRYSCAN 0
#define MDDB_SCANALLSYNC 2 /* During reconfig, sync up incore */
/* and ondisk mddb by writing incore */
/* values to disk. Don't write */
/* change log records. */
/* Flags passed to writestart and writecopy */
#define MDDB_WRITECOPY_ALL 1 /* Write all incore mddb to disk */
#define MDDB_WRITECOPY_SYNC 2 /* Write incore mddb to disk except */
/* - change log records */
/* - optimized resync records */
#define MDDB_PROBE 1
#define MDDB_NOPROBE 0
/*
* MN diskset definitions used to determine if a slave can write
* directly to the mddb. ONLY_MASTER only allows the master node
* to write to the mddb. ANY_NODE allows any node to write
* to the mddb.
*/
#define MDDB_WR_ONLY_MASTER 0
#define MDDB_WR_ANY_NODE 1
#define MDDB_L_LOCKED 0x0001 /* this record is locked */
#define MDDB_L_WANTED 0x0002
#ifdef __cplusplus
}
#endif
#endif /* _SYS_MD_MDDB_H */