fsys_zfs.c revision 1
/*
* GRUB -- GRand Unified Bootloader
* Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
*
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
*/
/*
* The zfs plug-in routines for GRUB are:
*
* zfs_mount() - locates a valid uberblock of the root pool and reads
* in its MOS at the memory address MOS.
*
* zfs_open() - locates a plain file object by following the MOS
* and places its dnode at the memory address DNODE.
*
* zfs_read() - read in the data blocks pointed by the DNODE.
*
* ZFS_SCRATCH is used as a working area.
*
* (memory addr) MOS DNODE ZFS_SCRATCH
* | | |
* +-------V---------V----------V---------------+
* memory | | dnode | dnode | scratch |
* | | 512B | 512B | area |
* +--------------------------------------------+
*/
#ifdef FSYS_ZFS
#include "shared.h"
#include "filesys.h"
#include "fsys_zfs.h"
/* cache for a file block of the currently zfs_open()-ed file */
static uint64_t file_start = 0;
/* cache for a dnode block */
static uint64_t dnode_start = 0;
static uberblock_t current_uberblock;
static char *stackbase;
{
{"inherit", 0}, /* ZIO_COMPRESS_INHERIT */
{"off", 0}, /* ZIO_COMPRESS_OFF */
{"empty", 0} /* ZIO_COMPRESS_EMPTY */
};
/*
* Our own version of bcmp().
*/
static int
{
do {
return (1);
} while (--n != 0);
}
return (0);
}
/*
* Our own version of log2(). Same thing as highbit()-1.
*/
static int
{
int i = 0;
while (num > 1) {
i++;
}
return (i);
}
/* Checksum Functions */
static void
{
ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
}
/* Checksum Table and Values */
};
/*
* zio_checksum_verify: Provides support for checksum verification.
*
* Fletcher2, Fletcher4, and SHA256 are supported.
*
* Return:
* -1 = Failure
* 0 = Success
*/
static int
{
/* byteswap is not supported */
if (byteswap)
return (-1);
return (-1);
zc = expected_cksum;
} else {
}
return (-1);
return (0);
}
/*
* vdev_label_start returns the physical disk offset (in bytes) of
* label "l".
*/
static uint64_t
{
}
/*
* vdev_uberblock_compare takes two uberblock structures and returns an integer
* indicating the more recent of the two.
* Return Value = 1 if ub2 is more recent
* Return Value = -1 if ub1 is more recent
* The most recent uberblock is determined using its transaction number and
* timestamp. The uberblock with the highest transaction number is
* considered "newer". If the transaction numbers of the two blocks match, the
* timestamps are compared to determine the "newer" of the two.
*/
static int
{
return (-1);
return (1);
return (-1);
return (1);
return (0);
}
/*
* Three pieces of information are needed to verify an uberblock: the magic
* number, the version number, and the checksum.
*
* Currently Implemented: version number, magic number
* Need to Implement: checksum
*
* Return:
* 0 - Success
* -1 - Failure
*/
static int
{
return (-1);
return (0);
return (-1);
}
/*
* Find the best uberblock.
* Return:
* Success - Pointer to the best uberblock.
* Failure - NULL
*/
static uberblock_phys_t *
{
int i;
for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) {
} else if (vdev_uberblock_compare(
&(ub_array[i].ubp_uberblock),
&(ubbest->ubp_uberblock)) > 0) {
}
}
}
return (ubbest);
}
/*
* Read a block of data based on the gang block address dva,
* and put its data in buf.
*
* Return:
* 0 - success
* 1 - failure
*/
static int
{
int i;
/* pick a good dva from the block pointer */
for (i = 0; i < BP_GET_NDVAS(bp); i++) {
return (0);
}
return (1);
}
/*
* Read gang block header, verify its checksum, loop through all gang blocks
* to collect its data based on the gang block address dva and put it in buf.
*
* Return:
* 0 - success
* 1 - failure
*/
static int
{
int i;
/* read in the gang block header */
grub_printf("failed to read in a gang block header\n");
return (1);
}
/* self checksuming the gang block header */
grub_printf("failed to checksum a gang block header\n");
return (1);
}
for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
continue;
return (1);
}
return (0);
}
/*
* Read in a block of raw data to buf.
*
* Return:
* 0 - success
* 1 - failure
*/
static int
{
/* Only one top-level vdev is supported in the root pool */
if (DVA_GET_VDEV(dva) != 0)
return (1);
if (DVA_GET_GANG(dva)) {
return (0);
} else {
/* read in a data block */
return (0);
}
return (1);
}
/*
* Loop through DVAs to read in a block of raw data to buf and verify the
* checksum.
*
* Return:
* 0 - success
* 1 - failure
*/
static int
{
int i;
/* pick a good dva from the block pointer */
for (i = 0; i < BP_GET_NDVAS(bp); i++) {
continue;
grub_printf("checksum verification failed\n");
continue;
}
/* if no errors, return from here */
return (0);
}
return (1);
}
/*
* Read in a block of data, verify its checksum, decompress if needed,
* and put the uncompressed data in buf.
*
* Return:
* 0 - success
* errnum - failure
*/
static int
{
char *retbuf;
if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
(comp != ZIO_COMPRESS_OFF &&
grub_printf("compression algorithm not supported\n");
return (ERR_FSYS_CORRUPT);
}
grub_printf("not enough memory allocated\n");
return (ERR_WONT_FIT);
}
if (comp != ZIO_COMPRESS_OFF) {
}
grub_printf("zio_read_data failed\n");
return (ERR_FSYS_CORRUPT);
}
if (comp != ZIO_COMPRESS_OFF)
return (0);
}
/*
* Get the block from a block id.
* push the block onto the stack.
*
* Return:
* 0 - success
* errnum - failure
*/
static int
{
if (level == 0)
if (BP_IS_HOLE(bp)) {
grub_memset(buf, 0,
break;
return (errnum);
}
}
return (0);
}
/*
* mzap_lookup: Looks up property described by "name" and returns the value
* in "value".
*
* Return:
* 0 - success
* errnum - failure
*/
static int
{
int i, chunks;
for (i = 0; i < chunks; i++) {
return (0);
}
}
return (ERR_FSYS_CORRUPT);
}
static uint64_t
{
uint8_t c;
if (table[128] == 0) {
int i, j;
for (i = 0; i < 256; i++) {
}
}
return (0);
}
/*
* Only use 28 bits, since we need 4 bits in the cookie for the
* collision differentiator. We MUST use the high bits, since
* those are the onces that we first pay attention to when
* chosing the bucket.
*/
return (crc);
}
/*
* Only to be used on 8-bit arrays.
* array_len is actual len in bytes (not encoded le_value_length).
* buf is null-terminated.
*/
static int
{
int bseen = 0;
struct zap_leaf_array *la =
return (0);
break;
}
}
/*
* Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
* value for the property "name".
*
* Return:
* 0 - success
* errnum - failure
*/
static int
{
struct zap_leaf_entry *le;
/* Verify if this is a valid leaf block */
return (ERR_FSYS_CORRUPT);
return (ERR_FSYS_CORRUPT);
return (ERR_FSYS_CORRUPT);
/* Verify the chunk entry */
return (ERR_FSYS_CORRUPT);
continue;
struct zap_leaf_array *la;
return (ERR_FSYS_CORRUPT);
/* get the uint64_t property value */
return (0);
}
}
return (ERR_FSYS_CORRUPT);
}
/*
* Fat ZAP lookup
*
* Return:
* 0 - success
* errnum - failure
*/
static int
{
zap_leaf_phys_t *l;
/* Verify if this is a fat zap header block */
return (ERR_FSYS_CORRUPT);
if (errnum)
return (errnum);
/* get block id from index */
/* external pointer tables not supported */
return (ERR_FSYS_CORRUPT);
}
/* Get the leaf block */
l = (zap_leaf_phys_t *)stack;
return (ERR_FSYS_CORRUPT);
return (errnum);
}
/*
* Read in the data of a zap object and find the value for a matching
* property name.
*
* Return:
* 0 - success
* errnum - failure
*/
static int
{
int size;
void *zapbuf;
/* Read in the first block of the zap object data. */
return (errnum);
if (block_type == ZBT_MICRO) {
} else if (block_type == ZBT_HEADER) {
/* this is a fat zap */
}
return (ERR_FSYS_CORRUPT);
}
/*
* Get the dnode of an object number from the metadnode of an object set.
*
* Input
* mdn - metadnode to get the object dnode
* objnum - object number for the object dnode
* buf - data buffer that holds the returning dnode
* stack - scratch area
*
* Return:
* 0 - success
* errnum - failure
*/
static int
char *stack)
{
int epbs; /* shift of number of dnodes in a block */
int idx; /* index within a block */
return (0);
}
} else {
}
return (errnum);
return (0);
}
/*
* Check if this is a special file that resides at the top
* dataset of the pool. Currently this is the GRUB menu,
* boot signature and boot signature backup.
* str starts with '/'.
*/
static int
is_top_dataset_file(char *str)
{
char *tptr;
return (1);
return (1);
return (1);
return (0);
}
/*
* Get the file dnode for a given file name where mdn is the meta dnode
* for this ZFS object set. When found, place the file dnode in dn.
* The 'path' argument will be mangled.
*
* Return:
* 0 - success
* errnum - failure
*/
static int
char *stack)
{
return (errnum);
return (errnum);
if (version > ZPL_VERSION)
return (-1);
return (errnum);
return (errnum);
/* skip leading slashes */
while (*path == '/')
path++;
/* get the next component name */
path++;
*path = 0; /* ensure null termination */
return (errnum);
return (errnum);
while (*path == '/')
path++;
}
/* We found the dnode for this file. Verify if it is a plain file. */
return (0);
}
/*
* Get the default 'bootfs' property value from the rootpool.
*
* Return:
* 0 - success
* errnum -failure
*/
static int
{
stack += DNODE_SIZE;
return (errnum);
/*
* find the object number for 'pool_props', and get the dnode
* of the 'pool_props'.
*/
return (ERR_FILESYSTEM_NOT_FOUND);
return (errnum);
return (ERR_FILESYSTEM_NOT_FOUND);
if (!objnum)
return (ERR_FILESYSTEM_NOT_FOUND);
return (0);
}
/*
* Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
*
* If no fsname and no obj are given, return the DSL_DIR metadnode.
* If fsname is given, return its metadnode and its matching object number.
* If only obj is given, return the metadnode for this object number.
*
* Return:
* 0 - success
* errnum - failure
*/
static int
{
int issnapshot = 0;
char *snapname;
goto skip;
}
return (errnum);
stack))
return (errnum);
return (errnum);
headobj =
goto skip;
}
/* take out the pool name */
fsname++;
while (*fsname == '/')
fsname++;
fsname++;
*fsname = 0;
snapname++;
if (*snapname == '@') {
issnapshot = 1;
*snapname = 0;
}
childobj =
if (childobj == 0)
return (ERR_FILESYSTEM_NOT_FOUND);
return (errnum);
return (ERR_FILESYSTEM_NOT_FOUND);
return (errnum);
if (issnapshot)
*snapname = '@';
}
if (obj)
skip:
return (errnum);
if (issnapshot) {
return (errnum);
return (ERR_FILESYSTEM_NOT_FOUND);
return (errnum);
if (obj)
}
stack += sizeof (objset_phys_t);
return (errnum);
return (0);
}
/*
* For a given XDR packed nvlist, verify the first 4 bytes and move on.
*
* An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
*
* nvl_version (4 bytes)
* nvl_nvflag (4 bytes)
* encoded nvpairs:
* encoded size of the nvpair (4 bytes)
* decoded size of the nvpair (4 bytes)
* name string size (4 bytes)
* name string data (sizeof(NV_ALIGN4(string))
* data type (4 bytes)
* # of elements in the nvpair (4 bytes)
* data
* 2 zero's for the last nvpair
* (end of the entire list) (8 bytes)
*
* Return:
* 0 - success
* 1 - failure
*/
static int
{
/* Verify if the 1st and 2nd byte in the nvlist are valid. */
return (1);
nvlist += 4;
return (0);
}
static char *
{
int i, encode_size;
for (i = 0; i < index; i++) {
/* skip the header, nvl_version, and nvl_nvflag */
}
return (nvlist);
}
static int
int *nelmp)
{
/* skip the header, nvl_version, and nvl_nvflag */
/*
* Loop thru the nvpair list
* The XDR representation of an integer is in big-endian byte order.
*/
nvpair += 4;
nvpair += 4;
int nelm;
return (1);
nvpair += 4;
switch (valtype) {
case DATA_TYPE_STRING:
nvpair += 4;
return (0);
case DATA_TYPE_UINT64:
return (0);
case DATA_TYPE_NVLIST:
return (0);
case DATA_TYPE_NVLIST_ARRAY:
if (nelmp)
return (0);
}
}
}
return (1);
}
/*
* Check if this vdev is online and is in a good state.
*/
static int
vdev_validate(char *nv)
{
DATA_TYPE_UINT64, NULL) == 0 ||
DATA_TYPE_UINT64, NULL) == 0 ||
DATA_TYPE_UINT64, NULL) == 0)
return (ERR_DEV_VALUES);
return (0);
}
/*
* The caller should already allocate MAXPATHLEN memory for bootpath and devid.
*/
static int
int is_spare)
{
char type[16];
NULL))
return (ERR_FSYS_CORRUPT);
if (vdev_validate(nv) != 0)
return (ERR_NO_BOOTPATH);
return (ERR_NO_BOOTPATH);
return (ERR_NO_BOOTPATH);
/* for a spare vdev, pick the disk labeled with "is_spare" */
if (is_spare) {
if (!spare)
return (ERR_NO_BOOTPATH);
}
bootpath[0] = '\0';
devid[0] = '\0';
return (ERR_WONT_FIT);
return (0);
int nelm, i;
char *child;
return (ERR_FSYS_CORRUPT);
for (i = 0; i < nelm; i++) {
char *child_i;
return (0);
}
}
return (ERR_NO_BOOTPATH);
}
/*
* Check the disk label information and retrieve needed vdev name-value pairs.
*
* Return:
* 0 - success
* ERR_* - failure
*/
int
{
/* Read in the vdev name-value pair list (112K). */
return (ERR_READ);
stack += sizeof (vdev_phys_t);
return (ERR_FSYS_CORRUPT);
return (ERR_FSYS_CORRUPT);
if (pool_state == POOL_STATE_DESTROYED)
return (ERR_FILESYSTEM_NOT_FOUND);
return (ERR_FSYS_CORRUPT);
return (ERR_FSYS_CORRUPT);
/* not an active device */
if (txg == 0)
return (ERR_NO_BOOTPATH);
return (ERR_FSYS_CORRUPT);
if (version > SPA_VERSION)
return (ERR_NEWER_VERSION);
return (ERR_FSYS_CORRUPT);
return (ERR_FSYS_CORRUPT);
return (ERR_NO_BOOTPATH);
return (ERR_FSYS_CORRUPT);
return (0);
}
/*
* zfs_mount() locates a valid uberblock of the root pool and read in its MOS
* to the memory address MOS.
*
* Return:
* 1 - success
* 0 - failure
*/
int
zfs_mount(void)
{
char *stack;
int label = 0;
char tmp_bootpath[MAXNAMELEN];
char tmp_devid[MAXNAMELEN];
/* if it's our first time here, zero the best uberblock out */
pool_guid = 0;
}
stack += sizeof (objset_phys_t);
/*
* some eltorito stacks don't give us a size and
* we end up setting the size to MAXUINT, further
* some of these devices stop working once a single
* read past the end has been issued. Checking
* for a maximum part_length and skipping the backup
* avoids breaking down on such devices.
*/
break;
label) >> SPA_MINBLOCKSHIFT;
/* Read in the uberblock ring (128K). */
((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >>
(char *)ub_array) == 0)
continue;
== 0) {
tmp_bootpath, &tmp_guid))
continue;
if (pool_guid == 0)
&(current_uberblock)) <= 0))
continue;
/* Got the MOS. Save it at the memory addr MOS. */
is_zfs_mount = 1;
return (1);
}
}
/*
* While some fs impls. (tftp) rely on setting and keeping
* global errnums set, others won't reset it and will break
* when issuing rawreads. The goal here is to simply not
* have zfs mount attempts impact the previous state.
*/
return (0);
}
/*
* zfs_open() locates a file in the rootpool by following the
* MOS and places the dnode of the file in the memory address DNODE.
*
* Return:
* 1 - success
* 0 - failure
*/
int
{
char *stack;
stack += sizeof (dnode_phys_t);
/*
* menu.lst is placed at the root pool filesystem level,
* do not goto 'current_bootfs'.
*/
if (is_top_dataset_file(filename)) {
return (0);
current_bootfs_obj = 0;
} else {
if (current_bootfs[0] == '\0') {
/* Get the default root filesystem object number */
return (0);
return (0);
} else {
return (0);
}
}
}
return (0);
}
/* get the file size and set the file position to 0 */
/*
* For DMU_OT_SA we will need to locate the SIZE attribute
* attribute, which could be either in the bonus buffer
* or the "spill" block.
*/
int hdrsize;
if (DNODE->dn_bonuslen != 0) {
} else {
void *buf;
/* reset errnum to rawread() failure */
errnum = 0;
return (0);
}
} else {
return (0);
}
}
} else {
}
filepos = 0;
return (1);
}
/*
* zfs_read reads in the data blocks pointed by the DNODE.
*
* Return:
* len - the length successfully read in to the buffer
* 0 - failure
*/
int
{
char *stack;
char *tmpbuf;
file_start = file_end = 0;
}
/*
* If offset is in memory, move it into the buffer provided and return.
*/
return (len);
}
/*
* Entire Dnode is too big to fit into the space available. We
* will need to read it in chunks. This could be optimized to
* read in as large a chunk as there is space available, but for
* now, this only reads in one data block at a time.
*/
while (length) {
/*
* Find requested blkid and the offset within that block.
*/
return (0);
movesize);
}
return (len);
}
/*
* No-Op
*/
int
{
return (1);
}
#endif /* FSYS_ZFS */