ztest.c revision a6e57bd4c7a2bf9cc33be939d674d4c7d3e67cce
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* that runs entirely in userland, is easy to use, and easy to extend.
*
* The overall design of the ztest program is as follows:
*
* (1) For each major functional area (e.g. adding vdevs to a pool,
* creating and destroying datasets, reading and writing objects, etc)
* we have a simple routine to test that functionality. These
* individual routines do not have to do anything "stressful".
*
* (2) We turn these simple functionality tests into a stress test by
* running them all in parallel, with as many threads as desired,
* and spread across as many datasets, objects, and vdevs as desired.
*
* (3) While all this is happening, we inject faults into the pool to
* verify that self-healing data really works.
*
* (4) Every time we open a dataset, we change its checksum and compression
* functions. Thus even individual objects vary from block to block
* in which checksum they use and whether they're compressed.
*
* (5) To verify that we never lose on-disk consistency after a crash,
* we run the entire test in a child of the main process.
* At random times, the child self-immolates with a SIGKILL.
* This is the software equivalent of pulling the power cord.
* The parent then runs the test again, using the existing
* storage pool, as many times as desired.
*
* (6) To verify that we don't have future leaks or temporal incursions,
* many of the functional tests record the transaction group number
* as part of their data. When reading old data, they verify that
* the transaction group number is less than the current, open txg.
* If you add a new test, please do this if applicable.
*
* When run with no arguments, ztest runs for about five minutes and
* produces no output if successful. To get a little bit of information,
* specify -V. To get more information, specify -VV, and so on.
*
* To turn this into an overnight stress test, use -T to specify run time.
*
* You can ask more more vdevs [-v], datasets [-d], or threads [-t]
* to increase the pool capacity, fanout, and overall stress level.
*
* The -N(okill) option will suppress kills, so each child runs to completion.
* This can be useful when you're trying to distinguish temporal incursions
* from plain old race conditions.
*/
#include <sys/zfs_context.h>
#include <sys/dmu_traverse.h>
#include <sys/dmu_objset.h>
#include <sys/resource.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/vdev_impl.h>
#include <sys/spa_impl.h>
#include <sys/dsl_prop.h>
#include <sys/refcount.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <umem.h>
#include <dlfcn.h>
#include <ctype.h>
#include <math.h>
static char cmdname[] = "ztest";
static uint64_t zopt_vdevtime;
static int zopt_ashift = SPA_MINBLOCKSHIFT;
static int zopt_mirrors = 2;
static int zopt_raidz = 4;
static int zopt_raidz_parity = 1;
static int zopt_datasets = 7;
static int zopt_threads = 23;
static int zopt_verbose = 0;
static int zopt_init = 1;
static char *zopt_dir = "/tmp";
static int zopt_maxfaults;
typedef struct ztest_block_tag {
typedef struct ztest_args {
char za_pool[MAXNAMELEN];
/*
* Thread-local variables can go here to aid debugging.
*/
} ztest_args_t;
typedef void ztest_func_t(ztest_args_t *);
/*
* Note: these aren't static because we want dladdr() to work.
*/
typedef struct ztest_info {
} ztest_info_t;
ztest_info_t ztest_info[] = {
};
#define ZTEST_SYNC_LOCKS 16
/*
* Stuff we need to share writably between parent and child.
*/
typedef struct ztest_shared {
static char ztest_dev_template[] = "%s/%s.%llua";
static ztest_shared_t *ztest_shared;
static int ztest_random_fd;
static int ztest_dump_core = 1;
extern uint64_t metaslab_gang_bang;
extern uint16_t zio_zil_fail_shift;
extern uint16_t zio_io_fail_shift;
#define ZTEST_DIROBJ 1
#define ZTEST_MICROZAP_OBJ 2
#define ZTEST_FATZAP_OBJ 3
#define ZTEST_DIRSIZE 256
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities.
*/
const char *
{
return ("default,verbose"); /* $UMEM_DEBUG setting */
}
const char *
_umem_logging_init(void)
{
return ("fail,contents"); /* $UMEM_LOGGING setting */
}
#define FATAL_MSG_SZ 1024
char *fatal_msg;
static void
{
int save_errno = errno;
char buf[FATAL_MSG_SZ];
/* LINTED */
if (do_perror) {
}
if (ztest_dump_core)
abort();
exit(3);
}
static int
{
const char *ends = "BKMGTPEZ";
int i;
if (buf[0] == '\0')
return (0);
break;
}
buf);
}
return (10*i);
}
/* NOTREACHED */
}
static uint64_t
nicenumtoull(const char *buf)
{
char *end;
} else if (end[0] == '.') {
if (fval > UINT64_MAX) {
buf);
}
} else {
buf);
}
}
return (val);
}
static void
{
char nice_vdev_size[10];
char nice_gang_bang[10];
"\t[-v vdevs (default: %llu)]\n"
"\t[-s size_of_each_vdev (default: %s)]\n"
"\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
"\t[-m mirror_copies (default: %d)]\n"
"\t[-r raidz_disks (default: %d)]\n"
"\t[-R raidz_parity (default: %d)]\n"
"\t[-d datasets (default: %d)]\n"
"\t[-t threads (default: %d)]\n"
"\t[-g gang_block_threshold (default: %s)]\n"
"\t[-i initialize pool i times (default: %d)]\n"
"\t[-k kill percentage (default: %llu%%)]\n"
"\t[-p pool_name (default: %s)]\n"
"\t[-f file directory for vdev files (default: %s)]\n"
"\t[-V(erbose)] (use multiple times for ever more blather)\n"
"\t[-E(xisting)] (use existing pool instead of creating new one)\n"
"\t[-T time] total run time (default: %llu sec)\n"
"\t[-P passtime] time per pass (default: %llu sec)\n"
"\t[-z zil failure rate (default: fail every 2^%llu allocs)]\n"
"\t[-w write failure rate (default: fail every 2^%llu allocs)]\n"
"\t[-h] (print help)\n"
"",
nice_vdev_size, /* -s */
zopt_ashift, /* -a */
zopt_mirrors, /* -m */
zopt_raidz, /* -r */
zopt_raidz_parity, /* -R */
zopt_datasets, /* -d */
zopt_threads, /* -t */
nice_gang_bang, /* -g */
zopt_init, /* -i */
zopt_pool, /* -p */
zopt_dir, /* -f */
}
static uint64_t
{
uint64_t r;
if (range == 0)
return (0);
if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
return (r % range);
}
static void
ztest_record_enospc(char *s)
{
}
static void
{
int opt;
/* By default, test gang blocks for blocks 32K and greater */
/* Default value, fail every 32nd allocation */
zio_zil_fail_shift = 5;
"v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:z:w:h")) != EOF) {
value = 0;
switch (opt) {
case 'v':
case 's':
case 'a':
case 'm':
case 'r':
case 'R':
case 'd':
case 't':
case 'g':
case 'i':
case 'k':
case 'T':
case 'P':
case 'z':
case 'w':
}
switch (opt) {
case 'v':
zopt_vdevs = value;
break;
case 's':
break;
case 'a':
zopt_ashift = value;
break;
case 'm':
break;
case 'r':
break;
case 'R':
break;
case 'd':
break;
case 't':
break;
case 'g':
break;
case 'i':
break;
case 'k':
break;
case 'p':
break;
case 'f':
break;
case 'V':
zopt_verbose++;
break;
case 'E':
zopt_init = 0;
break;
case 'T':
break;
case 'P':
break;
case 'z':
break;
case 'w':
break;
case 'h':
break;
case '?':
default:
break;
}
}
}
static uint64_t
ztest_get_ashift(void)
{
if (zopt_ashift == 0)
return (zopt_ashift);
}
static nvlist_t *
{
char dev_name[MAXPATHLEN];
int fd;
if (size == 0) {
} else {
if (fd == -1)
}
return (file);
}
static nvlist_t *
{
int c;
if (r < 2)
return (make_vdev_file(size));
for (c = 0; c < r; c++)
VDEV_TYPE_RAIDZ) == 0);
zopt_raidz_parity) == 0);
child, r) == 0);
for (c = 0; c < r; c++)
nvlist_free(child[c]);
return (raidz);
}
static nvlist_t *
{
int c;
if (m < 1)
return (make_vdev_raidz(size, r));
for (c = 0; c < m; c++)
VDEV_TYPE_MIRROR) == 0);
child, m) == 0);
for (c = 0; c < m; c++)
nvlist_free(child[c]);
return (mirror);
}
static nvlist_t *
{
int c;
ASSERT(t > 0);
for (c = 0; c < t; c++)
child, t) == 0);
for (c = 0; c < t; c++)
nvlist_free(child[c]);
return (root);
}
static void
{
int bs = SPA_MINBLOCKSHIFT +
int ibs = DN_MIN_INDBLKSHIFT +
int error;
if (error) {
char osname[300];
fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
}
}
static uint8_t
ztest_random_checksum(void)
{
do {
if (checksum == ZIO_CHECKSUM_OFF)
return (checksum);
}
static uint8_t
ztest_random_compress(void)
{
}
typedef struct ztest_replay {
static int
{
int error;
if (byteswap)
if (error) {
return (error);
}
DMU_OT_NONE, 0, tx);
if (zopt_verbose >= 5) {
char osname[MAXNAMELEN];
(void) printf("replay create of %s object %llu"
" in txg %llu = %d\n",
}
return (error);
}
static int
{
int error;
if (byteswap)
if (error) {
return (error);
}
return (error);
}
NULL, /* 0 no such transaction type */
ztest_replay_create, /* TX_CREATE */
NULL, /* TX_MKDIR */
NULL, /* TX_MKXATTR */
NULL, /* TX_SYMLINK */
ztest_replay_remove, /* TX_REMOVE */
NULL, /* TX_RMDIR */
NULL, /* TX_LINK */
NULL, /* TX_RENAME */
NULL, /* TX_WRITE */
NULL, /* TX_TRUNCATE */
NULL, /* TX_SETATTR */
NULL, /* TX_ACL */
};
/*
* Verify that we can't destroy an active pool, create an existing pool,
* or create a pool with a bad vdev spec.
*/
void
{
int error;
/*
* Attempt to create using a bad file.
*/
/*
* Attempt to create using a bad mirror.
*/
/*
* Attempt to create an existing pool. It shouldn't matter
* what's in the nvroot; we should fail with EEXIST.
*/
if (error)
}
/*
* Verify that vdev_add() works as expected.
*/
void
{
int error;
if (zopt_verbose >= 6)
(void) printf("adding vdev\n");
/*
* Make 1/4 of the devices be log devices.
*/
ztest_record_enospc("spa_vdev_add");
else if (error != 0)
if (zopt_verbose >= 6)
}
static vdev_t *
{
int c;
/*
* For whole disks, the internal path has 's0', but the
* path passed in by the user doesn't.
*/
return (vd);
return (vd);
}
}
for (c = 0; c < vd->vdev_children; c++)
NULL)
return (mvd);
return (NULL);
}
/*
* Verify that we can attach and detach devices.
*/
void
{
int replacing;
int error, expected_error;
int fd;
/*
* Decide whether to do an attach or a replace.
*/
/*
* Pick a random top-level vdev.
*/
/*
* Pick a random leaf within it.
*/
/*
* Generate the path to this leaf. The filename will end with 'a'.
* We'll alternate replacements with a filename that ends with 'b'.
*/
/*
* If the 'a' file isn't part of the pool, the 'b' file must be.
*/
else
/*
* Now oldpath represents something that's already in the pool,
* and newpath is the thing we'll try to attach.
*/
/*
* Make newsize a little bigger or smaller than oldsize.
* If it's smaller, the attach should fail.
* If it's larger, and we're doing a replace,
* we should get dynamic LUN growth when we're done.
*/
/*
* If pvd is not a mirror or root, the attach should fail with ENOTSUP,
* unless it's a replace; in that case any non-replacing parent is OK.
*
* If newvd is already part of the pool, it should fail with EBUSY.
*
* If newvd is too small, it should fail with EOVERFLOW.
*/
else
expected_error = 0;
/*
* If newvd isn't already part of the pool, create it.
*/
if (fd == -1)
}
/*
* Build the nvlist describing newpath.
*/
&file, 1) == 0);
/*
* If our parent was the replacing vdev, but the replace completed,
* then instead of failing with ENOTSUP we may either succeed,
* fail with ENODEV, or fail with EOVERFLOW.
*/
if (expected_error == ENOTSUP &&
/*
* If someone grew the LUN, the replacement may be too small.
*/
/* XXX workaround 6690467 */
fatal(0, "attach (%s %llu, %s %llu, %d) "
"returned %d, expected %d",
}
}
/*
* Verify that dynamic LUN growth works as expected.
*/
/* ARGSUSED */
void
{
char dev_name[MAXPATHLEN];
int fd;
/*
* Pick a random leaf vdev.
*/
/*
* Determine the size.
*/
/*
* If it's less than 2x the original size, grow by around 3%.
*/
if (zopt_verbose >= 6) {
(void) printf("%s grew from %lu to %lu bytes\n",
}
}
}
}
/* ARGSUSED */
static void
{
/*
* Create the directory object.
*/
}
static int
{
int error;
/*
* Verify that the dataset contains a directory object.
*/
/* We could have crashed in the middle of destroying it */
}
/*
* Destroy the dataset.
*/
if (error) {
}
return (0);
}
/*
* Verify that dmu_objset_{create,destroy,open,close} work as expected.
*/
static uint64_t
{
char name[24];
}
void
{
int error;
char name[100];
int basemode, expected_error;
/*
* If this dataset exists from a previous run, process its replay log
* half of the time. If we don't replay it, then dmu_objset_destroy()
* (invoked from ztest_destroy_cb() below) should just throw it away.
*/
if (ztest_random(2) == 0 &&
}
/*
* There may be an old instance of the dataset we're about to
* create lying around from a previous run. If so, destroy it
* and all of its snapshots.
*/
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
/*
* Verify that we can create a new dataset.
*/
if (error) {
ztest_record_enospc("dmu_objset_create");
return;
}
}
if (error) {
}
/*
* Open the intent log for it.
*/
/*
* Put a random number of objects in there.
*/
seq = 0;
while (objects-- != 0) {
if (error) {
} else {
DMU_OT_NONE, 0, tx);
}
if (ztest_random(5) == 0) {
}
if (ztest_random(100) == 0) {
if (error == 0) {
}
}
}
/*
* Verify that we cannot create an existing dataset.
*/
/*
* Verify that multiple dataset holds are allowed, but only when
* the new access mode is compatible with the base mode.
*/
if (basemode == DS_MODE_OWNER) {
&os2);
if (error)
else
}
if (error != expected_error)
fatal(0, "dmu_objset_open('%s') = %d, expected %d",
if (error == 0)
if (error)
}
/*
* Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
*/
void
{
int error;
char snapname[100];
char osname[MAXNAMELEN];
ztest_record_enospc("dmu_take_snapshot");
}
#define ZTEST_TRAVERSE_BLOCKS 1000
static int
{
/*
* Level -1 indicates the objset_phys_t or something in its intent log.
*/
za->za_zil_seq = 0;
} else {
}
return (0);
}
return (ERESTART);
/*
* Once in a while, abort the traverse. We only do this to odd
* instance numbers to ensure that even ones can run to completion.
*/
return (EINTR);
return (0);
}
return (0);
}
/*
* This is an expensive question, so don't ask it too often.
*/
}
}
return (0);
}
return (0);
}
/*
* Verify that live pool traversal works.
*/
void
{
advance = 0;
if (ztest_random(2) == 0)
advance |= ADVANCE_PRE;
if (ztest_random(2) == 0)
advance |= ADVANCE_PRUNE;
if (ztest_random(2) == 0)
advance |= ADVANCE_DATA;
if (ztest_random(2) == 0)
advance |= ADVANCE_HOLES;
if (ztest_random(2) == 0)
advance |= ADVANCE_ZIL;
}
continue;
if (zopt_verbose >= 5)
(void) printf("traverse %s%s%s%s %llu blocks to "
"<%llu, %llu, %lld, %llx>%s\n",
rc == 0 ? " [done]" :
}
}
/*
* Verify that dmu_object_{alloc,free} work as expected.
*/
void
{
char osname[MAXNAMELEN];
endoff = -8ULL;
batchsize = 2;
/*
* Create a batch object if necessary, and record it in the directory.
*/
if (batchobj == 0) {
sizeof (uint64_t));
if (error) {
ztest_record_enospc("create a batch object");
return;
}
DMU_OT_NONE, 0, tx);
}
/*
* Destroy the previous batch of objects.
*/
for (b = 0; b < batchsize; b++) {
if (object == 0)
continue;
/*
* Read and validate contents.
* We expect the nth byte of the bonus buffer to be n.
*/
for (c = 0; c < bonuslen; c++) {
fatal(0,
"bad bonus: %s, obj %llu, off %d: %u != %u",
}
}
/*
* We expect the word at endoff to be our object number.
*/
fatal(0, "bad data in %s, got %llu, expected %llu",
}
/*
* Destroy old object and clear batch entry.
*/
if (error) {
ztest_record_enospc("free object");
return;
}
if (error) {
fatal(0, "dmu_object_free('%s', %llu) = %d",
}
object = 0;
ztest_random_checksum(), tx);
ztest_random_compress(), tx);
}
/*
* Before creating the new batch of objects, generate a bunch of churn.
*/
for (b = ztest_random(100); b > 0; b--) {
if (error) {
ztest_record_enospc("churn objects");
return;
}
DMU_OT_NONE, 0, tx);
if (error) {
fatal(0, "dmu_object_free('%s', %llu) = %d",
}
}
/*
* Create a new batch of objects with randomly chosen
* blocksizes and record them in the batch directory.
*/
for (b = 0; b < batchsize; b++) {
sizeof (uint64_t));
sizeof (uint64_t));
if (error) {
ztest_record_enospc("create batchobj");
return;
}
ztest_random_checksum(), tx);
ztest_random_compress(), tx);
/*
* Write to both the bonus buffer and the regular data.
*/
ASSERT3S(va_nblocks, >=, 0);
/*
* See comments above regarding the contents of
* the bonus buffer and the word at endoff.
*/
for (c = 0; c < bonuslen; c++)
/*
* Write to a large offset to increase indirection.
*/
}
}
/*
* Verify that dmu_{read,write} work as expected.
*/
typedef struct bufwad {
} bufwad_t;
typedef struct dmu_read_write_dir {
void
{
int free_percent = 5;
/*
* This test uses two objects, packobj and bigobj, that are always
* updated together (i.e. in the same tx) so that their contents are
* in sync and can be compared. Their contents relate to each other
* in a simple way: packobj is a dense array of 'bufwad' structures,
* while bigobj is a sparse array of the same bufwads. Specifically,
* for any index n, there are three bufwads that should be identical:
*
* packobj, at offset n * sizeof (bufwad_t)
* bigobj, at the head of the nth chunk
* bigobj, at the tail of the nth chunk
*
* The chunk size is arbitrary. It doesn't have to be a power of two,
* and it doesn't have any relation to the object blocksize.
* The only requirement is that it can hold at least two bufwads.
*
* Normally, we write the bufwad to each of these locations.
* However, free_percent of the time we instead write zeroes to
* packobj and perform a dmu_free_range() on bigobj. By comparing
* bigobj to packobj, we can verify that the DMU is correctly
* tracking which parts of an object are allocated and free,
* and that the contents of the allocated blocks are correct.
*/
/*
* Read the directory info. If it's the first time, set things up.
*/
if (error) {
ztest_record_enospc("create r/w directory");
return;
}
DMU_OT_NONE, 0, tx);
DMU_OT_NONE, 0, tx);
tx);
}
/*
* Prefetch a random chunk of the big object.
* Our aim here is to get some async reads in flight
* for blocks that we may free below; the DMU should
* handle this race correctly.
*/
/*
* Pick a random index and compute the offsets into packobj and bigobj.
*/
/*
* free_percent of the time, free a range of bigobj rather than
* overwriting it.
*/
/*
* Read the current contents of our objects.
*/
/*
* Get a tx for the mods to both packobj and bigobj.
*/
if (freeit)
else
if (error) {
ztest_record_enospc("dmu r/w range");
return;
}
/*
* For each index from n to n + s, verify that the existing bufwad
* in packobj matches the bufwads at the head and tail of the
* corresponding chunk in bigobj. Then update all three bufwads
* with the new values we want to write out.
*/
for (i = 0; i < s; i++) {
/* LINTED */
/* LINTED */
/* LINTED */
fatal(0, "future leak: got %llx, open txg is %llx",
fatal(0, "wrong index: got %llx, wanted %llx+%llx",
if (freeit) {
} else {
}
}
/*
* We've verified all the old bufwads, and made new ones.
* Now write them out.
*/
if (freeit) {
if (zopt_verbose >= 6) {
(void) printf("freeing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)txg);
}
} else {
if (zopt_verbose >= 6) {
(void) printf("writing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)txg);
}
}
/*
* Sanity check the stuff we just wrote.
*/
{
}
}
void
{
/*
* Make sure that, if there is a write record in the bonus buffer
* of the ZTEST_DIROBJ, that the txg for this record is <= the
* last synced txg of the pool.
*/
}
}
void
{
int b, error;
int bs = ZTEST_DIROBJ_BLOCKSIZE;
int do_free = 0;
char osname[MAXNAMELEN];
char iobuf[SPA_MAXBLOCKSIZE];
/*
* Have multiple threads write to large offsets in ZTEST_DIROBJ
* to verify that having multiple threads writing to the same object
* in parallel doesn't cause any trouble.
*/
if (ztest_random(4) == 0) {
/*
* Do the bonus buffer instead of a regular block.
* We need a lock to serialize resize vs. others,
* so we hash on the objset ID.
*/
off = -1ULL;
} else {
b = ztest_random(ZTEST_SYNC_LOCKS);
if (ztest_random(4) == 0) {
do_free = 1;
} else {
}
}
if (error) {
} else {
ztest_record_enospc("dmu write parallel");
}
return;
}
(void) mutex_lock(lp);
if (off == -1ULL) {
char *dboff;
}
if (ztest_random(10) == 0) {
}
} else if (do_free) {
} else {
}
(void) mutex_unlock(lp);
if (ztest_random(1000) == 0)
if (ztest_random(10000) == 0)
return;
if (ztest_random(2) != 0)
return;
/*
* dmu_sync() the block we just wrote.
*/
(void) mutex_lock(lp);
if (error) {
dprintf("dmu_buf_hold(%s, %d, %llx) = %d\n",
(void) mutex_unlock(lp);
return;
}
(void) mutex_unlock(lp);
if (error) {
dprintf("dmu_sync(%s, %d, %llx) = %d\n",
return;
}
return;
/*
* Read the block that dmu_sync() returned to make sure its contents
* match what we wrote. We do this while still txg_suspend()ed
* to ensure that the block can't be reused before we read it.
*/
return;
/*
* The semantic of dmu_sync() is that we always push the most recent
* version of the data, so in the face of concurrent updates we may
* see a newer version of the block. That's OK.
*/
else
}
/*
* Verify that zap_{create,destroy,add,remove,update} work as expected.
*/
#define ZTEST_ZAP_MIN_INTS 1
#define ZTEST_ZAP_MAX_INTS 4
#define ZTEST_ZAP_MAX_PROPS 1000
void
{
int i, ints;
int error;
char osname[MAXNAMELEN];
/*
* Create a new object if necessary, and record it in the directory.
*/
if (object == 0) {
sizeof (uint64_t));
if (error) {
ztest_record_enospc("create zap test obj");
return;
}
if (error) {
fatal(0, "zap_create('%s', %llu) = %d",
}
/*
* Generate a known hash collision, and verify that
* we can lookup and remove both entries.
*/
for (i = 0; i < 2; i++) {
value[i] = i;
}
for (i = 0; i < 2; i++) {
&zl_intsize, &zl_ints);
}
for (i = 0; i < 2; i++) {
}
}
last_txg = 0;
/*
* If these zap entries already exist, validate their contents.
*/
if (error == 0) {
&zl_ints) == 0);
for (i = 0; i < ints; i++) {
}
} else {
}
/*
* Atomically update two entries in our zap object.
* The first is named txg_%llu, and contains the txg
* in which the property was last updated. The second
* is named prop_%llu, and the nth element of its value
* should be txg + object + n.
*/
if (error) {
ztest_record_enospc("create zap entry");
return;
}
for (i = 0; i < ints; i++)
if (error)
fatal(0, "zap_update('%s', %llu, '%s') = %d",
if (error)
fatal(0, "zap_update('%s', %llu, '%s') = %d",
/*
* Remove a random pair of entries.
*/
return;
if (error) {
ztest_record_enospc("remove zap entry");
return;
}
if (error)
fatal(0, "zap_remove('%s', %llu, '%s') = %d",
if (error)
fatal(0, "zap_remove('%s', %llu, '%s') = %d",
/*
* Once in a while, destroy the object.
*/
if (ztest_random(1000) != 0)
return;
if (error) {
ztest_record_enospc("destroy zap object");
return;
}
if (error)
fatal(0, "zap_destroy('%s', %llu) = %d",
object = 0;
}
void
{
void *data;
/*
* Generate a random name of the form 'xxx.....' where each
* x is a random printable character and the dots are dots.
* There are 94 such characters, and the name length goes from
* 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
*/
for (i = 0; i < 3; i++)
for (; i < namelen - 1; i++)
name[i] = '.';
name[i] = '\0';
if (ztest_random(2) == 0)
else
wc = 1;
} else {
wsize = 1;
data = string_value;
}
count = -1ULL;
/*
* Select an operation: length, lookup, add, update, remove.
*/
i = ztest_random(5);
if (i >= 2) {
if (error) {
ztest_record_enospc("zap parallel");
return;
}
} else {
txg = 0;
}
switch (i) {
case 0:
if (error == 0) {
} else {
}
break;
case 1:
if (error == 0) {
if (data == string_value &&
fatal(0, "name '%s' != val '%s' len %d",
} else {
}
break;
case 2:
break;
case 3:
break;
case 4:
break;
}
}
void
{
int i, inherit;
char setpoint[MAXPATHLEN];
char osname[MAXNAMELEN];
int error;
for (i = 0; i < 2; i++) {
if (i == 0) {
prop = "checksum";
} else {
prop = "compression";
}
ztest_record_enospc("dsl_prop_set");
break;
}
if (i == 0)
else
if (zopt_verbose >= 6) {
(void) printf("%s %s = %s for '%s'\n",
}
}
}
static void
{
int c;
for (c = 0; c < vd->vdev_children; c++)
}
}
/*
* Inject random faults into the on-disk data.
*/
void
{
int fd;
char path0[MAXPATHLEN];
char pathrand[MAXPATHLEN];
int iters = 1000;
/*
* We can't inject faults when we have no fault tolerance.
*/
if (zopt_maxfaults == 0)
return;
/*
* Pick a random top-level vdev.
*/
/*
* Pick a random leaf.
*/
/*
* Generate paths to the first two leaves in this top-level vdev,
* and to the random leaf we selected. We'll induce transient
* and we'll write random garbage to the randomly chosen leaf.
*/
/*
* If we can tolerate two or more faults, make vd0 fail randomly.
*/
}
/*
*/
else
}
/*
* We have at least single-fault tolerance, so inject data corruption.
*/
return;
while (--iters != 0) {
continue;
if (zopt_verbose >= 6)
(void) printf("injecting bad word into %s,"
}
}
/*
* Scrub the pool.
*/
void
{
}
/*
* Rename the pool to a different name and then rename it back.
*/
void
{
int error;
/*
* Do the rename
*/
if (error)
/*
* Try to open it under the old name, which shouldn't exist
*/
/*
* Open it under the new name and make sure it's still the same spa_t.
*/
if (error != 0)
/*
* Rename it back to the original
*/
if (error)
/*
* Make sure it can still be opened
*/
if (error != 0)
}
/*
* Completely obliterate one disk.
*/
static void
{
int fd;
if (zopt_maxfaults < 2)
return;
if (fd == -1)
/*
* Determine the size.
*/
/*
* Rename the old device to dev_name.old (useful for debugging).
*/
/*
* Create a new one.
*/
}
static void
{
char dev_name[MAXPATHLEN];
int error;
/*
* Build the nvlist describing dev_name.
*/
&file, 1) == 0);
guid = 0;
else
if (error != 0 &&
}
static void
ztest_verify_blocks(char *pool)
{
int status;
char zbuf[1024];
char *bin;
char *ztest;
char *isa;
int isalen;
/* LINTED */
isa,
if (zopt_verbose >= 5)
if (zopt_verbose >= 3)
if (status == 0)
return;
ztest_dump_core = 0;
else
}
static void
ztest_walk_pool_directory(char *header)
{
if (zopt_verbose >= 6)
if (zopt_verbose >= 6)
}
static void
{
int error;
if (zopt_verbose >= 4) {
}
/*
* Clean up from previous runs.
*/
(void) spa_destroy(newname);
/*
* Get the pool's configuration and guid.
*/
if (error)
ztest_walk_pool_directory("pools before export");
/*
* Export it.
*/
if (error)
ztest_walk_pool_directory("pools after export");
/*
* Import it under the new name.
*/
if (error)
ztest_walk_pool_directory("pools after import");
/*
* Try to import it again -- should fail with EEXIST.
*/
/*
* Try to import it under a different name -- should fail with EEXIST.
*/
/*
* Verify that the pool is no longer visible under the old name.
*/
/*
* Verify that we can open and close the pool using the new name.
*/
if (error)
}
/* ARGSUSED */
static void *
ztest_suspend_monitor(void *arg)
{
int error;
if (error) {
return (NULL);
}
while (!ztest_exiting) {
(void) sleep(3);
/*
* We don't hold the spa_config_lock since the pool is in
* complete failure mode and there is no way for us to
* change the vdev config when we're in this state.
*/
(void) sleep(1);
}
}
return (NULL);
}
static void *
ztest_thread(void *arg)
{
int f, i;
/*
* See if it's time to force a crash.
*/
}
/*
* Pick a random function.
*/
f = ztest_random(ZTEST_FUNCS);
/*
* Decide whether to call it, based on the requested frequency.
*/
if (zi->zi_call_target == 0 ||
continue;
if (zopt_verbose >= 4) {
(void) printf("%6.2f sec in %s\n",
}
/*
* If we're getting ENOSPC with some regularity, stop.
*/
break;
}
return (NULL);
}
/*
* Kick off threads to run tests on all datasets in parallel.
*/
static void
{
int t, d, error;
char name[100];
for (t = 0; t < ZTEST_SYNC_LOCKS; t++)
/*
* Destroy one disk before we even start.
* It's mirrored, so everything should work just fine.
* This makes us exercise fault handling very early in spa_load().
*/
/*
* Verify that the sum of the sizes of all blocks in the pool
* equals the SPA's allocated space total.
*/
/*
* Kick off a replacement of the disk we just obliterated.
*/
if (error)
if (zopt_verbose >= 5)
kernel_fini();
/*
* Verify that we can export the pool and reimport it under a
* different name.
*/
if (ztest_random(2) == 0) {
}
/*
* Verify that we can loop over all pools.
*/
if (zopt_verbose > 3) {
}
}
/*
* Create a thread to handling complete pool failures. This
* start the thread before setting the zio_io_fail_shift, which
* will indicate our failure rate.
*/
if (error) {
fatal(0, "can't create suspend monitor thread: error %d",
t, error);
}
/*
* Open our pool.
*/
if (error)
/*
* Verify that we can safely inquire about about any object,
* whether it's allocated or not. To make it interesting,
* we probe a 5-wide window around each power of two.
* This hits all edge cases, including zero and the max.
*/
for (t = 0; t < 64; t++) {
for (d = -5; d <= 5; d++) {
(1ULL << t) + d, NULL);
}
}
/*
* Now kick off all the tests that run in parallel.
*/
zs->zs_enospc_count = 0;
if (zopt_verbose >= 4)
(void) printf("starting main threads...\n");
/* Let failures begin */
for (t = 0; t < zopt_threads; t++) {
d = t % zopt_datasets;
za[t].za_instance = t;
if (t < zopt_datasets) {
int test_future = FALSE;
test_future = TRUE;
zs->zs_enospc_count++;
break;
} else if (error != 0) {
fatal(0, "dmu_objset_create(%s) = %d",
}
if (error)
fatal(0, "dmu_objset_open('%s') = %d",
if (test_future)
}
if (error)
fatal(0, "can't create thread %d: error %d",
t, error);
}
while (--t >= 0) {
if (error)
if (t < zopt_datasets) {
}
}
if (zopt_verbose >= 3)
/*
* If we had out-of-space errors, destroy a random objset.
*/
if (zs->zs_enospc_count != 0) {
d = (int)ztest_random(zopt_datasets);
if (zopt_verbose >= 3)
}
/*
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
for (t = 1; t < 50; t++)
/* Shutdown the suspend monitor thread */
zio_io_fail_shift = 0;
if (error)
kernel_fini();
}
void
{
hrtime_t m = s / 60;
hrtime_t h = m / 60;
hrtime_t d = h / 24;
s -= m * 60;
m -= h * 60;
h -= d * 24;
timebuf[0] = '\0';
if (d)
"%llud%02lluh%02llum%02llus", d, h, m, s);
else if (h)
else if (m)
else
}
/*
* Create a storage pool with the given name and initial vdev size.
* Then create the specified number of datasets in the pool.
*/
static void
ztest_init(char *pool)
{
int error;
/*
* Create the storage pool.
*/
(void) spa_destroy(pool);
if (error)
if (error)
if (zopt_verbose >= 3)
kernel_fini();
}
int
{
int kills = 0;
int iters = 0;
int i, f;
char timebuf[100];
char numbuf[6];
/* Override location of zpool.cache */
spa_config_path = "/tmp/zpool.cache";
/*
* Blow away any existing copy of zpool.cache
*/
if (zopt_init != 0)
(void) remove("/tmp/zpool.cache");
if (zopt_verbose >= 1) {
(void) printf("%llu vdevs, %d datasets, %d threads,"
" %llu seconds...\n",
}
/*
* Create and initialize our storage pool.
*/
for (i = 1; i <= zopt_init; i++) {
(void) printf("ztest_init(), pass %d\n", i);
}
/*
* Initialize the call targets for each function.
*/
for (f = 0; f < ZTEST_FUNCS; f++) {
*zi = ztest_info[f];
if (*zi->zi_interval == 0)
else
}
/*
* Run the tests in a loop. These tests include fault injection
* to verify that self-healing data works, and forced crashes
* to verify that we never lose on-disk consistency.
*/
int status;
char *tmp;
/*
* Initialize the workload counters for each function.
*/
for (f = 0; f < ZTEST_FUNCS; f++) {
zi->zi_call_time = 0;
}
if (pid == -1)
if (pid == 0) { /* child */
exit(0);
}
continue;
if (WEXITSTATUS(status) != 0) {
"child exited with code %d\n",
exit(2);
}
} else if (WIFSIGNALED(status)) {
"child died with signal %d\n",
exit(3);
}
kills++;
} else {
"to child\n");
exit(4);
}
iters++;
if (zopt_verbose >= 1) {
(void) printf("Pass %3d, %8s, %3llu ENOSPC, "
"%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
}
if (zopt_verbose >= 2) {
(void) printf("\nWorkload summary:\n\n");
(void) printf("%7s %9s %s\n",
"Calls", "Time", "Function");
(void) printf("%7s %9s %s\n",
"-----", "----", "--------");
for (f = 0; f < ZTEST_FUNCS; f++) {
(void) printf("%7llu %9s %s\n",
}
(void) printf("\n");
}
/*
* It's possible that we killed a child during a rename test, in
* which case we'll have a 'ztest_tmp' pool lying around instead
* of 'ztest'. Do a blind rename in case this happened.
*/
kernel_fini();
}
if (zopt_verbose >= 1) {
(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
}
return (0);
}