/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
/*
* that runs entirely in userland, is easy to use, and easy to extend.
*
* The overall design of the ztest program is as follows:
*
* (1) For each major functional area (e.g. adding vdevs to a pool,
* creating and destroying datasets, reading and writing objects, etc)
* we have a simple routine to test that functionality. These
* individual routines do not have to do anything "stressful".
*
* (2) We turn these simple functionality tests into a stress test by
* running them all in parallel, with as many threads as desired,
* and spread across as many datasets, objects, and vdevs as desired.
*
* (3) While all this is happening, we inject faults into the pool to
* verify that self-healing data really works.
*
* (4) Every time we open a dataset, we change its checksum and compression
* functions. Thus even individual objects vary from block to block
* in which checksum they use and whether they're compressed.
*
* (5) To verify that we never lose on-disk consistency after a crash,
* we run the entire test in a child of the main process.
* At random times, the child self-immolates with a SIGKILL.
* This is the software equivalent of pulling the power cord.
* The parent then runs the test again, using the existing
* storage pool, as many times as desired. If backwards compatibility
* testing is enabled ztest will sometimes run the "older" version
* of ztest after a SIGKILL.
*
* (6) To verify that we don't have future leaks or temporal incursions,
* many of the functional tests record the transaction group number
* as part of their data. When reading old data, they verify that
* the transaction group number is less than the current, open txg.
* If you add a new test, please do this if applicable.
*
* When run with no arguments, ztest runs for about five minutes and
* produces no output if successful. To get a little bit of information,
* specify -V. To get more information, specify -VV, and so on.
*
* To turn this into an overnight stress test, use -T to specify run time.
*
* You can ask more more vdevs [-v], datasets [-d], or threads [-t]
* to increase the pool capacity, fanout, and overall stress level.
*
* Use the -k option to set the desired frequency of kills.
*
* When ztest invokes itself it passes all relevant information through a
* temporary file which is mmap-ed in the child process. This allows shared
* memory to survive the exec syscall. The ztest_shared_hdr_t struct is always
* stored at offset 0 of this file and contains information on the size and
* number of shared structures in the file. The information stored in this file
* must remain backwards compatible with older versions of ztest so that
* ztest can invoke them during backwards compatibility testing (-B).
*/
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
#include <sys/resource.h>
#include <sys/zil_impl.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_file.h>
#include <sys/spa_impl.h>
#include <sys/metaslab_impl.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_scan.h>
#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <sys/zfeature.h>
#include <sys/dsl_userhold.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <umem.h>
#include <dlfcn.h>
#include <ctype.h>
#include <math.h>
#include <libnvpair.h>
typedef struct ztest_shared_hdr {
typedef struct ztest_shared_opts {
int zo_ashift;
int zo_mirrors;
int zo_raidz;
int zo_raidz_parity;
int zo_datasets;
int zo_threads;
int zo_verbose;
int zo_init;
.zo_alt_ztest = { '\0' },
.zo_alt_libpath = { '\0' },
.zo_vdevs = 5,
.zo_mirrors = 2,
.zo_raidz = 4,
.zo_raidz_parity = 1,
.zo_datasets = 7,
.zo_threads = 23,
.zo_verbose = 0,
.zo_init = 1,
};
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
typedef struct ztest_shared_ds {
#define MAXFAULTS() \
enum ztest_io_type {
};
typedef struct ztest_block_tag {
typedef struct bufwad {
} bufwad_t;
/*
* XXX -- fix zfs range locks to be generic so we can use them here.
*/
typedef enum {
} rl_type_t;
typedef struct rll {
void *rll_writer;
int rll_readers;
} rll_t;
typedef struct rl {
} rl_t;
/*
*/
typedef struct ztest_od {
} ztest_od_t;
/*
* Per-dataset state.
*/
typedef struct ztest_ds {
} ztest_ds_t;
/*
* Per-iteration state.
*/
typedef struct ztest_info {
} ztest_info_t;
typedef struct ztest_shared_callstate {
/*
* Note: these aren't static because we want dladdr() to work.
*/
#if 0
#endif
{ ztest_vdev_add_remove, 1,
};
/*
* The following struct is used to hold a list of uncalled commit callbacks.
* The callbacks are ordered by txg number.
*/
typedef struct ztest_cb_list {
/*
* Stuff we need to share writably between parent and child.
*/
typedef struct ztest_shared {
/*
* The ztest_name_lock protects the pool and dataset namespace used by
* the individual tests. To modify the namespace, consumers must grab
* this lock as writer. Grabbing the lock as reader will ensure that the
* namespace does not change while the lock is held.
*/
/* Global commit callback list */
enum ztest_object {
ZTEST_META_DNODE = 0,
};
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
* debugging facilities.
*/
const char *
{
return ("default,verbose"); /* $UMEM_DEBUG setting */
}
const char *
_umem_logging_init(void)
{
return ("fail,contents"); /* $UMEM_LOGGING setting */
}
char *fatal_msg;
static void
{
/* LINTED */
if (do_perror) {
}
if (ztest_dump_core)
abort();
exit(3);
}
static int
{
int i;
if (buf[0] == '\0')
return (0);
break;
}
buf);
}
return (10*i);
}
/* NOTREACHED */
}
static uint64_t
{
char *end;
} else if (end[0] == '.') {
if (fval > UINT64_MAX) {
buf);
}
} else {
buf);
}
}
return (val);
}
static void
{
"\t[-v vdevs (default: %llu)]\n"
"\t[-s size_of_each_vdev (default: %s)]\n"
"\t[-a alignment_shift (default: %d)] use 0 for random\n"
"\t[-m mirror_copies (default: %d)]\n"
"\t[-r raidz_disks (default: %d)]\n"
"\t[-R raidz_parity (default: %d)]\n"
"\t[-d datasets (default: %d)]\n"
"\t[-t threads (default: %d)]\n"
"\t[-g gang_block_threshold (default: %s)]\n"
"\t[-i init_count (default: %d)] initialize pool i times\n"
"\t[-k kill_percentage (default: %llu%%)]\n"
"\t[-p pool_name (default: %s)]\n"
"\t[-f dir (default: %s)] file directory for vdev files\n"
"\t[-V] verbose (use multiple times for ever more blather)\n"
"\t[-E] use existing pool instead of creating new one\n"
"\t[-T time (default: %llu sec)] total run time\n"
"\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
"\t[-P passtime (default: %llu sec)] time per pass\n"
"\t[-B alt_ztest (default: <none>)] alternate ztest path\n"
"\t[-o variable=value] ... set global variable to an unsigned\n"
"\t 32-bit integer value\n"
"\t[-h] (print help)\n"
"",
nice_vdev_size, /* -s */
nice_gang_bang, /* -g */
}
static void
{
char *path;
int opt;
"v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) {
value = 0;
switch (opt) {
case 'v':
case 's':
case 'a':
case 'm':
case 'r':
case 'R':
case 'd':
case 't':
case 'g':
case 'i':
case 'k':
case 'T':
case 'P':
case 'F':
}
switch (opt) {
case 'v':
break;
case 's':
break;
case 'a':
break;
case 'm':
break;
case 'r':
break;
case 'R':
break;
case 'd':
break;
case 't':
break;
case 'g':
value);
break;
case 'i':
break;
case 'k':
break;
case 'p':
break;
case 'f':
} else {
}
break;
case 'V':
zo->zo_verbose++;
break;
case 'E':
break;
case 'T':
break;
case 'P':
break;
case 'F':
break;
case 'B':
break;
case 'o':
if (set_global_var(optarg) != 0)
break;
case 'h':
break;
case '?':
default:
break;
}
}
zo->zo_vdevtime =
UINT64_MAX >> 2);
char *cmd;
char *realaltdir;
char *bin;
char *ztest;
char *isa;
int isalen;
altdir);
}
/*
* We want to extract <isa> to determine if we should use
* 32 or 64 bit binaries.
*/
zo->zo_alt_ztest);
zo->zo_alt_libpath);
}
}
}
static void
{
/*
* Before we kill off ztest, make sure that the config is updated.
* See comment above spa_config_sync().
*/
}
static uint64_t
{
uint64_t r;
ASSERT3S(ztest_fd_rand, >=, 0);
if (range == 0)
return (0);
if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
return (r % range);
}
/* ARGSUSED */
static void
ztest_record_enospc(const char *s)
{
}
static uint64_t
ztest_get_ashift(void)
{
if (ztest_opts.zo_ashift == 0)
return (ztest_opts.zo_ashift);
}
static nvlist_t *
{
if (ashift == 0)
ashift = ztest_get_ashift();
} else {
}
}
if (size != 0) {
if (fd == -1)
}
return (file);
}
static nvlist_t *
{
int c;
if (r < 2)
for (c = 0; c < r; c++)
VDEV_TYPE_RAIDZ) == 0);
ztest_opts.zo_raidz_parity) == 0);
child, r) == 0);
for (c = 0; c < r; c++)
nvlist_free(child[c]);
return (raidz);
}
static nvlist_t *
{
int c;
if (m < 1)
for (c = 0; c < m; c++)
VDEV_TYPE_MIRROR) == 0);
child, m) == 0);
for (c = 0; c < m; c++)
nvlist_free(child[c]);
return (mirror);
}
static nvlist_t *
int log, int r, int m, int t)
{
int c;
ASSERT(t > 0);
for (c = 0; c < t; c++) {
r, m);
log) == 0);
}
child, t) == 0);
for (c = 0; c < t; c++)
nvlist_free(child[c]);
return (root);
}
/*
* Find a random spa version. Returns back a random spa version in the
* range [initial_version, SPA_VERSION_FEATURES].
*/
static uint64_t
{
if (version <= SPA_VERSION_BEFORE_FEATURES) {
}
return (version);
}
static int
ztest_random_blocksize(void)
{
/*
* Choose a block size >= the ashift.
* If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks.
*/
maxbs = 20;
}
static int
ztest_random_ibshift(void)
{
return (DN_MIN_INDBLKSHIFT +
}
static uint64_t
{
do {
return (top);
}
static uint64_t
{
do {
return (value);
}
static int
{
const char *valname;
int error;
return (error);
}
(void) printf("%s %s = %s at '%s'\n",
}
return (error);
}
static int
{
int error;
return (error);
}
return (error);
}
static void
{
rll->rll_readers = 0;
}
static void
{
}
static void
{
rll->rll_readers++;
} else {
}
}
static void
{
if (rll->rll_writer) {
} else {
rll->rll_readers--;
}
}
static void
{
}
static void
{
}
static rl_t *
{
return (rl);
}
static void
{
}
static void
{
for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
}
static void
{
for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
}
static uint64_t
{
int error;
/*
* Attempt to assign tx to some transaction group.
*/
if (error) {
} else {
}
return (0);
}
return (txg);
}
static void
{
}
static boolean_t
{
return (diff == 0);
}
static void
{
}
static void
{
}
static ztest_block_tag_t *
{
return (bt);
}
/*
* ZIL logging ops
*/
static void
{
return;
}
static void
{
return;
}
static void
{
return;
if (write_state == WR_COPIED &&
}
}
static void
{
return;
}
static void
{
return;
}
/*
* ZIL replay ops
*/
static int
{
int error = 0;
if (byteswap)
} else {
}
if (txg == 0)
return (ENOSPC);
} else {
}
} else {
} else {
}
}
if (error) {
return (error);
}
return (0);
}
static int
{
if (byteswap)
VERIFY3U(0, ==,
if (txg == 0) {
return (ENOSPC);
}
} else {
}
return (0);
}
static int
{
if (byteswap)
/* If it's a dmu_sync() block, write the whole block */
}
}
if (txg == 0) {
return (ENOSPC);
}
/*
* Usually, verify the old data before writing new data --
* but not always, because we also want to verify correct
* behavior when the data was not recently read into cache.
*/
if (ztest_random(4) != 0) {
}
}
/*
* Writes can appear to be newer than the bonus buffer because
* the ztest_get_data() callback does a dmu_read() of the
* open-context data, which may be different than the data
* as it was when the write was generated.
*/
}
/*
* so that all of the usual ASSERTs will work.
*/
}
} else {
}
return (0);
}
static int
{
if (byteswap)
if (txg == 0) {
return (ENOSPC);
}
return (0);
}
static int
{
if (byteswap)
if (txg == 0) {
return (ENOSPC);
}
} else {
/*
* Randomly change the size and increment the generation.
*/
sizeof (*bbt);
}
/*
* Verify that the current bonus buffer is not newer than our txg.
*/
return (0);
}
NULL, /* 0 no such transaction type */
ztest_replay_create, /* TX_CREATE */
NULL, /* TX_MKDIR */
NULL, /* TX_MKXATTR */
NULL, /* TX_SYMLINK */
ztest_replay_remove, /* TX_REMOVE */
NULL, /* TX_RMDIR */
NULL, /* TX_LINK */
NULL, /* TX_RENAME */
ztest_replay_write, /* TX_WRITE */
ztest_replay_truncate, /* TX_TRUNCATE */
ztest_replay_setattr, /* TX_SETATTR */
NULL, /* TX_ACL */
NULL, /* TX_CREATE_ACL */
NULL, /* TX_CREATE_ATTR */
NULL, /* TX_CREATE_ACL_ATTR */
NULL, /* TX_MKDIR_ACL */
NULL, /* TX_MKDIR_ATTR */
NULL, /* TX_MKDIR_ACL_ATTR */
NULL, /* TX_WRITE2 */
};
/*
* ZIL get_data callbacks
*/
static void
{
}
static int
{
int error;
if (error) {
return (error);
}
return (ENOENT);
}
} else {
} else {
offset = 0;
}
if (error == 0) {
if (obp) {
}
if (error == 0)
return (0);
}
}
return (error);
}
static void *
{
char *lr;
if (name)
return (lr);
}
void
{
}
/*
* Lookup a bunch of objects. Returns the number of objects not found.
*/
static int
{
int missing = 0;
int error;
if (error) {
missing++;
} else {
}
}
return (missing);
}
static int
{
int missing = 0;
if (missing) {
missing++;
continue;
}
missing++;
} else {
}
}
return (missing);
}
static int
{
int missing = 0;
int error;
if (missing) {
missing++;
continue;
}
/*
* No object was found.
*/
continue;
missing++;
} else {
}
}
return (missing);
}
static int
void *data)
{
int error;
return (error);
}
static int
{
int error;
return (error);
}
static int
{
int error;
return (error);
}
static void
{
if (txg != 0) {
} else {
}
}
static void
{
int err;
void *data;
/*
* Pick an i/o type at random, biased toward writing block tags.
*/
if (ztest_random(2) == 0)
switch (io_type) {
case ZTEST_IO_WRITE_TAG:
break;
case ZTEST_IO_WRITE_PATTERN:
if (ztest_random(2) == 0) {
/*
* Induce fletcher2 collisions to ensure that
* zio_ddt_collision() detects and resolves them
* when using fletcher2-verify for deduplication.
*/
}
break;
case ZTEST_IO_WRITE_ZEROES:
break;
case ZTEST_IO_TRUNCATE:
break;
case ZTEST_IO_SETATTR:
break;
case ZTEST_IO_REWRITE:
(void) rw_rdlock(&ztest_name_lock);
B_FALSE);
B_FALSE);
(void) rw_unlock(&ztest_name_lock);
break;
}
}
/*
* Initialize an object description template.
*/
static void
{
od->od_blocksize = 0;
}
/*
* Lookup or create the objects for a test using the od template.
* If the objects do not all exist, or if 'remove' is specified,
* remove any existing objects and create new ones. Otherwise,
* use the existing objects.
*/
static int
{
int rv = 0;
rv = -1;
return (rv);
}
/* ARGSUSED */
void
{
/*
* shared memory. If we die, the next iteration of ztest_run()
* will verify that the log really does contain this record.
*/
}
/*
* This function is designed to simulate the operations that occur during a
* attempt to expose any implicit assumptions about ZIL management.
*/
/* ARGSUSED */
void
{
/*
* We grab the zd_dirobj_lock to ensure that no other thread is
* updating the zil (i.e. adding in-memory log records) and the
* zd_zilog_lock to block any I/O.
*/
/* zfsvfs_teardown() */
/* zfsvfs_setup() */
}
/*
* Verify that we can't destroy an active pool, create an existing pool,
* or create a pool with a bad vdev spec.
*/
/* ARGSUSED */
void
{
/*
* Attempt to create using a bad file.
*/
/*
* Attempt to create using a bad mirror.
*/
/*
* Attempt to create an existing pool. It shouldn't matter
* what's in the nvroot; we should fail with EEXIST.
*/
(void) rw_rdlock(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/* ARGSUSED */
void
{
char *name;
/*
* Clean up from previous runs.
*/
(void) spa_destroy(name);
/*
* If we're configuring a RAIDZ device then make sure that the
* the initial version is capable of supporting that feature.
*/
switch (ztest_opts.zo_raidz_parity) {
case 0:
case 1:
break;
case 2:
break;
case 3:
break;
}
/*
* Create a pool with a spa version that can be upgraded. Pick
* a value between initial_version and SPA_VERSION_BEFORE_FEATURES.
*/
do {
} while (version > SPA_VERSION_BEFORE_FEATURES);
props = fnvlist_alloc();
(void) printf("upgrading spa version from %llu to %llu\n",
}
}
static vdev_t *
{
return (vd);
for (int c = 0; c < vd->vdev_children; c++)
NULL)
return (mvd);
return (NULL);
}
/*
* Find the first available hole which can be used as a top-level.
*/
int
{
int c;
for (c = 0; c < rvd->vdev_children; c++) {
if (cvd->vdev_ishole)
break;
}
return (c);
}
/*
* Verify that vdev_add() works as expected.
*/
/* ARGSUSED */
void
{
int error;
/*
* If we have slogs then remove them 1/4 of the time.
*/
/*
* Grab the guid from the head of the log class rotor.
*/
/*
* We have to grab the zs_name_lock as writer to
* prevent a race between removing a slog (dmu_objset_find)
* and destroying a dataset. Removing the slog will
* grab a reference on the dataset which may cause
* dmu_objset_destroy() to fail with EBUSY thus
* leaving the dataset in an inconsistent state.
*/
} else {
/*
* Make 1/4 of the devices be log devices.
*/
ztest_record_enospc("spa_vdev_add");
else if (error != 0)
}
}
/*
*/
/* ARGSUSED */
void
{
char *aux;
int error;
if (ztest_random(2) == 0) {
} else {
}
/*
* Pick a random device to remove.
*/
} else {
/*
* Find an unused device we can add.
*/
zs->zs_vdev_aux = 0;
for (;;) {
int c;
zs->zs_vdev_aux);
path) == 0)
break;
break;
zs->zs_vdev_aux++;
}
}
if (guid == 0) {
/*
* Add a new device.
*/
if (error != 0)
} else {
/*
* Remove an existing device. Sometimes, dirty its
* vdev state first to make sure we handle removal
* of devices that have pending state changes.
*/
if (ztest_random(2) == 0)
}
}
/*
* split a pool if it has mirror tlvdevs
*/
/* ARGSUSED */
void
{
int error = 0;
/* ensure we have a useable config; mirrors of raidz aren't supported */
return;
}
/* clean up the old pool, if any */
(void) spa_destroy("splitp");
/* generate a config from the existing config */
&tree) == 0);
&children) == 0);
for (c = 0; c < children; c++) {
0) == 0);
ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0);
ZPOOL_CONFIG_IS_HOLE, 1) == 0);
if (lastlogid == 0)
++schildren;
continue;
}
lastlogid = 0;
}
/* OK, create a config that can be used to split */
VDEV_TYPE_ROOT) == 0);
for (c = 0; c < schildren; c++)
nvlist_free(schild[c]);
(void) rw_wrlock(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
if (error == 0) {
(void) printf("successful split - results:\n");
--zs->zs_mirrors;
}
}
/*
* Verify that we can attach and detach devices.
*/
/* ARGSUSED */
void
{
int replacing;
int oldvd_is_log;
/*
* Decide whether to do an attach or a replace.
*/
/*
* Pick a random top-level vdev.
*/
/*
* Pick a random leaf within it.
*/
/*
* Locate this vdev.
*/
}
}
/*
* If we're already doing an attach or replace, oldvd may be a
* mirror vdev -- in which case, pick a random child.
*/
while (oldvd->vdev_children != 0) {
}
/*
* If oldvd has siblings, then half of the time, detach it.
*/
return;
}
/*
* For the new vdev, choose with equal probability between the two
* standard paths (ending in either 'a' or 'b') or a random hot spare.
*/
} else {
if (ztest_random(2) == 0)
}
if (newvd) {
} else {
/*
* Make newsize a little bigger or smaller than oldsize.
* If it's smaller, the attach should fail.
* If it's larger, and we're doing a replace,
* we should get dynamic LUN growth when we're done.
*/
}
/*
* If pvd is not a mirror or root, the attach should fail with ENOTSUP,
* unless it's a replace; in that case any non-replacing parent is OK.
*
* If newvd is already part of the pool, it should fail with EBUSY.
*
* If newvd is too small, it should fail with EOVERFLOW.
*/
else
expected_error = 0;
/*
* Build the nvlist describing newpath.
*/
ashift, 0, 0, 0, 1);
/*
* If our parent was the replacing vdev, but the replace completed,
* then instead of failing with ENOTSUP we may either succeed,
* fail with ENODEV, or fail with EOVERFLOW.
*/
if (expected_error == ENOTSUP &&
/*
* If someone grew the LUN, the replacement may be too small.
*/
/* XXX workaround 6690467 */
fatal(0, "attach (%s %llu, %s %llu, %d) "
"returned %d, expected %d",
}
}
/*
* Callback function which expands the physical size of the vdev.
*/
vdev_t *
{
int fd;
return (vd);
(void) printf("%s grew from %lu to %lu bytes\n",
}
return (NULL);
}
/*
* Callback function which expands a given vdev by calling vdev_online().
*/
/* ARGSUSED */
vdev_t *
{
int error;
/* Calling vdev_online will initialize the new metaslabs */
/*
* If vdev_online returned an error or the underlying vdev_open
* failed then we abort the expand. The only way to know that
* vdev_open fails is by checking the returned newstate.
*/
(void) printf("Unable to expand vdev, state %llu, "
}
return (vd);
}
/*
* Since we dropped the lock we need to ensure that we're
* still talking to the original vdev. It's possible this
* trying to online it.
*/
(void) printf("vdev configuration has changed, "
"guid %llu, state %llu, expected gen %llu, "
"got gen %llu\n",
}
return (vd);
}
return (NULL);
}
/*
* Traverse the vdev tree calling the supplied function.
* We continue to walk the tree until we either have walked all
* children or we receive a non-NULL return from the callback.
* If a NULL callback is passed, then we just return back the first
* leaf vdev we encounter.
*/
vdev_t *
{
return (vd);
else
}
return (cvd);
}
return (NULL);
}
/*
* Verify that dynamic LUN growth works as expected.
*/
/* ARGSUSED */
void
{
/*
* Determine the size of the first leaf vdev associated with
* our top-level device.
*/
/*
* We only try to expand the vdev if it's healthy, less than 4x its
* original size, and it has a valid psize.
*/
return;
}
(void) printf("Expanding LUN %s from %lu to %lu\n",
}
/*
* Growing the vdev is a two step process:
* 1). expand the physical size (i.e. relabel)
* 2). online the vdev to create the new metaslabs
*/
(void) printf("Could not expand LUN because "
"the vdev configuration changed.\n");
}
return;
}
/*
* Expanding the LUN will update the config asynchronously,
* thus we must wait for the async thread to complete any
* pending tasks before proceeding.
*/
for (;;) {
if (done)
break;
}
(void) printf("Could not verify LUN expansion due to "
"intervening vdev offline or remove.\n");
}
return;
}
/*
* Make sure we were able to grow the vdev.
*/
if (new_ms_count <= old_ms_count)
fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
/*
* Make sure we were able to grow the pool.
*/
if (new_class_space <= old_class_space)
fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
(void) printf("%s grew from %s to %s\n",
}
}
/*
* Verify that dmu_objset_{create,destroy,open,close} work as expected.
*/
/* ARGSUSED */
static void
{
/*
* Create the objects common to all ztest datasets.
*/
}
static int
{
return (err);
}
/* ARGSUSED */
static int
{
int error;
/*
* Verify that the dataset contains a directory object.
*/
/* We could have crashed in the middle of destroying it */
}
/*
* Destroy the dataset.
*/
} else {
/* There could be a hold on this dataset */
}
return (0);
}
static boolean_t
{
int error;
return (B_FALSE);
}
}
return (B_TRUE);
}
static boolean_t
{
int error;
(u_longlong_t)id);
return (B_TRUE);
}
/* ARGSUSED */
void
{
int iters;
int error;
(void) rw_rdlock(&ztest_name_lock);
/*
* If this dataset exists from a previous run, process its replay log
* half of the time. If we don't replay it, then dmu_objset_destroy()
* (invoked from ztest_objset_destroy_cb()) should just throw it away.
*/
if (ztest_random(2) == 0 &&
}
/*
* There may be an old instance of the dataset we're about to
* create lying around from a previous run. If so, destroy it
* and all of its snapshots.
*/
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
/*
* Verify that we can create a new dataset.
*/
if (error) {
(void) rw_unlock(&ztest_name_lock);
return;
}
}
/*
* Open the intent log for it.
*/
/*
* Put some objects in there, do a little I/O to them,
* and randomly take a couple of snapshots along the way.
*/
for (int i = 0; i < iters; i++) {
if (ztest_random(iters) == 0)
(void) ztest_snapshot_create(name, i);
}
/*
* Verify that we cannot create an existing dataset.
*/
/*
* Verify that we can hold an objset that is also owned.
*/
/*
* Verify that we cannot own an objset that is already owned.
*/
(void) rw_unlock(&ztest_name_lock);
}
/*
* Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
*/
void
{
(void) rw_rdlock(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/*
* Cleanup non-standard snapshots and clones.
*/
void
{
int error;
}
/*
* Verify dsl_dataset_promote handles EBUSY
*/
void
{
int error;
(void) rw_rdlock(&ztest_name_lock);
goto out;
}
}
if (error) {
goto out;
}
}
goto out;
}
}
goto out;
}
}
if (error) {
goto out;
}
}
if (error)
goto out;
}
error);
out:
(void) rw_unlock(&ztest_name_lock);
}
/*
* Verify that dmu_object_{alloc,free} work as expected.
*/
void
{
for (int b = 0; b < batchsize; b++)
/*
* Destroy the previous batch of objects, create a new batch,
* and do some I/O on the new objects.
*/
return;
}
/*
* Verify that dmu_{read,write} work as expected.
*/
void
{
/*
* This test uses two objects, packobj and bigobj, that are always
* updated together (i.e. in the same tx) so that their contents are
* in sync and can be compared. Their contents relate to each other
* in a simple way: packobj is a dense array of 'bufwad' structures,
* while bigobj is a sparse array of the same bufwads. Specifically,
* for any index n, there are three bufwads that should be identical:
*
* packobj, at offset n * sizeof (bufwad_t)
* bigobj, at the head of the nth chunk
* bigobj, at the tail of the nth chunk
*
* The chunk size is arbitrary. It doesn't have to be a power of two,
* and it doesn't have any relation to the object blocksize.
* The only requirement is that it can hold at least two bufwads.
*
* Normally, we write the bufwad to each of these locations.
* However, free_percent of the time we instead write zeroes to
* packobj and perform a dmu_free_range() on bigobj. By comparing
* bigobj to packobj, we can verify that the DMU is correctly
* tracking which parts of an object are allocated and free,
* and that the contents of the allocated blocks are correct.
*/
/*
* Read the directory info. If it's the first time, set things up.
*/
return;
/*
* Prefetch a random chunk of the big object.
* Our aim here is to get some async reads in flight
* for blocks that we may free below; the DMU should
* handle this race correctly.
*/
/*
* Pick a random index and compute the offsets into packobj and bigobj.
*/
/*
* free_percent of the time, free a range of bigobj rather than
* overwriting it.
*/
/*
* Read the current contents of our objects.
*/
/*
* Get a tx for the mods to both packobj and bigobj.
*/
if (freeit)
else
/* This accounts for setting the checksum/compression. */
if (txg == 0) {
return;
}
do {
cksum = (enum zio_checksum)
} while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS);
do {
comp = (enum zio_compress)
} while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS);
/*
* For each index from n to n + s, verify that the existing bufwad
* in packobj matches the bufwads at the head and tail of the
* corresponding chunk in bigobj. Then update all three bufwads
* with the new values we want to write out.
*/
for (i = 0; i < s; i++) {
/* LINTED */
/* LINTED */
/* LINTED */
fatal(0, "future leak: got %llx, open txg is %llx",
fatal(0, "wrong index: got %llx, wanted %llx+%llx",
if (freeit) {
} else {
}
}
/*
* We've verified all the old bufwads, and made new ones.
* Now write them out.
*/
if (freeit) {
(void) printf("freeing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)txg);
}
} else {
(void) printf("writing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)txg);
}
}
/*
* Sanity check the stuff we just wrote.
*/
{
}
}
void
{
uint64_t i;
/*
* For each index from n to n + s, verify that the existing bufwad
* in packobj matches the bufwads at the head and tail of the
* corresponding chunk in bigobj. Then update all three bufwads
* with the new values we want to write out.
*/
for (i = 0; i < s; i++) {
/* LINTED */
/* LINTED */
/* LINTED */
fatal(0, "future leak: got %llx, open txg is %llx",
fatal(0, "wrong index: got %llx, wanted %llx+%llx",
}
}
void
{
uint64_t i;
int error;
/*
* This test uses two objects, packobj and bigobj, that are always
* updated together (i.e. in the same tx) so that their contents are
* in sync and can be compared. Their contents relate to each other
* in a simple way: packobj is a dense array of 'bufwad' structures,
* while bigobj is a sparse array of the same bufwads. Specifically,
* for any index n, there are three bufwads that should be identical:
*
* packobj, at offset n * sizeof (bufwad_t)
* bigobj, at the head of the nth chunk
* bigobj, at the tail of the nth chunk
*
* The chunk size is set equal to bigobj block size so that
* dmu_assign_arcbuf() can be tested for object updates.
*/
/*
* Read the directory info. If it's the first time, set things up.
*/
return;
/*
* Pick a random index and compute the offsets into packobj and bigobj.
*/
/*
* Iteration 0 test zcopy for DB_UNCACHED dbufs.
* Iteration 1 test zcopy to already referenced dbufs.
* Iteration 2 test zcopy to dirty dbuf in the same txg.
* Iteration 3 test zcopy to dbuf dirty in previous txg.
* Iteration 4 test zcopy when dbuf is no longer dirty.
* Iteration 5 test zcopy when it can't be done.
* Iteration 6 one more zcopy write.
*/
for (i = 0; i < 7; i++) {
uint64_t j;
/*
* In iteration 5 (i == 5) use arcbufs
* that don't match bigobj blksz to test
* dmu_assign_arcbuf() when it can't directly
* assign an arcbuf to a dbuf.
*/
for (j = 0; j < s; j++) {
bigbuf_arcbufs[j] =
} else {
bigbuf_arcbufs[2 * j] =
}
}
/*
* Get a tx for the mods to both packobj and bigobj.
*/
if (txg == 0) {
for (j = 0; j < s; j++) {
if (i != 5 ||
} else {
bigbuf_arcbufs[2 * j]);
}
}
return;
}
/*
* 50% of the time don't read objects in the 1st iteration to
* test dmu_assign_arcbuf() for the case when there're no
* existing dbufs for the specified offsets.
*/
if (i != 0 || ztest_random(2) != 0) {
}
/*
* We've verified all the old bufwads, and made new ones.
* Now write them out.
*/
(void) printf("writing offset %llx size %llx"
" txg %llx\n",
(u_longlong_t)txg);
}
} else {
chunksize / 2);
chunksize / 2,
chunksize / 2);
}
if (i == 1) {
}
bigbuf_arcbufs[j], tx);
} else {
}
if (i == 1) {
}
}
/*
* Sanity check the stuff we just wrote.
*/
{
}
if (i == 2) {
} else if (i == 3) {
}
}
}
/* ARGSUSED */
void
{
/*
* Have multiple threads write to large offsets in an object
* to verify that parallel writes to an object -- even to the
* same blocks within the object -- doesn't cause any trouble.
*/
return;
while (ztest_random(10) != 0)
}
void
{
void *data;
return;
return;
while (ztest_random(count) != 0) {
data) != 0)
break;
while (ztest_random(4) != 0)
}
}
/*
* Verify that zap_{create,destroy,add,remove,update} work as expected.
*/
void
{
int i, ints;
int error;
return;
/*
* Generate a known hash collision, and verify that
* we can lookup and remove both entries.
*/
if (txg == 0)
return;
for (i = 0; i < 2; i++) {
value[i] = i;
}
for (i = 0; i < 2; i++) {
VERIFY3U(0, ==,
}
for (i = 0; i < 2; i++) {
}
/*
* Generate a buch of random entries.
*/
last_txg = 0;
/*
* If these zap entries already exist, validate their contents.
*/
if (error == 0) {
&zl_ints) == 0);
for (i = 0; i < ints; i++) {
}
} else {
}
/*
* Atomically update two entries in our zap object.
* The first is named txg_%llu, and contains the txg
* in which the property was last updated. The second
* is named prop_%llu, and the nth element of its value
* should be txg + object + n.
*/
if (txg == 0)
return;
for (i = 0; i < ints; i++)
/*
* Remove a random pair of entries.
*/
return;
if (txg == 0)
return;
}
/*
* Testcase to test the upgrading of a microzap to fatzap.
*/
void
{
return;
/*
* Add entries to this ZAP and make sure it spills over
* and gets upgraded to a fatzap. Also, since we are adding
* 2050 entries we should see ptrtbl growth and leaf-block split.
*/
for (int i = 0; i < 2050; i++) {
int error;
if (txg == 0)
return;
}
}
/* ARGSUSED */
void
{
void *data;
return;
/*
* Generate a random name of the form 'xxx.....' where each
* x is a random printable character and the dots are dots.
* There are 94 such characters, and the name length goes from
* 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
*/
for (i = 0; i < 3; i++)
for (; i < namelen - 1; i++)
name[i] = '.';
name[i] = '\0';
wc = 1;
} else {
wsize = 1;
data = string_value;
}
count = -1ULL;
/*
* Select an operation: length, lookup, add, update, remove.
*/
i = ztest_random(5);
if (i >= 2) {
if (txg == 0)
return;
} else {
txg = 0;
}
switch (i) {
case 0:
if (error == 0) {
} else {
}
break;
case 1:
if (error == 0) {
if (data == string_value &&
fatal(0, "name '%s' != val '%s' len %d",
} else {
}
break;
case 2:
break;
case 3:
break;
case 4:
break;
}
}
/*
* Commit callback data.
*/
typedef struct ztest_cb_data {
int zcd_expected_err;
/* This is the actual commit callback function */
static void
{
/*
* The private callback data should be destroyed here, but
* since we are going to check the zcd_called field after
* dmu_tx_abort(), we will destroy it there.
*/
return;
}
/* Was this callback added to the global callback list? */
goto out;
/* Remove our callback from the list */
out:
}
/* Allocate and initialize callback data structure */
static ztest_cb_data_t *
{
return (cb_data);
}
/*
* If a number of txgs equal to this threshold have been created after a commit
* callback has been registered but not called, then we assume there is an
* implementation bug.
*/
/*
* Commit callback test.
*/
void
{
int i, error;
return;
/* Every once in a while, abort the transaction on purpose */
if (ztest_random(100) == 0)
error = -1;
if (!error)
if (error) {
/*
* It's not a strict requirement to call the registered
* callbacks from inside dmu_tx_abort(), but that's what
* it's supposed to happen in the current implementation
* so we will check for that.
*/
for (i = 0; i < 2; i++) {
}
for (i = 0; i < 2; i++) {
}
return;
}
/*
* Read existing data to make sure there isn't a future leak.
*/
&old_txg, DMU_READ_PREFETCH));
/*
* Since commit callbacks don't have any ordering requirement and since
* it is theoretically possible for a commit callback to be called
* after an arbitrary amount of time has elapsed since its txg has been
* synced, it is difficult to reliably determine whether a commit
* callback hasn't been called due to high load or due to a flawed
* implementation.
*
* In practice, we will assume that if after a certain number of txgs a
* commit callback hasn't been called, then most likely there's an
* implementation bug..
*/
fatal(0, "Commit callback threshold exceeded, oldest txg: %"
}
/*
* Let's find the place to insert our callbacks.
*
* Even though the list is ordered by txg, it is possible for the
* insertion point to not be the end because our txg may already be
* quiescing at this point and other callbacks in the open txg
* (from other objsets) may have sneaked in.
*/
/* Add the 3 callbacks to the list */
for (i = 0; i < 3; i++) {
else
cb_data[i]);
}
}
/* ARGSUSED */
void
{
};
(void) rw_rdlock(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
/* ARGSUSED */
void
{
(void) rw_rdlock(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
}
static int
{
int error;
snaps = fnvlist_alloc();
holds = fnvlist_alloc();
return (error);
}
/*
*/
void
{
int error;
(void) rw_rdlock(&ztest_name_lock);
/*
* Clean up from any previous run.
*/
/*
* Create snapshot, clone it, mark snap for deferred destroy,
* destroy clone, verify snap was also destroyed.
*/
if (error) {
ztest_record_enospc("dmu_objset_snapshot");
goto out;
}
}
if (error) {
ztest_record_enospc("dmu_objset_clone");
goto out;
}
}
if (error) {
fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
}
if (error)
/*
* Create snapshot, add temporary hold, verify that we can't
* destroy a held snapshot, mark for deferred destroy,
* release hold, verify snapshot was destroyed.
*/
if (error) {
ztest_record_enospc("dmu_objset_snapshot");
goto out;
}
}
holds = fnvlist_alloc();
ztest_record_enospc("dsl_dataset_user_hold");
goto out;
} else if (error) {
fatal(0, "dsl_dataset_user_hold(%s, %s) = %u",
}
fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
}
if (error) {
fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
}
if (error)
out:
(void) rw_unlock(&ztest_name_lock);
}
/*
* Inject random faults into the on-disk data.
*/
/* ARGSUSED */
void
{
int fd;
int maxfaults;
int mirror_save;
/*
* Grab the name lock as reader. There are some operations
* which don't like to have their vdevs changed while
* they are in progress (i.e. spa_change_guid). Those
* operations will have grabbed the name lock as writer.
*/
(void) rw_rdlock(&ztest_name_lock);
/*
* We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
*/
if (ztest_random(2) == 0) {
/*
* Inject errors on a normal data device or slog device.
*/
/*
* Generate paths to the first leaf in this top-level vdev,
* and to the random leaf we selected. We'll induce transient
* and we'll write random garbage to the randomly chosen leaf.
*/
/*
* If the top-level vdev needs to be resilvered
* then we only allow faults on the device that is
* resilvering.
*/
vd0->vdev_resilver_txg != 0)) {
/*
* Make vd0 explicitly claim to be unreadable,
* or unwriteable, or reach behind its back
* and close the underlying fd. We can do this if
* maxfaults == 0 because we'll fail and reexecute,
* and we can do it if maxfaults >= 2 because we'll
* have enough redundancy. If maxfaults == 1, the
* combination of this with injection of random data
* corruption below exceeds the pool's fault tolerance.
*/
} else if (ztest_random(2) == 0) {
} else {
}
}
} else {
/*
* Inject errors on an l2cache device.
*/
(void) rw_unlock(&ztest_name_lock);
return;
}
leaf = 0;
leaves = 1;
}
(void) rw_unlock(&ztest_name_lock);
/*
* If we can tolerate two or more faults, or we're dealing
*/
ZFS_OFFLINE_TEMPORARY : 0);
/*
* We have to grab the zs_name_lock as writer to
* prevent a race between offlining a slog and
* destroying a dataset. Offlining the slog will
* grab a reference on the dataset which may cause
* dmu_objset_destroy() to fail with EBUSY thus
* leaving the dataset in an inconsistent state.
*/
if (islog)
(void) rw_wrlock(&ztest_name_lock);
if (islog)
(void) rw_unlock(&ztest_name_lock);
} else {
/*
* Ideally we would like to be able to randomly
* call vdev_[on|off]line without holding locks
* to force unpredictable failures but the side
* effects of vdev_[on|off]line prevent us from
* doing so. We grab the ztest_vdev_lock here to
* prevent a race between injection testing and
* aux_vdev removal.
*/
}
}
if (maxfaults == 0)
return;
/*
* We have at least single-fault tolerance, so inject data corruption.
*/
return;
while (--iters != 0) {
/*
* The offset must be chosen carefully to ensure that
* we do not inject a given logical block with errors
* on two different leaf devices, because ZFS can not
* tolerate that (if maxfaults==1).
*
* We divide each leaf into chunks of size
* (# leaves * SPA_MAXBLOCKSIZE * 4). Within each chunk
* there is a series of ranges to which we can inject errors.
* Each range can accept errors on only a single leaf vdev.
* The error injection ranges are separated by ranges
* which we will not inject errors on any device (DMZs).
* Each DMZ must be large enough such that a single block
* can not straddle it, so that a single block can not be
* a target in two different injection ranges (on different
* leaf vdevs).
*
* For example, with 3 leaves, each chunk looks like:
* 0 to 32M: injection range for leaf 0
* 32M to 64M: DMZ - no injection allowed
* 64M to 96M: injection range for leaf 1
* 96M to 128M: DMZ - no injection allowed
* 128M to 160M: injection range for leaf 2
* 160M to 192M: DMZ - no injection allowed
*/
/*
* Only allow damage to the labels at one end of the vdev.
*
* If all labels are damaged, the device will be totally
* inaccessible, which will result in loss of data,
* because we also damage (parts of) the other side of
*
* Additionally, we will always have both an even and an
* odd label, so that we can handle crashes in the
* middle of vdev_config_sync().
*/
continue;
/*
* The two end labels are stored at the "end" of the disk, but
* the end of the disk (vdev_psize) is aligned to
* sizeof (vdev_label_t).
*/
continue;
return;
}
(void) printf("injected bad word into %s,"
}
}
/*
* Verify that DDT repair works as expected.
*/
void
{
void *buf;
return;
/*
* Take the name lock as writer to prevent anyone else from changing
* the pool and dataset properies we need to maintain during this test.
*/
(void) rw_wrlock(&ztest_name_lock);
B_FALSE) != 0 ||
B_FALSE) != 0) {
(void) rw_unlock(&ztest_name_lock);
return;
}
if (txg == 0) {
(void) rw_unlock(&ztest_name_lock);
return;
}
/*
* Write all the copies of our block.
*/
for (int i = 0; i < copies; i++) {
if (error != 0) {
}
}
/*
* Find out what block we got.
*/
/*
* Damage the block. Dedup-ditto will save us when we read it later.
*/
(void) rw_unlock(&ztest_name_lock);
}
/*
* Scrub the pool.
*/
/* ARGSUSED */
void
{
}
/*
* Change the guid for the pool.
*/
/* ARGSUSED */
void
{
int error;
(void) rw_wrlock(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
if (error != 0)
return;
(void) printf("Changed guid old %llu -> %llu\n",
}
}
/*
* Rename the pool to a different name and then rename it back.
*/
/* ARGSUSED */
void
{
(void) rw_wrlock(&ztest_name_lock);
/*
* Do the rename
*/
/*
* Try to open it under the old name, which shouldn't exist
*/
/*
* Open it under the new name and make sure it's still the same spa_t.
*/
/*
* Rename it back to the original
*/
/*
* Make sure it can still be opened
*/
(void) rw_unlock(&ztest_name_lock);
}
/*
* Verify pool integrity by running zdb.
*/
static void
{
int status;
char *bin;
char *ztest;
char *isa;
int isalen;
/* LINTED */
isa,
pool);
if (status == 0)
return;
ztest_dump_core = 0;
else
}
static void
{
}
static void
{
int error;
}
/*
* Clean up from previous runs.
*/
(void) spa_destroy(newname);
/*
* Get the pool's configuration and guid.
*/
/*
*/
if (ztest_random(2) == 0)
ztest_walk_pool_directory("pools before export");
/*
* Export it.
*/
ztest_walk_pool_directory("pools after export");
/*
* Try to import it.
*/
/*
* Import it under the new name.
*/
if (error != 0) {
dump_nvlist(config, 0);
}
ztest_walk_pool_directory("pools after import");
/*
* Try to import it again -- should fail with EEXIST.
*/
/*
* Try to import it under a different name -- should fail with EEXIST.
*/
/*
* Verify that the pool is no longer visible under the old name.
*/
/*
* Verify that we can open and close the pool using the new name.
*/
}
static void
{
(void) printf("resuming from suspended state\n");
(void) zio_resume(spa);
}
static void *
{
while (!ztest_exiting) {
if (spa_suspended(spa))
/*
* Periodically change the zfs_compressed_arc_enabled setting.
*/
if (ztest_random(10) == 0)
}
return (NULL);
}
static void *
{
for (;;) {
/*
* If the pool is suspended then fail immediately. Otherwise,
* check to see if the pool is making any progress. If
* vdev_deadman() discovers that there hasn't been any recent
*/
fatal(0, "aborting test after %llu seconds because "
"pool has transitioned to a suspended state.",
zfs_deadman_synctime_ms / 1000);
return (NULL);
}
(void) printf("ztest has been running for %lld seconds\n",
total);
}
}
static void
{
(void) printf("%6.2f sec in %s\n",
}
}
static void *
{
int rand;
/*
* See if it's time to force a crash.
*/
ztest_kill(zs);
/*
* If we're getting ENOSPC with some regularity, stop.
*/
break;
/*
* Pick a random function to execute.
*/
}
}
return (NULL);
}
static void
{
}
static void
ztest_dataset_destroy(int d)
{
/*
* Cleanup any non-standard clones and snapshots. In general,
* ztest thread t operates on dataset (t % zopt_datasets),
* so there may be more than one thing to clean up.
*/
for (int t = d; t < ztest_opts.zo_threads;
t += ztest_opts.zo_datasets) {
}
}
static void
{
/*
* ZTEST_DIROBJ is the object directory for the entire dataset.
* Therefore, the number of objects in use should equal the
* number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself.
* If not, we have an object leak.
*
* Note that we can only check this in ztest_dataset_open(),
* when the open-context and syncing-context values agree.
* That's because zap_count() returns the open-context value,
* while dmu_objset_space() returns the rootbp fill count.
*/
}
static int
ztest_dataset_open(int d)
{
int error;
(void) rw_rdlock(&ztest_name_lock);
(void) rw_unlock(&ztest_name_lock);
return (error);
}
(void) rw_unlock(&ztest_name_lock);
fatal(0, "missing log records: claimed %llu < committed %llu",
(void) printf("%s replay %llu blocks, %llu records, seq %llu\n",
if (zilog->zl_replaying_seq != 0 &&
fatal(0, "missing log records: replayed %llu < committed %llu",
return (0);
}
static void
ztest_dataset_close(int d)
{
}
/*
* Kick off threads to run tests on all datasets in parallel.
*/
static void
{
int error;
/*
*/
zs->zs_thread_stop =
zs->zs_thread_kill -=
}
/*
* Open our pool.
*/
/*
* We don't expect the pool to suspend unless maxfaults == 0,
* in which case ztest_fault_inject() temporarily takes away
* the only valid replica.
*/
if (MAXFAULTS() == 0)
else
/*
* Create a thread to periodically resume suspended I/O.
*/
&resume_tid) == 0);
/*
* Create a deadman thread to abort() if we hang.
*/
NULL) == 0);
/*
* Verify that we can safely inquire about about any object,
* whether it's allocated or not. To make it interesting,
* we probe a 5-wide window around each power of two.
* This hits all edge cases, including zero and the max.
*/
for (int t = 0; t < 64; t++) {
for (int d = -5; d <= 5; d++) {
(1ULL << t) + d, NULL);
}
}
/*
* If we got any ENOSPC errors on the previous run, destroy something.
*/
if (zs->zs_enospc_count != 0) {
}
zs->zs_enospc_count = 0;
(void) printf("starting main threads...\n");
/*
* Kick off all the tests that run in parallel.
*/
for (int t = 0; t < ztest_opts.zo_threads; t++) {
if (t < ztest_opts.zo_datasets &&
ztest_dataset_open(t) != 0)
return;
}
/*
* Wait for all of the tests to complete. We go in reverse order
* so we don't close datasets while threads are still using them.
*/
if (t < ztest_opts.zo_datasets)
}
/* Kill the resume thread */
/*
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
}
/*
* Verify that we can loop over all pools.
*/
/*
* Verify that we can export the pool and reimport it under a
* different name.
*/
if (ztest_random(2) == 0) {
}
kernel_fini();
(void) rwlock_destroy(&ztest_name_lock);
(void) _mutex_destroy(&ztest_vdev_lock);
}
static void
ztest_freeze(void)
{
int numloops = 0;
(void) printf("testing spa_freeze()...\n");
VERIFY3U(0, ==, ztest_dataset_open(0));
/*
* Force the first log block to be transactionally allocated.
* We have to do this before we freeze the pool -- otherwise
* the log chain won't be anchored.
*/
}
/*
* Freeze the pool. This stops spa_sync() from doing anything,
* so that the only way to record changes from now on is the ZIL.
*/
/*
* Because it is hard to predict how much space a write will actually
* require beforehand, we leave ourselves some fudge space to write over
* capacity.
*/
/*
* Run tests that generate log records but don't alter the pool config
* We do a txg_wait_synced() after each iteration to force the txg
* to increase well beyond the last synced value in the uberblock.
* The ZIL should be OK with that.
*
* Run a random number of times less than zo_maxloops and ensure we do
* not run out of space on the pool.
*/
while (ztest_random(10) != 0 &&
}
/*
* Commit all of the changes we just generated.
*/
/*
* Close our dataset and close the pool.
*/
kernel_fini();
/*
* Open and close the pool and dataset to induce log replay.
*/
VERIFY3U(0, ==, ztest_dataset_open(0));
ztest_reguid(NULL, 0);
kernel_fini();
}
void
{
hrtime_t m = s / 60;
hrtime_t h = m / 60;
hrtime_t d = h / 24;
s -= m * 60;
m -= h * 60;
h -= d * 24;
timebuf[0] = '\0';
if (d)
"%llud%02lluh%02llum%02llus", d, h, m, s);
else if (h)
else if (m)
else
}
static nvlist_t *
{
if (ztest_random(2) == 0)
return (props);
return (props);
}
/*
* Create a storage pool with the given name and initial vdev size.
* Then test spa_freeze() functionality.
*/
static void
{
/*
* Create the storage pool.
*/
props = make_random_props();
for (int i = 0; i < SPA_FEATURES; i++) {
}
zs->zs_metaslab_sz =
kernel_fini();
ztest_freeze();
(void) rwlock_destroy(&ztest_name_lock);
(void) _mutex_destroy(&ztest_vdev_lock);
}
static void
setup_data_fd(void)
{
ASSERT3S(ztest_fd_data, >=, 0);
(void) unlink(ztest_name_data);
}
static int
{
int size;
return (size);
}
static void
setup_hdr(void)
{
int size;
}
static void
setup_data(void)
{
}
static boolean_t
{
int status;
}
if (pid == -1)
if (pid == 0) { /* child */
(void) close(ztest_fd_rand);
VERIFY3U(11, >=,
}
}
continue;
if (WEXITSTATUS(status) != 0) {
exit(2);
}
return (B_FALSE);
} else if (WIFSIGNALED(status)) {
exit(3);
}
return (B_TRUE);
} else {
exit(4);
/* NOTREACHED */
}
}
static void
ztest_run_init(void)
{
/*
* Blow away any existing copy of zpool.cache
*/
(void) remove(spa_config_path);
/*
* Create and initialize our storage pool.
*/
(void) printf("ztest_init(), pass %d\n", i);
}
ztest_init(zs);
}
}
int
{
int kills = 0;
int iters = 0;
int older = 0;
int newer = 0;
char *cmd;
zfs_deadman_synctime_ms = 300000;
ASSERT3S(ztest_fd_rand, >=, 0);
if (!fd_data_str) {
setup_hdr();
setup_data();
sizeof (*ztest_shared_opts));
} else {
setup_data();
}
/* Override location of zpool.cache */
zs = ztest_shared;
if (fd_data_str) {
if (zs->zs_do_init)
else
exit(0);
}
(void) printf("%llu vdevs, %d datasets, %d threads,"
" %llu seconds...\n",
}
(void) printf("Executing older ztest for "
}
} else {
}
for (int f = 0; f < ZTEST_FUNCS; f++) {
zi = &ztest_info[f];
zc = ZTEST_GET_SHARED_CALLSTATE(f);
else
}
/*
* Run the tests in a loop. These tests include fault injection
* to verify that self-healing data works, and forced crashes
* to verify that we never lose on-disk consistency.
*/
int status;
/*
* Initialize the workload counters for each function.
*/
for (int f = 0; f < ZTEST_FUNCS; f++) {
zc = ZTEST_GET_SHARED_CALLSTATE(f);
}
/* Set the allocation switch size */
(void) printf("Executing newer ztest: %s\n",
cmd);
}
newer++;
} else {
(void) printf("Executing older ztest: %s\n",
}
older++;
}
if (killed)
kills++;
iters++;
(void) printf("Pass %3d, %8s, %3llu ENOSPC, "
"%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
}
(void) printf("\nWorkload summary:\n\n");
(void) printf("%7s %9s %s\n",
"Calls", "Time", "Function");
(void) printf("%7s %9s %s\n",
"-----", "----", "--------");
for (int f = 0; f < ZTEST_FUNCS; f++) {
zi = &ztest_info[f];
zc = ZTEST_GET_SHARED_CALLSTATE(f);
(void) printf("%7llu %9s %s\n",
}
(void) printf("\n");
}
/*
* It's possible that we killed a child during a rename test,
* in which case we'll have a 'ztest_tmp' pool lying around
* instead of 'ztest'. Do a blind rename in case this happened.
*/
} else {
kernel_fini();
}
kernel_fini();
}
if (hasalt) {
cmd);
}
(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
}
return (0);
}