spa_history.c revision 088f389458728c464569a5506b58070254fa4f7d
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * CDDL HEADER START
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * The contents of this file are subject to the terms of the
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * Common Development and Distribution License (the "License").
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * You may not use this file except in compliance with the License.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * See the License for the specific language governing permissions
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * and limitations under the License.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * When distributing Covered Code, include this CDDL HEADER in each
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * If applicable, add the following below this CDDL HEADER, with the
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * information: Portions Copyright [yyyy] [name of copyright owner]
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * CDDL HEADER END
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * Use is subject to license terms.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe#pragma ident "%Z%%M% %I% %E% SMI"
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * Routines to manage the on-disk history log.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * The history log is stored as a dmu object containing
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * <packed record length, record nvlist> tuples.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * Where "record nvlist" is a nvlist containing uint64_ts and strings, and
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * "packed record length" is the packed length of the "record nvlist" stored
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * as a little endian uint64_t.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * The log is implemented as a ring buffer, though the original creation
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * of the pool ('zpool create') is never overwritten.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * of 'spa_history' stores the offsets for logging/retrieving history as
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * where the 'zpool create' record is stored. This allows us to never
efbf89fbc68a0864d303fe237fc420cf018d52f7Robert Mustacchi * overwrite the original creation of the pool. 'sh_phys_max_off' is the
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * physical ending offset in bytes of the log. This tells you the length of
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * is added, 'sh_eof' is incremented by the the size of the record.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes).
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * This is where the consumer should start reading from after reading in
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * the 'zpool create' portion of the log.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * 'sh_records_lost' keeps track of how many records have been overwritten
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * and permanently lost.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe/* convert a logical offset to physical */
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowespa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len;
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe return ((log_off - shpp->sh_pool_create_len) % phys_len
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowespa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY,
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe ASSERT(dbp->db_size >= sizeof (spa_history_phys_t));
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * Figure out maximum size of history log. We set it at
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * 1% of pool size, with a max of 32MB and min of 128KB.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe shpp->sh_phys_max_off = spa_get_dspace(spa) / 100;
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20);
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe * Change 'sh_bof' to the beginning of the next record.
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowespa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov shpp->sh_pool_create_len, sizeof (reclen) - firstread,
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankovspa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov /* see if we need to reset logical BOF */
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov while (shpp->sh_phys_max_off - shpp->sh_pool_create_len -
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov if ((err = spa_history_advance_bof(spa, shpp)) != 0) {
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov /* write out the rest at the beginning of physical file */
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankovstatic char *
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov return ("global");
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * Write out a history event.
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankovspa_history_log_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * If we have an older pool that doesn't have a command
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * history object, create it now.
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * Get the offset of where we need to write via the bonus buffer.
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov * Update the offset when the write completes.
a9478106a12424322498e53cf7cd75bd8a4d6004Yuri Pankov VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
c10c16dec587a0662068f6e2991c29ed3a9db943Richard Lowe ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
gethrestime_sec()) == 0);
#ifdef _KERNEL
history_str) == 0);
history_str) == 0);
if (!ret)
int err;
return (ENOENT);
return (err);
#ifdef ZFS_DEBUG
if (read_len == 0) {
return (err);
char *str;