zfs_log.c revision da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * Common Development and Distribution License (the "License").
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
893a6d32980d24be1349478f44169009d4801c25ahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
fa9e4066f08beec538e775443c5be79dd423fcabahrens#pragma ident "%Z%%M% %I% %E% SMI"
fa9e4066f08beec538e775443c5be79dd423fcabahrens * All the functions in this file are used to construct the log entries
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * to record transactions. They allocate * an intent log transaction
fa9e4066f08beec538e775443c5be79dd423fcabahrens * structure (itx_t) and save within it all the information necessary to
fa9e4066f08beec538e775443c5be79dd423fcabahrens * possibly replay the transaction. The itx is then assigned a sequence
fa9e4066f08beec538e775443c5be79dd423fcabahrens * number and inserted in the in-memory list anchored in the zilog.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw switch (type) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /*NOTREACHED*/
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * build up the log data necessary for logging xvattr_t
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * First lr_attr_t is initialized. following the lr_attr_t
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * is the mapsize and attribute bitmap copied from the xvattr_t.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Following the bitmap and bitmapsize two 64 bit words are reserved
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * for the create time which may be set. Following the create time
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * records a single 64 bit integer which has the bits to set on
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * replay for the xvattr.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic void
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* Now pack the attributes up in a single uint64_t */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic void *
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* First copy in the ACE FUIDs */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic void *
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* now copy in the domain info, if any */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * TX_MKDIR_ATTR and TX_MKXATTR
fa9e4066f08beec538e775443c5be79dd423fcabahrens * transactions.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * domain information appended prior to the name. In this case the
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * uid/gid in the log record will be a log centric FUID.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * may contain attributes, ACL and optional fuid information.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * and ACL and normal users/groups in the ACEs.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * There may be an optional xvattr attribute information similar
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * to zfs_log_setattr.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Also, after the file name "domain" strings may be appended.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * If we have FUIDs present then add in space for
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * domains and ACE fuid's if any.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR ||
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Fill in xvattr info if any
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* Now fill in any ACL info */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* drop in FUID info */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Now place file name in log record
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
fa9e4066f08beec538e775443c5be79dd423fcabahrens itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_link() handles TX_LINK transactions.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
fa9e4066f08beec538e775443c5be79dd423fcabahrens itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_symlink() handles TX_SYMLINK transactions.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
fa9e4066f08beec538e775443c5be79dd423fcabahrens itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_rename() handles TX_RENAME transactions.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
fa9e4066f08beec538e775443c5be79dd423fcabahrens znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
fa9e4066f08beec538e775443c5be79dd423fcabahrens itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
fa9e4066f08beec538e775443c5be79dd423fcabahrens bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_write() handles TX_WRITE transactions.
6ce0521ac291be36119f359237066c4fb8088683perrin#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
6ce0521ac291be36119f359237066c4fb8088683perrin znode_t *zp, offset_t off, ssize_t resid, int ioflag)
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * Writes are handled in three different ways:
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * WR_INDIRECT:
6ce0521ac291be36119f359237066c4fb8088683perrin * If the write is greater than zfs_immediate_write_sz and there are
6ce0521ac291be36119f359237066c4fb8088683perrin * no separate logs in this pool then later *if* we need to log the
6ce0521ac291be36119f359237066c4fb8088683perrin * write then dmu_sync() is used to immediately write the block and
6ce0521ac291be36119f359237066c4fb8088683perrin * its block pointer is put in the log record.
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * WR_COPIED:
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * If we know we'll immediately be committing the
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * transaction (FDSYNC (O_DSYNC)), the we allocate a larger
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * log record here for the data and copy the data in.
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * WR_NEED_COPY:
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * Otherwise we don't allocate a buffer, and *if* we need to
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * flush the write later then a buffer is allocated and
104e2ed78d9ef0a0f89f320108b8ca29ca3850d5perrin * we retrieve the data using the dmu.
ec533521f091387e2911769818ae4be26219fca2fr if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
ec533521f091387e2911769818ae4be26219fca2fr (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
6ce0521ac291be36119f359237066c4fb8088683perrin * If there are slogs and the write would overflow the largest
6ce0521ac291be36119f359237066c4fb8088683perrin * block, then because we don't want to use the main pool
6ce0521ac291be36119f359237066c4fb8088683perrin * to dmu_sync, we have to split the write.
6ce0521ac291be36119f359237066c4fb8088683perrin if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_truncate() handles TX_TRUNCATE transactions.
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_setattr() handles TX_SETATTR transactions.
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * If XVATTR set, then log record size needs to allow
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * for lr_attr_t + xvattr mask, mapsize and create time
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * plus actual attribute values
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid))
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid))
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Now stick on domain information if any on end
fa9e4066f08beec538e775443c5be79dd423fcabahrens * zfs_log_acl() handles TX_ACL transactions.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw txtype = (zp->z_zfsvfs->z_version == ZPL_VERSION_INITIAL) ?
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw txsize = lrsize + aclbytes + (fuidp ? fuidp->z_domain_str_sz : 0) +