zfs_replay.c revision 7a286c471efbab8562f7655a82931904703fffe0
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
72fc53bc90bd3b199d29d03ee68adb4a5a17d35bmarks * Common Development and Distribution License (the "License").
72fc53bc90bd3b199d29d03ee68adb4a5a17d35bmarks * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
7a286c471efbab8562f7655a82931904703fffe0Dai Ngo * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Use is subject to license terms.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Functions to replay ZFS intent log (ZIL) records
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The functions are called through a function vector (zfs_replay_vector)
fa9e4066f08beec538e775443c5be79dd423fcabahrens * which is indexed by the transaction type.
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
fa9e4066f08beec538e775443c5be79dd423fcabahrens uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
fa9e4066f08beec538e775443c5be79dd423fcabahrens/* ARGSUSED */
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic void
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw xvap->xva_vattr.va_mask &= ~AT_XVATTR; /* shouldn't happen */
569e6c63191416b7413c148fd5a6194a0b820b2cmarks for (i = 0; i != lrattr->lr_attr_masksize; i++, bitmap++)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw attrs = (uint64_t *)(lrattr + lrattr->lr_attr_masksize - 1);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw xoap->xoa_appendonly = ((*attrs & XAT0_APPENDONLY) != 0);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw xoap->xoa_av_modified = ((*attrs & XAT0_AV_MODIFIED) != 0);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw bcopy(scanstamp, xoap->xoa_av_scanstamp, AV_SCANSTAMP_SZ);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic void *
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_replay_fuid_domain_common(zfs_fuid_info_t *fuid_infop, void *start,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw for (i = 0; i != domcnt; i++) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Set the uid/gid in the fuid_info structure.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic void
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_replay_fuid_ugid(zfs_fuid_info_t *fuid_infop, uint64_t uid, uint64_t gid)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * If owner or group are log specific FUIDs then slurp up
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * domain information and build zfs_fuid_info_t
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Load fuid domains into fuid_info_t
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw *end = zfs_replay_fuid_domain_common(fuid_infop, buf, domcnt);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * load zfs_fuid_t's and fuid_domains into fuid_info_t
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw for (i = 0; i != idcnt; i++) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw *end = zfs_replay_fuid_domain_common(fuid_infop, log_fuid, domcnt);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstatic void
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* swap the lr_attr structure */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* swap the bitmap */
569e6c63191416b7413c148fd5a6194a0b820b2cmarks byteswap_uint32_array(lrattr + 1, (lrattr->lr_attr_masksize - 1) *
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* swap the attributes, create time + 64 bit word for attributes */
569e6c63191416b7413c148fd5a6194a0b820b2cmarks byteswap_uint64_array((caddr_t)(lrattr + 1) + (sizeof (uint32_t) *
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw (lrattr->lr_attr_masksize - 1)), 3 * sizeof (uint64_t));
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Replay file create with optional ACL, xvattr information as well
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * as option FUID information.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* swap fuids */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * eventually end up in zfs_mknode(), which assigns the object's
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * creation time and generation number. The generic VOP_CREATE()
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * doesn't have either concept, so we smuggle the values inside
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * the vattr's otherwise unused va_ctime and va_nblocks fields.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /*FALLTHROUGH*/
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /*FALLTHROUGH*/
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
fa9e4066f08beec538e775443c5be79dd423fcabahrens lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
fa9e4066f08beec538e775443c5be79dd423fcabahrens * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
fa9e4066f08beec538e775443c5be79dd423fcabahrens * eventually end up in zfs_mknode(), which assigns the object's
fa9e4066f08beec538e775443c5be79dd423fcabahrens * creation time and generation number. The generic VOP_CREATE()
fa9e4066f08beec538e775443c5be79dd423fcabahrens * doesn't have either concept, so we smuggle the values inside
fa9e4066f08beec538e775443c5be79dd423fcabahrens * the vattr's otherwise unused va_ctime and va_nblocks fields.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
758f6e0b258f20dcb5b772642e2a18b998ee7927gw error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Symlinks don't have fuid info, and CIFS never creates
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * symlinks.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * The _ATTR versions will grab the fuid info in their subcases.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /*FALLTHROUGH*/
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /*FALLTHROUGH*/
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred);
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap)
fa9e4066f08beec538e775443c5be79dd423fcabahrens char *name = (char *)(lr + 1); /* name follows lr_remove_t */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg);
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap)
fa9e4066f08beec538e775443c5be79dd423fcabahrens char *name = (char *)(lr + 1); /* name follows lr_link_t */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg);
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap)
fa9e4066f08beec538e775443c5be79dd423fcabahrens char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
fa9e4066f08beec538e775443c5be79dd423fcabahrens if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred,
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
fa9e4066f08beec538e775443c5be79dd423fcabahrens char *data = (char *)(lr + 1); /* data follows lr_write_t */
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * As we can log writes out of order, it's possible the
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * file has been removed. In this case just drop the write
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * and return success.
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, lr->lr_length,
fa9e4066f08beec538e775443c5be79dd423fcabahrens lr->lr_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * As we can log truncates out of order, it's possible the
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * file has been removed. In this case just drop the truncate
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * and return success.
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX,
fa9e4066f08beec538e775443c5be79dd423fcabahrenszfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap)
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * As we can log setattrs out of order, it's possible the
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * file has been removed. In this case just drop the setattr
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * and return success.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Fill in xvattr_t portions if necessary.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
fa9e4066f08beec538e775443c5be79dd423fcabahrens ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * As we can log acls out of order, it's possible the
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * file has been removed. In this case just drop the acl
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * and return success.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Replaying ACLs is complicated by FUID support.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * The log record may contain some optional data
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * to be used for replaying FUID's. These pieces
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * are the actual FUIDs that were created initially.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * The FUID table index may no longer be valid and
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * during zfs_create() a new index may be assigned.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * Because of this the log will contain the original
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * doman+rid in order to create a new FUID.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * The individual ACEs may contain an ephemeral uid/gid which is no
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw * longer valid and will need to be replaced with an actual FUID.
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwzfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * As we can log acls out of order, it's possible the
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * file has been removed. In this case just drop the acl
b19a79ec1a527828a60c4d325ccd8dcbeb2b2e8bperrin * and return success.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Callback vectors for replaying records