zfs_znode.c revision 67bd71c6cc629bab3aa0d595c624a667f1574254
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * CDDL HEADER START
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * The contents of this file are subject to the terms of the
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * Common Development and Distribution License (the "License").
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * You may not use this file except in compliance with the License.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * See the License for the specific language governing permissions
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * and limitations under the License.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * When distributing Covered Code, include this CDDL HEADER in each
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * If applicable, add the following below this CDDL HEADER, with the
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * fields enclosed by brackets "[]" replaced with your own identifying
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * information: Portions Copyright [yyyy] [name of copyright owner]
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * CDDL HEADER END
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
43b9c05035ac59f7f7a8e7827598db5a15f30ed3jacobs * Use is subject to license terms.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs#pragma ident "%Z%%M% %I% %E% SMI"
355b4669e025ff377602b6fc7caaf30dbc218371jacobs/*ARGSUSED*/
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs /* signal force unmount that this znode can be freed */
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs/*ARGSUSED*/
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobszfs_znode_cache_constructor(void *buf, void *cdrarg, int kmflags)
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs return (0);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs/*ARGSUSED*/
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * Initialize zcache
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * Cleanup vfs & vnode ops
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Cleanup zcache
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Remove vfs ops
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Remove vnode ops
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobsextern const fs_operation_def_t zfs_dvnodeops_template[];
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobsextern const fs_operation_def_t zfs_fvnodeops_template[];
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobsextern const fs_operation_def_t zfs_xdvnodeops_template[];
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobsextern const fs_operation_def_t zfs_symvnodeops_template[];
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobsextern const fs_operation_def_t zfs_evnodeops_template[];
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * In this case we just return as the ops vectors are already set up.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs return (0);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * zfs_init_fs - Initialize the zfsvfs struct and the file system
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * incore "master" object. Verify version compatibility.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobszfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr)
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * XXX - hack to auto-create the pool root filesystem at
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * the first attempted mount.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) {
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_OBJ, 8, 1,
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs "is version %lld on-disk format, which is "
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs "incompatible with this software version %lld!",
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * The fsid is 64 bits, composed of an 8-bit fs type, which
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * separates our fsid from any other filesystem types, and a
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * 56-bit objset unique ID. The objset unique ID is unique to
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * all objsets open on this system, provided by unique_create().
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * The 8-bit fs type must be put in the low bits of fsid[1]
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * because that's where other Solaris filesystems put it.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &zoid);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Create the per mount vop tables.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Initialize zget mutex's
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_DELETE_QUEUE, 8, 1, &zoid);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Initialize delete head structure
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Thread(s) will be started/stopped via
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * readonly_changed_cb() depending
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * on whether this is rw/ro mount.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs /* Mutex never destroyed. */
43b9c05035ac59f7f7a8e7827598db5a15f30ed3jacobs mutex_init(&zfsvfs->z_delete_head.z_mutex, NULL, MUTEX_DEFAULT, NULL);
43b9c05035ac59f7f7a8e7827598db5a15f30ed3jacobs return (0);
43b9c05035ac59f7f7a8e7827598db5a15f30ed3jacobs * define a couple of values we need available
43b9c05035ac59f7f7a8e7827598db5a15f30ed3jacobs * for both 64 and 32 bit environments.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Create special expldev for ZFS private use.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Can't use standard expldev since it doesn't do
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * what we want. The standard expldev() takes a
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * dev32_t in LP64 and expands it to a long dev_t.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * We need an interface that takes a dev32_t in ILP32
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * and expands it to a long dev_t.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Special cmpldev for ZFS private use.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Can't use standard cmpldev since it takes
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * a long dev_t and compresses it to dev32_t in
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * LP64. We need to do a compaction of a long dev_t
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * to a dev32_t in ILP32.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Construct a new znode/vnode and intialize.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * This does not do a call to dmu_set_user() that is
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * up to the caller to do, in case you don't want to
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * return the znode
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobszfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, uint64_t obj_num, int blksz)
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs /*FALLTHROUGH*/
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs nzp = dmu_buf_set_user(db, zp, &zp->z_phys, znode_pageout_func);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * there should be no
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * concurrent zgets on this object.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Slap on VROOT if we are the root znode
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Create a new DMU object to hold a zfs znode.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * IN: dzp - parent directory for new znode
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * vap - file attributes for new znode
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * tx - dmu transaction id for zap operations
d978af5cd9e717f0cb0742b6e940c8fdd836504djacobs * cr - credentials of caller
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * flag - flags:
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * IS_ROOT_NODE - new object will be root
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * IS_XATTR - new object is an attribute
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * IS_REPLAY - intent log replay
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * OUT: oid - ID of created object
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobszfs_mknode(znode_t *dzp, vattr_t *vap, uint64_t *oid, dmu_tx_t *tx, cred_t *cr,
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * Create a new DMU object.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * There's currently no mechanism for pre-reading the blocks that will
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * be to needed allocate a new object, so we accept the small chance
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * that there will be an i/o error and we will fail one of the
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * assertions below.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, *oid, NULL, &dbp));
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * Initialize the znode physical data to zero.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * If this is the root, fix up the half-initialized parent pointer
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * to reference the just-allocated physical data area.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * If parent is an xattr, so am I.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode);
355b4669e025ff377602b6fc7caaf30dbc218371jacobszfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
355b4669e025ff377602b6fc7caaf30dbc218371jacobs err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs return (0);
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * Not found create new znode/vnode
355b4669e025ff377602b6fc7caaf30dbc218371jacobs zp = zfs_znode_alloc(zfsvfs, db, obj_num, doi.doi_data_block_size);
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs return (0);
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * Don't allow a zfs_zget() while were trying to release this znode
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * If the hold count is greater than zero, somebody has
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * obtained a new reference on this znode while we were
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * processing it here, so we are done. If we still have
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * mapped pages then we are also done, since we don't
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs * want to inactivate the znode until the pages get pushed.
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * XXX - if vn_has_cached_data(vp) is true, but count == 0,
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * this seems like it would leave the znode hanging with
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * no chance to go inactive...
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * If this was the last reference to a file with no links,
355b4669e025ff377602b6fc7caaf30dbc218371jacobs * remove the file from the file system.
0a44ef6d9afbfe052a7e975f55ea0d2954b62a82jacobs /* XATTR files are not put on the delete queue */
355b4669e025ff377602b6fc7caaf30dbc218371jacobs list_insert_tail(&zfsvfs->z_delete_head.z_znodes, zp);
if (tx) {
int error;
ASSERT(0);
int error;
} else if (len == 0) {
return (error);
new_blksz = 0;
if (error) {
return (error);
if (new_blksz)
if (len == 0)
if (log) {
int error;