sa.c revision d2b3cbbd7f3a37bc7c01b526d3eb312acd070423
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Portions Copyright 2011 iXsystems, Inc
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/sysmacros.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_objset.h>
#include <sys/zfs_context.h>
/*
* ZFS System attributes:
*
* A generic mechanism to allow for arbitrary attributes
* to be stored in a dnode. The data will be stored in the bonus buffer of
* the dnode and if necessary a special "spill" block will be used to handle
* overflow situations. The spill block will be sized to fit the data
* from 512 - 128K. When a spill block is used the BP (blkptr_t) for the
* spill block is stored at the end of the current bonus buffer. Any
* attributes that would be in the way of the blkptr_t will be relocated
* into the spill block.
*
* Attribute registration:
*
* Stored persistently on a per dataset basis
* a mapping between attribute "string" names and their actual attribute
* numeric values, length, and byteswap function. The names are only used
* during registration. All attributes are known by their unique attribute
* id value. If an attribute can have a variable size then the value
* 0 will be used to indicate this.
*
* Attribute Layout:
*
* Attribute layouts are a way to compactly store multiple attributes, but
* without taking the overhead associated with managing each attribute
* individually. Since you will typically have the same set of attributes
* stored in the same order a single table will be used to represent that
* layout. The ZPL for example will usually have only about 10 different
* layouts (regular files, device files, symlinks,
* you have the possibility of all of those minus ACL, because it would
* be kicked out into the spill block)
*
* Layouts are simply an array of the attributes and their
* ordering i.e. [0, 1, 4, 5, 2]
*
* Each distinct layout is given a unique layout number and that is whats
* stored in the header at the beginning of the SA data buffer.
*
* A layout only covers a single dbuf (bonus or spill). If a set of
* attributes is split up between the bonus buffer and a spill buffer then
* two different layouts will be used. This allows us to byteswap the
* spill without looking at the bonus buffer and keeps the on disk format of
* the bonus and spill buffer the same.
*
* Adding a single attribute will cause the entire set of attributes to
* be rewritten and could result in a new layout number being constructed
* as part of the rewrite if no such layout exists for the new set of
* attribues. The new attribute will be appended to the end of the already
* existing attributes.
*
* Both the attribute registration and attribute layout information are
* stored in normal ZAP attributes. Their should be a small number of
* known layouts and the set of attributes is assumed to typically be quite
* small.
*
* The registered attributes and layout "table" information is maintained
* in core and a special "sa_os_t" is attached to the objset_t.
*
* A special interface is provided to allow for quickly applying
* a large set of attributes at once. sa_replace_all_by_template() is
* used to set an array of attributes. This is used by the ZPL when
* creating a brand new file. The template that is passed into the function
* specifies the attribute, size for variable length attributes, location of
* data and special "data locator" function if the data isn't in a contiguous
* location.
*
* Byteswap implications:
*
* Since the SA attributes are not entirely self describing we can't do
* the normal byteswap processing. The special ZAP layout attribute and
* attribute registration attributes define the byteswap function and the
* size of the attributes, unless it is variable sized.
* The normal ZFS byteswapping infrastructure assumes you don't need
* to read any objects in order to do the necessary byteswapping. Whereas
* SA attributes can only be properly byteswapped if the dataset is opened
* the SA attributes will be byteswapped when they are first accessed by
* the SA code that will read the SA data.
*/
void *data);
int buflen);
};
#define SA_COPY_DATA(f, s, t, l) \
{ \
if (f == NULL) { \
if (l == 8) { \
} else if (l == 16) { \
} else { \
bcopy(s, t, l); \
} \
} else \
sa_copy_data(f, s, t, l); \
}
/*
* This table is fixed and cannot be changed. Its purpose is to
* It contains the list of legacy attributes. These attributes aren't
* stored in the "attribute" registry zap objects, since older ZPL file systems
* won't have the registry. Only objsets of type ZFS_TYPE_FILESYSTEM will
* use this static table.
*/
sa_attr_reg_t sa_legacy_attrs[] = {
};
/*
* This is only used for objects of type DMU_OT_ZNODE
*/
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
/*
* Special dummy layout used for buffers with no attributes.
*/
sa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
static int sa_legacy_attr_count = 16;
/*ARGSUSED*/
static int
{
return (0);
}
/*ARGSUSED*/
static void
{
}
void
sa_cache_init(void)
{
sizeof (sa_handle_t), 0, sa_cache_constructor,
}
void
sa_cache_fini(void)
{
if (sa_cache)
}
static int
{
return (1);
return (-1);
return (0);
}
static int
{
return (1);
return (-1);
return (1);
return (-1);
return (0);
}
{
int i;
return (1);
for (i = 0; i != count; i++) {
return (1);
}
return (0);
}
static uint64_t
{
int i;
for (i = 0; i != attr_count; i++)
return (crc);
}
static int
{
int rc;
} else {
rc = 0;
}
return (rc);
}
/*
* returns 0 for success or non zero for failures
*
* Operates on bulk array, first failure will abort further processing
*/
int
{
int i;
int error = 0;
buftypes = 0;
for (i = 0; i != count; i++) {
/* First check the bonus buffer */
}
}
if (TOC_ATTR_PRESENT(
}
}
}
}
switch (data_op) {
case SA_LOOKUP:
}
continue;
case SA_UPDATE:
/* existing rewrite of attr */
continue;
} else { /* adding new attribute */
}
if (error)
return (error);
break;
}
}
return (error);
}
static sa_lot_t *
{
int i;
KM_SLEEP);
tb->lot_instance = 0;
if (zapadd) {
char attr_name[8];
if (sa->sa_layout_attr_obj == 0) {
}
"%d", (int)lot_num);
}
for (i = 0; i != attr_count; i++) {
tb->lot_var_sizes++;
}
/* verify we don't have a hash collision */
break;
tb->lot_instance++;
}
}
return (tb);
}
static void
{
tbsearch.lot_instance = 0;
if (tb) {
break;
}
}
}
if (!found) {
}
}
static int
{
int error;
if (size == 0) {
} else if (size > SPA_MAXBLOCKSIZE) {
ASSERT(0);
} else {
}
return (error);
}
static void
{
} else {
int bytes;
void *dataptr;
bytes = 0;
}
}
}
/*
* Determine several different sizes
* first the sa header size
* the number of bytes to be stored
* if spill would occur the index in the attribute array is returned
*
* the boolean will_spill will be set when spilling is necessary. It
* is only set when the buftype is SA_BONUS
*/
static int
{
int var_size = 0;
int i;
int j = -1;
int full_space;
int hdrsize;
*total = 0;
*index = 0;
*will_spill = B_TRUE;
return (0);
}
*index = -1;
*total = 0;
*will_spill = B_FALSE;
sizeof (sa_hdr_phys_t);
for (i = 0; i != attr_count; i++) {
if (done)
goto next;
if (is_var_sz) {
var_size++;
}
*total < full_space) {
/*
* Account for header space used by array of
* optional sizes of variable-length attributes.
* Record the index in case this increase needs
* to be reversed due to spill-over.
*/
j = i;
} else {
*index = i;
*will_spill = B_TRUE;
continue;
}
}
/*
* find index of where spill *could* occur.
* Then continue to count of remainder attribute
* space. The sum is used later for sizing bonus
* and spill buffer.
*/
(full_space - sizeof (blkptr_t))) {
*index = i;
}
next:
*will_spill = B_TRUE;
}
/*
* j holds the index of the last variable-sized attribute for
* which hdrsize was increased. Reverse the increase if that
* attribute will be relocated to the spill block.
*/
if (*will_spill && j == *index)
return (hdrsize);
}
/*
* Find layout that corresponds to ordering of attributes
* If not found a new layout number is created and added to
* persistent layout tables.
*/
static int
{
void *data_start;
int buf_space;
int i, lot_count;
int hdrsize;
int spillhdrsize = 0;
int used;
int len_idx;
int spill_used;
/* first determine bonus header size and sum of all attributes */
if (used > SPA_MAXBLOCKSIZE)
/* setup and size spill buffer when needed */
if (spilling) {
}
&spill_used, &dummy);
if (spill_used > SPA_MAXBLOCKSIZE)
}
/* setup starting pointers to lay down data */
if (spilling)
0 : SA_BLKPTR_SPACE - hdrsize;
else
KM_SLEEP);
lot_count = 0;
if (length == 0)
}
hash = -1ULL;
len_idx = 0;
attrs_start = &attrs[i];
lot_count = 0;
}
data_start, length);
}
length), 8);
lot_count++;
}
/*
* Verify that old znodes always have layout number 0.
* Must be DMU_OT_SA for arbitrary layouts
*/
}
if (hdl->sa_bonus_tab) {
}
if (!sa->sa_force_spill)
if (!spilling) {
/*
* remove spill block that is no longer needed.
*/
} else {
}
}
return (0);
}
static void
{
int i;
return;
for (i = 0; i != sa->sa_num_attrs; i++) {
}
}
static int
{
uint64_t sa_attr_count = 0;
uint64_t sa_reg_count = 0;
int error = 0;
int registered_count = 0;
int i;
sa->sa_user_table =
if (sa->sa_reg_attr_obj != 0) {
/*
* Make sure we retrieved a count and that it isn't zero
*/
if (error == 0)
goto bail;
}
}
/* Allocate attribute numbers for attributes that aren't registered */
for (i = 0; i != count; i++) {
int j;
if (ostype == DMU_OST_ZFS) {
for (j = 0; j != sa_legacy_attr_count; j++) {
sa_legacy_attrs[j].sa_name) == 0) {
sa->sa_user_table[i] =
}
}
}
if (found)
continue;
if (sa->sa_reg_attr_obj)
else
switch (error) {
case ENOENT:
break;
case 0:
break;
default:
goto bail;
}
}
/*
* Attribute table is constructed from requested attribute list,
* previously foreign registered attributes, and also the legacy
* ZPL set of attributes.
*/
if (sa->sa_reg_attr_obj) {
zap_cursor_advance(&zc)) {
continue;
}
}
/*
* Make sure we processed the correct number of registered
* attributes
*/
if (registered_count != sa_reg_count) {
goto bail;
}
}
if (ostype == DMU_OST_ZFS) {
for (i = 0; i != sa_legacy_attr_count; i++) {
continue;
KM_SLEEP);
}
}
for (i = 0; i != count; i++) {
continue;
}
return (0);
bail:
}
int
{
int error;
*user_table = tb;
return (0);
}
if (sa_obj) {
goto fail;
goto fail;
}
goto fail;
if (sa->sa_layout_attr_obj != 0) {
&layout_count);
/*
* Layout number count should be > 0
*/
if (error == 0)
goto fail;
}
zap_cursor_advance(&zc)) {
lot_attrs))) != 0) {
break;
}
(unsigned long long *)&lot_num) == 0);
}
/*
* Make sure layout count matches number of entries added
* to AVL tree
*/
goto fail;
}
}
/* Add special layout number for old ZNODES */
if (ostype == DMU_OST_ZFS) {
}
return (0);
fail:
if (sa->sa_user_table)
}
void
{
void *cookie;
/* Free up attr table */
}
}
}
}
void
{
if (var_length) {
}
}
static void
{
void *data_start;
int i;
uint8_t length_idx = 0;
}
if (IS_SA_BONUSTYPE(type)) {
} else {
data_start = hdr;
}
for (i = 0; i != tb->lot_attr_count; i++) {
int attr_length, reg_length;
if (reg_length) {
idx_len = 0;
} else {
idx_len = length_idx++;
}
attr_length), 8);
}
}
/*ARGSUSED*/
void
{
}
void
{
int num_lengths = 1;
int i;
return;
}
/*
* Determine number of variable lenghts in header
* The standard 8 byte header has one for free and a
* 16 byte header would have 4 + 1;
*/
for (i = 0; i != num_lengths; i++)
sa_hdr_phys->sa_lengths[i] =
}
static int
{
/* Do we need to byteswap? */
/* only check if not old znode */
sa_hdr_phys->sa_magic != 0) {
}
else
return (0);
}
/*ARGSUSED*/
void
{
}
static void
{
return;
if (idx_tab->sa_variable_lengths)
sizeof (uint16_t) *
}
}
static void
{
}
void
{
if (hdl->sa_bonus_tab) {
}
if (hdl->sa_spill_tab) {
}
}
int
{
int error = 0;
#ifdef ZFS_DEBUG
#endif
/* find handle, if it exists */
/* if one doesn't exist then create a new one, and initialize it */
}
}
return (error);
}
int
{
int error;
return (error);
handlepp));
}
int
{
}
void
{
}
int
{
}
int
{
int error;
return (error);
}
#ifdef _KERNEL
int
{
int error;
}
return (error);
}
#endif
void *
{
/*
* Deterimine layout number. If SA node and header == 0 then
* force the index table to the dummy "1" empty layout.
*
* The layout number would only be zero for a newly created file
* that has not added any attributes yet, or with crypto enabled which
* doesn't write any attributes to the bonus buffer.
*/
/* Verify header size is consistent with layout information */
/*
* See if any of the already existing TOC entries can be reused?
*/
int i;
if (tb->lot_var_sizes != 0 &&
for (i = 0; i != tb->lot_var_sizes; i++) {
if (hdr->sa_lengths[i] !=
idx_tab->sa_variable_lengths[i]) {
break;
}
}
}
if (valid_idx) {
return (idx_tab);
}
}
/* No such luck, create a new entry */
if (tb->lot_var_sizes)
return (idx_tab);
}
void
{
}
static void
{
uint64_t attr_value = 0;
int i;
return;
}
}
for (i = 0; i != sa->sa_num_attrs; i++) {
continue;
tb[i].sa_byteswap);
}
}
/*
* Replace all attributes with attributes specified in template.
* If dnode had a spill buffer then those attributes will be
* also be replaced, possibly with just an empty spill block
*
* This interface is intended to only be used for bulk adding of
* attributes for a new file. It will also be used by the ZPL
* when converting and old formatted znode to native SA support.
*/
int
{
if (sa->sa_need_attr_registration)
}
int
{
int error;
attr_count, tx);
return (error);
}
/*
* of attributes.
*/
static int
{
void *old_data[2];
int bonus_attr_count = 0;
int bonus_data_size = 0;
int spill_data_size = 0;
int spill_attr_count = 0;
int error;
int i, j, k, length_idx;
int attr_count;
int count;
/* First make of copy of the old data */
if (dn->dn_bonuslen != 0) {
} else {
}
/* Bring spill buffer online if it isn't currently */
if (old_data[0])
return (error);
} else {
}
/* build descriptor of all attributes */
attr_count++;
attr_count--;
/*
* loop through bonus and spill buffer if it exists, and
* build up new attr_descriptor to reset the attributes
*/
k = j = 0;
for (; k != 2; k++) {
/* iterate over each attribute in layout */
for (i = 0, length_idx = 0; i != count; i++) {
j++;
continue;
}
} else {
if (length == 0) {
}
NULL, (void *)
}
}
} else {
break;
}
}
if (length == 0) {
}
}
if (old_data[0])
if (old_data[1])
return (error);
}
static int
{
int error;
/* sync out registration table if necessary */
if (sa->sa_need_attr_registration)
return (error);
}
/*
* update or add new attribute
*/
int
{
int error;
return (error);
}
int
{
int error;
return (error);
}
/*
* Return size of an attribute
*/
int
{
int error;
return (error);
}
return (0);
}
int
{
}
int
{
int error;
return (error);
}
int
{
int error;
return (error);
}
int
{
int error;
return (error);
}
void
{
}
void
{
}
void
{
}
void
{
}
{
}
void *
{
}
void
{
}
void
{
}
{
}
{
}
int
{
if (sa->sa_master_obj)
return (1);
return (0);
}
int
sa_hdrsize(void *arg)
{
return (SA_HDR_SIZE(hdr));
}
void
{
}
void
{
}