lufs_map.c revision b97d6ca7333c353b6ca20c20c99fb1be8d32a8de
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2012 Milan Jurik. All rights reserved.
*/
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/vnode.h>
#include <sys/errno.h>
#include <sys/sysmacros.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/cmn_err.h>
#include <sys/fs/ufs_inode.h>
#include <sys/fs/ufs_filio.h>
#include <sys/fs/ufs_log.h>
#include <sys/inttypes.h>
#include <sys/atomic.h>
#include <sys/tuneable.h>
/*
* externs
*/
extern pri_t minclsyspri;
extern struct kmem_cache *lufs_bp;
extern int ufs_trans_push_quota(ufsvfs_t *, delta_t, struct dquot *);
/*
* globals
*/
kmem_cache_t *mapentry_cache;
/*
* logmap tuning constants
*/
long logmap_maxnme_commit = 2048;
long logmap_maxnme_async = 4096;
long logmap_maxnme_sync = 6144;
long logmap_maxcfrag_commit = 4; /* Max canceled fragments per moby */
uint64_t ufs_crb_size = 0; /* current size of all crb buffers */
uint64_t ufs_crb_max_size = 0; /* highest crb buffer use so far */
size_t ufs_crb_limit; /* max allowable size for crbs */
uint64_t ufs_crb_alloc_fails = 0; /* crb allocation failures stat */
#define UFS_MAX_CRB_DEFAULT_DIVISOR 10 /* max 1/10 kmem_maxavail() */
int ufs_max_crb_divisor = UFS_MAX_CRB_DEFAULT_DIVISOR; /* tunable */
void handle_dquot(mapentry_t *);
/*
* GENERIC MAP ROUTINES
*/
#define CRB_FREE(crb, me) \
kmem_free(crb->c_buf, crb->c_nb); \
atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \
kmem_free(crb, sizeof (crb_t)); \
(me)->me_crb = NULL;
#define CRB_RELE(me) { \
crb_t *crb = (me)->me_crb; \
if (crb && (--crb->c_refcnt == 0)) { \
CRB_FREE(crb, me) \
} \
}
/*
* Check that the old delta has an argument and a push function of
* ufs_trans_push_quota(), then check that the old and new deltas differ.
* If so we clean up with handle_dquot() before replacing the old delta.
*/
#define HANDLE_DQUOT(me, melist) { \
if ((me->me_arg) && \
(me->me_func == ufs_trans_push_quota)) { \
if (!((me->me_dt == melist->me_dt) && \
(me->me_arg == melist->me_arg) && \
(me->me_func == melist->me_func))) { \
handle_dquot(me); \
} \
} \
}
/*
* free up all the mapentries for a map
*/
void
map_free_entries(mt_map_t *mtm)
{
int i;
mapentry_t *me;
while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
}
for (i = 0; i < mtm->mtm_nhash; i++)
mtm->mtm_hash[i] = NULL;
mtm->mtm_nme = 0;
mtm->mtm_nmet = 0;
}
/*
* done with map; free if necessary
*/
mt_map_t *
map_put(mt_map_t *mtm)
{
/*
* free up the map's memory
*/
map_free_entries(mtm);
ASSERT(map_put_debug(mtm));
kmem_free(mtm->mtm_hash,
(size_t) (sizeof (mapentry_t *) * mtm->mtm_nhash));
mutex_destroy(&mtm->mtm_mutex);
mutex_destroy(&mtm->mtm_scan_mutex);
cv_destroy(&mtm->mtm_to_roll_cv);
cv_destroy(&mtm->mtm_from_roll_cv);
rw_destroy(&mtm->mtm_rwlock);
mutex_destroy(&mtm->mtm_lock);
cv_destroy(&mtm->mtm_cv_commit);
cv_destroy(&mtm->mtm_cv_next);
cv_destroy(&mtm->mtm_cv_eot);
cv_destroy(&mtm->mtm_cv);
kmem_free(mtm, sizeof (mt_map_t));
return (NULL);
}
/*
* Allocate a map;
*/
mt_map_t *
map_get(ml_unit_t *ul, enum maptypes maptype, int nh)
{
mt_map_t *mtm;
/*
* assume the map is not here and allocate the necessary structs
*/
mtm = kmem_zalloc(sizeof (mt_map_t), KM_SLEEP);
mutex_init(&mtm->mtm_mutex, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&mtm->mtm_scan_mutex, NULL, MUTEX_DEFAULT, NULL);
cv_init(&mtm->mtm_to_roll_cv, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_from_roll_cv, NULL, CV_DEFAULT, NULL);
rw_init(&mtm->mtm_rwlock, NULL, RW_DEFAULT, NULL);
mtm->mtm_next = (mapentry_t *)mtm;
mtm->mtm_prev = (mapentry_t *)mtm;
mtm->mtm_hash = kmem_zalloc((size_t) (sizeof (mapentry_t *) * nh),
KM_SLEEP);
mtm->mtm_nhash = nh;
mtm->mtm_debug = ul->un_debug;
mtm->mtm_type = maptype;
mtm->mtm_cfrags = 0;
mtm->mtm_cfragmax = logmap_maxcfrag_commit;
/*
* for scan test
*/
mtm->mtm_ul = ul;
/*
* Initialize locks
*/
mutex_init(&mtm->mtm_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&mtm->mtm_cv_commit, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_cv_next, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_cv_eot, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_cv, NULL, CV_DEFAULT, NULL);
ASSERT(map_get_debug(ul, mtm));
return (mtm);
}
/*
* DELTAMAP ROUTINES
*/
/*
* deltamap tuning constants
*/
long deltamap_maxnme = 1024; /* global so it can be set */
int
deltamap_need_commit(mt_map_t *mtm)
{
return (mtm->mtm_nme > deltamap_maxnme);
}
/*
* put a delta into a deltamap; may sleep on memory
*/
void
deltamap_add(
mt_map_t *mtm,
offset_t mof,
off_t nb,
delta_t dtyp,
int (*func)(),
ulong_t arg,
threadtrans_t *tp)
{
int32_t hnb;
mapentry_t *me;
mapentry_t **mep;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
/*
* Search for dup entry. We need to ensure that we don't
* replace a map entry which carries quota information
* with a map entry which doesn't. In that case we lose
* reference the the dquot structure which will not be
* cleaned up by the push function me->me_func as this will
* never be called.
* The stray dquot would be found later by invalidatedq()
* causing a panic when the filesystem is unmounted.
*/
mep = MAP_HASH(mof, mtm);
for (me = *mep; me; me = me->me_hash) {
if (DATAwithinME(mof, hnb, me)) {
/*
* Don't remove quota entries which have
* incremented the ref count (those with a
* ufs_trans_push_quota push function).
* Let logmap_add[_buf] clean them up.
*/
if (me->me_func == ufs_trans_push_quota) {
continue;
}
break;
}
ASSERT((dtyp == DT_CANCEL) ||
(!DATAoverlapME(mof, hnb, me)) ||
MEwithinDATA(me, mof, hnb));
}
if (me) {
/* already in map */
continue;
}
/*
* Add up all the delta map deltas so we can compute
* an upper bound on the log size used.
* Note, some deltas get removed from the deltamap
* before the deltamap_push by lufs_write_strategy
* and so multiple deltas to the same mof offset
* don't get cancelled here but in the logmap.
* Thus we can't easily get a accurate count of
* the log space used - only an upper bound.
*/
if (tp && (mtm->mtm_ul->un_deltamap == mtm)) {
ASSERT(dtyp != DT_CANCEL);
if (dtyp == DT_ABZERO) {
tp->deltas_size += sizeof (struct delta);
} else {
tp->deltas_size +=
(hnb + sizeof (struct delta));
}
}
delta_stats[dtyp]++;
/*
* get a mapentry
* May need to drop & re-grab the mtm_mutex
* and then recheck for a duplicate
*/
me = kmem_cache_alloc(mapentry_cache, KM_NOSLEEP);
if (me == NULL) {
mutex_exit(&mtm->mtm_mutex);
me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
mutex_enter(&mtm->mtm_mutex);
}
bzero(me, sizeof (mapentry_t));
/*
* initialize and put in deltamap
*/
me->me_mof = mof;
me->me_nb = hnb;
me->me_func = func;
me->me_arg = arg;
me->me_dt = dtyp;
me->me_flags = ME_HASH;
me->me_tid = mtm->mtm_tid;
me->me_hash = *mep;
*mep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
mtm->mtm_nme++;
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
/*
* remove deltas within (mof, nb) and return as linked list
*/
mapentry_t *
deltamap_remove(mt_map_t *mtm, offset_t mof, off_t nb)
{
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
mapentry_t *mer;
if (mtm == NULL)
return (NULL);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_enter(&mtm->mtm_mutex);
for (mer = NULL, hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
/*
* remove entries from hash and return as a aged linked list
*/
mep = MAP_HASH(mof, mtm);
while ((me = *mep) != 0) {
if (MEwithinDATA(me, mof, hnb)) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
me->me_hash = mer;
mer = me;
me->me_flags |= ME_LIST;
me->me_flags &= ~ME_HASH;
mtm->mtm_nme--;
} else
mep = &me->me_hash;
}
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
return (mer);
}
/*
* delete entries within (mof, nb)
*/
void
deltamap_del(mt_map_t *mtm, offset_t mof, off_t nb)
{
mapentry_t *me;
mapentry_t *menext;
menext = deltamap_remove(mtm, mof, nb);
while ((me = menext) != 0) {
menext = me->me_hash;
kmem_cache_free(mapentry_cache, me);
}
}
/*
* Call the indicated function to cause deltas to move to the logmap.
* top_end_sync() is the only caller of this function and
* it has waited for the completion of all threads, so there can
* be no other activity in the deltamap. Therefore we don't need to
* hold the deltamap lock.
*/
void
deltamap_push(ml_unit_t *ul)
{
delta_t dtyp;
int (*func)();
ulong_t arg;
mapentry_t *me;
offset_t mof;
off_t nb;
mt_map_t *mtm = ul->un_deltamap;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
/*
* for every entry in the deltamap
*/
while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
ASSERT(me->me_func);
func = me->me_func;
dtyp = me->me_dt;
arg = me->me_arg;
mof = me->me_mof;
nb = me->me_nb;
if ((ul->un_flags & LDL_ERROR) ||
(*func)(ul->un_ufsvfs, dtyp, arg))
deltamap_del(mtm, mof, nb);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
/*
* LOGMAP ROUTINES
*/
int
logmap_need_commit(mt_map_t *mtm)
{
return ((mtm->mtm_nmet > logmap_maxnme_commit) ||
(mtm->mtm_cfrags >= mtm->mtm_cfragmax));
}
int
logmap_need_roll_async(mt_map_t *mtm)
{
return (mtm->mtm_nme > logmap_maxnme_async);
}
int
logmap_need_roll_sync(mt_map_t *mtm)
{
return (mtm->mtm_nme > logmap_maxnme_sync);
}
void
logmap_start_roll(ml_unit_t *ul)
{
mt_map_t *logmap = ul->un_logmap;
logmap_settail(logmap, ul);
ASSERT(!(ul->un_flags & LDL_NOROLL));
mutex_enter(&logmap->mtm_mutex);
if ((logmap->mtm_flags & MTM_ROLL_RUNNING) == 0) {
logmap->mtm_flags |= MTM_ROLL_RUNNING;
logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_EXIT);
(void) thread_create(NULL, 0, trans_roll, ul, 0, &p0,
TS_RUN, minclsyspri);
}
mutex_exit(&logmap->mtm_mutex);
}
void
logmap_kill_roll(ml_unit_t *ul)
{
mt_map_t *mtm = ul->un_logmap;
if (mtm == NULL)
return;
mutex_enter(&mtm->mtm_mutex);
while (mtm->mtm_flags & MTM_ROLL_RUNNING) {
mtm->mtm_flags |= MTM_ROLL_EXIT;
cv_signal(&mtm->mtm_to_roll_cv);
cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
}
mutex_exit(&mtm->mtm_mutex);
}
/*
* kick the roll thread if it's not doing anything
*/
void
logmap_forceroll_nowait(mt_map_t *logmap)
{
/*
* Don't need to lock mtm_mutex to read mtm_flags here as we
* don't care in the rare case when we get a transitional value
* of mtm_flags. Just by signalling the thread it will wakeup
* and notice it has too many logmap entries.
*/
ASSERT(!(logmap->mtm_ul->un_flags & LDL_NOROLL));
if ((logmap->mtm_flags & MTM_ROLLING) == 0) {
cv_signal(&logmap->mtm_to_roll_cv);
}
}
/*
* kick the roll thread and wait for it to finish a cycle
*/
void
logmap_forceroll(mt_map_t *mtm)
{
mutex_enter(&mtm->mtm_mutex);
if ((mtm->mtm_flags & MTM_FORCE_ROLL) == 0) {
mtm->mtm_flags |= MTM_FORCE_ROLL;
cv_signal(&mtm->mtm_to_roll_cv);
}
do {
if ((mtm->mtm_flags & MTM_ROLL_RUNNING) == 0) {
mtm->mtm_flags &= ~MTM_FORCE_ROLL;
goto out;
}
cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
} while (mtm->mtm_flags & MTM_FORCE_ROLL);
out:
mutex_exit(&mtm->mtm_mutex);
}
/*
* remove rolled deltas within (mof, nb) and free them
*/
void
logmap_remove_roll(mt_map_t *mtm, offset_t mof, off_t nb)
{
int dolock = 0;
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
offset_t savmof = mof;
off_t savnb = nb;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
again:
if (dolock)
rw_enter(&mtm->mtm_rwlock, RW_WRITER);
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
/*
* remove and free the rolled entries
*/
mep = MAP_HASH(mof, mtm);
while ((me = *mep) != 0) {
if ((me->me_flags & ME_ROLL) &&
(MEwithinDATA(me, mof, hnb))) {
if (me->me_flags & ME_AGE) {
ASSERT(dolock == 0);
dolock = 1;
mutex_exit(&mtm->mtm_mutex);
mof = savmof;
nb = savnb;
goto again;
}
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
me->me_flags &= ~(ME_HASH|ME_ROLL);
ASSERT(!(me->me_flags & ME_USER));
mtm->mtm_nme--;
/*
* cancelled entries are handled by someone else
*/
if ((me->me_flags & ME_CANCEL) == 0) {
roll_stats[me->me_dt]++;
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
}
} else
mep = &me->me_hash;
}
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
if (dolock)
rw_exit(&mtm->mtm_rwlock);
}
/*
* Find the disk offset of the next delta to roll.
* Returns 0: no more deltas to roll or a transaction is being committed
* 1: a delta to roll has been found and *mofp points
* to the master file disk offset
*/
int
logmap_next_roll(mt_map_t *logmap, offset_t *mofp)
{
mapentry_t *me;
ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(logmap));
mutex_enter(&logmap->mtm_mutex);
for (me = logmap->mtm_next; me != (mapentry_t *)logmap;
me = me->me_next) {
/* already rolled */
if (me->me_flags & ME_ROLL) {
continue;
}
/* part of currently busy transaction; stop */
if (me->me_tid == logmap->mtm_tid) {
break;
}
/* part of commit-in-progress transaction; stop */
if (me->me_tid == logmap->mtm_committid) {
break;
}
/*
* We shouldn't see a DT_CANCEL mapentry whose
* tid != mtm_committid, or != mtm_tid since
* these are removed at the end of each committed
* transaction.
*/
ASSERT(!(me->me_dt == DT_CANCEL));
*mofp = me->me_mof;
mutex_exit(&logmap->mtm_mutex);
return (1);
}
mutex_exit(&logmap->mtm_mutex);
return (0);
}
/*
* put mapentry on sorted age list
*/
static void
logmap_list_age(mapentry_t **age, mapentry_t *meadd)
{
mapentry_t *me;
ASSERT(!(meadd->me_flags & (ME_AGE|ME_LIST)));
for (me = *age; me; age = &me->me_agenext, me = *age) {
if (me->me_age > meadd->me_age)
break;
}
meadd->me_agenext = me;
meadd->me_flags |= ME_AGE;
*age = meadd;
}
/*
* get a list of deltas within <mof, mof+nb>
* returns with mtm_rwlock held
* return value says whether the entire mof range is covered by deltas
*/
int
logmap_list_get(
mt_map_t *mtm,
offset_t mof,
off_t nb,
mapentry_t **age)
{
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
int rwtype = RW_READER;
offset_t savmof = mof;
off_t savnb = nb;
int entire = 0;
crb_t *crb;
mtm->mtm_ref = 1;
again:
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
rw_enter(&mtm->mtm_rwlock, rwtype);
*age = NULL;
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
/*
* find overlapping entries
*/
mep = MAP_HASH(mof, mtm);
for (me = *mep; me; me = me->me_hash) {
if (me->me_dt == DT_CANCEL)
continue;
if (!DATAoverlapME(mof, hnb, me))
continue;
/*
* check if map entry is in use
* (about to be rolled).
*/
if (me->me_flags & ME_AGE) {
/*
* reset the age bit in the list,
* upgrade the lock, and try again
*/
for (me = *age; me; me = *age) {
*age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&mtm->mtm_mutex);
rw_exit(&mtm->mtm_rwlock);
rwtype = RW_WRITER;
mof = savmof;
nb = savnb;
entire = 0;
goto again;
} else {
/* add mapentry to age ordered list */
logmap_list_age(age, me);
crb = me->me_crb;
if (crb) {
if (DATAwithinCRB(savmof, savnb, crb)) {
entire = 1;
}
} else {
if (DATAwithinME(savmof, savnb, me)) {
entire = 1;
}
}
}
}
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
return (entire);
}
/*
* Get a list of deltas for rolling - returns sucess or failure.
* Also return the cached roll buffer if all deltas point to it.
*/
int
logmap_list_get_roll(mt_map_t *logmap, offset_t mof, rollbuf_t *rbp)
{
mapentry_t *me, **mep, *age = NULL;
crb_t *crb = NULL;
ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(logmap));
ASSERT((mof & MAPBLOCKOFF) == 0);
rbp->rb_crb = NULL;
/*
* find overlapping entries
*/
mutex_enter(&logmap->mtm_mutex);
mep = MAP_HASH(mof, logmap);
for (me = *mep; me; me = me->me_hash) {
if (!DATAoverlapME(mof, MAPBLOCKSIZE, me))
continue;
if (me->me_tid == logmap->mtm_tid)
continue;
if (me->me_tid == logmap->mtm_committid)
continue;
if (me->me_dt == DT_CANCEL)
continue;
/*
* Check if map entry is in use (by lufs_read_strategy())
* and if so reset the age bit in the list,
* upgrade the lock, and try again
*/
if (me->me_flags & ME_AGE) {
for (me = age; me; me = age) {
age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&logmap->mtm_mutex);
return (1); /* failure */
} else {
/* add mapentry to age ordered list */
logmap_list_age(&age, me);
}
}
if (!age) {
goto out;
}
/*
* Mark the deltas as being rolled.
*/
for (me = age; me; me = me->me_agenext) {
me->me_flags |= ME_ROLL;
}
/*
* Test if all deltas are covered by one valid roll buffer
*/
crb = age->me_crb;
if (crb && !(crb->c_invalid)) {
for (me = age; me; me = me->me_agenext) {
if (me->me_crb != crb) {
crb = NULL;
break;
}
}
rbp->rb_crb = crb;
}
out:
rbp->rb_age = age;
mutex_exit(&logmap->mtm_mutex);
ASSERT(((logmap->mtm_debug & MT_SCAN) == 0) ||
logmap_logscan_debug(logmap, age));
ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
return (0); /* success */
}
void
logmap_list_put_roll(mt_map_t *mtm, mapentry_t *age)
{
mapentry_t *me;
ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
mutex_enter(&mtm->mtm_mutex);
for (me = age; me; me = age) {
age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&mtm->mtm_mutex);
}
void
logmap_list_put(mt_map_t *mtm, mapentry_t *age)
{
mapentry_t *me;
ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
mutex_enter(&mtm->mtm_mutex);
for (me = age; me; me = age) {
age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&mtm->mtm_mutex);
rw_exit(&mtm->mtm_rwlock);
}
#define UFS_RW_BALANCE 2
int ufs_rw_balance = UFS_RW_BALANCE;
/*
* Check if we need to read the master.
* The master does not need to be read if the log deltas to the
* block are for one contiguous set of full disk sectors.
* Both cylinder group bit maps DT_CG (8K); directory entries (512B);
* and possibly others should not require master disk reads.
* Calculate the sector map for writing later.
*/
int
logmap_setup_read(mapentry_t *age, rollbuf_t *rbp)
{
offset_t mof;
crb_t *crb;
mapentry_t *me;
int32_t nb;
int i;
int start_sec, end_sec;
int read_needed = 0;
int all_inodes = 1;
int first_sec = INT_MAX;
int last_sec = -1;
rbsecmap_t secmap = 0;
/* LINTED: warning: logical expression always true: op "||" */
ASSERT((MAPBLOCKSIZE / DEV_BSIZE) == (sizeof (secmap) * NBBY));
for (me = age; me; me = me->me_agenext) {
crb = me->me_crb;
if (crb) {
nb = crb->c_nb;
mof = crb->c_mof;
} else {
nb = me->me_nb;
mof = me->me_mof;
}
/*
* If the delta is not sector aligned then
* read the whole block.
*/
if ((nb & DEV_BMASK) || (mof & DEV_BMASK)) {
read_needed = 1;
}
/* Set sector map used in the MAPBLOCKSIZE block. */
start_sec = (mof & MAPBLOCKOFF) >> DEV_BSHIFT;
end_sec = start_sec + ((nb - 1) >> DEV_BSHIFT);
for (i = start_sec; i <= end_sec; i++) {
secmap |= UINT16_C(1) << i;
}
if (me->me_dt != DT_INODE) {
all_inodes = 0;
}
if (start_sec < first_sec) {
first_sec = start_sec;
}
if (end_sec > last_sec) {
last_sec = end_sec;
}
}
ASSERT(secmap);
ASSERT(first_sec != INT_MAX);
ASSERT(last_sec != -1);
if (all_inodes) {
/*
* Here we have a tradeoff choice. It must be better to
* do 2 writes * in the same MAPBLOCKSIZE chunk, than a
* read and a write. But what about 3 or more writes, versus
* a read+write? * Where is the cut over? It will depend on
* the track caching, scsi driver and other activity.
* A unpublished tunable is defined (ufs_rw_balance) that
* currently defaults to 2.
*/
if (!read_needed) {
int count = 0, gap = 0;
int sector_set; /* write needed to this sector */
/* Count the gaps (every 1 to 0 transation) */
for (i = first_sec + 1; i < last_sec; i++) {
sector_set = secmap & (UINT16_C(1) << i);
if (!gap && !sector_set) {
gap = 1;
count++;
if (count > ufs_rw_balance) {
read_needed = 1;
break;
}
} else if (gap && sector_set) {
gap = 0;
}
}
}
/*
* Inodes commonly make up the majority (~85%) of deltas.
* They cannot contain embedded user data, so its safe to
* read and write them all in one IO.
* But for directory entries, shadow inode data, and
* quota record data the user data fragments can be embedded
* betwen those metadata, and so its not safe to read, modify
* then write the entire range as user asynchronous user data
* writes could get overwritten with old data.
* Thus we have to create a segment map of meta data that
* needs to get written.
*
* If user data was logged then this issue would go away.
*/
if (read_needed) {
for (i = first_sec + 1; i < last_sec; i++) {
secmap |= (UINT16_C(1) << i);
}
}
}
rbp->rb_secmap = secmap;
return (read_needed);
}
/*
* Abort the load of a set of log map delta's.
* ie,
* Clear out all mapentries on this unit's log map
* which have a tid (transaction id) equal to the
* parameter tid. Walk the cancel list, taking everything
* off it, too.
*/
static void
logmap_abort(ml_unit_t *ul, uint32_t tid)
{
struct mt_map *mtm = ul->un_logmap; /* Log map */
mapentry_t *me, **mep;
int i;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
/*
* wait for any outstanding reads to finish; lock out future reads
*/
rw_enter(&mtm->mtm_rwlock, RW_WRITER);
mutex_enter(&mtm->mtm_mutex);
/* Take everything off cancel list */
while ((me = mtm->mtm_cancel) != NULL) {
mtm->mtm_cancel = me->me_cancel;
me->me_flags &= ~ME_CANCEL;
me->me_cancel = NULL;
}
/*
* Now take out all mapentries with current tid, and committid
* as this function is called from logmap_logscan and logmap_commit
* When it is called from logmap_logscan mtm_tid == mtm_committid
* But when logmap_abort is called from logmap_commit it is
* because the log errored when trying to write the commit record,
* after the async ops have been allowed to start in top_end_sync.
* So we also need to remove all mapentries from the transaction whose
* commit failed.
*/
for (i = 0; i < mtm->mtm_nhash; i++) {
mep = &mtm->mtm_hash[i];
while ((me = *mep) != NULL) {
if (me->me_tid == tid ||
me->me_tid == mtm->mtm_committid) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
if (!(me->me_flags & ME_USER)) {
mtm->mtm_nme--;
}
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
continue;
}
mep = &me->me_hash;
}
}
if (!(ul->un_flags & LDL_SCAN))
mtm->mtm_flags |= MTM_CANCELED;
mutex_exit(&mtm->mtm_mutex);
mtm->mtm_dirty = 0;
mtm->mtm_nmet = 0;
rw_exit(&mtm->mtm_rwlock);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
static void
logmap_wait_space(mt_map_t *mtm, ml_unit_t *ul, mapentry_t *me)
{
ASSERT(MUTEX_HELD(&ul->un_log_mutex));
while (!ldl_has_space(ul, me)) {
ASSERT(!(ul->un_flags & LDL_NOROLL));
mutex_exit(&ul->un_log_mutex);
logmap_forceroll(mtm);
mutex_enter(&ul->un_log_mutex);
if (ul->un_flags & LDL_ERROR)
break;
}
ASSERT(MUTEX_HELD(&ul->un_log_mutex));
}
/*
* put a list of deltas into a logmap
* If va == NULL, don't write to the log.
*/
void
logmap_add(
ml_unit_t *ul,
char *va, /* Ptr to buf w/deltas & data */
offset_t vamof, /* Offset on master of buf start */
mapentry_t *melist) /* Entries to add */
{
offset_t mof;
off_t nb;
mapentry_t *me;
mapentry_t **mep;
mapentry_t **savmep;
uint32_t tid;
mt_map_t *mtm = ul->un_logmap;
mutex_enter(&ul->un_log_mutex);
if (va)
logmap_wait_space(mtm, ul, melist);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mtm->mtm_ref = 1;
mtm->mtm_dirty++;
tid = mtm->mtm_tid;
while (melist) {
mof = melist->me_mof;
nb = melist->me_nb;
/*
* search for overlaping entries
*/
savmep = mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
while ((me = *mep) != 0) {
/*
* Data consumes old map entry; cancel map entry.
* Take care when we replace an old map entry
* which carries quota information with a newer entry
* which does not. In that case the push function
* would not be called to clean up the dquot structure.
* This would be found later by invalidatedq() causing
* a panic when the filesystem in unmounted.
* We clean up the dquot manually and then replace
* the map entry.
*/
if (MEwithinDATA(me, mof, nb) &&
((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
if (tid == me->me_tid &&
((me->me_flags & ME_AGE) == 0)) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
ASSERT(!(me->me_flags & ME_USER));
mtm->mtm_nme--;
/*
* Special case if the mapentry
* carries a dquot and a push function.
* We have to clean up the quota info
* before replacing the mapentry.
*/
if (me->me_dt == DT_QR)
HANDLE_DQUOT(me, melist);
kmem_cache_free(mapentry_cache, me);
continue;
}
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
me->me_flags |= ME_CANCEL;
}
mep = &(*mep)->me_hash;
}
mutex_exit(&mtm->mtm_mutex);
/*
* remove from list
*/
me = melist;
melist = melist->me_hash;
me->me_flags &= ~ME_LIST;
/*
* If va != NULL, put in the log.
*/
if (va)
ldl_write(ul, va, vamof, me);
if (ul->un_flags & LDL_ERROR) {
kmem_cache_free(mapentry_cache, me);
continue;
}
ASSERT((va == NULL) ||
((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
map_check_ldl_write(ul, va, vamof, me));
/*
* put on hash
*/
mutex_enter(&mtm->mtm_mutex);
me->me_hash = *savmep;
*savmep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
me->me_flags |= ME_HASH;
me->me_tid = tid;
me->me_age = mtm->mtm_age++;
mtm->mtm_nme++;
mtm->mtm_nmet++;
mutex_exit(&mtm->mtm_mutex);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_exit(&ul->un_log_mutex);
}
/*
* Add the delta(s) into the log.
* Create one cached roll buffer logmap entry, and reference count the
* number of mapentries refering to it.
* Cancel previous logmap entries.
* logmap_add is tolerant of failure to allocate a cached roll buffer.
*/
void
logmap_add_buf(
ml_unit_t *ul,
char *va, /* Ptr to buf w/deltas & data */
offset_t bufmof, /* Offset on master of buf start */
mapentry_t *melist, /* Entries to add */
caddr_t buf, /* Buffer containing delta(s) */
uint32_t bufsz) /* Size of buf */
{
offset_t mof;
offset_t vamof = bufmof + (va - buf);
off_t nb;
mapentry_t *me;
mapentry_t **mep;
mapentry_t **savmep;
uint32_t tid;
mt_map_t *mtm = ul->un_logmap;
crb_t *crb;
crb_t *crbsav = NULL;
ASSERT((bufsz & DEV_BMASK) == 0);
mutex_enter(&ul->un_log_mutex);
logmap_wait_space(mtm, ul, melist);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mtm->mtm_ref = 1;
mtm->mtm_dirty++;
tid = mtm->mtm_tid;
while (melist) {
mof = melist->me_mof;
nb = melist->me_nb;
/*
* search for overlapping entries
*/
savmep = mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
while ((me = *mep) != 0) {
/*
* Data consumes old map entry; cancel map entry.
* Take care when we replace an old map entry
* which carries quota information with a newer entry
* which does not. In that case the push function
* would not be called to clean up the dquot structure.
* This would be found later by invalidatedq() causing
* a panic when the filesystem in unmounted.
* We clean up the dquot manually and then replace
* the map entry.
*/
crb = me->me_crb;
if (MEwithinDATA(me, mof, nb) &&
((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
if (tid == me->me_tid &&
((me->me_flags & ME_AGE) == 0)) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
ASSERT(!(me->me_flags & ME_USER));
mtm->mtm_nme--;
/*
* Special case if the mapentry
* carries a dquot and a push function.
* We have to clean up the quota info
* before replacing the mapentry.
*/
if (me->me_dt == DT_QR)
HANDLE_DQUOT(me, melist);
/*
* If this soon to be deleted mapentry
* has a suitable roll buffer then
* re-use it.
*/
if (crb && (--crb->c_refcnt == 0)) {
if (crbsav ||
(crb->c_nb != bufsz)) {
CRB_FREE(crb, me);
} else {
bcopy(buf, crb->c_buf,
bufsz);
crb->c_invalid = 0;
crb->c_mof = bufmof;
crbsav = crb;
me->me_crb = NULL;
}
}
kmem_cache_free(mapentry_cache, me);
continue;
}
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
me->me_flags |= ME_CANCEL;
}
/*
* Inode deltas within the same fs block come
* in individually as separate calls to logmap_add().
* All others come in as one call. So check for an
* existing entry where we can re-use the crb.
*/
if ((me->me_dt == DT_INODE) && (tid == me->me_tid) &&
!crbsav && crb &&
WITHIN(mof, nb, crb->c_mof, crb->c_nb)) {
ASSERT(crb->c_mof == bufmof);
ASSERT(crb->c_nb == bufsz);
bcopy(buf, crb->c_buf, bufsz);
crbsav = crb;
}
mep = &(*mep)->me_hash;
}
mutex_exit(&mtm->mtm_mutex);
/*
* If we don't already have a crb then allocate one
* and copy the incoming buffer. Only do this once
* for all the incoming deltas.
*/
if ((crbsav == NULL) && (melist->me_dt != DT_ABZERO)) {
/*
* Only use a cached roll buffer if we
* have enough memory, and check for failures.
*/
if (((ufs_crb_size + bufsz) < ufs_crb_limit) &&
(kmem_avail() > bufsz)) {
crbsav = kmem_alloc(sizeof (crb_t), KM_NOSLEEP);
} else {
ufs_crb_alloc_fails++;
}
if (crbsav) {
crbsav->c_buf = kmem_alloc(bufsz, KM_NOSLEEP);
if (crbsav->c_buf) {
atomic_add_64(&ufs_crb_size,
(uint64_t)bufsz);
if (ufs_crb_size > ufs_crb_max_size) {
ufs_crb_max_size = ufs_crb_size;
}
bcopy(buf, crbsav->c_buf, bufsz);
crbsav->c_nb = bufsz;
crbsav->c_refcnt = 0;
crbsav->c_invalid = 0;
ASSERT((bufmof & DEV_BMASK) == 0);
crbsav->c_mof = bufmof;
} else {
kmem_free(crbsav, sizeof (crb_t));
crbsav = NULL;
}
}
}
/*
* remove from list
*/
me = melist;
melist = melist->me_hash;
me->me_flags &= ~ME_LIST;
me->me_crb = crbsav;
if (crbsav) {
crbsav->c_refcnt++;
}
crbsav = NULL;
ASSERT(va);
ldl_write(ul, va, vamof, me); /* add to on-disk log */
if (ul->un_flags & LDL_ERROR) {
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
continue;
}
ASSERT(((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
map_check_ldl_write(ul, va, vamof, me));
/*
* put on hash
*/
mutex_enter(&mtm->mtm_mutex);
me->me_hash = *savmep;
*savmep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
me->me_flags |= ME_HASH;
me->me_tid = tid;
me->me_age = mtm->mtm_age++;
mtm->mtm_nme++;
mtm->mtm_nmet++;
mutex_exit(&mtm->mtm_mutex);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_exit(&ul->un_log_mutex);
}
/*
* free up any cancelled deltas
*/
void
logmap_free_cancel(mt_map_t *mtm, mapentry_t **cancelhead)
{
int dolock = 0;
mapentry_t *me;
mapentry_t **mep;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
again:
if (dolock)
rw_enter(&mtm->mtm_rwlock, RW_WRITER);
/*
* At EOT, cancel the indicated deltas
*/
mutex_enter(&mtm->mtm_mutex);
if (mtm->mtm_flags & MTM_CANCELED) {
mtm->mtm_flags &= ~MTM_CANCELED;
ASSERT(dolock == 0);
mutex_exit(&mtm->mtm_mutex);
return;
}
while ((me = *cancelhead) != NULL) {
/*
* roll forward or read collision; wait and try again
*/
if (me->me_flags & ME_AGE) {
ASSERT(dolock == 0);
mutex_exit(&mtm->mtm_mutex);
dolock = 1;
goto again;
}
/*
* remove from cancel list
*/
*cancelhead = me->me_cancel;
me->me_cancel = NULL;
me->me_flags &= ~(ME_CANCEL);
/*
* logmap_remove_roll handles ME_ROLL entries later
* we leave them around for logmap_iscancel
* XXX is this necessary?
*/
if (me->me_flags & ME_ROLL)
continue;
/*
* remove from hash (if necessary)
*/
if (me->me_flags & ME_HASH) {
mep = MAP_HASH(me->me_mof, mtm);
while (*mep) {
if (*mep == me) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
me->me_flags &= ~(ME_HASH);
if (!(me->me_flags & ME_USER)) {
mtm->mtm_nme--;
}
break;
} else
mep = &(*mep)->me_hash;
}
}
/*
* put the entry on the free list
*/
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
}
mutex_exit(&mtm->mtm_mutex);
if (dolock)
rw_exit(&mtm->mtm_rwlock);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
void
logmap_commit(ml_unit_t *ul, uint32_t tid)
{
mapentry_t me;
mt_map_t *mtm = ul->un_logmap;
ASSERT(MUTEX_HELD(&ul->un_log_mutex));
/*
* async'ly write a commit rec into the log
*/
if (mtm->mtm_dirty) {
/*
* put commit record into log
*/
me.me_mof = mtm->mtm_tid;
me.me_dt = DT_COMMIT;
me.me_nb = 0;
me.me_hash = NULL;
logmap_wait_space(mtm, ul, &me);
ldl_write(ul, NULL, (offset_t)0, &me);
ldl_round_commit(ul);
/*
* abort on error; else reset dirty flag
*/
if (ul->un_flags & LDL_ERROR)
logmap_abort(ul, tid);
else {
mtm->mtm_dirty = 0;
mtm->mtm_nmet = 0;
mtm->mtm_cfrags = 0;
}
/* push commit */
ldl_push_commit(ul);
}
}
void
logmap_sethead(mt_map_t *mtm, ml_unit_t *ul)
{
off_t lof;
uint32_t tid;
mapentry_t *me;
/*
* move the head forward so the log knows how full it is
* Make sure to skip any mapentry whose me_lof is 0, these
* are just place holders for DT_CANCELED freed user blocks
* for the current moby.
*/
mutex_enter(&ul->un_log_mutex);
mutex_enter(&mtm->mtm_mutex);
me = mtm->mtm_next;
while (me != (mapentry_t *)mtm && me->me_lof == 0) {
me = me->me_next;
}
if (me == (mapentry_t *)mtm)
lof = -1;
else {
lof = me->me_lof;
tid = me->me_tid;
}
mutex_exit(&mtm->mtm_mutex);
ldl_sethead(ul, lof, tid);
if (lof == -1)
mtm->mtm_age = 0;
mutex_exit(&ul->un_log_mutex);
}
void
logmap_settail(mt_map_t *mtm, ml_unit_t *ul)
{
off_t lof;
size_t nb;
/*
* set the tail after the logmap_abort
*/
mutex_enter(&ul->un_log_mutex);
mutex_enter(&mtm->mtm_mutex);
if (mtm->mtm_prev == (mapentry_t *)mtm)
lof = -1;
else {
/*
* set the tail to the end of the last commit
*/
lof = mtm->mtm_tail_lof;
nb = mtm->mtm_tail_nb;
}
mutex_exit(&mtm->mtm_mutex);
ldl_settail(ul, lof, nb);
mutex_exit(&ul->un_log_mutex);
}
/*
* when reseting a device; roll the log until every
* delta has been rolled forward
*/
void
logmap_roll_dev(ml_unit_t *ul)
{
mt_map_t *mtm = ul->un_logmap;
mapentry_t *me;
ufsvfs_t *ufsvfsp = ul->un_ufsvfs;
again:
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
if (ul->un_flags & (LDL_ERROR|LDL_NOROLL))
return;
/*
* look for deltas
*/
mutex_enter(&mtm->mtm_mutex);
for (me = mtm->mtm_next; me != (mapentry_t *)mtm; me = me->me_next) {
if (me->me_flags & ME_ROLL)
break;
if (me->me_tid == mtm->mtm_tid)
continue;
if (me->me_tid == mtm->mtm_committid)
continue;
break;
}
/*
* found a delta; kick the roll thread
* but only if the thread is running... (jmh)
*/
if (me != (mapentry_t *)mtm) {
mutex_exit(&mtm->mtm_mutex);
logmap_forceroll(mtm);
goto again;
}
/*
* no more deltas, return
*/
mutex_exit(&mtm->mtm_mutex);
(void) ufs_putsummaryinfo(ul->un_dev, ufsvfsp, ufsvfsp->vfs_fs);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
static void
logmap_cancel_delta(ml_unit_t *ul, offset_t mof, int32_t nb, int metadata)
{
mapentry_t *me;
mapentry_t **mep;
mt_map_t *mtm = ul->un_logmap;
int frags;
/*
* map has been referenced and is dirty
*/
mtm->mtm_ref = 1;
mtm->mtm_dirty++;
/*
* get a mapentry
*/
me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
bzero(me, sizeof (mapentry_t));
/*
* initialize cancel record and put in logmap
*/
me->me_mof = mof;
me->me_nb = nb;
me->me_dt = DT_CANCEL;
me->me_tid = mtm->mtm_tid;
me->me_hash = NULL;
/*
* Write delta to log if this delta is for metadata. If this is not
* metadata it is user data and we are just putting a cancel
* mapentry into the hash to cancel a user block deletion
* in which we do not want the block to be allocated
* within this moby. This cancel entry will prevent the block from
* being allocated within the moby and prevent user data corruption
* if we happen to crash before this moby is committed.
*/
mutex_enter(&ul->un_log_mutex);
if (metadata) {
logmap_wait_space(mtm, ul, me);
ldl_write(ul, NULL, (offset_t)0, me);
if (ul->un_flags & LDL_ERROR) {
kmem_cache_free(mapentry_cache, me);
mutex_exit(&ul->un_log_mutex);
return;
}
}
/*
* put in hash and on cancel list
*/
mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
me->me_age = mtm->mtm_age++;
me->me_hash = *mep;
*mep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
if (metadata) {
mtm->mtm_nme++;
mtm->mtm_nmet++;
} else {
me->me_flags = ME_USER;
}
me->me_flags |= (ME_HASH|ME_CANCEL);
if (!(metadata)) {
frags = blkoff(ul->un_ufsvfs->vfs_fs, nb);
if (frags)
mtm->mtm_cfrags +=
numfrags(ul->un_ufsvfs->vfs_fs, frags);
}
mutex_exit(&mtm->mtm_mutex);
mutex_exit(&ul->un_log_mutex);
}
/*
* cancel entries in a logmap (entries are freed at EOT)
*/
void
logmap_cancel(ml_unit_t *ul, offset_t mof, off_t nb, int metadata)
{
int32_t hnb;
mapentry_t *me;
mapentry_t **mep;
mt_map_t *mtm = ul->un_logmap;
crb_t *crb;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
/*
* Find overlapping metadata entries. Don't search through
* the hash chains if this is user data because it is only
* possible to have overlapping map entries for metadata,
* and the search can become expensive for large files.
*/
if (metadata) {
mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
for (me = *mep; me; me = me->me_hash) {
if (!DATAoverlapME(mof, hnb, me))
continue;
ASSERT(MEwithinDATA(me, mof, hnb));
if ((me->me_flags & ME_CANCEL) == 0) {
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
me->me_flags |= ME_CANCEL;
crb = me->me_crb;
if (crb) {
crb->c_invalid = 1;
}
}
}
mutex_exit(&mtm->mtm_mutex);
}
/*
* put a cancel record into the log
*/
logmap_cancel_delta(ul, mof, hnb, metadata);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
/*
* check for overlap w/cancel delta
*/
int
logmap_iscancel(mt_map_t *mtm, offset_t mof, off_t nb)
{
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
/*
* search for dup entry
*/
mep = MAP_HASH(mof, mtm);
for (me = *mep; me; me = me->me_hash) {
if (((me->me_flags & ME_ROLL) == 0) &&
(me->me_dt != DT_CANCEL))
continue;
if (DATAoverlapME(mof, hnb, me))
break;
}
/*
* overlap detected
*/
if (me) {
mutex_exit(&mtm->mtm_mutex);
return (1);
}
}
mutex_exit(&mtm->mtm_mutex);
return (0);
}
static int
logmap_logscan_add(ml_unit_t *ul, struct delta *dp, off_t lof, size_t *nbp)
{
mapentry_t *me;
int error;
mt_map_t *mtm = ul->un_logmap;
/*
* verify delta header; failure == mediafail
*/
error = 0;
/* delta type */
if ((dp->d_typ <= DT_NONE) || (dp->d_typ >= DT_MAX))
error = EINVAL;
if (dp->d_typ == DT_COMMIT) {
if (dp->d_nb != INT32_C(0) && dp->d_nb != INT32_C(-1))
error = EINVAL;
} else {
/* length of delta */
if ((dp->d_nb < INT32_C(0)) ||
(dp->d_nb > INT32_C(MAPBLOCKSIZE)))
error = EINVAL;
/* offset on master device */
if (dp->d_mof < INT64_C(0))
error = EINVAL;
}
if (error) {
ldl_seterror(ul, "Error processing ufs log data during scan");
return (error);
}
/*
* process commit record
*/
if (dp->d_typ == DT_COMMIT) {
if (mtm->mtm_dirty) {
ASSERT(dp->d_nb == INT32_C(0));
logmap_free_cancel(mtm, &mtm->mtm_cancel);
mtm->mtm_dirty = 0;
mtm->mtm_nmet = 0;
mtm->mtm_tid++;
mtm->mtm_committid = mtm->mtm_tid;
ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
logmap_logscan_commit_debug(lof, mtm));
}
/*
* return #bytes to next sector (next delta header)
*/
*nbp = ldl_logscan_nbcommit(lof);
mtm->mtm_tail_lof = lof;
mtm->mtm_tail_nb = *nbp;
return (0);
}
/*
* add delta to logmap
*/
me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
bzero(me, sizeof (mapentry_t));
me->me_lof = lof;
me->me_mof = dp->d_mof;
me->me_nb = dp->d_nb;
me->me_tid = mtm->mtm_tid;
me->me_dt = dp->d_typ;
me->me_hash = NULL;
me->me_flags = (ME_LIST | ME_SCAN);
logmap_add(ul, NULL, 0, me);
switch (dp->d_typ) {
case DT_CANCEL:
me->me_flags |= ME_CANCEL;
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
break;
default:
ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
logmap_logscan_add_debug(dp, mtm));
break;
}
sizeofdelta:
/*
* return #bytes till next delta header
*/
if ((dp->d_typ == DT_CANCEL) || (dp->d_typ == DT_ABZERO))
*nbp = 0;
else
*nbp = dp->d_nb;
return (0);
}
void
logmap_logscan(ml_unit_t *ul)
{
size_t nb, nbd;
off_t lof;
struct delta delta;
mt_map_t *logmap = ul->un_logmap;
ASSERT(ul->un_deltamap->mtm_next == (mapentry_t *)ul->un_deltamap);
/*
* prepare the log for a logscan
*/
ldl_logscan_begin(ul);
/*
* prepare the logmap for a logscan
*/
(void) map_free_entries(logmap);
logmap->mtm_tid = 0;
logmap->mtm_committid = UINT32_C(0);
logmap->mtm_age = 0;
logmap->mtm_dirty = 0;
logmap->mtm_ref = 0;
/*
* while not at end of log
* read delta header
* add to logmap
* seek to beginning of next delta
*/
lof = ul->un_head_lof;
nbd = sizeof (delta);
while (lof != ul->un_tail_lof) {
/* read delta header */
if (ldl_logscan_read(ul, &lof, nbd, (caddr_t)&delta))
break;
/* add to logmap */
if (logmap_logscan_add(ul, &delta, lof, &nb))
break;
/* seek to next header (skip data) */
if (ldl_logscan_read(ul, &lof, nb, NULL))
break;
}
/*
* remove the last partial transaction from the logmap
*/
logmap_abort(ul, logmap->mtm_tid);
ldl_logscan_end(ul);
}
void
_init_map(void)
{
/*
* Initialise the mapentry cache. No constructor or deconstructor
* is needed. Also no reclaim function is supplied as reclaiming
* current entries is not possible.
*/
mapentry_cache = kmem_cache_create("lufs_mapentry_cache",
sizeof (mapentry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
}
/*
* Special case when we replace an old map entry which carries quota
* information with a newer entry which does not.
* In that case the push function would not be called to clean up the
* dquot structure. This would be found later by invalidatedq() causing
* a panic when the filesystem in unmounted.
* We clean up the dquot manually before replacing the map entry.
*/
void
handle_dquot(mapentry_t *me)
{
int dolock = 0;
int domutex = 0;
struct dquot *dqp;
dqp = (struct dquot *)me->me_arg;
/*
* We need vfs_dqrwlock to call dqput()
*/
dolock = (!RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
if (dolock)
rw_enter(&dqp->dq_ufsvfsp->vfs_dqrwlock, RW_READER);
domutex = (!MUTEX_HELD(&dqp->dq_lock));
if (domutex)
mutex_enter(&dqp->dq_lock);
/*
* Only clean up if the dquot is referenced
*/
if (dqp->dq_cnt == 0) {
if (domutex)
mutex_exit(&dqp->dq_lock);
if (dolock)
rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
return;
}
dqp->dq_flags &= ~(DQ_MOD|DQ_TRANS);
dqput(dqp);
if (domutex)
mutex_exit(&dqp->dq_lock);
if (dolock)
rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
}