lufs_map.c revision 20a1ae8aa548e5c0874f0cb213a5f242fe315a59
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <sys/inttypes.h>
#include <sys/tuneable.h>
/*
* externs
*/
extern pri_t minclsyspri;
extern struct kmem_cache *lufs_bp;
extern int ufs_trans_push_quota();
/*
* globals
*/
/*
* logmap tuning constants
*/
long logmap_maxnme_commit = 2048;
long logmap_maxnme_async = 4096;
long logmap_maxnme_sync = 6144;
void handle_dquot(mapentry_t *);
/*
* GENERIC MAP ROUTINES
*/
} \
}
/*
* Check that the old delta has an argument and a push function of
* ufs_trans_push_quota(), then check that the old and new deltas differ.
* If so we clean up with handle_dquot() before replacing the old delta.
*/
handle_dquot(me); \
} \
} \
}
/*
* free up all the mapentries for a map
*/
void
{
int i;
mapentry_t *me;
}
}
/*
* done with map; free if necessary
*/
mt_map_t *
{
/*
* free up the map's memory
*/
return (NULL);
}
/*
* Allocate a map;
*/
mt_map_t *
{
/*
* assume the map is not here and allocate the necessary structs
*/
KM_SLEEP);
mtm->mtm_cfrags = 0;
/*
* for scan test
*/
/*
* Initialize locks
*/
return (mtm);
}
/*
* DELTAMAP ROUTINES
*/
/*
* deltamap tuning constants
*/
int
{
}
/*
* put a delta into a deltamap; may sleep on memory
*/
void
int (*func)(),
{
mapentry_t *me;
mapentry_t **mep;
/*
* Search for dup entry. We need to ensure that we don't
* replace a map entry which carries quota information
* with a map entry which doesn't. In that case we lose
* reference the the dquot structure which will not be
* cleaned up by the push function me->me_func as this will
* never be called.
* The stray dquot would be found later by invalidatedq()
* causing a panic when the filesystem is unmounted.
*/
/*
* Don't remove quota entries which have
* incremented the ref count (those with a
* ufs_trans_push_quota push function).
* Let logmap_add[_buf] clean them up.
*/
continue;
}
break;
}
}
if (me) {
/* already in map */
continue;
}
/*
* Add up all the delta map deltas so we can compute
* an upper bound on the log size used.
* Note, some deltas get removed from the deltamap
* before the deltamap_push by lufs_write_strategy
* and so multiple deltas to the same mof offset
* don't get cancelled here but in the logmap.
* Thus we can't easily get a accurate count of
* the log space used - only an upper bound.
*/
} else {
tp->deltas_size +=
}
}
delta_stats[dtyp]++;
/*
* get a mapentry
* May need to drop & re-grab the mtm_mutex
* and then recheck for a duplicate
*/
}
/*
* initialize and put in deltamap
*/
}
}
/*
* remove deltas within (mof, nb) and return as linked list
*/
{
mapentry_t *me;
mapentry_t **mep;
return (NULL);
/*
* remove entries from hash and return as a aged linked list
*/
} else
}
}
return (mer);
}
/*
* delete entries within (mof, nb)
*/
void
{
mapentry_t *me;
}
}
/*
* Call the indicated function to cause deltas to move to the logmap.
* top_end_sync() is the only caller of this function and
* it has waited for the completion of all threads, so there can
* be no other activity in the deltamap. Therefore we don't need to
* hold the deltamap lock.
*/
void
{
int (*func)();
mapentry_t *me;
/*
* for every entry in the deltamap
*/
}
}
/*
* LOGMAP ROUTINES
*/
int
{
}
int
{
}
int
{
}
void
{
}
}
void
{
return;
}
}
/*
* kick the roll thread if it's not doing anything
*/
void
{
/*
* Don't need to lock mtm_mutex to read mtm_flags here as we
* don't care in the rare case when we get a transitional value
* of mtm_flags. Just by signalling the thread it will wakeup
* and notice it has too many logmap entries.
*/
}
}
/*
* kick the roll thread and wait for it to finish a cycle
*/
void
{
}
do {
goto out;
}
out:
}
/*
* remove rolled deltas within (mof, nb) and free them
*/
void
{
int dolock = 0;
mapentry_t *me;
mapentry_t **mep;
if (dolock)
/*
* remove and free the rolled entries
*/
dolock = 1;
goto again;
}
/*
* cancelled entries are handled by someone else
*/
}
} else
}
}
if (dolock)
}
/*
* Find the disk offset of the next delta to roll.
* Returns 0: no more deltas to roll or a transaction is being committed
* 1: a delta to roll has been found and *mofp points
* to the master file disk offset
*/
int
{
mapentry_t *me;
/* already rolled */
continue;
}
/* part of currently busy transaction; stop */
break;
}
/* part of commit-in-progress transaction; stop */
break;
}
/*
* We shouldn't see a DT_CANCEL mapentry whose
* tid != mtm_committid, or != mtm_tid since
* these are removed at the end of each committed
* transaction.
*/
return (1);
}
return (0);
}
/*
* put mapentry on sorted age list
*/
static void
{
mapentry_t *me;
break;
}
}
/*
* get a list of deltas within <mof, mof+nb>
* returns with mtm_rwlock held
* return value says whether the entire mof range is covered by deltas
*/
int
mapentry_t **age)
{
mapentry_t *me;
mapentry_t **mep;
int entire = 0;
/*
* find overlapping entries
*/
continue;
continue;
/*
* check if map entry is in use
* (about to be rolled).
*/
/*
* reset the age bit in the list,
* upgrade the lock, and try again
*/
}
entire = 0;
goto again;
} else {
/* add mapentry to age ordered list */
if (crb) {
entire = 1;
}
} else {
entire = 1;
}
}
}
}
}
return (entire);
}
/*
* Get a list of deltas for rolling - returns sucess or failure.
* Also return the cached roll buffer if all deltas point to it.
*/
int
{
/*
* find overlapping entries
*/
continue;
continue;
continue;
continue;
/*
* Check if map entry is in use (by lufs_read_strategy())
* and if so reset the age bit in the list,
* upgrade the lock, and try again
*/
}
return (1); /* failure */
} else {
/* add mapentry to age ordered list */
}
}
if (!age) {
goto out;
}
/*
* Mark the deltas as being rolled.
*/
}
/*
* Test if all deltas are covered by one valid roll buffer
*/
break;
}
}
}
out:
return (0); /* success */
}
void
{
mapentry_t *me;
}
}
void
{
mapentry_t *me;
}
}
#define UFS_RW_BALANCE 2
int ufs_rw_balance = UFS_RW_BALANCE;
/*
* Check if we need to read the master.
* The master does not need to be read if the log deltas to the
* block are for one contiguous set of full disk sectors.
* Both cylinder group bit maps DT_CG (8K); directory entries (512B);
* and possibly others should not require master disk reads.
* Calculate the sector map for writing later.
*/
int
{
mapentry_t *me;
int i;
int read_needed = 0;
int all_inodes = 1;
int last_sec = -1;
rbsecmap_t secmap = 0;
/* LINTED: warning: logical expression always true: op "||" */
if (crb) {
} else {
}
/*
* If the delta is not sector aligned then
* read the whole block.
*/
read_needed = 1;
}
/* Set sector map used in the MAPBLOCKSIZE block. */
}
all_inodes = 0;
}
}
}
}
if (all_inodes) {
/*
* Here we have a tradeoff choice. It must be better to
* do 2 writes * in the same MAPBLOCKSIZE chunk, than a
* read and a write. But what about 3 or more writes, versus
* a read+write? * Where is the cut over? It will depend on
* the track caching, scsi driver and other activity.
* A unpublished tunable is defined (ufs_rw_balance) that
* currently defaults to 2.
*/
if (!read_needed) {
int sector_set; /* write needed to this sector */
/* Count the gaps (every 1 to 0 transation) */
if (!gap && !sector_set) {
gap = 1;
count++;
if (count > ufs_rw_balance) {
read_needed = 1;
break;
}
} else if (gap && sector_set) {
gap = 0;
}
}
}
/*
* Inodes commonly make up the majority (~85%) of deltas.
* They cannot contain embedded user data, so its safe to
* read and write them all in one IO.
* But for directory entries, shadow inode data, and
* quota record data the user data fragments can be embedded
* betwen those metadata, and so its not safe to read, modify
* then write the entire range as user asynchronous user data
* writes could get overwritten with old data.
* Thus we have to create a segment map of meta data that
* needs to get written.
*
* If user data was logged then this issue would go away.
*/
if (read_needed) {
}
}
}
return (read_needed);
}
/*
* Abort the load of a set of log map delta's.
* ie,
* Clear out all mapentries on this unit's log map
* which have a tid (transaction id) equal to the
* parameter tid. Walk the cancel list, taking everything
* off it, too.
*/
static void
{
mapentry_t *me,
**mep;
int i;
/*
* wait for any outstanding reads to finish; lock out future reads
*/
/* Take everything off cancel list */
}
/*
* Now take out all mapentries with current tid, and committid
* as this function is called from logmap_logscan and logmap_commit
* When it is called from logmap_logscan mtm_tid == mtm_committid
* But when logmap_abort is called from logmap_commit it is
* because the log errored when trying to write the commit record,
* after the async ops have been allowed to start in top_end_sync.
* So we also need to remove all mapentries from the transaction whose
* commit failed.
*/
}
continue;
}
}
}
}
static void
{
break;
}
}
/*
* put a list of deltas into a logmap
* If va == NULL, don't write to the log.
*/
void
{
mapentry_t *me;
mapentry_t **mep;
mapentry_t **savmep;
if (va)
while (melist) {
/*
* search for overlaping entries
*/
/*
* Data consumes old map entry; cancel map entry.
* Take care when we replace an old map entry
* which carries quota information with a newer entry
* which does not. In that case the push function
* would not be called to clean up the dquot structure.
* This would be found later by invalidatedq() causing
* a panic when the filesystem in unmounted.
* We clean up the dquot manually and then replace
* the map entry.
*/
/*
* Special case if the mapentry
* carries a dquot and a push function.
* We have to clean up the quota info
* before replacing the mapentry.
*/
continue;
}
}
}
/*
* remove from list
*/
/*
* If va != NULL, put in the log.
*/
if (va)
continue;
}
/*
* put on hash
*/
}
}
/*
* Add the delta(s) into the log.
* Create one cached roll buffer logmap entry, and reference count the
* number of mapentries refering to it.
* Cancel previous logmap entries.
* logmap_add is tolerant of failure to allocate a cached roll buffer.
*/
void
{
mapentry_t *me;
mapentry_t **mep;
mapentry_t **savmep;
while (melist) {
/*
* search for overlapping entries
*/
/*
* Data consumes old map entry; cancel map entry.
* Take care when we replace an old map entry
* which carries quota information with a newer entry
* which does not. In that case the push function
* would not be called to clean up the dquot structure.
* This would be found later by invalidatedq() causing
* a panic when the filesystem in unmounted.
* We clean up the dquot manually and then replace
* the map entry.
*/
/*
* Special case if the mapentry
* carries a dquot and a push function.
* We have to clean up the quota info
* before replacing the mapentry.
*/
/*
* If this soon to be deleted mapentry
* has a suitable roll buffer then
* re-use it.
*/
if (crbsav ||
} else {
bufsz);
}
}
continue;
}
}
/*
* Inode deltas within the same fs block come
* in individually as separate calls to logmap_add().
* All others come in as one call. So check for an
* existing entry where we can re-use the crb.
*/
}
}
/*
* If we don't already have a crb then allocate one
* and copy the incoming buffer. Only do this once
* for all the incoming deltas.
*/
/*
* Only use a cached roll buffer if we
* have enough memory, and check for failures.
*/
(kmem_avail() > bufsz)) {
} else {
}
if (crbsav) {
if (ufs_crb_size > ufs_crb_max_size) {
}
} else {
}
}
}
/*
* remove from list
*/
if (crbsav) {
}
continue;
}
/*
* put on hash
*/
}
}
/*
* free up any cancelled deltas
*/
void
{
int dolock = 0;
mapentry_t *me;
mapentry_t **mep;
if (dolock)
/*
* At EOT, cancel the indicated deltas
*/
return;
}
/*
* roll forward or read collision; wait and try again
*/
dolock = 1;
goto again;
}
/*
* remove from cancel list
*/
/*
* logmap_remove_roll handles ME_ROLL entries later
* we leave them around for logmap_iscancel
* XXX is this necessary?
*/
continue;
/*
* remove from hash (if necessary)
*/
while (*mep) {
}
break;
} else
}
}
/*
* put the entry on the free list
*/
}
if (dolock)
}
void
{
/*
* async'ly write a commit rec into the log
*/
/*
* put commit record into log
*/
/*
* abort on error; else reset dirty flag
*/
else {
mtm->mtm_cfrags = 0;
}
/* push commit */
}
}
void
{
mapentry_t *me;
/*
* move the head forward so the log knows how full it is
* Make sure to skip any mapentry whose me_lof is 0, these
* are just place holders for DT_CANCELED freed user blocks
* for the current moby.
*/
}
lof = -1;
else {
}
if (lof == -1)
}
void
{
/*
* set the tail after the logmap_abort
*/
lof = -1;
else {
/*
* set the tail to the end of the last commit
*/
}
}
/*
* when reseting a device; roll the log until every
* delta has been rolled forward
*/
void
{
mapentry_t *me;
return;
/*
* look for deltas
*/
break;
continue;
continue;
break;
}
/*
* found a delta; kick the roll thread
* but only if the thread is running... (jmh)
*/
goto again;
}
/*
* no more deltas, return
*/
}
static void
{
mapentry_t *me;
mapentry_t **mep;
int frags;
/*
* map has been referenced and is dirty
*/
/*
* get a mapentry
*/
/*
* initialize cancel record and put in logmap
*/
/*
* Write delta to log if this delta is for metadata. If this is not
* metadata it is user data and we are just putting a cancel
* mapentry into the hash to cancel a user block deletion
* in which we do not want the block to be allocated
* within this moby. This cancel entry will prevent the block from
* being allocated within the moby and prevent user data corruption
* if we happen to crash before this moby is committed.
*/
if (metadata) {
return;
}
}
/*
* put in hash and on cancel list
*/
if (metadata) {
} else {
}
if (!(metadata)) {
if (frags)
frags);
}
}
/*
* cancel entries in a logmap (entries are freed at EOT)
*/
void
{
mapentry_t *me;
mapentry_t **mep;
/*
* Find overlapping metadata entries. Don't search through
* the hash chains if this is user data because it is only
* possible to have overlapping map entries for metadata,
* and the search can become expensive for large files.
*/
if (metadata) {
continue;
if (crb) {
}
}
}
}
/*
* put a cancel record into the log
*/
}
}
/*
*/
int
{
mapentry_t *me;
mapentry_t **mep;
/*
* search for dup entry
*/
continue;
break;
}
/*
* overlap detected
*/
if (me) {
return (1);
}
}
return (0);
}
static int
{
mapentry_t *me;
int error;
/*
* verify delta header; failure == mediafail
*/
error = 0;
/* delta type */
} else {
/* length of delta */
/* offset on master device */
}
if (error) {
return (error);
}
/*
* process commit record
*/
}
/*
* return #bytes to next sector (next delta header)
*/
return (0);
}
/*
* add delta to logmap
*/
case DT_CANCEL:
break;
default:
break;
}
/*
* return #bytes till next delta header
*/
*nbp = 0;
else
return (0);
}
void
{
/*
* prepare the log for a logscan
*/
/*
* prepare the logmap for a logscan
*/
(void) map_free_entries(logmap);
/*
* while not at end of log
* read delta header
* add to logmap
* seek to beginning of next delta
*/
/* read delta header */
break;
/* add to logmap */
break;
/* seek to next header (skip data) */
break;
}
/*
* remove the last partial transaction from the logmap
*/
}
void
_init_map(void)
{
/*
* Initialise the mapentry cache. No constructor or deconstructor
* is needed. Also no reclaim function is supplied as reclaiming
* current entries is not possible.
*/
}
/*
* Special case when we replace an old map entry which carries quota
* information with a newer entry which does not.
* In that case the push function would not be called to clean up the
* dquot structure. This would be found later by invalidatedq() causing
* a panic when the filesystem in unmounted.
* We clean up the dquot manually before replacing the map entry.
*/
void
{
int dolock = 0;
int domutex = 0;
/*
* We need vfs_dqrwlock to call dqput()
*/
if (dolock)
if (domutex)
/*
* Only clean up if the dquot is referenced
*/
if (domutex)
if (dolock)
return;
}
if (domutex)
if (dolock)
}