lufs_log.c revision 80d3443290aca22ad7fb6c18568d19d37517ebbf
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/fssnap_if.h>
extern int maxphys;
extern uint_t bypass_snapshot_throttle_key;
extern struct kmem_cache *lufs_sv;
extern struct kmem_cache *lufs_bp;
static void
{
return;
else
}
static int
{
else
/* wakeup the thread waiting on this buf */
return (0);
}
static int
{
/*
* Compute address of the ``save'' struct
*/
/*
* If this is the last request, release the resources and
* ``done'' the original buffer header.
*/
return (1);
}
/* Propagate any errors back to the original buffer header */
return (0);
}
/*
* Map the log logical block number to a physical disk block number
*/
static int
{
uint32_t s = 0;
uint32_t i = e >> 1;
/* FOUND IT */
return (0);
} else
s = i;
} else
e = i;
i = s + ((e - s) >> 1);
if (i == lasti) {
return (ENOENT);
}
lasti = i;
goto again;
}
/*
* The log is a set of extents (which typically will be only one, but
* may be more if the disk was close to full when the log was created)
* and hence the logical offsets into the log
* have to be translated into their real device locations before
* calling the device's strategy routine. The translation may result
* in several IO requests if this request spans extents.
*/
void
{
int error;
/*
* Allocate and initialise the save stucture,
*/
offset = 0;
do {
if (error) {
} else {
if (read) {
} else {
}
/*
* write through the snapshot driver if necessary
* We do not want this write to be throttled because
* we are holding the un_log mutex here. If we
* are throttled in fssnap_translate, the fssnap_taskq
* thread which can wake us up can get blocked on
* the un_log mutex resulting in a deadlock.
*/
if (ufsvfsp->vfs_snapshot) {
(void) tsd_set(bypass_snapshot_throttle_key,
(void *)1);
(void) tsd_set(bypass_snapshot_throttle_key,
(void *)0);
} else {
(void) bdev_strategy(cb);
}
}
} while (nb_left);
}
static void
{
/*
* This is really an B_ASYNC write but we want Presto to
* cache this write. The iodone routine, logdone, processes
* the buf correctly.
*/
/*
* return EIO for every IO if in hard error state
*/
return;
}
}
static void
{
/* all IO returns errors when in error state */
(void) trans_wait(bp);
return;
}
if (trans_wait(bp))
}
/*
* NOTE: writers are single threaded thru the log layer.
* This means we can safely reference and change the cb and bp fields
* that ldl_read does not reference w/o holding the cb_rwlock or
* the bp makebusy lock.
*/
static void
{
/*
* async write the buf
*/
/*
* no longer filling any buf
*/
/*
* no extra buffer space; all done
*/
return;
/*
* give extra buffer space to a new bp
* try to take buf off of free list
*/
} else {
}
/*
* lock out readers and put new buf at LRU position
*/
}
static void
{
/*
* discard all bufs that overlap the range (lof, lof + nb)
*/
do {
continue;
}
continue;
}
}
/*
* NOTE: writers are single threaded thru the log layer.
* This means we can safely reference and change the cb and bp fields
* that ldl_read does not reference w/o holding the cb_rwlock or
* the bp makebusy lock.
*/
static buf_t *
{
/*
* cb_dirty is the buffer we are currently filling; if any
*/
return (bp);
}
/*
* discard any bp that overlaps the current tail since we are
* about to overwrite it.
*/
/*
* steal LRU buf
*/
/*
* NOTE:
* 1. un_tail_lof never addresses >= un_eol_lof
* 2. b_blkno + btodb(b_bufsize) may > un_eol_lof
* this case is handled in storebuf
*/
return (bp);
}
void
{
int i;
/*
* Clear previous allocation
*/
/*
* preallocate 3 bp's and put them on the free list.
*/
for (i = 0; i < 3; ++i) {
}
/*
* first bp claims entire write buffer
*/
}
void
{
/*
* Clear previous allocation
*/
/*
* preallocate N bufs that are hard-sized to blksize
* in other words, the read buffer pool is a linked list
* of statically sized bufs.
*/
} else
}
}
void
{
return;
/*
* free the active bufs
*/
else
}
/*
* free the free bufs
*/
}
}
static int
{
}
static buf_t *
{
/*
* find a buf that contains the offset lof
*/
do {
return (bp);
}
return (NULL);
}
static off_t
{
/*
* we mustn't:
* o read past eol
* o read past the tail
* o read data that may be being written.
*/
do {
break;
}
/* lof is prior to the range represented by the write buf */
return (rlof);
else
/* lof follows the range represented by the write buf */
}
static buf_t *
{
/*
* retrieve as much data as possible from the incore buffers
*/
return (bp);
}
return (bp);
}
/*
* steal the LRU buf
*/
/*
* don't read past the tail or the end-of-log
*/
return (bp);
}
/*
* NOTE: writers are single threaded thru the log layer.
* This means we can safely reference and change the cb and bp fields
* that ldl_read does not reference w/o holding the cb_rwlock or
* the bp makebusy lock.
*/
static int
{
/*
* there is no `next' bp; do nothing
*/
return (0);
/*
* buffer space is not adjacent; do nothing
*/
return (0);
/*
* locking protocol requires giving up any bp locks before
* acquiring cb_rwlock. This is okay because we hold
* un_log_mutex.
*/
/*
* lock out ldl_read
*/
/*
*/
/*
* free the next bp and steal its space
*/
return (1);
}
static size_t
{
/*
* log wrapped; async write this bp
*/
}
/*
* out of bp space; get more or async write buf
*/
}
}
}
if (nb_left)
goto again;
return (nb);
}
static void
{
} else {
}
}
/*
* dst_va == NULL means don't copy anything
*/
static ulong_t
{
/*
* copy from bp to dst_va
*/
while (dst_nb) {
/*
* compute address within bp
*/
/*
* adjust copy size to amount of data in bp
*/
/*
* adjust copy size to amount of data in sector
*/
/*
* dst_va == NULL means don't do copy (see logseek())
*/
if (dst_va) {
}
/*
* advance over sector trailer
*/
if (nb_sec == 0)
dst_lof += sizeof (sect_trailer_t);
/*
* exhausted buffer
* return current lof for next read
*/
return (sav_dst_nb - dst_nb);
}
}
/*
* copy complete - return current lof
*/
return (sav_dst_nb);
}
void
{
int wrapped;
/*
* if nothing to write; then do nothing
*/
return;
/*
* round up to sector boundary and set new tail
* don't readjust st_ident if buf is already rounded
*/
return;
}
wrapped = 0;
++wrapped;
}
/*
* fix up the sector trailer
*/
/* LINTED */
st = (sect_trailer_t *)
/*
* if tail wrapped or we have exhausted this buffer
* async write the buffer
*/
else
}
void
{
/*
* if nothing to write; then do nothing
*/
return;
}
int
{
}
int
{
/*
* Add up the size used by the deltas
* round nb up to a sector length plus an extra sector
* w/o the extra sector we couldn't distinguish
* a full log (head == tail) from an empty log (head == tail)
*/
}
else
}
void
{
/* Write the delta */
while (nb) {
return;
}
if (nb)
}
/* If a commit, cancel, or 0's; we're almost done */
case DT_COMMIT:
case DT_CANCEL:
case DT_ABZERO:
/* roll needs to know where the next delta will go */
return;
default:
break;
}
/* Now write the data */
/* Save where we will put the data */
while (nb) {
return;
}
if (nb)
}
}
void
{
/*
* wait on them
*/
do {
}
}
/*
* seek nb bytes from location lof
*/
static int
{
while (nb) {
return (EIO);
}
}
return (0);
}
int
{
int error;
/*
* check for an cached roll buffer
*/
if (crb) {
/*
* This mapentry overlaps with the beginning of
* the supplied buffer
*/
} else {
/*
* This mapentry starts at or after
* the supplied buffer.
*/
}
continue;
}
/*
* check for a delta full of zeroes - there's no log data
*/
continue;
}
if (error)
return (EIO);
} else {
}
while (rnb) {
return (EIO);
}
}
}
return (0);
}
void
{
int error;
#if DEBUG
/*
* Scan test is running; don't update intermediate state
*/
return;
#endif /* DEBUG */
/* If a snapshot is enabled write through the shapshot driver. */
else
if (error)
}
/*
* The head will be set to (new_lof - header) since ldl_sethead is
* called with the new_lof of the data portion of a delta.
*/
void
{
if (data_lof == -1) {
/* log is empty */
}
} else {
/* compute header's lof */
/* whoops, header spans sectors; subtract out sector trailer */
new_lof -= sizeof (sect_trailer_t);
/* whoops, header wrapped the log; go to last sector */
/* sector offset */
/* add to last sector's lof */
}
}
/*
* check for nop
*/
return;
/*
* invalidate the affected bufs and calculate new ident
*/
} else {
}
/*
* don't update the head if there has been an error
*/
return;
/* Fix up the head and ident */
if (data_lof == -1) {
}
/* Commit to the database */
}
/*
* The tail will be set to the sector following lof+nb
* lof + nb == size of the last delta + commit record
* this function is called once after the log scan has completed.
*/
void
{
if (lof == -1) {
}
/* Commit to the database */
return;
}
/*
* new_lof is the offset of the sector following the last commit
*/
/*
* calculate new ident
*/
} else {
}
/* Fix up the tail and ident */
/* Commit to the database */
}
/*
* LOGSCAN STUFF
*/
static int
{
ulong_t ident;
/*
* compute ident for first sector in the buffer
*/
ident = ul->un_head_ident;
} else {
}
/*
* truncate the buffer down to the last valid sector
*/
/* LINTED */
for (i = 0; i < nblk; ++i) {
break;
/* remember last valid tid for ldl_logscan_error() */
/* LINTED */
++ident;
}
/*
* make sure that lof is still within range
*/
}
{
/*
* lof is the offset following the commit header. However,
* if the commit header fell on the end-of-sector, then lof
* has already been advanced to the beginning of the next
* sector. So do nothing. Otherwise, return the remaining
* bytes in the sector.
*/
return (0);
return (NB_LEFT_IN_SECTOR(lof));
}
int
{
/*
* Check the log data doesn't go out of bounds
*/
return (EIO);
}
} else {
return (EIO);
}
}
while (nb) {
return (EIO);
}
/*
* out-of-seq idents means partial transaction
* panic, non-corrupting powerfail, ...
*/
return (EIO);
}
/*
* copy the header into the caller's buf
*/
if (va)
}
return (0);
}
void
{
/*
* logscan has begun
*/
/*
* reset the circular bufs
*/
/*
* set the tail to reflect a full log
*/
/*
* un_tid is used during error processing; it is initialized to
* the tid of the delta at un_head_lof;
*/
}
void
{
/*
* reset the circular bufs
*/
/*
*/
}
int
{
/*
* snapshot the log state
*/
/*
* compute number of busy (inuse) bytes
*/
else
/*
* return TRUE if > 75% full
*/
}
void
{
/*
* already in error state; do nothing
*/
return;
/*
* Commit to state sectors
*/
/* Pretty print */
/*
* If we aren't in the middle of scan (aka snarf); tell ufs
* to hard lock itself.
*/
}
{
extern uint32_t ldl_minbufsize;
/*
* initial guess is the maxtransfer value for this log device
* increase if too small
* decrease if too large
*/
if (bufsize < ldl_minbufsize)
return (bufsize);
}