fsys_reiserfs.c revision 677833bc953b6cb418c701facbdcf4aa18d6c44e
/* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
/*
* GRUB -- GRand Unified Bootloader
* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
*
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifdef FSYS_REISERFS
#include "shared.h"
#include "filesys.h"
#include <lib.h>
#include "string.h"
/* Some parts of this code (mainly the structures and defines) are
* from the original reiser fs code, as found in the linux kernel.
*/
typedef __signed__ char __s8;
typedef unsigned char __u8;
typedef __signed__ short __s16;
typedef unsigned short __u16;
typedef __signed__ int __s32;
typedef unsigned int __u32;
typedef unsigned long long __u64;
/* linux/posix_type.h */
typedef long linux_off_t;
/* linux/little_endian.h */
#define __cpu_to_le64(x) ((__u64) (x))
#define __le64_to_cpu(x) ((__u64) (x))
#define __cpu_to_le32(x) ((__u32) (x))
#define __le32_to_cpu(x) ((__u32) (x))
#define __cpu_to_le16(x) ((__u16) (x))
#define __le16_to_cpu(x) ((__u16) (x))
/* include/linux/reiser_fs.h */
/* This is the new super block of a journaling reiserfs system */
struct reiserfs_super_block
{
__u32 s_journal_size; /* size of the journal on FS creation. used to make sure they don't overflow it */
};
#define REISERFS_MAX_SUPPORTED_VERSION 2
#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
#define MAX_HEIGHT 7
/* must be correct to keep the desc and commit structs at 4k */
#define JOURNAL_TRANS_HALF 1018
/* first block written in a commit. */
struct reiserfs_journal_desc {
char j_magic[12];
};
/* last block written in a commit */
struct reiserfs_journal_commit {
char j_digest[16]; /* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
};
/* this header block gets written whenever a transaction is considered
fully flushed, and is more recent than the last fully flushed
transaction.
fully flushed means all the log blocks and all the real blocks are
on disk, and this transaction does not need to be replayed.
*/
struct reiserfs_journal_header {
/* id of last fully flushed transaction */
/* offset in the log of where to start replay after a crash */
/* mount id to detect very old transactions */
};
/* magic string to find desc blocks in the journal */
#define JOURNAL_DESC_MAGIC "ReIsErLB"
/*
* directories use this key as well as old files
*/
struct offset_v1
{
/*
* for regular files this is the offset to the first byte of the
* body, contained in the object-item, as measured from the start of
* the entire body of the object.
*
* for directory entries, k_offset consists of hash derived from
* hashing the name and using few bits (23 or more) of the resulting
* hash, and generation number that allows distinguishing names with
* hash collisions. If number of collisions overflows generation
* number, we return EEXIST. High order bit is 0 always
*/
};
struct offset_v2
{
/*
* for regular files this is the offset to the first byte of the
* body, contained in the object-item, as measured from the start of
* the entire body of the object.
*
* for directory entries, k_offset consists of hash derived from
* hashing the name and using few bits (23 or more) of the resulting
* hash, and generation number that allows distinguishing names with
* hash collisions. If number of collisions overflows generation
* number, we return EEXIST. High order bit is 0 always
*/
};
struct key
{
/* packing locality: by default parent directory object id */
/* object identifier */
/* the offset and node type (old and new form) */
union
{
}
u;
};
/* Header of a disk block. More precisely, header of a formatted leaf
or internal node, and not the header of an unformatted node. */
struct block_head
{
struct key blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
only) */
};
#define BLKH_SIZE (sizeof (struct block_head))
struct item_head
{
union
{
is an indirect item. This equals 0xFFFF iff this is a direct item or
stat data item. Note that the key, not this field, is used to determine
the item type, and thus which field this union contains. */
entries in the directory item. */
}
u;
ITEM_VERSION_2 for new ones.
Highest bit is set by fsck
temporary, cleaned after all done */
};
/* size of item header */
#define ITEM_VERSION_1 0
#define ITEM_VERSION_2 1
struct disk_child
{
unsigned long dc_block_number; /* Disk child's block number. */
unsigned short dc_size; /* Disk child's used space. */
};
#define DC_SIZE (sizeof (struct disk_child))
/* Stat Data on disk.
*
* Note that reiserfs has two different forms of stat data. Luckily
* the fields needed by grub are at the same position.
*/
struct stat_data
{
};
struct reiserfs_de_head
{
object, that is referenced by directory entry */
directory entry */
future), and 2) whether entry is hidden
(unlinked) */
};
#define DEH_SIZE (sizeof (struct reiserfs_de_head))
#define SD_OFFSET 0
#define SD_UNIQUENESS 0
#define DOT_OFFSET 1
#define DOT_DOT_OFFSET 2
#define DIRENTRY_UNIQUENESS 500
#define V1_TYPE_STAT_DATA 0x0
#define V1_TYPE_DIRECT 0xffffffff
#define V1_TYPE_INDIRECT 0xfffffffe
#define V1_TYPE_DIRECTORY_MAX 0xfffffffd
#define V2_TYPE_STAT_DATA 0
#define V2_TYPE_INDIRECT 1
#define V2_TYPE_DIRECT 2
#define V2_TYPE_DIRENTRY 3
#define REISERFS_ROOT_OBJECTID 2
#define REISERFS_ROOT_PARENT_OBJECTID 1
/* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
#define REISERFS_OLD_BLOCKSIZE 4096
/* The size of the node cache */
#define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
/* Info about currently opened file */
struct fsys_reiser_fileinfo
{
};
/* In memory info about the currently mounted filesystem */
struct fsys_reiser_info
{
/* The last read item head */
struct item_head *current_ih;
/* The last read item */
char *current_item;
/* The information for the currently opened file */
struct fsys_reiser_fileinfo fileinfo;
/* The start of the journal */
/* The size of the journal */
/* The first valid descriptor block in journal
(relative to journal_block) */
/* The ReiserFS version. */
/* The current depth of the reiser tree. */
/* SECTOR_SIZE << blocksize_shift == blocksize. */
/* 1 << full_blocksize_shift == blocksize. */
/* The reiserfs block size (must be a power of 2) */
/* The number of cached tree nodes */
/* The number of valid transactions in journal */
unsigned int blocks[MAX_HEIGHT];
unsigned int next_key_nr[MAX_HEIGHT];
};
/* The cached s+tree blocks in FSYS_BUF, see below
* for a more detailed description.
*/
/* The fsys_reiser_info block.
*/
#define INFO \
/*
* The journal cache. For each transaction it contains the number of
* blocks followed by the real block numbers of this transaction.
*
* If the block numbers of some transaction won't fit in this space,
* this list is stopped with a 0xffffffff marker and the remaining
* uncommitted transactions aren't cached.
*/
static __inline__ unsigned long
{
__asm__ ("bsfl %1,%0"
: "=r" (word)
: "r" (word));
return word;
}
static __inline__ int
is_power_of_two (unsigned long word)
{
}
static int
{
}
/* Read a block from ReiserFS file system, taking the journal into
* account. If the block nr is in the journal, the block from the
* journal taken.
*/
static int
{
int translatedNr = blockNr;
while (transactions-- > 0)
{
int i = 0;
int j_len;
if (*journal_table != 0xffffffff)
{
/* Search for the blockNr in cached journal */
j_len = *journal_table++;
while (i++ < j_len)
{
if (*journal_table++ == blockNr)
{
journal_table += j_len - i;
goto found;
}
}
}
else
{
/* This is the end of cached journal marker. The remaining
* transactions are still on disk.
*/
struct reiserfs_journal_desc desc;
struct reiserfs_journal_commit commit;
return 0;
while (i < j_len && i < JOURNAL_TRANS_HALF)
goto found;
if (j_len >= JOURNAL_TRANS_HALF)
{
if (! journal_read (commit_block,
return 0;
while (i < j_len)
goto found;
}
}
goto not_found;
#ifdef REISERDEBUG
printf ("block_read: block %d is mapped to journal block %d.\n",
#endif
/* We must continue the search, as this block may be overwritten
* in later transactions.
*/
}
}
/* Init the journal data structure. We try to cache as much as
* possible in the JOURNAL_START-JOURNAL_END space, but if it is full
* we can still read the rest from the disk on demand.
*
* The first number of valid transactions and the descriptor block of the
* first valid transaction are held in INFO. The transactions are all
* adjacent, but we must take care of the journal wrap around.
*/
static int
journal_init (void)
{
unsigned int desc_block;
unsigned int commit_block;
unsigned int next_trans_id;
struct reiserfs_journal_header header;
struct reiserfs_journal_desc desc;
struct reiserfs_journal_commit commit;
if (desc_block >= block_count)
return 0;
#ifdef REISERDEBUG
printf ("journal_init: last flushed %d\n",
#endif
while (1)
{
/* no more valid transactions */
break;
/* no more valid transactions */
break;
#ifdef REISERDEBUG
printf ("Found valid transaction %d/%d at %d.\n",
#endif
if (journal_table < JOURNAL_END)
{
{
/* The table is almost full; mark the end of the cached
* journal.*/
*journal_table = 0xffffffff;
}
else
{
int i;
/* Cache the length and the realblock numbers in the table.
* The block number of descriptor can easily be computed.
* and need not to be stored here.
*/
{
#ifdef REISERDEBUG
printf ("block %d is in journal %d.\n",
#endif
}
{
#ifdef REISERDEBUG
printf ("block %d is in journal %d.\n",
#endif
}
}
}
}
#ifdef REISERDEBUG
printf ("Transaction %d/%d at %d isn't valid.\n",
#endif
return errnum == 0;
}
/* check filesystem types and read superblock into memory buffer */
int
reiserfs_mount (void)
{
struct reiserfs_super_block super;
(char *) &super)
|| (/* check that this is not a copy inside the journal log */
{
/* Try old super block position */
(char *) &super))
return 0;
{
/* pre journaling super block ? */
(char*) ((int) &super + 20)) > 0)
return 0;
super.s_journal_block = 0;
}
}
/* check the version number. */
return 0;
INFO->cached_slots =
#ifdef REISERDEBUG
printf ("reiserfs_mount: version=%d, blocksize=%d\n",
#endif /* REISERDEBUG */
/* Clear node cache. */
return 0;
/* Initialize journal code. If something fails we end with zero
* journal_transactions, so we don't access the journal at all.
*/
INFO->journal_transactions = 0;
{
journal_init ();
/* Read in super block again, maybe it is in the journal */
0, sizeof (struct reiserfs_super_block), (char *) &super);
}
return 0;
#ifdef REISERDEBUG
printf ("root read_in: block=%d, depth=%d\n",
#endif /* REISERDEBUG */
return 0;
{
/* There is only one node in the whole filesystem,
* which is simultanously leaf and root */
}
return 1;
}
/***************** TREE ACCESSING METHODS *****************************/
/* I assume you are familiar with the ReiserFS tree, if not go to
*
* My tree node cache is organized as following
* 0 ROOT node
* 1 LEAF node (if the ROOT is also a LEAF it is copied here
* 2-n other nodes on current path from bottom to top.
* if there is not enough space in the cache, the top most are
* omitted.
*
* I have only two methods to find a key in the tree:
* search_stat(dir_id, objectid) searches for the stat entry (always
* the first entry) of an object.
* next_key() gets the next key in tree order.
*
* This means, that I can only sequential reads of files are
* efficient, but this really doesn't hurt for grub.
*/
/* Read in the node at the current path and depth into the node cache.
* You must set INFO->blocks[depth] before.
*/
static char *
{
if (depth < num_cached)
{
/* This is the cached part of the path. Check if same block is
* needed.
*/
return cache;
}
else
#ifdef REISERDEBUG
printf (" next read_in: block=%d (depth=%d)\n",
#endif /* REISERDEBUG */
return 0;
/* Make sure it has the right node level */
{
return 0;
}
return cache;
}
/* Get the next key, i.e. the key following the last retrieved key in
* tree order. INFO->current_ih and
* INFO->current_info are adapted accordingly. */
static int
next_key (void)
{
int depth;
char *cache;
#ifdef REISERDEBUG
printf ("next_key:\n old ih: key %d:%d:%d:%d version:%d\n",
#endif /* REISERDEBUG */
{
/* The last item, was the last in the leaf node.
* Read in the next block
*/
do
{
{
/* There are no more keys at all.
* Return a dummy item with MAX_KEY */
goto found;
}
depth++;
#ifdef REISERDEBUG
#endif /* REISERDEBUG */
}
else
{
if (! cache)
return 0;
}
do
{
#ifdef REISERDEBUG
#endif /* REISERDEBUG */
/* This is the last item in this block, set the next_key_nr to 0 */
if (! cache)
return 0;
}
while (depth > DISK_LEAF_NODE_LEVEL);
}
#ifdef REISERDEBUG
printf (" new ih: key %d:%d:%d:%d version:%d\n",
#endif /* REISERDEBUG */
return 1;
}
/* preconditions: reiserfs_mount already executed, therefore
* INFO block is valid
* returns: 0 if error (errnum is set),
* nonzero iff we were able to find the key successfully.
* postconditions: on a nonzero return, the current_ih and
* current_item fields describe the key that equals the
* searched key. INFO->next_key contains the next key after
* the searched key.
* side effects: messes around with the cache.
*/
static int
{
char *cache;
int depth;
int nr_item;
int i;
#ifdef REISERDEBUG
#endif /* REISERDEBUG */
while (depth > DISK_LEAF_NODE_LEVEL)
{
for (i = 0; i < nr_item; i++)
{
break;
key++;
}
#ifdef REISERDEBUG
#endif /* REISERDEBUG */
if (! cache)
return 0;
}
/* cache == LEAF */
for (i = 0; i < nr_item; i++)
{
{
#ifdef REISERDEBUG
#endif /* REISERDEBUG */
return 1;
}
ih++;
}
return 0;
}
int
{
unsigned int blocksize;
unsigned int offset;
unsigned int to_read;
#ifdef REISERDEBUG
printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
#endif /* REISERDEBUG */
{
goto get_next_key;
}
while (! errnum)
{
break;
#ifdef REISERDEBUG
printf (" loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
#endif /* REISERDEBUG */
{
#ifdef REISERDEBUG
printf ("direct_read: offset=%d, blocksize=%d\n",
#endif /* REISERDEBUG */
if (disk_read_hook != NULL)
{
}
else
goto update_buf_len;
}
{
#ifdef REISERDEBUG
printf ("indirect_read: offset=%d, blocksize=%d\n",
#endif /* REISERDEBUG */
{
/* Journal is only for meta data. Data blocks can be read
* directly without using block_read
*/
if (len == 0)
goto done;
}
}
next_key ();
}
done:
}
/* preconditions: reiserfs_mount already executed, therefore
* INFO block is valid
* returns: 0 if error, nonzero iff we were able to find the file successfully
* postconditions: on a nonzero return, INFO->fileinfo contains the info
* of the file we were trying to look up, filepos is 0 and filemax is
* the size of the file.
*/
int
reiserfs_dir (char *dirname)
{
struct reiserfs_de_head *de_head;
#ifndef STAGE1_5
int do_possibilities = 0;
#endif /* ! STAGE1_5 */
int link_count = 0;
int mode;
while (1)
{
#ifdef REISERDEBUG
#endif /* REISERDEBUG */
/* Search for the stat info first. */
return 0;
#ifdef REISERDEBUG
printf ("sd_mode=%x sd_size=%d\n",
#endif /* REISERDEBUG */
/* If we've got a symbolic link, then chase it. */
{
int len;
if (++link_count > MAX_LINK_COUNT)
{
return 0;
}
/* Get the symlink size. */
/* Find out how long our remaining name is. */
len = 0;
len++;
{
return 0;
}
/* Copy the remaining name to the end of the symlink data.
Note that DIRNAME and LINKBUF may overlap! */
filepos = 0;
if (! next_key ()
{
if (! errnum)
return 0;
}
#ifdef REISERDEBUG
#endif /* REISERDEBUG */
if (*dirname == '/')
{
/* It's an absolute link, so look it up in root. */
}
else
{
/* Relative, so look it up in our parent directory. */
}
/* Now lookup the new name. */
continue;
}
/* if we have a real file (and we're not just printing possibilities),
then this is where we want to exit */
{
{
return 0;
}
filepos = 0;
/* If this is a new stat data and size is > 4GB set filemax to
* maximum
*/
filemax = 0xffffffff;
return next_key ();
}
while (*dirname == '/')
dirname++;
{
return 0;
}
*rest = 0;
# ifndef STAGE1_5
do_possibilities = 1;
# endif /* ! STAGE1_5 */
while (1)
{
char *name_end;
int num_entries;
if (! next_key ())
return 0;
#ifdef REISERDEBUG
printf ("ih: key %d:%d:%d:%d version:%d\n",
#endif /* REISERDEBUG */
break;
while (num_entries > 0)
{
{
int cmp;
/* Directory names in ReiserFS are not null
* terminated. We write a temporary 0 behind it.
* NOTE: that this may overwrite the first block in
* the tree cache. That doesn't hurt as long as we
* don't call next_key () in between.
*/
*name_end = 0;
# ifndef STAGE1_5
if (do_possibilities)
{
if (cmp <= 0)
{
if (print_possibilities > 0)
*name_end = 0;
}
}
else
# endif /* ! STAGE1_5 */
if (cmp == 0)
goto found;
}
/* The beginning of this name marks the end of the next name.
*/
de_head++;
num_entries--;
}
}
# ifndef STAGE1_5
if (print_possibilities < 0)
return 1;
# endif /* ! STAGE1_5 */
return 0;
}
return 1;
}
int
{
struct reiserfs_super_block super;
int num_sectors;
sizeof (struct reiserfs_super_block), (char *) &super))
return 0;
&& (/* check that this is not a super block copy inside
* the journal log */
else
return (needed_sectors <= num_sectors);
}
#endif /* FSYS_REISERFS */