lcnalloc.c revision 1
/**
* lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project.
*
* Copyright (c) 2002-2004 Anton Altaparmakov
* Copyright (c) 2004 Yura Pakhuchiy
*
* modify it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program (in the main directory of the Linux-NTFS
* distribution in the file COPYING); if not, write to the Free Software
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#include "compat.h"
#include "types.h"
#include "attrib.h"
#include "bitmap.h"
#include "debug.h"
#include "runlist.h"
#include "volume.h"
#include "lcnalloc.h"
#include "logging.h"
/**
* ntfs_cluster_alloc - allocate clusters on an ntfs volume
* @vol: mounted ntfs volume on which to allocate the clusters
* @start_vcn: vcn to use for the first allocated cluster
* @count: number of clusters to allocate
* @start_lcn: starting lcn at which to allocate the clusters (or -1 if none)
* @zone: zone from which to allocate the clusters
*
* Allocate @count clusters preferably starting at cluster @start_lcn or at the
* current allocator position if @start_lcn is -1, on the mounted ntfs volume
* @vol. @zone is either DATA_ZONE for allocation of normal clusters and
* MFT_ZONE for allocation of clusters for the master file table, i.e. the
* $MFT/$DATA attribute.
*
* On success return a runlist describing the allocated cluster(s).
*
* On error return NULL with errno set to the error code.
*
* Notes on the allocation algorithm
* =================================
*
* There are two data zones. First is the area between the end of the mft zone
* and the end of the volume, and second is the area between the start of the
* volume and the start of the mft zone. On unmodified/standard NTFS 1.x
* volumes, the second data zone doesn't exist due to the mft zone being
* expanded to cover the start of the volume in order to reserve space for the
* mft bitmap attribute.
*
* This is not the prettiest function but the complexity stems from the need of
* implementing the mft vs data zoned approach and from the fact that we have
* access to the lcn bitmap in portions of up to 8192 bytes at a time, so we
* need to cope with crossing over boundaries of two buffers. Further, the fact
* that the allocator allows for caller supplied hints as to the location of
* where allocation should begin and the fact that the allocator keeps track of
* where in the data zones the next natural allocation should occur, contribute
* to the complexity of the function. But it should all be worthwhile, because
* this allocator should: 1) be a full implementation of the MFT zone approach
* used by Windows, 2) cause reduction in fragmentation as much as possible,
* and 3) be speedy in allocations (the code is not optimized for speed, but
* the algorithm is, so further speed improvements are probably possible).
*
* FIXME: We should be monitoring cluster allocation and increment the MFT zone
* size dynamically but this is something for the future. We will just cause
* heavier fragmentation by not doing it and I am not even sure Windows would
* grow the MFT zone dynamically, so it might even be correct not to do this.
* The overhead in doing dynamic MFT zone expansion would be very large and
* unlikely worth the effort. (AIA)
*
* TODO: I have added in double the required zone position pointer wrap around
* logic which can be optimized to having only one of the two logic sets.
* However, having the double logic will work fine, but if we have only one of
* the sets and we get it wrong somewhere, then we get into trouble, so
* removing the duplicate logic requires _very_ careful consideration of _all_
* possible code paths. So at least for now, I am leaving the double logic -
* better safe than sorry... (AIA)
*/
{
ntfs_log_trace("Entering with count = 0x%llx, start_lcn = 0x%llx, zone = "
ntfs_log_trace("Invalid arguments!\n");
return NULL;
}
/* Return empty runlist if @count == 0 */
if (!count) {
if (!rl)
return NULL;
return rl;
}
/* Allocate memory. */
if (!buf)
return NULL;
/*
* If no specific @start_lcn was requested, use the current data zone
* position, otherwise use the requested @start_lcn but make sure it
* lies outside the mft zone. Also set done_zones to 0 (no zones done)
* and pass depending on whether we are starting inside a zone (1) or
* at the beginning of a zone (2). If requesting from the MFT_ZONE,
* we either start at the current position within the mft zone or at
* the specified position. If the latter is out of bounds then we start
* at the beginning of the MFT_ZONE.
*/
done_zones = 0;
pass = 1;
/*
* zone_start and zone_end are the current search range. search_zone
* is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
* volume) and 4 for data zone 2 (start of volume till start of mft
* zone).
*/
if (zone_start < 0) {
else
if (!zone_start) {
/*
* Zone starts at beginning of volume which means a
* single pass is sufficient.
*/
pass = 2;
}
/*
* Starting at beginning of data1_zone which means a single
* pass in this zone is sufficient.
*/
pass = 2;
if (!vol->mft_zone_end)
zone_start = 0;
/*
* Starting at beginning of volume which means a single pass
* is sufficient.
*/
pass = 2;
}
search_zone = 1;
} else /* if (zone == DATA_ZONE) */ {
/* Skip searching the mft zone. */
done_zones |= 1;
search_zone = 2;
} else {
search_zone = 4;
}
}
/*
* bmp_pos is the current bit position inside the bitmap. We use
* bmp_initial_pos to determine whether or not to do a zone switch.
*/
/* Loop until all clusters are allocated, i.e. clusters == 0. */
while (1) {
ntfs_log_trace("Start of outer while loop: done_zones = 0x%x, "
"search_zone = %i, pass = %i, zone_start = "
"0x%llx, zone_end = 0x%llx, bmp_initial_pos = "
"0x%llx, bmp_pos = 0x%llx, rlpos = %i, rlsize = "
(long long)zone_start, (long long)zone_end,
(long long)bmp_initial_pos, (long long)bmp_pos,
/* Loop until we run out of free clusters. */
if (br <= 0) {
if (!br) {
/* Reached end of attribute. */
ntfs_log_trace("End of attribute reached. Skipping "
"to zone_pass_done.\n");
goto zone_pass_done;
}
ntfs_log_trace("ntfs_attr_pread() failed. Aborting.\n");
goto err_ret;
}
/*
* We might have read less than 8192 bytes if we are close to
* the end of the attribute.
*/
bmp_pos &= ~7;
need_writeback = 0;
ntfs_log_trace("Before inner while loop: buf_size = %i, lcn = "
"0x%llx, bmp_pos = 0x%llx, need_writeback = %i.\n",
ntfs_log_trace("In inner while loop: buf_size = %i, lcn = "
"0x%llx, bmp_pos = 0x%llx, "
"need_writeback = %i, byte ofs = 0x%x, "
"*byte = 0x%x.\n", buf_size,
(unsigned int)*byte);
/* Skip full bytes. */
if (*byte == 0xff) {
ntfs_log_trace("continuing while loop 1.\n");
continue;
}
/* If the bit is already set, go onto the next one. */
lcn++;
ntfs_log_trace("continuing while loop 2.\n");
continue;
}
/* Reallocate memory if necessary. */
ntfs_log_trace("Reallocating space.\n");
if (!rl)
ntfs_log_trace("First free bit is at LCN = "
rlsize += 4096;
if (!trl) {
ntfs_log_trace("Failed to allocate memory, "
"going to wb_err_ret.\n");
goto wb_err_ret;
}
ntfs_log_trace("Reallocated memory, rlsize = "
"0x%x.\n", rlsize);
}
/* Allocate the bitmap bit. */
vol->nr_free_clusters--;
/* We need to write this bitmap buffer back to disk! */
need_writeback = 1;
ntfs_log_trace("*byte = 0x%x, need_writeback is set.\n",
(unsigned int)*byte);
/*
* Coalesce with previous run if adjacent LCNs.
* Otherwise, append a new run.
*/
ntfs_log_trace("Adding run (lcn 0x%llx, len 0x%llx), "
"prev_lcn = 0x%llx, lcn = 0x%llx, "
"bmp_pos = 0x%llx, prev_run_len = "
"0x%llx, rlpos = %i.\n",
(long long)bmp_pos,
(long long)prev_run_len, rlpos);
ntfs_log_trace("Coalescing to run (lcn 0x%llx, len "
"0x%llx).\n",
ntfs_log_trace("Run now (lcn 0x%llx, len 0x%llx), "
"prev_run_len = 0x%llx.\n",
(long long)prev_run_len);
} else {
if (rlpos) {
ntfs_log_trace("Adding new run, (previous "
"run lcn 0x%llx, len 0x%llx).\n",
} else {
ntfs_log_trace("Adding new run, is first run.\n");
}
rlpos++;
}
/* Done? */
if (!--clusters) {
/*
* Update the current zone position. Positions
* of already scanned zones have been updated
* during the respective zone switches.
*/
ntfs_log_trace("Done. Updating current zone "
"position, tc = 0x%llx, search_zone = %i.\n",
(long long)tc, search_zone);
switch (search_zone) {
case 1:
ntfs_log_trace("Before checks, vol->mft_zone_pos = 0x%llx.\n",
(long long) vol->mft_zone_pos);
vol->mft_zone_pos =
if (!vol->mft_zone_end)
vol->mft_zone_pos = 0;
} else if ((bmp_initial_pos >=
vol->mft_zone_pos ||
ntfs_log_trace("After checks, vol->mft_zone_pos = 0x%llx.\n",
(long long) vol->mft_zone_pos);
break;
case 2:
ntfs_log_trace("Before checks, vol->data1_zone_pos = 0x%llx.\n",
(long long) vol->data1_zone_pos);
else if ((bmp_initial_pos >=
vol->data1_zone_pos ||
ntfs_log_trace("After checks, vol->data1_zone_pos = 0x%llx.\n",
(long long) vol->data1_zone_pos);
break;
case 4:
ntfs_log_trace("Before checks, vol->data2_zone_pos = 0x%llx.\n",
(long long) vol->data2_zone_pos);
vol->data2_zone_pos = 0;
else if (bmp_initial_pos >=
vol->data2_zone_pos ||
ntfs_log_trace("After checks, vol->data2_zone_pos = 0x%llx.\n",
(long long) vol->data2_zone_pos);
break;
default:
NTFS_BUG("switch (search_zone) 1");
return NULL;
}
ntfs_log_trace("Going to done_ret.\n");
goto done_ret;
}
lcn++;
}
ntfs_log_trace("After inner while loop: buf_size = 0x%x, lcn = "
"0x%llx, bmp_pos = 0x%llx, need_writeback = %i.\n",
(long long)bmp_pos, need_writeback);
if (need_writeback) {
ntfs_log_trace("Writing back.\n");
need_writeback = 0;
if (bw == -1)
else
ntfs_log_trace("Bitmap writeback failed in read next "
"buffer code path with error code %i.\n", err);
goto err_ret;
}
}
ntfs_log_trace("Continuing outer while loop, bmp_pos = "
"0x%llx, zone_end = 0x%llx.\n",
(long long)bmp_pos,
(long long)zone_end);
continue;
}
zone_pass_done: /* Finished with the current zone pass. */
if (pass == 1) {
/*
* Now do pass 2, scanning the first part of the zone
* we omitted in pass 1.
*/
pass = 2;
switch (search_zone) {
case 1: /* mft_zone */
break;
case 2: /* data1_zone */
break;
case 4: /* data2_zone */
zone_start = 0;
break;
default:
NTFS_BUG("switch (search_zone) 2");
}
/* Sanity check. */
if (zone_end < zone_start)
ntfs_log_trace("Continuing outer while loop, pass = 2, "
"zone_start = 0x%llx, zone_end = "
"0x%llx, bmp_pos = 0x%llx.\n",
continue;
} /* pass == 2 */
ntfs_log_trace("At done_zones_check, search_zone = %i, done_zones "
"before = 0x%x, done_zones after = 0x%x.\n",
if (done_zones < 7) {
ntfs_log_trace("Switching zone.\n");
/* Now switch to the next zone we haven't done yet. */
pass = 1;
switch (search_zone) {
case 1:
ntfs_log_trace("Switching from mft zone to data1 "
"zone.\n");
/* Update mft zone position. */
if (rlpos) {
ntfs_log_trace("Before checks, vol->mft_zone_pos = 0x%llx.\n",
(long long) vol->mft_zone_pos);
vol->mft_zone_pos =
if (!vol->mft_zone_end)
vol->mft_zone_pos = 0;
} else if ((bmp_initial_pos >=
vol->mft_zone_pos ||
ntfs_log_trace("After checks, vol->mft_zone_pos = 0x%llx.\n",
(long long) vol->mft_zone_pos);
}
/* Switch from mft zone to data1 zone. */
pass = 2;
if (zone_start >= zone_end) {
pass = 2;
}
break;
case 2:
ntfs_log_trace("Switching from data1 zone to data2 "
"zone.\n");
/* Update data1 zone position. */
if (rlpos) {
ntfs_log_trace("Before checks, vol->data1_zone_pos = 0x%llx.\n",
(long long) vol->data1_zone_pos);
else if ((bmp_initial_pos >=
vol->data1_zone_pos ||
ntfs_log_trace("After checks, vol->data1_zone_pos = 0x%llx.\n",
(long long) vol->data1_zone_pos);
}
/* Switch from data1 zone to data2 zone. */
search_zone = 4;
if (!zone_start)
pass = 2;
if (zone_start >= zone_end) {
bmp_initial_pos = 0;
pass = 2;
}
break;
case 4:
ntfs_log_debug("Switching from data2 zone to data1 "
"zone.\n");
/* Update data2 zone position. */
if (rlpos) {
ntfs_log_trace("Before checks, vol->data2_zone_pos = 0x%llx.\n",
(long long) vol->data2_zone_pos);
vol->data2_zone_pos = 0;
else if (bmp_initial_pos >=
vol->data2_zone_pos ||
ntfs_log_trace("After checks, vol->data2_zone_pos = 0x%llx.\n",
(long long) vol->data2_zone_pos);
}
/* Switch from data2 zone to data1 zone. */
goto switch_to_data1_zone; /* See above. */
default:
NTFS_BUG("switch (search_zone) 3");
}
ntfs_log_trace("After zone switch, search_zone = %i, pass = "
"%i, bmp_initial_pos = 0x%llx, "
"zone_start = 0x%llx, zone_end = "
(long long)bmp_initial_pos,
(long long)zone_start,
(long long)zone_end);
if (zone_start == zone_end) {
ntfs_log_trace("Empty zone, going to "
"done_zones_check.\n");
/* Empty zone. Don't bother searching it. */
goto done_zones_check;
}
ntfs_log_trace("Continuing outer while loop.\n");
continue;
} /* done_zones == 7 */
ntfs_log_trace("All zones are finished.\n");
/*
* All zones are finished! If DATA_ZONE, shrink mft zone. If
* MFT_ZONE, we have really run out of space.
*/
ntfs_log_trace("vol->mft_zone_start = 0x%llx, vol->mft_zone_end = "
"0x%llx, mft_zone_size = 0x%llx.\n",
(long long)vol->mft_zone_start,
(long long)vol->mft_zone_end,
(long long)mft_zone_size);
ntfs_log_trace("No free clusters left, going to err_ret.\n");
/* Really no more space left on device. */
goto err_ret;
} /* zone == DATA_ZONE && mft_zone_size > 0 */
ntfs_log_trace("Shrinking mft zone.\n");
mft_zone_size >>= 1;
if (mft_zone_size > 0)
else /* mft zone and data2 zone no longer exist. */
vol->mft_zone_end = 0;
if (!vol->mft_zone_end)
vol->mft_zone_pos = 0;
}
search_zone = 2;
pass = 2;
done_zones &= ~2;
ntfs_log_trace("After shrinking mft zone, mft_zone_size = 0x%llx, "
"vol->mft_zone_start = 0x%llx, "
"vol->mft_zone_end = 0x%llx, vol->mft_zone_pos "
"= 0x%llx, search_zone = 2, pass = 2, "
"dones_zones = 0x%x, zone_start = 0x%llx, "
"zone_end = 0x%llx, vol->data1_zone_pos = "
"0x%llx, continuing outer while loop.\n",
(long long)mft_zone_size,
(long long)vol->mft_zone_start,
(long long)vol->mft_zone_end,
(long long)vol->mft_zone_pos,
(long long)zone_start,
(long long)zone_end,
(long long)vol->data1_zone_pos);
}
ntfs_log_debug("After outer while loop.\n");
ntfs_log_debug("At done_ret.\n");
/* Add runlist terminator element. */
if (need_writeback) {
ntfs_log_trace("Writing back.\n");
need_writeback = 0;
if (bw < 0)
else
ntfs_log_trace("Bitmap writeback failed in done code path "
"with error code %i.\n", err);
goto err_ret;
}
}
ntfs_log_debug("At done_err_ret (follows done_ret).\n");
/* Done! */
if (!err)
return rl;
ntfs_log_trace("Failed to allocate clusters. Returning with error code "
"%i.\n", err);
return NULL;
ntfs_log_trace("At wb_err_ret.\n");
if (need_writeback) {
ntfs_log_trace("Writing back.\n");
need_writeback = 0;
if (bw < 0)
else
ntfs_log_trace("Bitmap writeback failed in error code path "
"with error code %i.\n", err);
}
}
ntfs_log_trace("At err_ret.\n");
if (rl) {
ntfs_log_trace("err = ENOSPC, first free lcn = 0x%llx, could "
"allocate up to = 0x%llx clusters.\n",
}
/* Add runlist terminator element. */
/* Deallocate all allocated clusters. */
ntfs_log_trace("Deallocating allocated clusters.\n");
/* Free the runlist. */
} else {
ntfs_log_trace("No space left at all, err = ENOSPC, first "
"free lcn = 0x%llx.\n",
(long long)vol->data1_zone_pos);
}
}
ntfs_log_trace("rl = NULL, going to done_err_ret.\n");
goto done_err_ret;
}
/**
* ntfs_cluster_free_from_rl - free clusters from runlist
* @vol: mounted ntfs volume on which to free the clusters
* @rl: runlist from which deallocate clusters
*
* On success return 0 and on error return -1 with errno set to the error code.
*/
{
ntfs_log_trace("Entering.\n");
ntfs_log_trace("Dealloc lcn 0x%llx, len 0x%llx.\n",
ntfs_log_trace("Eeek! Deallocation of clusters failed.\n");
return -1;
}
}
return 0;
}
/**
* ntfs_cluster_free - free clusters on an ntfs volume
* @vol: mounted ntfs volume on which to free the clusters
* @na: attribute whose runlist describes the clusters to free
* @start_vcn: vcn in @rl at which to start freeing clusters
* @count: number of clusters to free or -1 for all clusters
*
* Free @count clusters starting at the cluster @start_vcn in the runlist
* described by the attribute @na from the mounted ntfs volume @vol.
*
* If @count is -1, all clusters from @start_vcn to the end of the runlist
* are deallocated.
*
* On success return the number of deallocated clusters (not counting sparse
* clusters) and on error return -1 with errno set to the error code.
*/
{
ntfs_log_trace("Invalid arguments!\n");
return -1;
}
ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, count 0x%llx, "
if (!rl) {
return 0;
else
return -1;
}
return -1;
}
/* Find the starting cluster inside the run that needs freeing. */
/* The number of clusters in this run that need freeing. */
/* Do the actual freeing of the clusters in this run. */
to_free))
return -1;
/* We have freed @to_free real clusters. */
} else {
/* No real clusters were freed. */
nr_freed = 0;
}
/* Go to the next run and adjust the number of clusters left to free. */
++rl;
if (count >= 0)
/*
* Loop over the remaining runs, using @count as a capping value, and
* free them.
*/
// FIXME: Need to try ntfs_attr_map_runlist() for attribute
// list support! (AIA)
// FIXME: Eeek! We need rollback! (AIA)
ntfs_log_trace("Eeek! invalid lcn (= %lli). Should attempt "
"to map runlist! Leaving inconsistent "
return -1;
}
/* The number of clusters in this run that need freeing. */
/* Do the actual freeing of the clusters in the run. */
to_free)) {
// FIXME: Eeek! We need rollback! (AIA)
ntfs_log_trace("Eeek! bitmap clear run failed. "
"Leaving inconsistent metadata!\n");
return -1;
}
/* We have freed @to_free real clusters. */
}
if (count >= 0)
}
// FIXME: Eeek! BUG()
ntfs_log_trace("Eeek! count still not zero (= %lli). Leaving "
"inconsistent metadata!\n", (long long)count);
return -1;
}
/* Done. Return the number of actual clusters that were freed. */
return nr_freed;
}