1N/A/**
1N/A * compress.c - Compressed attribute handling code. Part of the Linux-NTFS
1N/A * project.
1N/A *
1N/A * Copyright (c) 2004-2005 Anton Altaparmakov
1N/A * Copyright (c) 2005 Yura Pakhuchiy
1N/A *
1N/A * This program/include file is free software; you can redistribute it and/or
1N/A * modify it under the terms of the GNU General Public License as published
1N/A * by the Free Software Foundation; either version 2 of the License, or
1N/A * (at your option) any later version.
1N/A *
1N/A * This program/include file is distributed in the hope that it will be
1N/A * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
1N/A * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1N/A * GNU General Public License for more details.
1N/A *
1N/A * You should have received a copy of the GNU General Public License
1N/A * along with this program (in the main directory of the Linux-NTFS
1N/A * distribution in the file COPYING); if not, write to the Free Software
1N/A * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1N/A */
1N/A
1N/A#ifdef HAVE_CONFIG_H
1N/A#include "config.h"
1N/A#endif
1N/A
1N/A#ifdef HAVE_STDIO_H
1N/A#include <stdio.h>
1N/A#endif
1N/A#ifdef HAVE_STRING_H
1N/A#include <string.h>
1N/A#endif
1N/A#ifdef HAVE_STDLIB_H
1N/A#include <stdlib.h>
1N/A#endif
1N/A#ifdef HAVE_ERRNO_H
1N/A#include <errno.h>
1N/A#endif
1N/A
1N/A#include "compat.h"
1N/A#include "attrib.h"
1N/A#include "debug.h"
1N/A#include "volume.h"
1N/A#include "types.h"
1N/A#include "layout.h"
1N/A#include "runlist.h"
1N/A#include "compress.h"
1N/A#include "logging.h"
1N/A
1N/A/**
1N/A * enum ntfs_compression_constants - constants used in the compression code
1N/A */
1N/Atypedef enum {
1N/A /* Token types and access mask. */
1N/A NTFS_SYMBOL_TOKEN = 0,
1N/A NTFS_PHRASE_TOKEN = 1,
1N/A NTFS_TOKEN_MASK = 1,
1N/A
1N/A /* Compression sub-block constants. */
1N/A NTFS_SB_SIZE_MASK = 0x0fff,
1N/A NTFS_SB_SIZE = 0x1000,
1N/A NTFS_SB_IS_COMPRESSED = 0x8000,
1N/A} ntfs_compression_constants;
1N/A
1N/A/**
1N/A * ntfs_decompress - decompress a compression block into an array of pages
1N/A * @dest: buffer to which to write the decompressed data
1N/A * @dest_size: size of buffer @dest in bytes
1N/A * @cb_start: compression block to decompress
1N/A * @cb_size: size of compression block @cb_start in bytes
1N/A *
1N/A * This decompresses the compression block @cb_start into the destination
1N/A * buffer @dest.
1N/A *
1N/A * @cb_start is a pointer to the compression block which needs decompressing
1N/A * and @cb_size is the size of @cb_start in bytes (8-64kiB).
1N/A *
1N/A * Return 0 if success or -EOVERFLOW on error in the compressed stream.
1N/A */
1N/Astatic int ntfs_decompress(u8 *dest, const u32 dest_size,
1N/A u8 *const cb_start, const u32 cb_size)
1N/A{
1N/A /*
1N/A * Pointers into the compressed data, i.e. the compression block (cb),
1N/A * and the therein contained sub-blocks (sb).
1N/A */
1N/A u8 *cb_end = cb_start + cb_size; /* End of cb. */
1N/A u8 *cb = cb_start; /* Current position in cb. */
1N/A u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */
1N/A u8 *cb_sb_end; /* End of current sb / beginning of next sb. */
1N/A /* Variables for uncompressed data / destination. */
1N/A u8 *dest_end = dest + dest_size; /* End of dest buffer. */
1N/A u8 *dest_sb_start; /* Start of current sub-block in dest. */
1N/A u8 *dest_sb_end; /* End of current sb in dest. */
1N/A /* Variables for tag and token parsing. */
1N/A u8 tag; /* Current tag. */
1N/A int token; /* Loop counter for the eight tokens in tag. */
1N/A
1N/A ntfs_log_trace("Entering, cb_size = 0x%x.\n", (unsigned)cb_size);
1N/Ado_next_sb:
1N/A ntfs_log_debug("Beginning sub-block at offset = 0x%x in the cb.\n",
1N/A cb - cb_start);
1N/A /*
1N/A * Have we reached the end of the compression block or the end of the
1N/A * decompressed data? The latter can happen for example if the current
1N/A * position in the compression block is one byte before its end so the
1N/A * first two checks do not detect it.
1N/A */
1N/A if (cb == cb_end || !le16_to_cpup((u16*)cb) || dest == dest_end) {
1N/A ntfs_log_debug("Completed. Returning success (0).\n");
1N/A return 0;
1N/A }
1N/A /* Setup offset for the current sub-block destination. */
1N/A dest_sb_start = dest;
1N/A dest_sb_end = dest + NTFS_SB_SIZE;
1N/A /* Check that we are still within allowed boundaries. */
1N/A if (dest_sb_end > dest_end)
1N/A goto return_overflow;
1N/A /* Does the minimum size of a compressed sb overflow valid range? */
1N/A if (cb + 6 > cb_end)
1N/A goto return_overflow;
1N/A /* Setup the current sub-block source pointers and validate range. */
1N/A cb_sb_start = cb;
1N/A cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK)
1N/A + 3;
1N/A if (cb_sb_end > cb_end)
1N/A goto return_overflow;
1N/A /* Now, we are ready to process the current sub-block (sb). */
1N/A if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) {
1N/A ntfs_log_debug("Found uncompressed sub-block.\n");
1N/A /* This sb is not compressed, just copy it into destination. */
1N/A /* Advance source position to first data byte. */
1N/A cb += 2;
1N/A /* An uncompressed sb must be full size. */
1N/A if (cb_sb_end - cb != NTFS_SB_SIZE)
1N/A goto return_overflow;
1N/A /* Copy the block and advance the source position. */
1N/A memcpy(dest, cb, NTFS_SB_SIZE);
1N/A cb += NTFS_SB_SIZE;
1N/A /* Advance destination position to next sub-block. */
1N/A dest += NTFS_SB_SIZE;
1N/A goto do_next_sb;
1N/A }
1N/A ntfs_log_debug("Found compressed sub-block.\n");
1N/A /* This sb is compressed, decompress it into destination. */
1N/A /* Forward to the first tag in the sub-block. */
1N/A cb += 2;
1N/Ado_next_tag:
1N/A if (cb == cb_sb_end) {
1N/A /* Check if the decompressed sub-block was not full-length. */
1N/A if (dest < dest_sb_end) {
1N/A int nr_bytes = dest_sb_end - dest;
1N/A
1N/A ntfs_log_debug("Filling incomplete sub-block with zeroes.\n");
1N/A /* Zero remainder and update destination position. */
1N/A memset(dest, 0, nr_bytes);
1N/A dest += nr_bytes;
1N/A }
1N/A /* We have finished the current sub-block. */
1N/A goto do_next_sb;
1N/A }
1N/A /* Check we are still in range. */
1N/A if (cb > cb_sb_end || dest > dest_sb_end)
1N/A goto return_overflow;
1N/A /* Get the next tag and advance to first token. */
1N/A tag = *cb++;
1N/A /* Parse the eight tokens described by the tag. */
1N/A for (token = 0; token < 8; token++, tag >>= 1) {
1N/A u16 lg, pt, length, max_non_overlap;
1N/A register u16 i;
1N/A u8 *dest_back_addr;
1N/A
1N/A /* Check if we are done / still in range. */
1N/A if (cb >= cb_sb_end || dest > dest_sb_end)
1N/A break;
1N/A /* Determine token type and parse appropriately.*/
1N/A if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
1N/A /*
1N/A * We have a symbol token, copy the symbol across, and
1N/A * advance the source and destination positions.
1N/A */
1N/A *dest++ = *cb++;
1N/A /* Continue with the next token. */
1N/A continue;
1N/A }
1N/A /*
1N/A * We have a phrase token. Make sure it is not the first tag in
1N/A * the sb as this is illegal and would confuse the code below.
1N/A */
1N/A if (dest == dest_sb_start)
1N/A goto return_overflow;
1N/A /*
1N/A * Determine the number of bytes to go back (p) and the number
1N/A * of bytes to copy (l). We use an optimized algorithm in which
1N/A * we first calculate log2(current destination position in sb),
1N/A * which allows determination of l and p in O(1) rather than
1N/A * O(n). We just need an arch-optimized log2() function now.
1N/A */
1N/A lg = 0;
1N/A for (i = dest - dest_sb_start - 1; i >= 0x10; i >>= 1)
1N/A lg++;
1N/A /* Get the phrase token into i. */
1N/A pt = le16_to_cpup((u16*)cb);
1N/A /*
1N/A * Calculate starting position of the byte sequence in
1N/A * the destination using the fact that p = (pt >> (12 - lg)) + 1
1N/A * and make sure we don't go too far back.
1N/A */
1N/A dest_back_addr = dest - (pt >> (12 - lg)) - 1;
1N/A if (dest_back_addr < dest_sb_start)
1N/A goto return_overflow;
1N/A /* Now calculate the length of the byte sequence. */
1N/A length = (pt & (0xfff >> lg)) + 3;
1N/A /* Verify destination is in range. */
1N/A if (dest + length > dest_sb_end)
1N/A goto return_overflow;
1N/A /* The number of non-overlapping bytes. */
1N/A max_non_overlap = dest - dest_back_addr;
1N/A if (length <= max_non_overlap) {
1N/A /* The byte sequence doesn't overlap, just copy it. */
1N/A memcpy(dest, dest_back_addr, length);
1N/A /* Advance destination pointer. */
1N/A dest += length;
1N/A } else {
1N/A /*
1N/A * The byte sequence does overlap, copy non-overlapping
1N/A * part and then do a slow byte by byte copy for the
1N/A * overlapping part. Also, advance the destination
1N/A * pointer.
1N/A */
1N/A memcpy(dest, dest_back_addr, max_non_overlap);
1N/A dest += max_non_overlap;
1N/A dest_back_addr += max_non_overlap;
1N/A length -= max_non_overlap;
1N/A while (length--)
1N/A *dest++ = *dest_back_addr++;
1N/A }
1N/A /* Advance source position and continue with the next token. */
1N/A cb += 2;
1N/A }
1N/A /* No tokens left in the current tag. Continue with the next tag. */
1N/A goto do_next_tag;
1N/Areturn_overflow:
1N/A ntfs_log_debug("Failed. Returning -EOVERFLOW.\n");
1N/A errno = EOVERFLOW;
1N/A return -1;
1N/A}
1N/A
1N/A/**
1N/A * ntfs_is_cb_compressed - internal function, do not use
1N/A *
1N/A * This is a very specialised function determining if a cb is compressed or
1N/A * uncompressed. It is assumed that checking for a sparse cb has already been
1N/A * performed and that the cb is not sparse. It makes all sorts of other
1N/A * assumptions as well and hence it is not useful anywhere other than where it
1N/A * is used at the moment. Please, do not make this function available for use
1N/A * outside of compress.c as it is bound to confuse people and not do what they
1N/A * want.
1N/A *
1N/A * Return TRUE on errors so that the error will be detected later on in the
1N/A * code. Might be a bit confusing to debug but there really should never be
1N/A * errors coming from here.
1N/A */
1N/Astatic BOOL ntfs_is_cb_compressed(ntfs_attr *na,
1N/A runlist_element *rl, VCN cb_start_vcn, int cb_clusters)
1N/A{
1N/A /*
1N/A * The simplest case: the run starting at @cb_start_vcn contains
1N/A * @cb_clusters clusters which are all not sparse, thus the cb is not
1N/A * compressed.
1N/A */
1N/Arestart:
1N/A cb_clusters -= rl->length - (cb_start_vcn - rl->vcn);
1N/A while (cb_clusters > 0) {
1N/A /* Go to the next run. */
1N/A rl++;
1N/A /* Map the next runlist fragment if it is not mapped. */
1N/A if (rl->lcn < LCN_HOLE || !rl->length) {
1N/A cb_start_vcn = rl->vcn;
1N/A rl = ntfs_attr_find_vcn(na, rl->vcn);
1N/A if (!rl || rl->lcn < LCN_HOLE || !rl->length)
1N/A return TRUE;
1N/A /*
1N/A * If the runs were merged need to deal with the
1N/A * resulting partial run so simply restart.
1N/A */
1N/A if (rl->vcn < cb_start_vcn)
1N/A goto restart;
1N/A }
1N/A /* If the current run is sparse, the cb is compressed. */
1N/A if (rl->lcn == LCN_HOLE)
1N/A return TRUE;
1N/A /* If the whole cb is not sparse, it is not compressed. */
1N/A if (rl->length >= cb_clusters)
1N/A return FALSE;
1N/A cb_clusters -= rl->length;
1N/A };
1N/A /* All cb_clusters were not sparse thus the cb is not compressed. */
1N/A return FALSE;
1N/A}
1N/A
1N/A/**
1N/A * ntfs_compressed_attr_pread - read from a compressed attribute
1N/A * @na: ntfs attribute to read from
1N/A * @pos: byte position in the attribute to begin reading from
1N/A * @count: number of bytes to read
1N/A * @b: output data buffer
1N/A *
1N/A * NOTE: You probably want to be using attrib.c::ntfs_attr_pread() instead.
1N/A *
1N/A * This function will read @count bytes starting at offset @pos from the
1N/A * compressed ntfs attribute @na into the data buffer @b.
1N/A *
1N/A * On success, return the number of successfully read bytes. If this number
1N/A * is lower than @count this means that the read reached end of file or that
1N/A * an error was encountered during the read so that the read is partial.
1N/A * 0 means end of file or nothing was read (also return 0 when @count is 0).
1N/A *
1N/A * On error and nothing has been read, return -1 with errno set appropriately
1N/A * to the return code of ntfs_pread(), or to EINVAL in case of invalid
1N/A * arguments.
1N/A */
1N/As64 ntfs_compressed_attr_pread(ntfs_attr *na, s64 pos, s64 count, void *b)
1N/A{
1N/A s64 br, to_read, ofs, total, total2;
1N/A u64 cb_size_mask;
1N/A VCN start_vcn, vcn, end_vcn;
1N/A ntfs_volume *vol;
1N/A runlist_element *rl;
1N/A u8 *dest, *cb, *cb_pos, *cb_end;
1N/A u32 cb_size;
1N/A int err;
1N/A unsigned int nr_cbs, cb_clusters;
1N/A
1N/A ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, pos 0x%llx, count 0x%llx.\n",
1N/A (unsigned long long)na->ni->mft_no, na->type,
1N/A (long long)pos, (long long)count);
1N/A if (!na || !NAttrCompressed(na) || !na->ni || !na->ni->vol || !b ||
1N/A pos < 0 || count < 0) {
1N/A errno = EINVAL;
1N/A return -1;
1N/A }
1N/A /*
1N/A * Encrypted attributes are not supported. We return access denied,
1N/A * which is what Windows NT4 does, too.
1N/A */
1N/A if (NAttrEncrypted(na)) {
1N/A errno = EACCES;
1N/A return -1;
1N/A }
1N/A if (!count)
1N/A return 0;
1N/A /* Truncate reads beyond end of attribute. */
1N/A if (pos + count > na->data_size) {
1N/A if (pos >= na->data_size) {
1N/A return 0;
1N/A }
1N/A count = na->data_size - pos;
1N/A }
1N/A /* If it is a resident attribute, simply use ntfs_attr_pread(). */
1N/A if (!NAttrNonResident(na))
1N/A return ntfs_attr_pread(na, pos, count, b);
1N/A total = total2 = 0;
1N/A /* Zero out reads beyond initialized size. */
1N/A if (pos + count > na->initialized_size) {
1N/A if (pos >= na->initialized_size) {
1N/A memset(b, 0, count);
1N/A return count;
1N/A }
1N/A total2 = pos + count - na->initialized_size;
1N/A count -= total2;
1N/A memset((u8*)b + count, 0, total2);
1N/A }
1N/A vol = na->ni->vol;
1N/A cb_size = na->compression_block_size;
1N/A cb_size_mask = cb_size - 1UL;
1N/A cb_clusters = na->compression_block_clusters;
1N/A
1N/A /* Need a temporary buffer for each loaded compression block. */
1N/A cb = ntfs_malloc(cb_size);
1N/A if (!cb)
1N/A return -1;
1N/A
1N/A /* Need a temporary buffer for each uncompressed block. */
1N/A dest = ntfs_malloc(cb_size);
1N/A if (!dest) {
1N/A err = errno;
1N/A free(cb);
1N/A errno = err;
1N/A return -1;
1N/A }
1N/A /*
1N/A * The first vcn in the first compression block (cb) which we need to
1N/A * decompress.
1N/A */
1N/A start_vcn = (pos & ~cb_size_mask) >> vol->cluster_size_bits;
1N/A /* Offset in the uncompressed cb at which to start reading data. */
1N/A ofs = pos & cb_size_mask;
1N/A /*
1N/A * The first vcn in the cb after the last cb which we need to
1N/A * decompress.
1N/A */
1N/A end_vcn = ((pos + count + cb_size - 1) & ~cb_size_mask) >>
1N/A vol->cluster_size_bits;
1N/A /* Number of compression blocks (cbs) in the wanted vcn range. */
1N/A nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits >>
1N/A na->compression_block_size_bits;
1N/A cb_end = cb + cb_size;
1N/Ado_next_cb:
1N/A nr_cbs--;
1N/A cb_pos = cb;
1N/A vcn = start_vcn;
1N/A start_vcn += cb_clusters;
1N/A
1N/A /* Check whether the compression block is sparse. */
1N/A rl = ntfs_attr_find_vcn(na, vcn);
1N/A if (!rl || rl->lcn < LCN_HOLE) {
1N/A free(cb);
1N/A free(dest);
1N/A if (total)
1N/A return total;
1N/A /* FIXME: Do we want EIO or the error code? (AIA) */
1N/A errno = EIO;
1N/A return -1;
1N/A }
1N/A if (rl->lcn == LCN_HOLE) {
1N/A /* Sparse cb, zero out destination range overlapping the cb. */
1N/A ntfs_log_debug("Found sparse compression block.\n");
1N/A to_read = min(count, cb_size - ofs);
1N/A memset(b, 0, to_read);
1N/A ofs = 0;
1N/A total += to_read;
1N/A count -= to_read;
1N/A b = (u8*)b + to_read;
1N/A } else if (!ntfs_is_cb_compressed(na, rl, vcn, cb_clusters)) {
1N/A s64 tdata_size, tinitialized_size;
1N/A /*
1N/A * Uncompressed cb, read it straight into the destination range
1N/A * overlapping the cb.
1N/A */
1N/A ntfs_log_debug("Found uncompressed compression block.\n");
1N/A /*
1N/A * Read the uncompressed data into the destination buffer.
1N/A * NOTE: We cheat a little bit here by marking the attribute as
1N/A * not compressed in the ntfs_attr structure so that we can
1N/A * read the data by simply using ntfs_attr_pread(). (-8
1N/A * NOTE: we have to modify data_size and initialized_size
1N/A * temporarily as well...
1N/A */
1N/A to_read = min(count, cb_size - ofs);
1N/A ofs += vcn << vol->cluster_size_bits;
1N/A NAttrClearCompressed(na);
1N/A tdata_size = na->data_size;
1N/A tinitialized_size = na->initialized_size;
1N/A na->data_size = na->initialized_size = na->allocated_size;
1N/A do {
1N/A br = ntfs_attr_pread(na, ofs, to_read, b);
1N/A if (br < 0) {
1N/A err = errno;
1N/A na->data_size = tdata_size;
1N/A na->initialized_size = tinitialized_size;
1N/A NAttrSetCompressed(na);
1N/A free(cb);
1N/A free(dest);
1N/A if (total)
1N/A return total;
1N/A errno = err;
1N/A return br;
1N/A }
1N/A total += br;
1N/A count -= br;
1N/A b = (u8*)b + br;
1N/A to_read -= br;
1N/A ofs += br;
1N/A } while (to_read > 0);
1N/A na->data_size = tdata_size;
1N/A na->initialized_size = tinitialized_size;
1N/A NAttrSetCompressed(na);
1N/A ofs = 0;
1N/A } else {
1N/A s64 tdata_size, tinitialized_size;
1N/A
1N/A /*
1N/A * Compressed cb, decompress it into the temporary buffer, then
1N/A * copy the data to the destination range overlapping the cb.
1N/A */
1N/A ntfs_log_debug("Found compressed compression block.\n");
1N/A /*
1N/A * Read the compressed data into the temporary buffer.
1N/A * NOTE: We cheat a little bit here by marking the attribute as
1N/A * not compressed in the ntfs_attr structure so that we can
1N/A * read the raw, compressed data by simply using
1N/A * ntfs_attr_pread(). (-8
1N/A * NOTE: We have to modify data_size and initialized_size
1N/A * temporarily as well...
1N/A */
1N/A to_read = cb_size;
1N/A NAttrClearCompressed(na);
1N/A tdata_size = na->data_size;
1N/A tinitialized_size = na->initialized_size;
1N/A na->data_size = na->initialized_size = na->allocated_size;
1N/A do {
1N/A br = ntfs_attr_pread(na,
1N/A (vcn << vol->cluster_size_bits) +
1N/A (cb_pos - cb), to_read, cb_pos);
1N/A if (br < 0) {
1N/A err = errno;
1N/A na->data_size = tdata_size;
1N/A na->initialized_size = tinitialized_size;
1N/A NAttrSetCompressed(na);
1N/A free(cb);
1N/A free(dest);
1N/A if (total)
1N/A return total;
1N/A errno = err;
1N/A return br;
1N/A }
1N/A cb_pos += br;
1N/A to_read -= br;
1N/A } while (to_read > 0);
1N/A na->data_size = tdata_size;
1N/A na->initialized_size = tinitialized_size;
1N/A NAttrSetCompressed(na);
1N/A /* Just a precaution. */
1N/A if (cb_pos + 2 <= cb_end)
1N/A *(u16*)cb_pos = 0;
1N/A ntfs_log_debug("Successfully read the compression block.\n");
1N/A if (ntfs_decompress(dest, cb_size, cb, cb_size) < 0) {
1N/A err = errno;
1N/A free(cb);
1N/A free(dest);
1N/A if (total)
1N/A return total;
1N/A errno = err;
1N/A return -1;
1N/A }
1N/A to_read = min(count, cb_size - ofs);
1N/A memcpy(b, dest + ofs, to_read);
1N/A total += to_read;
1N/A count -= to_read;
1N/A b = (u8*)b + to_read;
1N/A ofs = 0;
1N/A }
1N/A /* Do we have more work to do? */
1N/A if (nr_cbs)
1N/A goto do_next_cb;
1N/A /* We no longer need the buffers. */
1N/A free(cb);
1N/A free(dest);
1N/A /* Return number of bytes read. */
1N/A return total + total2;
1N/A}