tarvfs.cpp revision 1c434bf12564eb5a885b3f7e230b7638955004ce
/* $Id$ */
/** @file
* IPRT - TAR Virtual Filesystem.
*/
/*
* Copyright (C) 2010 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
/******************************************************************************
* Header Files *
******************************************************************************/
#include <iprt/vfslowlevel.h>
#include "tar.h"
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
/**
* Tar directory, character device, block device, fifo socket or symbolic link.
*/
typedef struct RTZIPTARBASEOBJ
{
/** The stream offset of the (first) header. */
/** The tar header. */
/** The object info with unix attributes. */
/** Pointer to a tar filesystem stream base object. */
typedef RTZIPTARBASEOBJ *PRTZIPTARBASEOBJ;
/**
* Tar file represented as a VFS I/O stream.
*/
typedef struct RTZIPTARIOSTREAM
{
/** The basic tar object data. */
/** The number of bytes in the file. */
/** The current file position. */
/** The number of padding bytes following the file. */
/** Set if we've reached the end of the file. */
bool fEndOfStream;
/** The input I/O stream. */
/** Pointer to a the private data of a tar file I/O stream. */
typedef RTZIPTARIOSTREAM *PRTZIPTARIOSTREAM;
/**
* Tar filesystem stream private data.
*/
typedef struct RTZIPTARFSSTREAM
{
/** The input I/O stream. */
/** The current object (referenced). */
/** Pointer to the private data if hVfsCurObj is representing a file. */
/** The start offset. */
/** The offset of the next header. */
/** Set if we've reached the end of the stream. */
bool fEndOfStream;
/** Set if we've encountered a fatal error. */
int rcFatal;
/** Pointer to a the private data of a tar filesystem stream. */
typedef RTZIPTARFSSTREAM *PRTZIPTARFSSTREAM;
/**
* Checks if the TAR header is in the ustar format.
*
* @returns true / false.
* @param pTar The TAR header.
*/
{
}
/**
* Checks if the TAR header is in the ustar format and has a regular file type.
*
* @returns true / false.
* @param pTar The TAR header.
*/
{
return rtZipTarHdrIsUstar(pTar)
}
/**
* Checks if the TAR header includes a posix user name field.
*
* @returns true / false.
* @param pTar The TAR header.
*/
{
&& rtZipTarHdrIsUstar(pTar);
}
/**
* Checks if the TAR header includes a posix group name field.
*
* @returns true / false.
* @param pTar The TAR header.
*/
{
&& rtZipTarHdrIsUstar(pTar);
}
/**
* Checks if the TAR header includes a posix compatible path prefix field.
*
* @returns true / false.
* @param pTar The TAR header.
*/
{
&& rtZipTarHdrIsUstar(pTar);
}
/**
* Converts a numeric header field to the C native type.
*
* @returns IPRT status code.
*
* @param pszField The TAR header field.
* @param cchField The length of the field.
* @param fOctalOnly Must be octal.
* @param pi64 Where to store the value.
*/
static int rtZipTarHdrFieldToNum(const char *pszField, size_t cchField, bool fOctalOnly, int64_t *pi64)
{
if ( fOctalOnly
|| !(*(unsigned char *)pszField & 0x80))
{
/*
* Skip leading zeros, saving a few slower loops below.
*/
/*
* Convert octal digits.
*/
while (cchField > 0)
{
if (uDigit >= 8)
break;
i64 <<= 3;
pszField++;
cchField--;
}
/*
* Was it terminated correctly?
*/
while (cchField > 0)
{
return cchField < cchFieldOrg
cchField--;
}
}
else
{
/** @todo implement base-256 encoded fields. */
return VERR_TAR_BASE_256_NOT_SUPPORTED;
}
return VINF_SUCCESS;
}
/**
* Calculates the tar header checksums and detects if it's all zeros.
*
* @returns true if all zeros, false if not.
* @param pHdr The header to checksum.
* @param pi32Unsigned Where to store the checksum calculated using
* unsigned chars. This is the one POSIX
* specifies.
* @param pi32Signed Where to store the checksum calculated using
* signed chars.
*
* @remarks The reason why we calculate the checksum as both signed and unsigned
* has to do with various the char C type being signed on some hosts
* and unsigned on others.
*/
{
int32_t i32Unsigned = 0;
/*
* Sum up the entire header.
*/
do
{
i32Unsigned += *(unsigned char *)pch;
/*
* Check if it's all zeros and replace the chksum field with spaces.
*/
bool const fZeroHdr = i32Unsigned == 0;
do
{
i32Unsigned -= *(unsigned char *)pch;
if (pi32Signed)
*pi32Signed = i32Signed;
return fZeroHdr;
}
/**
* Validates the TAR header.
*
* @returns VINF_SUCCESS if valid, appropriate VERR_TAR_XXX if not.
* @param pTar The TAR header.
*/
{
/*
* Calc the checksum first since this enables us to detect zero headers.
*/
return VERR_TAR_ZERO_HEADER;
/*
* Read the checksum field and match the checksums.
*/
int rc = rtZipTarHdrFieldToNum(pTar->Posix.chksum, sizeof(pTar->Posix.chksum), true /*fOctalOnly*/, &i64HdrChkSum);
if (RT_FAILURE(rc))
return VERR_TAR_BAD_CHKSUM_FIELD;
if ( i32ChkSum != i64HdrChkSum
return VERR_TAR_CHKSUM_MISMATCH;
/*
* Perform some basic checks.
*/
if (!rtZipTarHdrIsUstar(pTar))
{
return VERR_TAR_NOT_USTAR_V00;
}
{
case RTZIPTAR_TF_OLDNORMAL:
case RTZIPTAR_TF_NORMAL:
case RTZIPTAR_TF_CONTIG:
case RTZIPTAR_TF_LINK:
case RTZIPTAR_TF_SYMLINK:
case RTZIPTAR_TF_CHR:
case RTZIPTAR_TF_BLK:
case RTZIPTAR_TF_FIFO:
{
return VERR_TAR_EMPTY_NAME;
if (*pchEnd == '/')
return VERR_TAR_NON_DIR_ENDS_WITH_SLASH;
break;
}
case RTZIPTAR_TF_DIR:
return VERR_TAR_EMPTY_NAME;
break;
case RTZIPTAR_TF_X_HDR:
case RTZIPTAR_TF_X_GLOBAL:
return VERR_TAR_UNSUPPORTED_PAX_TYPE;
case RTZIPTAR_TF_SOLARIS_XHDR:
case RTZIPTAR_TF_GNU_DUMPDIR:
case RTZIPTAR_TF_GNU_LONGLINK:
case RTZIPTAR_TF_GNU_LONGNAME:
case RTZIPTAR_TF_GNU_MULTIVOL:
case RTZIPTAR_TF_GNU_SPARSE:
case RTZIPTAR_TF_GNU_VOLDHR:
}
return VINF_SUCCESS;
}
/**
* Translate a TAR header to an IPRT object info structure with additional UNIX
* attributes.
*
* This completes the validation done by rtZipTarHdrValidate.
*
* @returns VINF_SUCCESS if valid, appropriate VERR_TAR_XXX if not.
* @param pTar The TAR header (input).
* @param pObjInfo The object info structure (output).
*/
{
/*
* Zap the whole structure, this takes care of unused space in the union.
*/
/*
* Convert the tar field in RTFSOBJINFO order.
*/
int rc;
do { \
if (RT_FAILURE(rc)) \
return rc; \
return VERR_TAR_NUM_VALUE_TOO_LARGE; \
} while (0)
return VERR_TAR_NUM_VALUE_TOO_LARGE;
{
return VERR_TAR_DEV_VALUE_TOO_LARGE;
}
/*
* Massage the result a little bit.
* Also validate some more now that we've got the numbers to work with.
*/
return VERR_TAR_BAD_MODE_FIELD;
{
case RTZIPTAR_TF_OLDNORMAL:
case RTZIPTAR_TF_NORMAL:
case RTZIPTAR_TF_CONTIG:
break;
case RTZIPTAR_TF_LINK:
return VERR_TAR_SIZE_NOT_ZERO;
break;
case RTZIPTAR_TF_SYMLINK:
break;
case RTZIPTAR_TF_CHR:
break;
case RTZIPTAR_TF_BLK:
break;
case RTZIPTAR_TF_DIR:
break;
case RTZIPTAR_TF_FIFO:
break;
default:
return VERR_TAR_UNKNOWN_TYPE_FLAG; /* Should've been caught in validate. */
}
return VERR_TAR_MODE_WITH_TYPE;
{
case RTZIPTAR_TF_CHR:
case RTZIPTAR_TF_BLK:
case RTZIPTAR_TF_DIR:
case RTZIPTAR_TF_FIFO:
pObjInfo->cbAllocated = 0;
break;
}
return VINF_SUCCESS;
}
/*
*
* T h e V F S F i l e s y s t e m S t r e a m B i t s.
* T h e V F S F i l e s y s t e m S t r e a m B i t s.
* T h e V F S F i l e s y s t e m S t r e a m B i t s.
*
*/
/**
* @interface_method_impl{RTVFSOBJOPS,pfnClose}
*/
{
/* Currently there is nothing we really have to do here. */
return VINF_SUCCESS;
}
/**
* @interface_method_impl{RTVFSOBJOPS,pfnQueryInfo}
*/
static DECLCALLBACK(int) rtZipTarFssBaseObj_QueryInfo(void *pvThis, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAddAttr)
{
/*
* Copy the desired data.
*/
switch (enmAddAttr)
{
case RTFSOBJATTRADD_NOTHING:
case RTFSOBJATTRADD_UNIX:
break;
RTStrCopy(pObjInfo->Attr.u.UnixOwner.szName, sizeof(pObjInfo->Attr.u.UnixOwner.szName), pThis->Hdr.Posix.uname);
break;
RTStrCopy(pObjInfo->Attr.u.UnixGroup.szName, sizeof(pObjInfo->Attr.u.UnixGroup.szName), pThis->Hdr.Posix.gname);
break;
case RTFSOBJATTRADD_EASIZE:
break;
default:
return VERR_NOT_SUPPORTED;
}
return VINF_SUCCESS;
}
/**
* Tar filesystem base object operations.
*/
static const RTVFSOBJOPS g_rtZipTarFssBaseObjOps =
{
"TarFsStream::Obj",
};
/**
* @interface_method_impl{RTVFSOBJOPS,pfnClose}
*/
{
}
/**
* @interface_method_impl{RTVFSOBJOPS,pfnQueryInfo}
*/
static DECLCALLBACK(int) rtZipTarFssIos_QueryInfo(void *pvThis, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAddAttr)
{
}
/**
* Reads one segment.
*
* @returns IPRT status code.
* @param pThis The instance data.
* @param pvBuf Where to put the read bytes.
* @param cbToRead The number of bytes to read.
* @param fBlocking Whether to block or not.
* @param pcbRead Where to store the number of bytes actually read.
*/
static int rtZipTarFssIos_ReadOneSeg(PRTZIPTARIOSTREAM pThis, void *pvBuf, size_t cbToRead, bool fBlocking, size_t *pcbRead)
{
/*
* Fend of reads beyond the end of the stream here.
*/
if (pThis->fEndOfStream)
{
if (!pcbRead)
return VERR_EOF;
}
/*
* Do the reading.
*/
size_t cbReadStack = 0;
if (!pcbRead)
pcbRead = &cbReadStack;
{
pThis->fEndOfStream = true;
}
return rc;
}
/**
* @interface_method_impl{RTVFSIOSTREAMOPS,pfnRead}
*/
static DECLCALLBACK(int) rtZipTarFssIos_Read(void *pvThis, RTFOFF off, PCRTSGBUF pSgBuf, bool fBlocking, size_t *pcbRead)
{
int rc;
rc = rtZipTarFssIos_ReadOneSeg(pThis, pSgBuf->paSegs[0].pvSeg, pSgBuf->paSegs[0].cbSeg, fBlocking, pcbRead);
else
{
rc = VINF_SUCCESS;
{
cbReadSeg = 0;
rc = rtZipTarFssIos_ReadOneSeg(pThis, pSgBuf->paSegs[iSeg].pvSeg, pSgBuf->paSegs[iSeg].cbSeg, fBlocking, pcbReadSeg);
if (RT_FAILURE(rc))
break;
if (pcbRead)
{
break;
}
}
if (pcbRead)
}
return rc;
}
/**
* @interface_method_impl{RTVFSIOSTREAMOPS,pfnWrite}
*/
static DECLCALLBACK(int) rtZipTarFssIos_Write(void *pvThis, RTFOFF off, PCRTSGBUF pSgBuf, bool fBlocking, size_t *pcbWritten)
{
/* Cannot write to a read-only I/O stream. */
return VERR_ACCESS_DENIED;
}
/**
* @interface_method_impl{RTVFSIOSTREAMOPS,pfnFlush}
*/
{
/* It's a read only stream, nothing dirty to flush. */
return VINF_SUCCESS;
}
/**
* @interface_method_impl{RTVFSIOSTREAMOPS,pfnPollOne}
*/
static DECLCALLBACK(int) rtZipTarFssIos_PollOne(void *pvThis, uint32_t fEvents, RTMSINTERVAL cMillies, bool fIntr,
{
/* When we've reached the end, read will be set to indicate it. */
if ( (fEvents & RTPOLL_EVT_READ)
&& pThis->fEndOfStream)
{
if (RT_SUCCESS(rc))
else
return VINF_SUCCESS;
}
}
/**
* @interface_method_impl{RTVFSIOSTREAMOPS,pfnTell}
*/
{
}
/**
* Tar I/O stream operations.
*/
static const RTVFSIOSTREAMOPS g_rtZipTarFssIosOps =
{
{ /* Obj */
"TarFsStream::IoStream",
},
0,
NULL /*Skip*/,
NULL /*ZeroFill*/,
};
/**
* @interface_method_impl{RTVFSOBJOPS,pfnClose}
*/
{
return rtZipTarFssBaseObj_Close(pThis);
}
/**
* @interface_method_impl{RTVFSOBJOPS,pfnQueryInfo}
*/
static DECLCALLBACK(int) rtZipTarFssSym_QueryInfo(void *pvThis, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAddAttr)
{
}
/**
* @interface_method_impl{RTVFSOBJSETOPS,pfnMode}
*/
{
return VERR_ACCESS_DENIED;
}
/**
* @interface_method_impl{RTVFSOBJSETOPS,pfnSetTimes}
*/
static DECLCALLBACK(int) rtZipTarFssSym_SetTimes(void *pvThis, PCRTTIMESPEC pAccessTime, PCRTTIMESPEC pModificationTime,
{
return VERR_ACCESS_DENIED;
}
/**
* @interface_method_impl{RTVFSOBJSETOPS,pfnSetOwner}
*/
{
return VERR_ACCESS_DENIED;
}
/**
* @interface_method_impl{RTVFSSYMLINKOPS,pfnRead}
*/
{
}
/**
* Tar symbolic (and hardlink) operations.
*/
static const RTVFSSYMLINKOPS g_rtZipTarFssSymOps =
{
{ /* Obj */
"TarFsStream::Symlink",
},
0,
{ /* ObjSet */
},
};
/**
* @interface_method_impl{RTVFSOBJOPS,pfnClose}
*/
{
return VINF_SUCCESS;
}
/**
* @interface_method_impl{RTVFSOBJOPS,pfnQueryInfo}
*/
static DECLCALLBACK(int) rtZipTarFss_QueryInfo(void *pvThis, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAddAttr)
{
/* Take the lazy approach here, with the sideffect of providing some info
that is actually kind of useful. */
}
/**
* @interface_method_impl{RTVFSFSSTREAMOPS,pfnNext}
*/
static DECLCALLBACK(int) rtZipTarFss_Next(void *pvThis, char **ppszName, RTVFSOBJTYPE *penmType, PRTVFSOBJ phVfsObj)
{
/*
* Dispense with the current object.
*/
{
if (pThis->pCurIosData)
{
}
}
/*
* Check if we've already reached the end in some way.
*/
if (pThis->fEndOfStream)
return VERR_EOF;
/*
* Make sure the input stream is in the right place.
*/
while ( off >= 0
{
if (RT_FAILURE(rc))
{
/** @todo Ignore if we're at the end of the stream? */
}
}
if (off < 0)
/*
* Read the next header.
*/
if (RT_FAILURE(rc))
{
pThis->fEndOfStream = true;
return VERR_EOF;
}
/*
* Validate the header and convert to binary object info.
* We pick up the start of the zero headers here in the failure path.
*/
if (RT_FAILURE_NP(rc))
{
if (rc == VERR_TAR_ZERO_HEADER)
{
if (RT_FAILURE(rc2))
{
pThis->fEndOfStream = true;
return VERR_EOF;
/* Just drain the stream because blocksize may dictate that
there is a whole bunch of stuff comming up. */
{
return VERR_EOF;
if (RT_FAILURE(rc))
break;
}
}
}
}
if (RT_FAILURE(rc))
/*
* Create an object of the appropriate type.
*/
{
/*
* Files are represented by a VFS I/O stream.
*/
case RTZIPTAR_TF_NORMAL:
case RTZIPTAR_TF_OLDNORMAL:
case RTZIPTAR_TF_CONTIG:
{
sizeof(*pIosData),
&hVfsIos,
(void **)&pIosData);
if (RT_FAILURE(rc))
pIosData->fEndOfStream = false;
break;
}
/*
* We represent hard links using a symbolic link object. This fits
* best with the way TAR stores it and there is currently no better
* fitting VFS type alternative.
*/
case RTZIPTAR_TF_LINK:
case RTZIPTAR_TF_SYMLINK:
{
sizeof(*pBaseObjData),
&hVfsSym,
(void **)&pBaseObjData);
if (RT_FAILURE(rc))
break;
}
/*
* All other objects are repesented using a VFS base object since they
* carry no data streams (unless some tar extension implements extended
* attributes / alternative streams).
*/
case RTZIPTAR_TF_CHR:
case RTZIPTAR_TF_BLK:
case RTZIPTAR_TF_DIR:
case RTZIPTAR_TF_FIFO:
{
sizeof(*pBaseObjData),
&hVfsObj,
(void **)&pBaseObjData);
if (RT_FAILURE(rc))
break;
}
default:
AssertFailed();
}
/*
* Set the return data and we're done.
*/
if (ppszName)
{
if (rtZipTarHdrHasPrefix(&Hdr))
{
"/", 1,
}
else
{
}
if (RT_FAILURE(rc))
return rc;
}
if (phVfsObj)
{
}
if (penmType)
return VINF_SUCCESS;
}
/**
* Tar filesystem stream operations.
*/
static const RTVFSFSSTREAMOPS rtZipTarFssOps =
{
{ /* Obj */
"TarFsStream",
},
0,
};
RTDECL(int) RTZipTarFsStreamFromIoStream(RTVFSIOSTREAM hVfsIosIn, uint32_t fFlags, PRTVFSFSSTREAM phVfsFss)
{
/*
* Input validation.
*/
/*
* Retain the input stream and create a new filesystem stream handle.
*/
int rc = RTVfsNewFsStream(&rtZipTarFssOps, sizeof(*pThis), NIL_RTVFS, NIL_RTVFSLOCK, &hVfsFss, (void **)&pThis);
if (RT_SUCCESS(rc))
{
pThis->fEndOfStream = false;
/* Don't check if it's a TAR stream here, do that in the
rtZipTarFss_Next. */
return VINF_SUCCESS;
}
return rc;
}