diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c index b134b87b6..d18def7cd 100644 --- a/contrib/seekable_format/examples/seekable_decompression.c +++ b/contrib/seekable_format/examples/seekable_decompression.c @@ -17,6 +17,7 @@ #include "zstd_seekable.h" +#define MIN(a, b) ((a) < (b) ? (a) : (b)) static void* malloc_orDie(size_t size) { @@ -85,74 +86,31 @@ static void fseek_orDie(FILE* file, long int offset, int origin) { static void decompressFile_orDie(const char* fname, unsigned startOffset, unsigned endOffset) { FILE* const fin = fopen_orDie(fname, "rb"); - size_t const buffInSize = ZSTD_DStreamInSize(); - void* const buffIn = malloc_orDie(buffInSize); FILE* const fout = stdout; size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */ void* const buffOut = malloc_orDie(buffOutSize); - ZSTD_seekable_DStream* const dstream = ZSTD_seekable_createDStream(); - if (dstream==NULL) { fprintf(stderr, "ZSTD_seekable_createDStream() error \n"); exit(10); } + ZSTD_seekable* const seekable = ZSTD_seekable_create(); + if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); } - { size_t sizeNeeded = 0; - void* buffSeekTable = NULL; + size_t const initResult = ZSTD_seekable_initFile(seekable, fin); + if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - do { - sizeNeeded = ZSTD_seekable_loadSeekTable(dstream, buffSeekTable, sizeNeeded); - if (!sizeNeeded) break; + while (startOffset < endOffset) { + size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset); - if (ZSTD_isError(sizeNeeded)) { - fprintf(stderr, "ZSTD_seekable_loadSeekTable() error : %s \n", - ZSTD_getErrorName(sizeNeeded)); - exit(11); - } - - fseek_orDie(fin, -(long) sizeNeeded, SEEK_END); - buffSeekTable = realloc_orDie(buffSeekTable, sizeNeeded); - fread_orDie(buffSeekTable, sizeNeeded, fin); - } while (sizeNeeded > 0); - - free(buffSeekTable); + if (ZSTD_isError(result)) { + fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n", + ZSTD_getErrorName(result)); + exit(12); + } + fwrite_orDie(buffOut, result, fout); + startOffset += result; } - /* In more complex scenarios, a file may consist of multiple appended frames (ex : pzstd). - * The following example decompresses only the first frame. - * It is compatible with other provided streaming examples */ - size_t const initResult = ZSTD_seekable_initDStream(dstream, startOffset, endOffset); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initDStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - - size_t result, read, toRead = 0; - - do { - read = fread_orDie(buffIn, toRead, fin); - { ZSTD_inBuffer input = { buffIn, read, 0 }; - ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - result = ZSTD_seekable_decompressStream(dstream, &output, &input); - - if (ZSTD_isError(result)) { - if (ZSTD_getErrorCode(result) == ZSTD_error_needSeek) { - unsigned long long const offset = ZSTD_seekable_getSeekOffset(dstream); - fseek_orDie(fin, offset, SEEK_SET); - ZSTD_seekable_updateOffset(dstream, offset); - toRead = 0; - } else { - fprintf(stderr, - "ZSTD_seekable_decompressStream() error : %s \n", - ZSTD_getErrorName(result)); - exit(12); - } - } else { - toRead = result; - } - fwrite_orDie(buffOut, output.pos, fout); - if (toRead > buffInSize) toRead = buffInSize; - } - } while (result > 0); - - ZSTD_seekable_freeDStream(dstream); + ZSTD_seekable_free(seekable); fclose_orDie(fin); fclose_orDie(fout); - free(buffIn); free(buffOut); } diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h index 3ab4f185e..54b50fa50 100644 --- a/contrib/seekable_format/zstd_seekable.h +++ b/contrib/seekable_format/zstd_seekable.h @@ -5,6 +5,8 @@ extern "C" { #endif +#include + static const unsigned ZSTD_seekTableFooterSize = 9; #define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1 @@ -14,6 +16,8 @@ static const unsigned ZSTD_seekTableFooterSize = 9; /* Limit the maximum size to avoid any potential issues storing the compressed size */ #define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U +#define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2) + /*-**************************************************************************** * Seekable Format * @@ -24,7 +28,7 @@ static const unsigned ZSTD_seekTableFooterSize = 9; ******************************************************************************/ typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream; -typedef struct ZSTD_seekable_DStream_s ZSTD_seekable_DStream; +typedef struct ZSTD_seekable_s ZSTD_seekable; /*-**************************************************************************** * Seekable compression - HowTo @@ -82,55 +86,76 @@ ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outB /*-**************************************************************************** * Seekable decompression - HowTo -* A ZSTD_seekable_DStream object is required to tracking streaming operation. -* Use ZSTD_seekable_createDStream() and ZSTD_seekable_freeDStream() to create/ -* release resources. +* A ZSTD_seekable object is required to tracking the seekTable. * -* Streaming objects are reusable to avoid allocation and deallocation, -* to start a new compression operation call ZSTD_seekable_initDStream() on the -* compressor. +* Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the +* the seek table provided in the input. +* There are three modes for ZSTD_seekable_init: +* - ZSTD_seekable_initBuff() : An in-memory API. The data contained in +* `src` should be the entire seekable file, including the seek table. +* `src` should be kept alive and unmodified until the ZSTD_seekable object +* is freed or reset. +* - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and +* fseek will be used to access the required data for building the seek +* table and doing decompression operations. `src` should not be closed +* or modified until the ZSTD_seekable object is freed or reset. +* - ZSTD_seekable_initAdvanced() : A general API allowing the client to +* provide its own read and seek callbacks. +* + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`. +* Premature EOF should be treated as an error. +* + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`, +* where origin is either SEEK_SET (beginning of +* file), or SEEK_END (end of file). +* Both functions should return a non-negative value in case of success, and a +* negative value in case of failure. If implementing using this API and +* stdio, be careful with files larger than 4GB and fseek. All of these +* functions return an error code checkable with ZSTD_isError(). * -* Use ZSTD_seekable_loadSeekTable() to load the seek table from a file. -* `src` should point to a block of data read from the end of the file, -* i.e. `src + srcSize` should always be the end of the file. -* @return : 0 if the table was loaded successfully, or if `srcSize` was too -* small, a size hint for how much data to provide. -* An error code may also be returned, checkable with ZSTD_isError() +* Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed +* offset `offset`. ZSTD_seekable_decompress may have to decompress the entire +* prefix of the frame before the desired data if it has not already processed +* this section. If ZSTD_seekable_decompress is called multiple times for a +* consecutive range of data, it will efficiently retain the decompressor object +* and avoid redecompressing frame prefixes. The return value is the number of +* bytes decompressed, or an error code checkable with ZSTD_isError(). * -* Use ZSTD_seekable_initDStream to prepare for a new decompression operation -* using the seektable loaded with ZSTD_seekable_loadSeekTable(). -* Data in the range [rangeStart, rangeEnd) will be decompressed. -* -* Call ZSTD_seekable_decompressStream() repetitively to consume input stream. -* @return : There are a number of possible return codes for this function -* - 0, the decompression operation has completed. -* - An error code checkable with ZSTD_isError -* + If this error code is ZSTD_error_needSeek, the user should seek -* to the file position provided by ZSTD_seekable_getSeekOffset() -* and indicate this to the stream with -* ZSTD_seekable_updateOffset(), before resuming decompression -* + Otherwise, this is a regular decompression error and the input -* file is likely corrupted or the API was incorrectly used. -* - A size hint, the preferred nb of bytes to provide as input to the -* next function call to improve latency. -* -* ZSTD_seekable_getSeekOffset() and ZSTD_seekable_updateOffset() are helper -* functions to indicate where the user should seek their file stream to, when -* a different position is required to continue decompression. -* Note that ZSTD_seekable_updateOffset will error if given an offset other -* than the one requested from ZSTD_seekable_getSeekOffset(). +* The seek table access functions can be used to obtain the data contained +* in the seek table. If frameIndex is larger than the value returned by +* ZSTD_seekable_getNumFrames(), they will return error codes checkable with +* ZSTD_isError(). Note that since the offset access functions return +* unsigned long long instead of size_t, in this case they will instead return +* the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE. ******************************************************************************/ /*===== Seekable decompressor management =====*/ -ZSTDLIB_API ZSTD_seekable_DStream* ZSTD_seekable_createDStream(void); -ZSTDLIB_API size_t ZSTD_seekable_freeDStream(ZSTD_seekable_DStream* zds); +ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void); +ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs); /*===== Seekable decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_seekable_initDStream(ZSTD_seekable_DStream* zds, unsigned long long rangeStart, unsigned long long rangeEnd); -ZSTDLIB_API size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API unsigned long long ZSTD_seekable_getSeekOffset(ZSTD_seekable_DStream* zds); -ZSTDLIB_API size_t ZSTD_seekable_updateOffset(ZSTD_seekable_DStream* zds, unsigned long long offset); +ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src); +ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset); +ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex); + +/*===== Seek Table access functions =====*/ +ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs); +ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); +ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); + +ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrame(ZSTD_seekable* const zs, unsigned long long offset); + +/*===== Seekable advanced I/O API =====*/ +typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n); +typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin); +typedef struct { + void* opaque; + ZSTD_seekable_read* read; + ZSTD_seekable_seek* seek; +} ZSTD_seekable_customFile; + +ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src); #if defined (__cplusplus) } diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index 87a140c02..9bcfea91b 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -7,7 +7,54 @@ * of patent rights can be found in the PATENTS file in the same directory. */ +/* ********************************************************* +* Turn on Large Files support (>4GB) for 32-bit Linux/Unix +***********************************************************/ +#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ +# if !defined(_FILE_OFFSET_BITS) +# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ +# endif +# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ +# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ +# endif +# if defined(_AIX) || defined(__hpux) +# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ +# endif +#endif + +/* ************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && _MSC_VER >= 1400 +# define LONG_SEEK _fseeki64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define LONG_SEEK fseeko +#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) +# define LONG_SEEK fseeko64 +#elif defined(_WIN32) && !defined(__DJGPP__) +# include + static int LONG_SEEK(FILE* file, __int64 offset, int origin) { + LARGE_INTEGER off; + DWORD method; + off.QuadPart = offset; + if (origin == SEEK_END) + method = FILE_END; + else if (origin == SEEK_CUR) + method = FILE_CURRENT; + else + method = FILE_BEGIN; + + if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) + return 0; + else + return -1; + } +#else +# define LONG_SEEK fseek +#endif + #include /* malloc, free */ +#include /* FILE* */ #define XXH_STATIC_LINKING_ONLY #define XXH_NAMESPACE ZSTD_ @@ -16,17 +63,74 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" -#include "mem.h" /* includes zstd.h */ +#include "mem.h" #include "zstd_seekable.h" #undef ERROR #define ERROR(name) ((size_t)-ZSTD_error_##name) +#define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); } + #undef MIN #undef MAX #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +/* Special-case callbacks for FILE* and in-memory modes, so that we can treat + * them the same way as the advanced API */ +static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n) +{ + size_t const result = fread(buffer, 1, n, (FILE*)opaque); + if (result != n) { + return -1; + } + return 0; +} + +static int ZSTD_seekable_seek_FILE(void* opaque, S64 offset, int origin) +{ + int const ret = LONG_SEEK((FILE*)opaque, offset, origin); + if (ret) return ret; + return fflush((FILE*)opaque); +} + +typedef struct { + const void *ptr; + size_t size; + size_t pos; +} buffWrapper_t; + +static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n) +{ + buffWrapper_t* buff = (buffWrapper_t*) opaque; + if (buff->size + n > buff->pos) return -1; + memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n); + buff->pos += n; + return 0; +} + +static int ZSTD_seekable_seek_buff(void* opaque, S64 offset, int origin) +{ + buffWrapper_t* buff = (buffWrapper_t*) opaque; + unsigned long long newOffset; + switch (origin) { + case SEEK_SET: + newOffset = offset; + break; + case SEEK_CUR: + newOffset = (unsigned long long)buff->pos + offset; + break; + case SEEK_END: + newOffset = (unsigned long long)buff->size - offset; + break; + } + if (newOffset < 0 || newOffset > buff->size) { + return -1; + } + buff->pos = newOffset; + return 0; +} + typedef struct { U64 cOffset; U64 dOffset; @@ -40,18 +144,70 @@ typedef struct { int checksumFlag; } seekTable_t; +#define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_ABSOLUTEMAX + +struct ZSTD_seekable_s { + ZSTD_DStream* dstream; + seekTable_t seekTable; + ZSTD_seekable_customFile src; + + U64 decompressedOffset; + U32 curFrame; + + BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */ + BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the + starts of chunks before we get to the + desired section */ + ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */ + buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */ + + XXH64_state_t xxhState; +}; + +ZSTD_seekable* ZSTD_seekable_create(void) +{ + ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable)); + + if (zs == NULL) return NULL; + + /* also initializes stage to zsds_init */ + memset(zs, 0, sizeof(*zs)); + + zs->dstream = ZSTD_createDStream(); + if (zs->dstream == NULL) { + free(zs); + return NULL; + } + + return zs; +} + +size_t ZSTD_seekable_free(ZSTD_seekable* zs) +{ + if (zs == NULL) return 0; /* support free on null */ + ZSTD_freeDStream(zs->dstream); + free(zs->seekTable.entries); + free(zs); + + return 0; +} + /** ZSTD_seekable_offsetToFrame() : * Performs a binary search to find the last frame with a decompressed offset * <= pos * @return : the frame's index */ -static U32 ZSTD_seekable_offsetToFrame(const seekTable_t* table, U64 pos) +U32 ZSTD_seekable_offsetToFrame(ZSTD_seekable* const zs, U64 pos) { U32 lo = 0; - U32 hi = table->tableLen; + U32 hi = zs->seekTable.tableLen; + + if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) { + return zs->seekTable.tableLen; + } while (lo + 1 < hi) { U32 const mid = lo + ((hi - lo) >> 1); - if (table->entries[mid].dOffset <= pos) { + if (zs->seekTable.entries[mid].dOffset <= pos) { lo = mid; } else { hi = mid; @@ -60,75 +216,50 @@ static U32 ZSTD_seekable_offsetToFrame(const seekTable_t* table, U64 pos) return lo; } -/* Stream decompressor state machine stages */ -enum ZSTD_seekable_DStream_stage { - zsds_init = 0, - zsds_seek, - zsds_decompress, - zsds_done, -}; - -struct ZSTD_seekable_DStream_s { - ZSTD_DStream* dstream; - seekTable_t seekTable; - - U32 curFrame; - U64 compressedOffset; - U64 decompressedOffset; - - U64 targetStart; - U64 targetEnd; - - U64 nextSeek; - - enum ZSTD_seekable_DStream_stage stage; - - XXH64_state_t xxhState; -}; - -ZSTD_seekable_DStream* ZSTD_seekable_createDStream(void) +U32 ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs) { - ZSTD_seekable_DStream* zds = malloc(sizeof(ZSTD_seekable_DStream)); - - if (zds == NULL) return NULL; - - /* also initializes stage to zsds_init */ - memset(zds, 0, sizeof(*zds)); - - zds->dstream = ZSTD_createDStream(); - if (zds->dstream == NULL) { - free(zds); - return NULL; - } - - return zds; + return zs->seekTable.tableLen; } -size_t ZSTD_seekable_freeDStream(ZSTD_seekable_DStream* zds) +U64 ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, U32 frameIndex) { - if (zds == NULL) return 0; /* support free on null */ - ZSTD_freeDStream(zds->dstream); - free(zds->seekTable.entries); - free(zds); - - return 0; + if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; + return zs->seekTable.entries[frameIndex].cOffset; } -size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src, size_t srcSize) +U64 ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, U32 frameIndex) { - const BYTE* ip = (const BYTE*)src + srcSize; + if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; + return zs->seekTable.entries[frameIndex].dOffset; +} +size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, U32 frameIndex) +{ + if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); + return zs->seekTable.entries[frameIndex + 1].cOffset - + zs->seekTable.entries[frameIndex].cOffset; +} + +size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, U32 frameIndex) +{ + if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); + return zs->seekTable.entries[frameIndex + 1].dOffset - + zs->seekTable.entries[frameIndex].dOffset; +} + +static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) +{ int checksumFlag; + ZSTD_seekable_customFile src = zs->src; + /* read the footer, fixed size */ + CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END)); + CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize)); - /* footer is fixed size */ - if (srcSize < ZSTD_seekTableFooterSize) - return ZSTD_seekTableFooterSize; - - if (MEM_readLE32(ip - 4) != ZSTD_SEEKABLE_MAGICNUMBER) { + if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) { return ERROR(prefix_unknown); } - { BYTE const sfd = ip[-5]; + { BYTE const sfd = zs->inBuff[4]; checksumFlag = sfd >> 7; /* check reserved bits */ @@ -137,30 +268,36 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src, } } - { U32 const numFrames = MEM_readLE32(ip-9); + { U32 const numFrames = MEM_readLE32(zs->inBuff); U32 const sizePerEntry = 8 + (checksumFlag?4:0); U32 const tableSize = sizePerEntry * numFrames; U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize; - const BYTE* base = ip - frameSize; + U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */ + { + U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); - if (srcSize < frameSize) return frameSize; + CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END)); + CHECK_IO(src.read(src.opaque, zs->inBuff, toRead)); - if (MEM_readLE32(base) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) { + remaining -= toRead; + } + + if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) { return ERROR(prefix_unknown); } - if (MEM_readLE32(base+4) + ZSTD_skippableHeaderSize != frameSize) { + if (MEM_readLE32(zs->inBuff+4) + ZSTD_skippableHeaderSize != frameSize) { return ERROR(prefix_unknown); } { /* Allocate an extra entry at the end so that we can do size * computations on the last element without special case */ - seekEntry_t* entries = - (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1)); - const BYTE* tableBase = base + ZSTD_skippableHeaderSize; + seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1)); + const BYTE* tableBase = zs->inBuff + ZSTD_skippableHeaderSize; + + U32 idx = 0; + U32 pos = 8; - U32 idx; - size_t pos; U64 cOffset = 0; U64 dOffset = 0; @@ -171,202 +308,153 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src, } /* compute cumulative positions */ - for (idx = 0, pos = 0; idx < numFrames; idx++) { + for (; idx < numFrames; idx++) { + if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) { + U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); + U32 const offset = SEEKABLE_BUFF_SIZE - pos; + memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */ + CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead)); + remaining -= toRead; + pos = 0; + } entries[idx].cOffset = cOffset; entries[idx].dOffset = dOffset; - cOffset += MEM_readLE32(tableBase + pos); + cOffset += MEM_readLE32(zs->inBuff + pos); pos += 4; - dOffset += MEM_readLE32(tableBase + pos); + dOffset += MEM_readLE32(zs->inBuff + pos); pos += 4; if (checksumFlag) { - entries[idx].checksum = MEM_readLE32(tableBase + pos); + entries[idx].checksum = MEM_readLE32(zs->inBuff + pos); pos += 4; } } entries[numFrames].cOffset = cOffset; entries[numFrames].dOffset = dOffset; - zds->seekTable.entries = entries; - zds->seekTable.tableLen = numFrames; - zds->seekTable.checksumFlag = checksumFlag; + zs->seekTable.entries = entries; + zs->seekTable.tableLen = numFrames; + zs->seekTable.checksumFlag = checksumFlag; return 0; } } } -size_t ZSTD_seekable_initDStream(ZSTD_seekable_DStream* zds, U64 rangeStart, U64 rangeEnd) +size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize) { - /* restrict range to the end of the file, of non-negative size */ - rangeEnd = MIN(rangeEnd, zds->seekTable.entries[zds->seekTable.tableLen].dOffset); - rangeStart = MIN(rangeStart, rangeEnd); + zs->buffWrapper = (buffWrapper_t){src, srcSize, 0}; + { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper, + &ZSTD_seekable_read_buff, + &ZSTD_seekable_seek_buff}; + return ZSTD_seekable_initAdvanced(zs, srcFile); } +} - zds->targetStart = rangeStart; - zds->targetEnd = rangeEnd; - zds->stage = zsds_seek; +size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src) +{ + ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE, + &ZSTD_seekable_seek_FILE}; + return ZSTD_seekable_initAdvanced(zs, srcFile); +} - /* force a seek first */ - zds->curFrame = (U32)-1; - zds->compressedOffset = (U64)-1; - zds->decompressedOffset = (U64)-1; +size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src) +{ + zs->src = src; - if (zds->seekTable.checksumFlag) { - XXH64_reset(&zds->xxhState, 0); - } + { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs); + if (ZSTD_isError(seekTableInit)) return seekTableInit; } - if (rangeStart == rangeEnd) zds->stage = zsds_done; + zs->decompressedOffset = (U64)-1; + zs->curFrame = (U32)-1; - { const size_t ret = ZSTD_initDStream(zds->dstream); - if (ZSTD_isError(ret)) return ret; } + { const size_t dstreamInit = ZSTD_initDStream(zs->dstream); + if (ZSTD_isError(dstreamInit)) return dstreamInit; } return 0; } -U64 ZSTD_seekable_getSeekOffset(ZSTD_seekable_DStream* zds) +size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, U64 offset) { - return zds->nextSeek; -} + U32 targetFrame = ZSTD_seekable_offsetToFrame(zs, offset); + do { + /* check if we can continue from a previous decompress job */ + if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) { + zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset; + zs->curFrame = targetFrame; -size_t ZSTD_seekable_updateOffset(ZSTD_seekable_DStream* zds, U64 offset) -{ - if (zds->stage != zsds_seek) { - return ERROR(stage_wrong); - } - if (offset != zds->nextSeek) { - return ERROR(needSeek); - } + CHECK_IO(zs->src.seek(zs->src.opaque, + zs->seekTable.entries[targetFrame].cOffset, + SEEK_SET)); + zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0}; + XXH64_reset(&zs->xxhState, 0); + ZSTD_resetDStream(zs->dstream); + } - zds->stage = zsds_decompress; - zds->compressedOffset = offset; - return 0; -} + while (zs->decompressedOffset < offset + len) { + size_t toRead; + ZSTD_outBuffer outTmp; + size_t prevOutPos; + if (zs->decompressedOffset < offset) { + /* dummy decompressions until we get to the target offset */ + outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0}; + } else { + outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset}; + } -size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - const seekTable_t* const jt = &zds->seekTable; - while (1) { - switch (zds->stage) { - case zsds_init: - return ERROR(init_missing); /* ZSTD_seekable_initDStream should be called first */ - case zsds_decompress: { - BYTE* const outBase = (BYTE*)output->dst + output->pos; - size_t const outLen = output->size - output->pos; - while (zds->decompressedOffset < zds->targetStart) { - U64 const toDecompress = - zds->targetStart - zds->decompressedOffset; - size_t const prevInputPos = input->pos; + prevOutPos = outTmp.pos; + toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in); + if (ZSTD_isError(toRead)) { + return toRead; + } - ZSTD_outBuffer outTmp = { - outBase, (size_t)MIN((U64)outLen, toDecompress), 0}; + if (zs->seekTable.checksumFlag) { + XXH64_update(&zs->xxhState, outTmp.dst, outTmp.pos); + } + zs->decompressedOffset += outTmp.pos - prevOutPos; - size_t const ret = - ZSTD_decompressStream(zds->dstream, &outTmp, input); + if (toRead == 0) { + /* frame complete */ - if (ZSTD_isError(ret)) return ret; - if (ret == 0) { - /* should not happen at this stage */ + /* verify checksum */ + if (zs->seekTable.checksumFlag && + (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) != + zs->seekTable.entries[targetFrame].checksum) { return ERROR(corruption_detected); } - zds->compressedOffset += input->pos - prevInputPos; - zds->decompressedOffset += outTmp.pos; - - if (jt->checksumFlag) { - XXH64_update(&zds->xxhState, outTmp.dst, outTmp.pos); + if (zs->decompressedOffset < offset + len) { + /* go back to the start and force a reset of the stream */ + targetFrame = ZSTD_seekable_offsetToFrame(zs, zs->decompressedOffset); } - - if (input->pos == input->size) { - /* need more input */ - return MIN( - ZSTD_DStreamInSize(), - (size_t)(jt->entries[zds->curFrame + 1] - .cOffset - - zds->compressedOffset)); - } - } - - /* do actual decompression */ - { - U64 const toDecompress = - MIN(zds->targetEnd, - jt->entries[zds->curFrame + 1].dOffset) - - zds->decompressedOffset; - size_t const prevInputPos = input->pos; - - ZSTD_outBuffer outTmp = { - outBase, (size_t)MIN((U64)outLen, toDecompress), 0}; - - size_t const ret = - ZSTD_decompressStream(zds->dstream, &outTmp, input); - - if (ZSTD_isError(ret)) return ret; - - zds->compressedOffset += input->pos - prevInputPos; - zds->decompressedOffset += outTmp.pos; - - output->pos += outTmp.pos; - - if (jt->checksumFlag) { - XXH64_update(&zds->xxhState, outTmp.dst, outTmp.pos); - if (ret == 0) { - /* verify the checksum */ - U32 const digest = XXH64_digest(&zds->xxhState) & 0xFFFFFFFFU; - if (digest != jt->entries[zds->curFrame].checksum) { - return ERROR(checksum_wrong); - } - - XXH64_reset(&zds->xxhState, 0); - } - } - - if (zds->decompressedOffset == zds->targetEnd) { - /* done */ - zds->stage = zsds_done; - return 0; - } - - if (ret == 0) { - /* frame is done */ - /* make sure this lines up with the expected frame border */ - if (zds->decompressedOffset != - jt->entries[zds->curFrame + 1].dOffset || - zds->compressedOffset != - jt->entries[zds->curFrame + 1].cOffset) - return ERROR(corruption_detected); - ZSTD_resetDStream(zds->dstream); - zds->stage = zsds_seek; - break; - } - - /* need more input */ - return MIN(ZSTD_DStreamInSize(), (size_t)( - jt->entries[zds->curFrame + 1].cOffset - - zds->compressedOffset)); - } - } - case zsds_seek: { - U32 targetFrame; - if (zds->decompressedOffset < zds->targetStart || - zds->decompressedOffset >= zds->targetEnd) { - /* haven't started yet */ - targetFrame = ZSTD_seekable_offsetToFrame(jt, zds->targetStart); - } else { - targetFrame = ZSTD_seekable_offsetToFrame(jt, zds->decompressedOffset); - } - - zds->curFrame = targetFrame; - - if (zds->compressedOffset == jt->entries[targetFrame].cOffset) { - zds->stage = zsds_decompress; break; } - zds->nextSeek = jt->entries[targetFrame].cOffset; - zds->decompressedOffset = jt->entries[targetFrame].dOffset; - /* signal to user that a seek is required */ - return ERROR(needSeek); + /* read in more data if we're done with this buffer */ + if (zs->in.pos == zs->in.size) { + toRead = MIN(toRead, SEEKABLE_BUFF_SIZE); + CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead)); + zs->in.size = toRead; + zs->in.pos = 0; + } } - case zsds_done: - return 0; + } while (zs->decompressedOffset != offset + len); + + return len; +} + +size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, U32 frameIndex) +{ + if (frameIndex >= zs->seekTable.tableLen) { + return ERROR(frameIndex_tooLarge); + } + + { + size_t const decompressedSize = + zs->seekTable.entries[frameIndex + 1].dOffset - + zs->seekTable.entries[frameIndex].dOffset; + if (dstSize < decompressedSize) { + return ERROR(dstSize_tooSmall); } + return ZSTD_seekable_decompress( + zs, dst, zs->seekTable.entries[frameIndex].dOffset, + decompressedSize); } } diff --git a/lib/common/error_private.c b/lib/common/error_private.c index f32c6abda..c94ea181c 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -39,7 +39,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(dictionary_wrong): return "Dictionary mismatch"; case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; - case PREFIX(needSeek): return "Wrong file position, a seek is required to continue"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/lib/common/zstd_errors.h b/lib/common/zstd_errors.h index d11c1ba21..de0fc8984 100644 --- a/lib/common/zstd_errors.h +++ b/lib/common/zstd_errors.h @@ -59,7 +59,7 @@ typedef enum { ZSTD_error_dictionary_wrong, ZSTD_error_dictionaryCreation_failed, ZSTD_error_frameIndex_tooLarge, - ZSTD_error_needSeek, + ZSTD_error_seekableIO, ZSTD_error_maxCode } ZSTD_ErrorCode;