From e47c4e5f8ee18bf7b7ba442ea1a6b6f0543b08e0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 5 Dec 2015 09:23:53 +0100 Subject: [PATCH] strengthened bufferless streaming decompression --- lib/zstd_compress.c | 22 +++++++++++----------- programs/Makefile | 2 +- programs/fuzzer.c | 41 ++++++++++++++++++++++++++++++++++++----- programs/zstdcli.c | 2 +- 4 files changed, 49 insertions(+), 18 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 6d2fd2cb5..c18d8f86c 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -769,12 +769,12 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, /* init */ ZSTD_resetSeqStore(seqStorePtr); - if (ip < base+4) + if (ip < lowest+4) { - hashTable[ZSTD_hashPtr(base+1, hBits, mls)] = 1; - hashTable[ZSTD_hashPtr(base+2, hBits, mls)] = 2; - hashTable[ZSTD_hashPtr(base+3, hBits, mls)] = 3; - ip = base+4; + hashTable[ZSTD_hashPtr(lowest+1, hBits, mls)] = zc->dictLimit+1; + hashTable[ZSTD_hashPtr(lowest+2, hBits, mls)] = zc->dictLimit+2; + hashTable[ZSTD_hashPtr(lowest+3, hBits, mls)] = zc->dictLimit+3; + ip = lowest+4; } /* Main Search Loop */ @@ -1535,6 +1535,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base + ctx->dictLimit; size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; const U32 maxSearches = 1 << ctx->params.searchLog; @@ -1547,7 +1548,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* init */ ZSTD_resetSeqStore(seqStorePtr); - if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; /* Match Loop */ while (ip < ilimit) @@ -1572,7 +1573,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, matchLength = ml2, start = ip, offset=offsetFound; } - if (matchLength < MINMATCH) + if (matchLength < MINMATCH) { ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ continue; @@ -1633,7 +1634,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* catch up */ if (offset) { - while ((start>anchor) && (start>ctx->base+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */ + while ((start>anchor) && (start>base+offset) && (start[-1] == start[-1-offset])) /* only search for offset within prefix */ { start--; matchLength++; } offset_2 = offset_1; offset_1 = offset; } @@ -1990,7 +1991,6 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, void* dst, size_t dstSize, const void* src, size_t srcSize) { - U32 adressOverflow = 0; const BYTE* const ip = (const BYTE*) src; /* Check if blocks follow each other */ @@ -2001,13 +2001,13 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, zc->lowLimit = zc->dictLimit; zc->dictLimit = (U32)(zc->nextSrc - zc->base); zc->dictBase = zc->base; - if ((size_t)zc->base < delta) adressOverflow = zc->lowLimit; zc->base -= delta; zc->nextToUpdate = zc->dictLimit; + if (zc->dictLimit - zc->lowLimit < 8) zc->lowLimit = zc->dictLimit; /* too small extDict */ } /* preemptive overflow correction */ - if (adressOverflow || (zc->lowLimit > (1<<30) )) + if ((zc->base > ip) || (zc->lowLimit > (1<<30) )) { U32 correction = zc->lowLimit-1; ZSTD_reduceIndex(zc, correction); diff --git a/programs/Makefile b/programs/Makefile index c2a1ca267..6e9d6b56f 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -281,7 +281,7 @@ valgrindTest: zstd datagen fuzzer fullbench zbufftest ./datagen -g64MB > tmp valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID) @rm tmp - valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i1000 -t1 + valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -T1mn -t1 valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 valgrind --leak-check=yes --error-exitcode=1 ./zbufftest -T1mn diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 0079eebb9..152d1c149 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -261,6 +261,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit BYTE* srcBuffer; BYTE* cBuffer; BYTE* dstBuffer; + BYTE* mirrorBuffer; size_t srcBufferSize = (size_t)1<= dstBufferSize) maxTestSize = dstBufferSize-1; totalTestSize = 0; cSize = ZSTD_compressBegin(ctx, cBuffer, cBufferSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); for (n=0; n maxTestSize) break; errorCode = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+sampleStart, sampleSize); CHECK (ZSTD_isError(errorCode), "multi-segments compression error : %s", ZSTD_getErrorName(errorCode)); cSize += errorCode; XXH64_update(&crc64, srcBuffer+sampleStart, sampleSize); + memcpy(mirrorBuffer + totalTestSize, srcBuffer+sampleStart, sampleSize); totalTestSize += sampleSize; - - if (totalTestSize > maxTestSize) break; } errorCode = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize); CHECK (ZSTD_isError(errorCode), "multi-segments epilogue error : %s", ZSTD_getErrorName(errorCode)); cSize += errorCode; crcOrig = XXH64_digest(&crc64); + + /* streaming decompression test */ + errorCode = ZSTD_resetDCtx(dctx); + CHECK (ZSTD_isError(errorCode), "cannot init DCtx : %s", ZSTD_getErrorName(errorCode)); + totalCSize = 0; + totalGenSize = 0; + while (totalCSize < cSize) + { + size_t inSize = ZSTD_nextSrcSizeToDecompress(dctx); + size_t genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize); + CHECK (ZSTD_isError(genSize), "streaming decompression error : %s", ZSTD_getErrorName(genSize)); + totalGenSize += genSize; + totalCSize += inSize; + } + CHECK (ZSTD_nextSrcSizeToDecompress(dctx) != 0, "frame not fully decoded"); + CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size") + CHECK (totalCSize != cSize, "compressed data should be fully read") + crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) + errorCode = findDiff(mirrorBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "streaming decompressed data corrupted : byte %u / %u (%02X!=%02X)", + (U32)errorCode, (U32)totalTestSize, dstBuffer[errorCode], mirrorBuffer[errorCode]); + } DISPLAY("\r%u fuzzer tests completed \n", testNb-1); _cleanup: ZSTD_freeCCtx(ctx); + ZSTD_freeDCtx(dctx); free(cNoiseBuffer[0]); free(cNoiseBuffer[1]); free(cNoiseBuffer[2]); @@ -483,6 +513,7 @@ _cleanup: free(cNoiseBuffer[4]); free(cBuffer); free(dstBuffer); + free(mirrorBuffer); return result; _output_error: diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 8c80b1573..6f23c09a8 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -119,7 +119,7 @@ static int usage(const char* programName) DISPLAY( " with no FILE, or when FILE is - , read standard input\n"); DISPLAY( "Arguments :\n"); DISPLAY( " -1 : Fast compression (default) \n"); - DISPLAY( " -9 : High compression \n"); + DISPLAY( " -19 : High compression \n"); DISPLAY( " -d : decompression (default for %s extension)\n", ZSTD_EXTENSION); //DISPLAY( " -z : force compression\n"); DISPLAY( " -f : overwrite output without prompting \n");