From 531a4273c0c8536c9fd0d914e4c4d7f9c1030a1c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 15 Jun 2016 19:02:11 +0200 Subject: [PATCH 01/20] stronger dictionary compression tests --- programs/playTests.sh | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/programs/playTests.sh b/programs/playTests.sh index 3be4c7775..6dafb630a 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -139,22 +139,32 @@ $ECHO "\n**** dictionary tests **** " ./datagen -g1M | $MD5SUM > tmp1 ./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | $MD5SUM > tmp2 diff -q tmp1 tmp2 -$ECHO "Create first dictionary" +$ECHO "- Create first dictionary" $ZSTD --train *.c -o tmpDict cp zstdcli.c tmp $ZSTD -f tmp -D tmpDict $ZSTD -d tmp.zst -D tmpDict -of result diff zstdcli.c result -$ECHO "Create second (different) dictionary" +$ECHO "- Create second (different) dictionary" $ZSTD --train *.c *.h -o tmpDictC $ZSTD -d tmp.zst -D tmpDictC -of result && die "wrong dictionary not detected!" -$ECHO "Create dictionary with short dictID" +$ECHO "- Create dictionary with short dictID" $ZSTD --train *.c --dictID 1 -o tmpDict1 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" -$ECHO "Compress without dictID" +$ECHO "- Compress without dictID" $ZSTD -f tmp -D tmpDict1 --no-dictID $ZSTD -d tmp.zst -D tmpDict -of result diff zstdcli.c result +$ECHO "- Compress multiple files with dictionary" +cat *.c *.h | $MD5SUM > tmp1 +rm -rf dirTestDict +mkdir dirTestDict +cp *.c dirTestDict +cp *.h dirTestDict +$ZSTD -f dirTestDict/* -D tmpDictC +$ZSTD -d dirTestDict/*.zst -D tmpDictC -c | $MD5SUM > tmp2 +diff -q tmp1 tmp2 +rm -rf dirTestDict rm tmp* From 3915545605c679b67b6707726b66e06c29f530af Mon Sep 17 00:00:00 2001 From: Tobias Ibounig Date: Wed, 15 Jun 2016 22:20:46 +0200 Subject: [PATCH 02/20] Fix Max Compression Level in zstd.1 --- programs/zstd.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/zstd.1 b/programs/zstd.1 index cc62eb30f..d7760f78f 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -43,7 +43,7 @@ It also features a very fast decoder, with speed > 500 MB/s per core. .SH OPTIONS .TP .B \-# - # compression level [1-21] (default:1) + # compression level [1-22] (default:1) .TP .BR \-d ", " --decompress decompression From 9b998e4d0846ac702153caaa36bfc2fb4654a038 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 15 Jun 2016 23:11:20 +0200 Subject: [PATCH 03/20] Fixed decompression of literals in dictionary mode --- lib/decompress/huf_decompress.c | 2 +- programs/playTests.sh | 46 ++++++++++++++++----------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 5a998ee2d..1580b3750 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -625,7 +625,7 @@ size_t HUF_decompress1X4_usingDTable( const HUF_DTable* DTable) { DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); + if (dtd.tableType != 1) return ERROR(GENERIC); return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); } diff --git a/programs/playTests.sh b/programs/playTests.sh index 6dafb630a..60d413c99 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -133,6 +133,29 @@ diff tmpSparse2M tmpSparseRegenerated rm tmpSparse* +$ECHO "\n**** multiple files tests **** " + +./datagen -s1 > tmp1 2> /dev/null +./datagen -s2 -g100K > tmp2 2> /dev/null +./datagen -s3 -g1M > tmp3 2> /dev/null +$ZSTD -f tmp* +$ECHO "compress tmp* : " +ls -ls tmp* +rm tmp1 tmp2 tmp3 +$ECHO "decompress tmp* : " +$ZSTD -df *.zst +ls -ls tmp* +$ECHO "compress tmp* into stdout > tmpall : " +$ZSTD -c tmp1 tmp2 tmp3 > tmpall +ls -ls tmp* +$ECHO "decompress tmpall* into stdout > tmpdec : " +cp tmpall tmpall2 +$ZSTD -dc tmpall* > tmpdec +ls -ls tmp* +$ECHO "compress multiple files including a missing one (notHere) : " +$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" + + $ECHO "\n**** dictionary tests **** " ./datagen > tmpDict @@ -168,29 +191,6 @@ rm -rf dirTestDict rm tmp* -$ECHO "\n**** multiple files tests **** " - -./datagen -s1 > tmp1 2> /dev/null -./datagen -s2 -g100K > tmp2 2> /dev/null -./datagen -s3 -g1M > tmp3 2> /dev/null -$ZSTD -f tmp* -$ECHO "compress tmp* : " -ls -ls tmp* -rm tmp1 tmp2 tmp3 -$ECHO "decompress tmp* : " -$ZSTD -df *.zst -ls -ls tmp* -$ECHO "compress tmp* into stdout > tmpall : " -$ZSTD -c tmp1 tmp2 tmp3 > tmpall -ls -ls tmp* -$ECHO "decompress tmpall* into stdout > tmpdec : " -cp tmpall tmpall2 -$ZSTD -dc tmpall* > tmpdec -ls -ls tmp* -$ECHO "compress multiple files including a missing one (notHere) : " -$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" - - $ECHO "\n**** integrity tests **** " $ECHO "test one file (tmp1.zst) " From 1a7b8fbc24e855d33474b783cd453568dc2dc686 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 15 Jun 2016 23:33:38 +0200 Subject: [PATCH 04/20] fixed dictionary tests --- programs/playTests.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/programs/playTests.sh b/programs/playTests.sh index 60d413c99..a7edbeb94 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -179,14 +179,14 @@ $ZSTD -f tmp -D tmpDict1 --no-dictID $ZSTD -d tmp.zst -D tmpDict -of result diff zstdcli.c result $ECHO "- Compress multiple files with dictionary" -cat *.c *.h | $MD5SUM > tmp1 rm -rf dirTestDict mkdir dirTestDict cp *.c dirTestDict cp *.h dirTestDict +cat dirTestDict/* | $MD5SUM > tmph1 # note : we expect same file order to generate same hash $ZSTD -f dirTestDict/* -D tmpDictC -$ZSTD -d dirTestDict/*.zst -D tmpDictC -c | $MD5SUM > tmp2 -diff -q tmp1 tmp2 +$ZSTD -d dirTestDict/*.zst -D tmpDictC -c | $MD5SUM > tmph2 +diff -q tmph1 tmph2 rm -rf dirTestDict rm tmp* @@ -194,6 +194,8 @@ rm tmp* $ECHO "\n**** integrity tests **** " $ECHO "test one file (tmp1.zst) " +./datagen > tmp1 +$ZSTD tmp1 $ZSTD -t tmp1.zst $ZSTD --test tmp1.zst $ECHO "test multiple files (*.zst) " From efd0b4993a44ed09e7b7cc3629a2def8cf6eaf8e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 00:53:56 +0200 Subject: [PATCH 05/20] fixed fuzzer error (inter-block repeated offsets) --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b8f7b32a5..220fadc65 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1847,7 +1847,7 @@ _storeSequence: /* Save reps for next block */ { int i; for (i=0; ibase); /* in case some zero are left */ ctx->savedRep[i] = rep[i]; } } From 52a0622beb1812dcc39da7c085e4793569fa07e6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 15 Jun 2016 13:53:34 +0200 Subject: [PATCH 06/20] RepsCodes are saved into Dict (uncomplete : need decompression to regenerate them) --- lib/common/zstd_internal.h | 2 +- lib/compress/fse_compress.c | 2 +- lib/compress/zstd_compress.c | 28 ++++---- lib/dictBuilder/zdict.c | 126 +++++++++++++++++++++++++---------- lib/dictBuilder/zdict.h | 2 +- 5 files changed, 111 insertions(+), 49 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 17ae1a77a..0909955a9 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -64,7 +64,7 @@ #endif #define ZSTD_OPT_NUM (1<<12) -#define ZSTD_DICT_MAGIC 0xEC30A437 +#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7 */ #define ZSTD_REP_NUM 3 #define ZSTD_REP_INIT ZSTD_REP_NUM diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 5c804dcaf..192d55026 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -256,7 +256,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, bitStream += count << bitCount; bitCount += nbBits; bitCount -= (count>=1; } if (bitCount>16) { diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 220fadc65..b1edaff3b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2342,45 +2342,49 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize) { /* note : magic number already checked */ - size_t const dictSizeStart = dictSize; + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; { size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + hufHeaderSize; - dictSize -= hufHeaderSize; + dictPtr += hufHeaderSize; } { short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); { size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } - dict = (const char*)dict + offcodeHeaderSize; - dictSize -= offcodeHeaderSize; + dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); { size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } - dict = (const char*)dict + matchlengthHeaderSize; - dictSize -= matchlengthHeaderSize; + dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); { size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } - dictSize -= litlengthHeaderSize; + dictPtr += litlengthHeaderSize; } + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + zc->rep[0] = MEM_readLE32(dictPtr+0); if (zc->rep[0] >= dictSize) return ERROR(dictionary_corrupted); + zc->rep[1] = MEM_readLE32(dictPtr+4); if (zc->rep[1] >= dictSize) return ERROR(dictionary_corrupted); + zc->rep[2] = MEM_readLE32(dictPtr+8); if (zc->rep[2] >= dictSize) return ERROR(dictionary_corrupted); + dictPtr += 12; + zc->flagStaticTables = 1; - return (dictSizeStart-dictSize); + return dictPtr - (const BYTE*)dict; } /** ZSTD_compress_insertDictionary() : diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 2e15cbbf8..ace0fc154 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -578,9 +578,10 @@ typedef struct void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ } EStats_ress_t; +#define MAXREPOFFSET 1024 static void ZDICT_countEStats(EStats_ress_t esr, - U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, + U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets, const void* src, size_t srcSize) { const seqStore_t* seqStorePtr; @@ -614,6 +615,17 @@ static void ZDICT_countEStats(EStats_ress_t esr, size_t u; for (u=0; uoffsetStart; + U32 offset1 = offsetPtr[0] - 3; + U32 offset2 = offsetPtr[1] - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } + } /* @@ -629,12 +641,29 @@ static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) { - size_t total; + size_t total=0; unsigned u; - for (u=0, total=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + + #define OFFCODE_MAX 18 /* only applicable to first block */ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, unsigned compressionLevel, @@ -649,6 +678,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, short matchLengthNCount[MaxML+1]; U32 litLengthCount[MaxLL+1]; short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET] = { 0 }; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; EStats_ress_t esr; ZSTD_parameters params; U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; @@ -656,12 +687,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, size_t eSize = 0; size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); size_t const averageSampleSize = totalSrcSize / nbFiles; + BYTE* dstPtr = (BYTE*)dstBuffer; /* init */ for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */ for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1; for (u=0; u<=MaxML; u++) matchLengthCount[u]=1; for (u=0; u<=MaxLL; u++) litLengthCount[u]=1; + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); esr.ref = ZSTD_createCCtx(); esr.zc = ZSTD_createCCtx(); esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); @@ -679,7 +713,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* collect stats on all files */ for (u=0; u Date: Wed, 15 Jun 2016 14:05:07 +0200 Subject: [PATCH 07/20] decoder restores repOffsets from dictionary --- lib/decompress/zstd_decompress.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 82d54fb5c..6d0903830 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1186,47 +1186,50 @@ static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSi dctx->previousDstEnd = (const char*)dict + dictSize; } -static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const dictSizeStart) +static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const dictSize) { - size_t dictSize = dictSizeStart; + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dict + dictSize; { size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize); if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); - dict = (const char*)dict + hSize; - dictSize -= hSize; + dictPtr += hSize; } { short offcodeNCount[MaxOff+1]; U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); { size_t const errorCode = FSE_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } - dict = (const char*)dict + offcodeHeaderSize; - dictSize -= offcodeHeaderSize; + dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); { size_t const errorCode = FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } - dict = (const char*)dict + matchlengthHeaderSize; - dictSize -= matchlengthHeaderSize; + dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); { size_t const errorCode = FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } - dictSize -= litlengthHeaderSize; + dictPtr += litlengthHeaderSize; } + dctx->rep[0] = MEM_readLE32(dictPtr+0); + dctx->rep[1] = MEM_readLE32(dictPtr+4); + dctx->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + dctx->litEntropy = dctx->fseEntropy = 1; - return dictSizeStart - dictSize; + return dictPtr - (const BYTE*)dict; } static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) From 736d419289591260f51a515b9e46384d8cc99322 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 15 Jun 2016 18:48:51 +0200 Subject: [PATCH 08/20] strengthened dict loading on decompresson side --- lib/compress/zstd_compress.c | 26 +++++++++++++++----------- lib/decompress/zstd_decompress.c | 9 +++++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b1edaff3b..1ae321838 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2335,17 +2335,21 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t /* Dictionary format : Magic == ZSTD_DICT_MAGIC (4 bytes) HUF_writeCTable(256) + FSE_writeNCount(ml) + FSE_writeNCount(off) + FSE_writeNCount(ll) + RepOffsets Dictionary content */ /*! ZSTD_loadDictEntropyStats() : - @return : size read from dictionary */ -static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize) + @return : size read from dictionary + note : magic number supposed already checked */ +static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { - /* note : magic number already checked */ const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; - { size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); + { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); dictPtr += hufHeaderSize; } @@ -2354,7 +2358,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); - { size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); + { size_t const errorCode = FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } dictPtr += offcodeHeaderSize; } @@ -2363,7 +2367,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); - { size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); + { size_t const errorCode = FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } dictPtr += matchlengthHeaderSize; } @@ -2372,18 +2376,18 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); - { size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); + { size_t const errorCode = FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } dictPtr += litlengthHeaderSize; } if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); - zc->rep[0] = MEM_readLE32(dictPtr+0); if (zc->rep[0] >= dictSize) return ERROR(dictionary_corrupted); - zc->rep[1] = MEM_readLE32(dictPtr+4); if (zc->rep[1] >= dictSize) return ERROR(dictionary_corrupted); - zc->rep[2] = MEM_readLE32(dictPtr+8); if (zc->rep[2] >= dictSize) return ERROR(dictionary_corrupted); + cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); + cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); + cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); dictPtr += 12; - zc->flagStaticTables = 1; + cctx->flagStaticTables = 1; return dictPtr - (const BYTE*)dict; } diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 6d0903830..9bc888961 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1186,7 +1186,7 @@ static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSi dctx->previousDstEnd = (const char*)dict + dictSize; } -static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const dictSize) +static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize) { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dict + dictSize; @@ -1223,9 +1223,10 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const d dictPtr += litlengthHeaderSize; } - dctx->rep[0] = MEM_readLE32(dictPtr+0); - dctx->rep[1] = MEM_readLE32(dictPtr+4); - dctx->rep[2] = MEM_readLE32(dictPtr+8); + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); + dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); + dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); dictPtr += 12; dctx->litEntropy = dctx->fseEntropy = 1; From ad39b7a7189c43a19af63aac11396ac38c1825ae Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 01:14:41 +0200 Subject: [PATCH 09/20] zdict stores standard rep-offset. It can use custom ones, but the proper formula and impact on statistics is not done yet. --- lib/dictBuilder/zdict.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index ace0fc154..0814581fe 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -727,6 +727,13 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, } huffLog = (U32)errorCode; + /* looking for most common first offsets */ + { U32 offset; + for (offset=1; offset Date: Thu, 16 Jun 2016 01:41:50 +0200 Subject: [PATCH 10/20] fixed ptr arithmetic warning --- lib/decompress/zstd_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 9bc888961..1763499d1 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1189,7 +1189,7 @@ static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSi static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize) { const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dict + dictSize; + const BYTE* const dictEnd = dictPtr + dictSize; { size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize); if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); From 803c05ec7e9890d4d4d4dc87e734fc9279825b12 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 11:32:57 +0200 Subject: [PATCH 11/20] fuzzer : tests with high id are run without need to change finalTestNb --- programs/fuzzer.c | 2 ++ tests/test-zstd-versions.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 480fd3072..42d3640da 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -851,6 +851,8 @@ int main(int argc, const char** argv) DISPLAY("Seed = %u\n", seed); if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba); + if (nbTests < testNb) nbTests = testNb; + if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */ if (!result) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index 437cd4c01..34b584087 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -130,7 +130,7 @@ if __name__ == '__main__': # Build all release zstd for tag in tags: os.chdir(base_dir) - dst_zstd = '{}/zstd.{}' .format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd. + dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd. if not os.path.isfile(dst_zstd) or tag == head: if tag != head: r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/ From 23ba41533a9d6592f5b71f8dd3b7655f61f35f9d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 13:20:46 +0200 Subject: [PATCH 12/20] Fixed zstd_opt encoding error with repeat-offsets --- lib/compress/zstd_opt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 703b568e2..8b15bf6ad 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -574,7 +574,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, best_mlen = minMatch; { U32 i; for (i=0; i Date: Thu, 16 Jun 2016 13:38:10 +0200 Subject: [PATCH 13/20] better seed randomization for systems with poor clock() resolutation --- programs/fuzzer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 42d3640da..d1dfe51e8 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -847,7 +847,12 @@ int main(int argc, const char** argv) /* Get Seed */ DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING); - if (!seedset) seed = (U32)(clock() % 10000); + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + DISPLAY("Seed = %u\n", seed); if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba); From ec2031e2a741e3ecb5b6ffd7a035663ea2ceeb17 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 14:08:48 +0200 Subject: [PATCH 14/20] update readme for 0.7 --- README.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 5173c9f9b..7b58e5e72 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - **Zstd**, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level compression ratio. + **Zstd**, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level and better compression ratios. It is provided as a BSD-license package, hosted on Github. @@ -7,7 +7,7 @@ It is provided as a BSD-license package, hosted on Github. |master | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=master)](https://travis-ci.org/Cyan4973/zstd) | |dev | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=dev)](https://travis-ci.org/Cyan4973/zstd) | -As a reference, several fast compression algorithms were tested and compared to [zlib] on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, on the [Silesia compression corpus]. +As a reference, several fast compression algorithms were tested and compared on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, with the [Silesia compression corpus]. [lzbench]: https://github.com/inikep/lzbench [Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia @@ -16,7 +16,7 @@ As a reference, several fast compression algorithms were tested and compared to |Name | Ratio | C.speed | D.speed | |-----------------|-------|--------:|--------:| | | | MB/s | MB/s | -|**zstd 0.6.0 -1**|**2.877**|**330**| **915** | +|**zstd 0.7.0 -1**|**2.877**|**325**| **930** | | [zlib] 1.2.8 -1 | 2.730 | 95 | 360 | | brotli -0 | 2.708 | 220 | 430 | | QuickLZ 1.5 | 2.237 | 510 | 605 | @@ -28,16 +28,16 @@ As a reference, several fast compression algorithms were tested and compared to [zlib]:http://www.zlib.net/ [LZ4]: http://www.lz4.org/ -Zstd can also offer stronger compression ratio at the cost of compression speed. -Speed vs Compression trade-off is configurable by small increment. Decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib]. +Zstd can also offer stronger compression ratios at the cost of compression speed. +Speed vs Compression trade-off is configurable by small increment. Decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib] or lzma. -The following test is run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, on the [Silesia compression corpus]. +The following tests were run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, on the [Silesia compression corpus]. Compression Speed vs Ratio | Decompression Speed ---------------------------|-------------------- ![Compression Speed vs Ratio](images/Cspeed4.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed4.png "Decompression Speed") -Several algorithms can produce higher compression ratio at slower speed, falling outside of the graph. +Several algorithms can produce higher compression ratio but at slower speed, falling outside of the graph. For a larger picture including very slow modes, [click on this link](images/DCspeed5.png) . @@ -74,8 +74,10 @@ Hence, deploying one dictionary per type of data will provide the greater benefi ### Status -Zstd is in development. The internal format evolves to reach better performance. "Final Format" is projected H1 2016, and will be tagged `v1.0`. Zstd offers legacy support, meaning any data compressed by any version >= 0.1 (therefore including current one) remain decodable in the future. -The library is also quite robust, able to withstand hazards situations, including invalid inputs. Library reliability has been tested using [Fuzz Testing](https://en.wikipedia.org/wiki/Fuzz_testing), with both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). Therefore, Zstandard is considered safe for production environments. +Zstd compression format has reached "Final status". It means it is planned to become the official stable zstd format and be tagged `v1.0`. The reason it's not yet tagged `v1.0` is that it currently performs its "validation period", making sure the format holds all its promises and nothing was missed. +Zstd library also offers legacy decoder support. Any data compressed by any version >= `v0.1` (hence including current one) remains decodable now and in the future. +The library has been validated using strong [fuzzer tests](https://en.wikipedia.org/wiki/Fuzz_testing), including both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). It's able to withstand hazard situations, including invalid inputs. +As a consequence, Zstandard is considered safe for, and is currently used in, production environments. ### Branch Policy From 4948f270b36e81f753e4f8d9c8dd4388103e8b76 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 15:38:51 +0200 Subject: [PATCH 15/20] make room for reserved "information bit" in frame header --- NEWS | 5 +++-- lib/decompress/zstd_decompress.c | 8 +++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index c7aeae98d..dc2d66bc1 100644 --- a/NEWS +++ b/NEWS @@ -5,8 +5,9 @@ New : Visual build scripts, by Christophe Chevalier New : Support for Sparse File-systems (do not use space for zero-filled sectors) New : Frame checksum support New : Support pass-through mode (when using `-df`) -New : API : dictionary files from custom content, by Giuseppe Ottaviano -New : API support for custom malloc/free functions +API : more efficient Dictionary API : `ZSTD_compress_usingCDict()`, `ZSTD_decompress_usingDDict()` +API : create dictionary files from custom content, by Giuseppe Ottaviano +API : support for custom malloc/free functions New : controllable Dictionary ID New : Support for skippable frames diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 1763499d1..b22021ed7 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -223,8 +223,10 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) // new 1 byte - FrameHeaderDescription : bit 0-1 : dictID (0, 1, 2 or 4 bytes) - bit 2-4 : reserved (must be zero) - bit 5 : SkippedWindowLog (if 1, WindowLog byte is not present) + bit 2 : checksumFlag + bit 3 : reserved (must be zero) + bit 4 : reserved (unused, can be any value) + bit 5 : Single Segment (if 1, WindowLog byte is not present) bit 6-7 : FrameContentFieldSize (0, 2, 4, or 8) if (SkippedWindowLog && !FrameContentFieldsize) FrameContentFieldsize=1; @@ -365,7 +367,7 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t U32 windowSize = 0; U32 dictID = 0; U64 frameContentSize = 0; - if ((fhdByte & 0x18) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */ + if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ if (!directMode) { BYTE const wlByte = ip[pos++]; U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; From 61cc4f207eda14582acccdf5d97ed879d7b30e25 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 15:44:30 +0200 Subject: [PATCH 16/20] Added build/README from @KrzysFR (#201) --- build/README.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 build/README.md diff --git a/build/README.md b/build/README.md new file mode 100644 index 000000000..e1658493d --- /dev/null +++ b/build/README.md @@ -0,0 +1,51 @@ +Here are a few command lines for reference : + +### Build with Visual Studio 2013 for msvcr120.dll + +Running the following command will build both the `Release Win32` and `Release x64` versions: +```batch +build\build.VS2013.cmd +``` +The result of each build will be in the corresponding `build\bin\Release\{ARCH}\` folder. + +If you want to only need one architecture: +- Win32: `build\build.generic.cmd VS2013 Win32 Release v120` +- x64: `build\build.generic.cmd VS2013 x64 Release v120` + +If you want a Debug build: +- Win32: `build\build.generic.cmd VS2013 Win32 Debug v120` +- x64: `build\build.generic.cmd VS2013 x64 Debug v120` + +### Build with Visual Studio 2015 for msvcr140.dll + +Running the following command will build both the `Release Win32` and `Release x64` versions: +```batch +build\build.VS2015.cmd +``` +The result of each build will be in the corresponding `build\bin\Release\{ARCH}\` folder. + +If you want to only need one architecture: +- Win32: `build\build.generic.cmd VS2015 Win32 Release v140` +- x64: `build\build.generic.cmd VS2015 x64 Release v140` + +If you want a Debug build: +- Win32: `build\build.generic.cmd VS2015 Win32 Debug v140` +- x64: `build\build.generic.cmd VS2015 x64 Debug v140` + +### Build with Visual Studio 2015 for msvcr120.dll + +You need to invoke `build\build.generic.cmd` with the proper arguments: + +**For Win32** +```batch +build\build.generic.cmd VS2015 Win32 Release v120 +``` +The result of the build will be in the `build\bin\Release\Win32\` folder. + +**For x64** +```batch +build\build.generic.cmd VS2015 x64 Release v120 +``` +The result of the build will be in the `build\bin\Release\x64\` folder. + +If you want Debug builds, replace `Release` with `Debug`. From 201d82f5d06278deb43b3fcdefe518866dc5a7e0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 15:53:02 +0200 Subject: [PATCH 17/20] `.cmd` files use windows-style eol --- .gitattributes | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitattributes b/.gitattributes index da0f7a530..387080198 100644 --- a/.gitattributes +++ b/.gitattributes @@ -18,3 +18,4 @@ # Windows *.bat text eol=crlf +*.cmd text eol=crlf From 12d881e810ead2577b795942d9c37f031db27fa4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 16:08:40 +0200 Subject: [PATCH 18/20] Move `build` into `projects/build` --- {build => projects/build}/README.md | 0 {build => projects/build}/build.VS2010.cmd | 0 {build => projects/build}/build.VS2012.cmd | 0 {build => projects/build}/build.VS2013.cmd | 0 {build => projects/build}/build.VS2015.cmd | 0 {build => projects/build}/build.generic.cmd | 3 +-- 6 files changed, 1 insertion(+), 2 deletions(-) rename {build => projects/build}/README.md (100%) rename {build => projects/build}/build.VS2010.cmd (100%) rename {build => projects/build}/build.VS2012.cmd (100%) rename {build => projects/build}/build.VS2013.cmd (100%) rename {build => projects/build}/build.VS2015.cmd (100%) rename {build => projects/build}/build.generic.cmd (97%) diff --git a/build/README.md b/projects/build/README.md similarity index 100% rename from build/README.md rename to projects/build/README.md diff --git a/build/build.VS2010.cmd b/projects/build/build.VS2010.cmd similarity index 100% rename from build/build.VS2010.cmd rename to projects/build/build.VS2010.cmd diff --git a/build/build.VS2012.cmd b/projects/build/build.VS2012.cmd similarity index 100% rename from build/build.VS2012.cmd rename to projects/build/build.VS2012.cmd diff --git a/build/build.VS2013.cmd b/projects/build/build.VS2013.cmd similarity index 100% rename from build/build.VS2013.cmd rename to projects/build/build.VS2013.cmd diff --git a/build/build.VS2015.cmd b/projects/build/build.VS2015.cmd similarity index 100% rename from build/build.VS2015.cmd rename to projects/build/build.VS2015.cmd diff --git a/build/build.generic.cmd b/projects/build/build.generic.cmd similarity index 97% rename from build/build.generic.cmd rename to projects/build/build.generic.cmd index ed46c922f..362952340 100644 --- a/build/build.generic.cmd +++ b/projects/build/build.generic.cmd @@ -33,7 +33,7 @@ IF %msbuild_version% == VS2013 SET msbuild="%programfiles(x86)%\MSBuild\12.0\Bin IF %msbuild_version% == VS2015 SET msbuild="%programfiles(x86)%\MSBuild\14.0\Bin\MSBuild.exe" rem TODO: Visual Studio "15" (vNext) will use MSBuild 15.0 ? -SET project="%~p0\..\projects\VS2010\zstd.sln" +SET project="%~p0\..\VS2010\zstd.sln" SET msbuild_params=/verbosity:minimal /nologo /t:Clean,Build /p:Platform=%msbuild_platform% /p:Configuration=%msbuild_configuration% IF NOT "%msbuild_toolset%" == "" SET msbuild_params=%msbuild_params% /p:PlatformToolset=%msbuild_toolset% @@ -50,4 +50,3 @@ IF ERRORLEVEL 1 EXIT /B 1 echo # Success echo # OutDir: %output% echo # - From 510cff3570862eba04a09dd72c23298346902177 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 16 Jun 2016 16:39:55 +0200 Subject: [PATCH 19/20] minor comment change --- lib/common/fse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 6be3e5aa0..e711d0135 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -132,8 +132,8 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, /*! FSE_optimalTableLog(): dynamically downsize 'tableLog' when conditions are met. It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - @return : recommended tableLog (necessarily <= initial 'tableLog') */ -unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue); + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) From 19cab46f2f2ae3e512c00df98043083101410810 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 17 Jun 2016 12:54:52 +0200 Subject: [PATCH 20/20] Joined `seqStore` initialization at dispatch point --- .gitignore | 1 + Makefile | 1 + lib/compress/zstd_compress.c | 11 ++--------- lib/compress/zstd_opt.h | 2 -- programs/.gitignore | 1 + 5 files changed, 5 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index a06c2afa2..181652401 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ *.dylib # Executables +zstd *.exe *.out *.app diff --git a/Makefile b/Makefile index 18db04245..77a67a231 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ all: zstdprogram: $(MAKE) -C $(PRGDIR) + mv $(PRGDIR)/zstd . zlibwrapper: $(MAKE) -C $(ZSTDDIR) all diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1ae321838..b8d1d2c0a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1128,7 +1128,6 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1]; /* init */ - ZSTD_resetSeqStore(seqStorePtr); ip += (ip==lowest); { U32 const maxRep = (U32)(ip-lowest); if (offset_1 > maxRep) offset_1 = 0; @@ -1239,7 +1238,6 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; /* init */ - ZSTD_resetSeqStore(seqStorePtr); /* skip first position to avoid read overflow during repcode match check */ hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); ip++; @@ -1743,7 +1741,6 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, /* init */ ip += (ip==base); ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); { U32 i; U32 const maxRep = (U32)(ip-base); for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); ip += (ip == prefixStart); /* Match Loop */ @@ -2097,11 +2093,7 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { static const ZSTD_blockCompressor blockCompressor[2][6] = { -#if 1 { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, -#else - { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }, -#endif { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } }; @@ -2111,8 +2103,9 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ + ZSTD_resetSeqStore(&(zc->seqStore)); blockCompressor(zc, src, srcSize); return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); } diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 8b15bf6ad..97b1623ba 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -465,7 +465,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); ip += (ip==prefixStart); { U32 i; for (i=0; irep[i]; } @@ -757,7 +756,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, { U32 i; for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); ip += (ip==prefixStart); diff --git a/programs/.gitignore b/programs/.gitignore index 5f50de0dd..cbe39dcdf 100644 --- a/programs/.gitignore +++ b/programs/.gitignore @@ -50,3 +50,4 @@ afl # Misc files *.bat fileTests.sh +dirTest*