diff --git a/.travis.yml b/.travis.yml index b7faed3eb..f583a803d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,10 +13,13 @@ env: - ZSTD_TRAVIS_CI_ENV=clangtest - ZSTD_TRAVIS_CI_ENV=gpptest - ZSTD_TRAVIS_CI_ENV=gnu90test + - ZSTD_TRAVIS_CI_ENV=c99test + - ZSTD_TRAVIS_CI_ENV=gnu99test - ZSTD_TRAVIS_CI_ENV=armtest-w-install - ZSTD_TRAVIS_CI_ENV=test - ZSTD_TRAVIS_CI_ENV="-C programs test32" - ZSTD_TRAVIS_CI_ENV="-C programs test-zstd_nolegacy" + - ZSTD_TRAVIS_CI_ENV="-C versionsTest" - ZSTD_TRAVIS_CI_ENV=usan - ZSTD_TRAVIS_CI_ENV=asan - ZSTD_TRAVIS_CI_ENV=asan32 diff --git a/Makefile b/Makefile index 7d1ae713f..95ecc007c 100644 --- a/Makefile +++ b/Makefile @@ -30,9 +30,6 @@ # - zstd homepage : http://www.zstd.net/ # ################################################################ -# force a version number : uncomment below export (otherwise, default to the one declared into zstd.h) -#export VERSION := 0.6.1 - PRGDIR = programs ZSTDDIR = lib @@ -88,11 +85,20 @@ clangtest: clean gpptest: clean $(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" -gnu90test: clean - $(MAKE) all CFLAGS="-std=gnu90 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef -Wdeclaration-after-statement -Werror" - c90test: clean - $(MAKE) all CFLAGS="-std=c90 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef -Werror" # will fail, due to // and long long + CFLAGS="-std=c90" $(MAKE) all # will fail, due to // and long long + +gnu90test: clean + CFLAGS="-std=gnu90" $(MAKE) all + +c99test: clean + CFLAGS="-std=c99" $(MAKE) all + +gnu99test: clean + CFLAGS="-std=gnu99" $(MAKE) all + +c11test: clean + CFLAGS="-std=c11" $(MAKE) all bmix64test: clean CFLAGS="-O3 -mbmi -Werror" $(MAKE) -C $(PRGDIR) test @@ -138,6 +144,9 @@ usan: clean asan: clean $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address" +msan: clean + $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory" # datagen.c fails this test, for no obvious reason + asan32: clean $(MAKE) -C $(PRGDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address" diff --git a/NEWS b/NEWS index 736a36ea3..c100700aa 100644 --- a/NEWS +++ b/NEWS @@ -1,10 +1,16 @@ +v0.6.2 +New : Support for Sparse File-systems (do not use space for zero-filled sectors) +New : Support pass-through mode, when using `-df` + v0.6.1 New : zlib wrapper API, thanks to Przemyslaw Skibinski +New : Ability to compile compressor / decompressor separately +Changed : new lib directory structure Fixed : Legacy codec v0.5 compatible with dictionary decompression Fixed : Decoder corruption error (#173) Fixed : null-string roundtrip (#176) -New : midipix support New : benchmark mode can select directory as input +Experimental : midipix support, VMS support v0.6.0 Stronger high compression modes, thanks to Przemyslaw Skibinski diff --git a/lib/Makefile b/lib/Makefile index 5ea014dd1..904366959 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -45,7 +45,7 @@ DESTDIR?= PREFIX ?= /usr/local CPPFLAGS= -I./common CFLAGS ?= -O3 -CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes -Wstrict-aliasing=1 +CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) LIBDIR ?= $(PREFIX)/lib diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 48021dfaf..694df4e3b 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -162,14 +162,12 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val) # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; - unsigned r; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; - r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; - return r; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; # endif } @@ -375,7 +373,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) { - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */ return BIT_DStream_overflow; if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { diff --git a/lib/common/fse_static.h b/lib/common/fse_static.h index 0661dbd3e..797258f75 100644 --- a/lib/common/fse_static.h +++ b/lib/common/fse_static.h @@ -64,19 +64,22 @@ extern "C" { * FSE advanced API *******************************************/ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); -/* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */ +/**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); -/* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */ +/**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); -/* build a fake FSE_CTable, designed to compress always the same symbolValue */ +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); -/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ +/**< build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); -/* build a fake FSE_DTable, designed to always generate the same symbolValue */ +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ /* ***************************************** @@ -103,7 +106,7 @@ static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsig static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); -/*! +/**< These functions are inner components of FSE_compress_usingCTable(). They allow the creation of custom streams, mixing multiple tables and bit sources. @@ -163,7 +166,7 @@ static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bi static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); -/*! +/**< Let's now decompose FSE_decompress_usingDTable() into its unitary components. You will decode FSE-encoded symbols from the bitStream, and also any other bitFields you put in, **in reverse order**. diff --git a/lib/common/huf.h b/lib/common/huf.h index d07080b15..a06fd3e18 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -77,6 +77,7 @@ HUF_decompress() : /* **************************************** * Tool functions ******************************************/ +#define HUF_BLOCKSIZE_MAX (128 * 1024) size_t HUF_compressBound(size_t size); /**< maximum compressed size */ /* Error Management */ diff --git a/lib/common/huf_static.h b/lib/common/huf_static.h index e68ec33dd..ea3eb62b8 100644 --- a/lib/common/huf_static.h +++ b/lib/common/huf_static.h @@ -85,15 +85,17 @@ size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cS /*! HUF_compress() does the following: 1. count symbol occurrence from source[] into table count[] using FSE_count() -2. build Huffman table from count using HUF_buildCTable() -3. save Huffman table to memory buffer using HUF_writeCTable() -4. encode the data stream using HUF_compress4X_usingCTable() +2. (optional) refine tableLog using HUF_optimalTableLog() +3. build Huffman table from count using HUF_buildCTable() +4. save Huffman table to memory buffer using HUF_writeCTable() +5. encode the data stream using HUF_compress4X_usingCTable() The following API allows targeting specific sub-functions for advanced tasks. For example, it's possible to compress several blocks using the same 'CTable', or to save and regenerate 'CTable' using external methods. */ /* FSE_count() : find it within "fse.h" */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); @@ -137,16 +139,19 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* sr /* ************************************************************** * Constants ****************************************************************/ -#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ -#define HUF_MAX_SYMBOL_VALUE 255 -#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) -# error "HUF_MAX_TABLELOG is too large !" +#define HUF_TABLELOG_ABSOLUTEMAX 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT HUF_TABLELOG_MAX /* tableLog by default, when not specified */ +#define HUF_SYMBOLVALUE_MAX 255 +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" #endif +/* ************************************************************** +* Needed by zstd in both compression and decompression +****************************************************************/ /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). `huffWeight` is destination buffer. @@ -188,17 +193,17 @@ MEM_STATIC size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, } /* collect weight stats */ - memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); + memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32)); weightTotal = 0; { U32 n; for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } /* get last non-null symbol weight (implied, total must be 2^n) */ { U32 const tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); *tableLogPtr = tableLog; /* determine last weight */ { U32 const total = 1 << tableLog; diff --git a/lib/common/mem.h b/lib/common/mem.h index 7f5a2ef7c..9156bfda9 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -52,6 +52,9 @@ extern "C" { /*-**************************************** * Compiler specifics ******************************************/ +#if defined(_MSC_VER) +# include /* _byteswap_ */ +#endif #if defined(__GNUC__) # define MEM_STATIC static __attribute__((unused)) #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) @@ -66,7 +69,7 @@ extern "C" { /*-************************************************************** * Basic Types *****************************************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; diff --git a/lib/common/zstd.h b/lib/common/zstd.h index 7b5708c42..dbcf62ba4 100644 --- a/lib/common/zstd.h +++ b/lib/common/zstd.h @@ -61,7 +61,7 @@ extern "C" { ***************************************/ #define ZSTD_VERSION_MAJOR 0 #define ZSTD_VERSION_MINOR 6 -#define ZSTD_VERSION_RELEASE 1 +#define ZSTD_VERSION_RELEASE 2 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE #define ZSTD_QUOTE(str) #str diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index dad0be794..f1068433a 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -111,8 +111,6 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned U32 highThreshold = tableSize-1; /* CTable header */ - - tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; @@ -147,10 +145,10 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned } /* Build table */ - { U32 u; for (u=0; u /* printf (debug) */ #include "huf_static.h" #include "bitstream.h" -#include "fse.h" /* header compression */ +#include "fse_static.h" /* header compression */ /* ************************************************************** @@ -73,6 +73,15 @@ #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + /* ******************************************************* * HUF : Huffman block compression *********************************************************/ @@ -94,14 +103,14 @@ typedef struct nodeElt_s { size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) { - BYTE bitsToWeight[HUF_MAX_TABLELOG + 1]; - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; U32 n; BYTE* op = (BYTE*)dst; size_t size; /* check conditions */ - if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1) + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX + 1) return ERROR(GENERIC); /* convert to weight */ @@ -116,7 +125,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, if (size >= 128) return ERROR(GENERIC); /* should never happen, since maxSymbolValue <= 255 */ if ((size <= 1) || (size >= maxSymbolValue/2)) { if (size==1) { /* RLE */ - /* only possible case : serie of 1 (because there are at least 2) */ + /* only possible case : series of 1 (because there are at least 2) */ /* can only be 2^n or (2^n-1), otherwise not an huffman tree */ BYTE code; switch(maxSymbolValue) @@ -159,19 +168,19 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize) { - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; size_t readSize; U32 nbSymbols = 0; //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ /* get symbol weights */ - readSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize); + readSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(readSize)) return readSize; /* check result */ - if (tableLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); /* Prepare base value per rank */ @@ -189,12 +198,12 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si }} /* fill val */ - { U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; - U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; { U32 n; for (n=0; n0; n--) { + U32 n; for (n=HUF_TABLELOG_MAX; n>0; n--) { valPerRank[n] = min; /* get starting value within each rank */ min += nbPerRank[n]; min >>= 1; @@ -229,7 +238,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) /* repay normalized cost */ { U32 const noSymbol = 0xF0F0F0F0; - U32 rankLast[HUF_MAX_TABLELOG+1]; + U32 rankLast[HUF_TABLELOG_MAX+1]; int pos; /* Get pos of last (smallest) symbol per rank */ @@ -253,7 +262,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) if (highTotal <= lowTotal) break; } } /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ - while ((nBitsToDecrease<=HUF_MAX_TABLELOG) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ nBitsToDecrease ++; totalCost -= 1 << (nBitsToDecrease-1); if (rankLast[nBitsToDecrease-1] == noSymbol) @@ -312,10 +321,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) } -#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1) +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) { - nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1]; + nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1]; nodeElt* huffNode = huffNode0 + 1; U32 n, nonNullRank; int lowS, lowN; @@ -323,8 +332,8 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3 U32 nodeRoot; /* safety checks */ - if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG; - if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return ERROR(GENERIC); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); memset(huffNode0, 0, sizeof(huffNode0)); /* sort, decreasing order */ @@ -360,9 +369,9 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); /* fill result into tree (val, nbBits) */ - { U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; - U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; - if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC); /* check fit into table */ + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ for (n=0; n<=nonNullRank; n++) nbPerRank[huffNode[n].nbBits]++; /* determine stating value per rank */ @@ -391,10 +400,10 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } #define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) #define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) HUF_FLUSHBITS(stream) + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) #define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) HUF_FLUSHBITS(stream) + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { @@ -441,44 +450,47 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) { - size_t segmentSize = (srcSize+3)/4; /* first 3 segments */ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - size_t errorCode; if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ if (srcSize < 12) return 0; /* no saving possible : too small input */ op += 6; /* jumpTable */ - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - MEM_writeLE16(ostart, (U16)errorCode); + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } ip += segmentSize; - op += errorCode; - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - MEM_writeLE16(ostart+2, (U16)errorCode); + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } ip += segmentSize; - op += errorCode; - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - MEM_writeLE16(ostart+4, (U16)errorCode); + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } ip += segmentSize; - op += errorCode; - errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + op += cSize; + } - op += errorCode; return op-ostart; } @@ -493,43 +505,46 @@ static size_t HUF_compress_internal ( BYTE* const oend = ostart + dstSize; BYTE* op = ostart; - U32 count[HUF_MAX_SYMBOL_VALUE+1]; - HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1]; - size_t errorCode; + U32 count[HUF_SYMBOLVALUE_MAX+1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1]; /* checks & inits */ - if (srcSize < 1) return 0; /* Uncompressed - note : 1 means rle, so first byte must be correct */ - if (dstSize < 1) return 0; /* not compressible within dst budget */ - if (srcSize > 128 * 1024) return ERROR(srcSize_wrong); /* current block size limit */ - if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE; - if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG; + if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ + if (!dstSize) return 0; /* cannot fit within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; /* Scan input and build symbol stats */ - errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } - if (errorCode <= (srcSize >> 7)+1) return 0; /* Heuristic : not compressible enough */ + { size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); + if (HUF_isError(largest)) return largest; + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + } /* Build Huffman Tree */ - errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); - if (HUF_isError(errorCode)) return errorCode; - huffLog = (U32)errorCode; + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); + if (HUF_isError(maxBits)) return maxBits; + huffLog = (U32)maxBits; + } /* Write table description header */ - errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode + 12 >= srcSize) return 0; /* not useful to try compression */ - op += errorCode; + { size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); + if (HUF_isError(hSize)) return hSize; + if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ + op += hSize; + } /* Compress */ - if (singleStream) - errorCode = HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable); /* single segment */ - else - errorCode = HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); - if (HUF_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - op += errorCode; + { size_t const cSize = (singleStream) ? + HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : /* single segment */ + HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; /* uncompressible */ + op += cSize; + } /* check compressibility */ if ((size_t)(op-ostart) >= srcSize-1) @@ -556,5 +571,5 @@ size_t HUF_compress2 (void* dst, size_t dstSize, size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG); + return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); } diff --git a/lib/compress/zbuff_compress.c b/lib/compress/zbuff_compress.c index cabf447a4..e078d7eb2 100644 --- a/lib/compress/zbuff_compress.c +++ b/lib/compress/zbuff_compress.c @@ -81,7 +81,7 @@ static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE; typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush } ZBUFF_cStage; -/* *** Ressources *** */ +/* *** Resources *** */ struct ZBUFF_CCtx_s { ZSTD_CCtx* zc; char* inBuff; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 01d276fe3..91f6bb791 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -973,7 +973,7 @@ static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLim const BYTE* const pStart = pIn; while ((pIn iEnd) vEnd = iEnd; - matchLength = ZSTD_count(ip, match, vEnd); + const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); + size_t matchLength = ZSTD_count(ip, match, vEnd); if (match + matchLength == mEnd) matchLength += ZSTD_count(ip+matchLength, iStart, iEnd); return matchLength; @@ -2464,7 +2462,7 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS /*-===== Pre-defined compression levels =====-*/ -#define ZSTD_DEFAULT_CLEVEL 5 +#define ZSTD_DEFAULT_CLEVEL 1 #define ZSTD_MAX_CLEVEL 22 unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } @@ -2575,17 +2573,16 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, }; -/*! ZSTD_getParams() : -* @return ZSTD_parameters structure for a selected compression level and srcSize. -* `srcSize` value is optional, select 0 if not known */ +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`. +* Size values are optional, provide 0 if not known or unused */ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize) { ZSTD_compressionParameters cp; size_t const addedSize = srcSize ? 0 : 500; U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ - if (compressionLevel < 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; - if (compressionLevel==0) compressionLevel = 1; + if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */ if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; cp = ZSTD_defaultCParameters[tableID][compressionLevel]; if (MEM_32bits()) { /* auto-correction, for 32-bits mode */ diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index e42f76300..01e9c07b2 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -92,8 +92,8 @@ typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) { - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ U32 tableLog = 0; size_t iSize; U32 nbSymbols = 0; @@ -105,7 +105,7 @@ size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; /* check result */ @@ -148,7 +148,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ @@ -203,7 +203,7 @@ size_t HUF_decompress1X2_usingDTable( size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); @@ -312,7 +312,7 @@ size_t HUF_decompress4X2_usingDTable( size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); @@ -335,7 +335,7 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co U32 nbBitsBaseline, U16 baseSeq) { HUF_DEltX4 DElt; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* get pre-calculated rankVal */ memcpy(rankVal, rankValOrigin, sizeof(rankVal)); @@ -369,14 +369,14 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co }} } -typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; +typedef U32 rankVal_t[HUF_TABLELOG_ABSOLUTEMAX][HUF_TABLELOG_ABSOLUTEMAX + 1]; static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, const sortedSymbol_t* sortedList, const U32 sortedListSize, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) { - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ const U32 minBits = nbBitsBaseline - maxWeight; U32 s; @@ -415,10 +415,10 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) { - BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; - sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; - U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 }; + U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 }; U32* const rankStart = rankStart0+1; rankVal_t rankVal; U32 tableLog, maxW, sizeOfSort, nbSymbols; @@ -428,10 +428,10 @@ size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1; HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ - if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (memLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; /* check result */ @@ -517,7 +517,7 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ @@ -580,7 +580,7 @@ size_t HUF_decompress1X4_usingDTable( size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); @@ -688,7 +688,7 @@ size_t HUF_decompress4X4_usingDTable( size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); @@ -716,7 +716,7 @@ static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSeque const int scaleLog = nbBitsBaseline - sizeLog; /* note : targetLog >= (nbBitsBaseline-1), hence scaleLog <= 1 */ const int minBits = nbBitsBaseline - maxWeight; const U32 level = DDesc.nbBytes; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; U32 symbolStartPos, s; /* local rankVal, will be modified */ @@ -766,20 +766,20 @@ static void HUF_fillDTableX6LevelN(HUF_DDescX6* DDescription, HUF_DSeqX6* DSeque /* note : same preparation as X4 */ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) { - BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; - sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; - U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 }; + U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 }; U32* const rankStart = rankStart0+1; U32 tableLog, maxW, sizeOfSort, nbSymbols; rankVal_t rankVal; const U32 memLog = DTable[0]; size_t iSize; - if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (memLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; /* check result */ @@ -838,7 +838,7 @@ size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize) DDesc.nbBits = 0; DDesc.nbBytes = 0; HUF_fillDTableX6LevelN(DDescription, DSequence, memLog, - (const U32 (*)[HUF_ABSOLUTEMAX_TABLELOG + 1])rankVal, 0, 1, maxW, + (const U32 (*)[HUF_TABLELOG_ABSOLUTEMAX + 1])rankVal, 0, 1, maxW, sortedSymbol, sizeOfSort, rankStart0, tableLog+1, DSeq, DDesc); } @@ -879,7 +879,7 @@ static U32 HUF_decodeLastSymbolsX6(void* op, U32 const maxL, BIT_DStream_t* DStr ptr += HUF_decodeSymbolX6(ptr, DStreamPtr, dd, ds, dtLog) #define HUF_DECODE_SYMBOLX6_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ HUF_DECODE_SYMBOLX6_0(ptr, DStreamPtr) #define HUF_DECODE_SYMBOLX6_2(ptr, DStreamPtr) \ @@ -939,7 +939,7 @@ size_t HUF_decompress1X6_usingDTable( size_t HUF_decompress1X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize); @@ -1068,7 +1068,7 @@ size_t HUF_decompress4X6_usingDTable( size_t HUF_decompress4X6 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_MAX_TABLELOG); + HUF_CREATE_STATIC_DTABLEX6(DTable, HUF_TABLELOG_MAX); const BYTE* ip = (const BYTE*) cSrc; size_t const hSize = HUF_readDTableX6 (DTable, cSrc, cSrcSize); diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 42e925c42..9c57d18fc 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -436,21 +436,6 @@ extern "C" { ***************************************/ #define ZSTDv05_WINDOWLOG_ABSOLUTEMIN 11 -/* from faster to stronger */ -typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy; - -typedef struct -{ - U64 srcSize; /* optional : tells how much bytes are present in the frame. Use 0 if not known. */ - U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ - U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ - U32 hashLog; /* dispatch table : larger == faster, more memory */ - U32 searchLog; /* nb of searches : larger == more compression, slower */ - U32 searchLength; /* match length searched : larger == faster decompression, sometimes less compression */ - U32 targetLength; /* acceptable match size for optimal parser (only) : larger == more compression, slower */ - ZSTDv05_strategy strategy; -} ZSTDv05_parameters; - /*-************************************* * Advanced functions diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h index 8ef6bb25a..6dc17a647 100644 --- a/lib/legacy/zstd_v05.h +++ b/lib/legacy/zstd_v05.h @@ -40,7 +40,7 @@ extern "C" { * Dependencies ***************************************/ #include /* size_t */ - +#include "mem.h" /* U64, U32 */ /* ************************************* @@ -91,6 +91,14 @@ size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, /*-************************ * Advanced Streaming API ***************************/ +typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy; +typedef struct { + U64 srcSize; + U32 windowLog; /* the only useful information to retrieve */ + U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy; +} ZSTDv05_parameters; +size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize); + size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize); void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx); size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx); diff --git a/programs/Makefile b/programs/Makefile index 339427e2d..11709300a 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -31,22 +31,11 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -# Version numbers -LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/common/zstd.h` -LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/common/zstd.h` -LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../lib/common/zstd.h` -LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT) -LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT)) -LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) -LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) -LIBVER := $(shell echo $(LIBVER_SCRIPT)) -VERSION?= $(LIBVER) - DESTDIR?= PREFIX ?= /usr/local -CPPFLAGS= -I../lib/common -DZSTD_VERSION=\"$(VERSION)\" +CPPFLAGS= -I../lib/common CFLAGS ?= -O3 # -falign-loops=32 # not always beneficial -CFLAGS += -std=c99 -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef +CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wstrict-prototypes -Wundef FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) BINDIR = $(PREFIX)/bin diff --git a/programs/bench.c b/programs/bench.c index bed7d16bc..a2721552e 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -188,7 +188,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* Bench */ { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); - U64 crcCheck = 0; UTIL_time_t coolTime; U32 testNb; size_t cSize = 0; @@ -282,7 +281,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, (double)srcSize / fastestD ); /* CRC Checking */ - { crcCheck = XXH64(resultBuffer, srcSize, 0); + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); if (crcOrig!=crcCheck) { size_t u; DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); @@ -308,12 +307,10 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, #endif } /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */ - if (crcOrig == crcCheck) { - result->ratio = ratio; - result->cSize = cSize; - result->cSpeed = (double)srcSize / fastestC; - result->dSpeed = (double)srcSize / fastestD; - } + result->ratio = ratio; + result->cSize = cSize; + result->cSpeed = (double)srcSize / fastestC; + result->dSpeed = (double)srcSize / fastestD; DISPLAYLEVEL(2, "%2i#\n", cLevel); } /* Bench */ @@ -402,9 +399,9 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, const char** fileNamesTable, unsigned nbFiles) { size_t pos = 0, totalSize = 0; - FILE* f; unsigned n; for (n=0; n 1) displayName = mfName; - else displayName = fileNamesTable[0]; - - BMK_benchCLevel(srcBuffer, benchedSize, - displayName, cLevel, cLevelLast, - fileSizes, nbFiles, - dictBuffer, dictBufferSize); + { const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; + BMK_benchCLevel(srcBuffer, benchedSize, + displayName, cLevel, cLevelLast, + fileSizes, nbFiles, + dictBuffer, dictBufferSize); + } /* clean up */ free(srcBuffer); @@ -482,7 +477,7 @@ static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility { char name[20] = {0}; size_t benchedSize = 10000000; - void* srcBuffer = malloc(benchedSize); + void* const srcBuffer = malloc(benchedSize); /* Memory allocation */ if (!srcBuffer) EXM_THROW(21, "not enough memory"); diff --git a/programs/datagen.c b/programs/datagen.c index ecc783e1b..fd9b2316f 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -48,22 +48,25 @@ * Macros **************************************/ #define KB *(1 <<10) +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define RDG_DEBUG 0 +#define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ ) /*-************************************ -* Local types +* Local constants **************************************/ #define LTLOG 13 #define LTSIZE (1<> (32 - r))) -static unsigned int RDG_rand(U32* src) +static U32 RDG_rand(U32* src) { static const U32 prime1 = 2654435761U; static const U32 prime2 = 2246822519U; @@ -72,39 +75,42 @@ static unsigned int RDG_rand(U32* src) rand32 ^= prime2; rand32 = RDG_rotl32(rand32, 13); *src = rand32; - return rand32; + return rand32 >> 5; } -static void RDG_fillLiteralDistrib(litDistribTable lt, double ld) +static void RDG_fillLiteralDistrib(BYTE* ldt, double ld) { - U32 i = 0; + BYTE const firstChar = (ld<=0.0) ? 0 : '('; + BYTE const lastChar = (ld<=0.0) ? 255 : '}'; BYTE character = (ld<=0.0) ? 0 : '0'; - BYTE const firstChar = (ld<=0.0) ? 0 : '('; - BYTE const lastChar = (ld<=0.0) ?255: '}'; + U32 u; - while (i LTSIZE) weight = LTSIZE-i; - end = i + weight; - while (i < end) lt[i++] = character; + if (ld<=0.0) ld = 0.0; + //TRACE(" percent:%5.2f%% \n", ld*100.); + //TRACE(" start:(%c)[%02X] ", character, character); + for (u=0; u lastChar) character = firstChar; } } -static BYTE RDG_genChar(U32* seed, const litDistribTable lt) +static BYTE RDG_genChar(U32* seed, const BYTE* ldt) { U32 const id = RDG_rand(seed) & LTMASK; - return (lt[id]); + //TRACE(" %u : \n", id); + //TRACE(" %4u [%4u] ; val : %4u \n", id, id&255, ldt[id]); + return (ldt[id]); /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with 0.0. Checked : table is fully initialized */ } -#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 0x7FFF) -#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) -void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr) +#define RDG_RAND15BITS ( RDG_rand(seed) & 0x7FFF ) +#define RDG_RANDLENGTH ( (RDG_rand(seed) & 7) ? (RDG_rand(seed) & 0xF) : (RDG_rand(seed) & 0x1FF) + 0xF) +void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, unsigned* seedPtr) { BYTE* buffPtr = (BYTE*)buffer; const U32 matchProba32 = (U32)(32768 * matchProba); @@ -123,75 +129,73 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match } memset(buffPtr+pos, 0, size0); pos += size0; - buffPtr[pos-1] = RDG_genChar(seed, lt); + buffPtr[pos-1] = RDG_genChar(seed, ldt); continue; } /* init */ - if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1; + if (pos==0) buffPtr[0] = RDG_genChar(seed, ldt), pos=1; /* Generate compressible data */ while (pos < buffSize) { /* Select : Literal (char) or Match (within 32K) */ if (RDG_RAND15BITS < matchProba32) { /* Copy (within 32K) */ - size_t match; - size_t d; - size_t const length = RDG_RANDLENGTH + 4; - U32 offset = RDG_RAND15BITS + 1; - U32 repeatOffset = (RDG_rand(seed) & 15) == 2; - if (repeatOffset) offset = prevOffset; - if (offset > pos) offset = (U32)pos; - prevOffset = offset; - match = pos - offset; - d = pos + length; - if (d > buffSize) d = buffSize; + U32 const length = RDG_RANDLENGTH + 4; + U32 const d = (U32) MIN(pos + length , buffSize); + U32 const repeatOffset = (RDG_rand(seed) & 15) == 2; + U32 const randOffset = RDG_RAND15BITS + 1; + U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos); + size_t match = pos - offset; + //TRACE("pos : %u; offset: %u ; length : %u \n", (U32)pos, offset, length); while (pos < d) buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ + prevOffset = offset; } else { /* Literal (noise) */ - size_t const length = RDG_RANDLENGTH; - size_t d = pos + length; - if (d > buffSize) d = buffSize; - while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt); + U32 const length = RDG_RANDLENGTH; + U32 const d = (U32) MIN(pos + length, buffSize); + while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldt); } } } void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) { - litDistribTable lt; - if (litProba==0.0) litProba = matchProba / 4.5; - RDG_fillLiteralDistrib(lt, litProba); - RDG_genBlock(buffer, size, 0, matchProba, lt, &seed); + BYTE ldt[LTSIZE]; + memset(ldt, '0', sizeof(ldt)); + if (litProba<=0.0) litProba = matchProba / 4.5; + //TRACE(" percent:%5.2f%% \n", litProba*100.); + RDG_fillLiteralDistrib(ldt, litProba); + RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed); } -#define RDG_DICTSIZE (32 KB) -#define RDG_BLOCKSIZE (128 KB) -#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed) { - BYTE* buff = (BYTE*)malloc(RDG_DICTSIZE + RDG_BLOCKSIZE); + size_t const stdBlockSize = 128 KB; + size_t const stdDictSize = 32 KB; + BYTE* buff = (BYTE*)malloc(stdDictSize + stdBlockSize); U64 total = 0; - litDistribTable ldt; + BYTE ldt[LTSIZE]; /* init */ if (buff==NULL) { fprintf(stdout, "not enough memory\n"); exit(1); } if (litProba<=0.0) litProba = matchProba / 4.5; + memset(ldt, '0', sizeof(ldt)); RDG_fillLiteralDistrib(ldt, litProba); SET_BINARY_MODE(stdout); /* Generate initial dict */ - RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, ldt, &seed); + RDG_genBlock(buff, stdDictSize, 0, matchProba, ldt, &seed); /* Generate compressible data */ while (total < size) { - size_t const genBlockSize = (size_t) (MIN (RDG_BLOCKSIZE, size-total)); - RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldt, &seed); + size_t const genBlockSize = (size_t) (MIN (stdBlockSize, size-total)); + RDG_genBlock(buff, stdDictSize+stdBlockSize, stdDictSize, matchProba, ldt, &seed); total += genBlockSize; { size_t const unused = fwrite(buff, 1, genBlockSize, stdout); (void)unused; } /* update dict */ - memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE); + memcpy(buff, buff + stdBlockSize, stdDictSize); } /* cleanup */ diff --git a/programs/fileio.c b/programs/fileio.c index 638e63b57..51707e8b7 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -130,6 +130,8 @@ static U32 g_overwrite = 0; void FIO_overwriteMode(void) { g_overwrite=1; } static U32 g_maxWLog = 23; void FIO_setMaxWLog(unsigned maxWLog) { g_maxWLog = maxWLog; } +static U32 g_sparseFileSupport = 1; /* 0 : no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ +void FIO_setSparseWrite(unsigned sparse) { g_sparseFileSupport=sparse; } /*-************************************* @@ -178,6 +180,10 @@ static FILE* FIO_openDstFile(const char* dstFileName) DISPLAYLEVEL(4,"Using stdout for output\n"); f = stdout; SET_BINARY_MODE(stdout); + if (g_sparseFileSupport==1) { + g_sparseFileSupport = 0; + DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); + } } else { if (!g_overwrite) { /* Check if destination file already exists */ f = fopen( dstFileName, "rb" ); @@ -189,8 +195,7 @@ static FILE* FIO_openDstFile(const char* dstFileName) return 0; } DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName); - { - int ch = getchar(); + { int ch = getchar(); if ((ch!='Y') && (ch!='y')) { DISPLAY(" not overwritten \n"); return 0; @@ -513,6 +518,81 @@ static void FIO_freeDResources(dRess_t ress) } +/** FIO_fwriteSparse() : +* @return : storedSkips, to be provided to next call to FIO_fwriteSparse() of LZ4IO_fwriteSparseEnd() */ +static unsigned FIO_fwriteSparse(FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips) +{ + const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ + size_t bufferSizeT = bufferSize / sizeof(size_t); + const size_t* const bufferTEnd = bufferT + bufferSizeT; + const size_t* ptrT = bufferT; + static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* 0-test re-attempted every 32 KB */ + + if (!g_sparseFileSupport) { /* normal write */ + size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); + if (sizeCheck != bufferSize) EXM_THROW(70, "Write error : cannot write decoded block"); + return 0; + } + + /* avoid int overflow */ + if (storedSkips > 1 GB) { + int const seekResult = fseek(file, 1 GB, SEEK_CUR); + if (seekResult != 0) EXM_THROW(71, "1 GB skip error (sparse file support)"); + storedSkips -= 1 GB; + } + + while (ptrT < bufferTEnd) { + size_t seg0SizeT = segmentSizeT; + size_t nb0T; + + /* count leading zeros */ + if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; + bufferSizeT -= seg0SizeT; + for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; + storedSkips += (unsigned)(nb0T * sizeof(size_t)); + + if (nb0T != seg0SizeT) { /* not all 0s */ + int const seekResult = fseek(file, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(72, "Sparse skip error ; try --no-sparse"); + storedSkips = 0; + seg0SizeT -= nb0T; + ptrT += nb0T; + { size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file); + if (sizeCheck != seg0SizeT) EXM_THROW(73, "Write error : cannot write decoded block"); + } } + ptrT += seg0SizeT; + } + + { static size_t const maskT = sizeof(size_t)-1; + if (bufferSize & maskT) { /* size not multiple of sizeof(size_t) : implies end of block */ + const char* const restStart = (const char*)bufferTEnd; + const char* restPtr = restStart; + size_t restSize = bufferSize & maskT; + const char* const restEnd = restStart + restSize; + for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; + storedSkips += (unsigned) (restPtr - restStart); + if (restPtr != restEnd) { + int seekResult = fseek(file, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(74, "Sparse skip error ; try --no-sparse"); + storedSkips = 0; + { size_t const sizeCheck = fwrite(restPtr, 1, restEnd - restPtr, file); + if (sizeCheck != (size_t)(restEnd - restPtr)) EXM_THROW(75, "Write error : cannot write decoded end of block"); + } } } } + + return storedSkips; +} + +static void FIO_fwriteSparseEnd(FILE* file, unsigned storedSkips) +{ + if (storedSkips-->0) { /* implies g_sparseFileSupport>0 */ + int const seekResult = fseek(file, storedSkips, SEEK_CUR); + if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n"); + { const char lastZeroByte[1] = { 0 }; + size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file); + if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n"); + } } +} + /** FIO_decompressFrame() : @return : size of decoded frame */ @@ -521,6 +601,7 @@ unsigned long long FIO_decompressFrame(dRess_t ress, { U64 frameSize = 0; size_t readSize; + U32 storedSkips = 0; ZBUFF_decompressInitDictionary(ress.dctx, ress.dictBuffer, ress.dictBufferSize); @@ -539,8 +620,7 @@ unsigned long long FIO_decompressFrame(dRess_t ress, readSize -= inSize; /* Write block */ - { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, decodedSize, foutput); - if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block into destination"); } + storedSkips = FIO_fwriteSparse(foutput, ress.dstBuffer, decodedSize, storedSkips); frameSize += decodedSize; DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(frameSize>>20) ); @@ -554,10 +634,34 @@ unsigned long long FIO_decompressFrame(dRess_t ress, EXM_THROW(35, "Read error"); } + FIO_fwriteSparseEnd(foutput, storedSkips); + return frameSize; } +/** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode + @return : 0 (no error) */ +static unsigned FIO_passThrough(FILE* foutput, FILE* finput, void* buffer, size_t bufferSize) +{ + size_t const blockSize = MIN (64 KB, bufferSize); + size_t readFromInput = 1; + unsigned storedSkips = 0; + + /* assumption : first 4 bytes already loaded (magic number detection), and stored within buffer */ + { size_t const sizeCheck = fwrite(buffer, 1, 4, foutput); + if (sizeCheck != 4) EXM_THROW(50, "Pass-through write error"); } + + while (readFromInput) { + readFromInput = fread(buffer, 1, blockSize, finput); + storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, storedSkips); + } + + FIO_fwriteSparseEnd(foutput, storedSkips); + return 0; +} + + /** FIO_decompressSrcFile() : Decompression `srcFileName` into `ress.dstFile` @return : 0 : OK @@ -585,9 +689,12 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName) } #endif if (magic != ZSTD_MAGICNUMBER) { - DISPLAYLEVEL(1, "zstd: %s: not in zstd format \n", srcFileName); - return 1; - } } + if (g_overwrite) /* -df : pass-through mode */ + return FIO_passThrough(dstFile, srcFile, ress.srcBuffer, ress.srcBufferSize); + else { + DISPLAYLEVEL(1, "zstd: %s: not in zstd format \n", srcFileName); + return 1; + } } } filesize += FIO_decompressFrame(ress, dstFile, srcFile, toRead); } diff --git a/programs/fileio.h b/programs/fileio.h index d5aae449b..6e7912380 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -46,7 +46,8 @@ extern "C" { ***************************************/ void FIO_overwriteMode(void); void FIO_setNotificationLevel(unsigned level); -void FIO_setMaxWLog(unsigned maxWLog); /**< if `maxWLog` == 0, no max enforced */ +void FIO_setMaxWLog(unsigned maxWLog); /**< if `maxWLog` == 0, no max enforced */ +void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ /*-************************************* diff --git a/programs/fullbench.c b/programs/fullbench.c index 6f5d6a782..06796c25a 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -32,7 +32,7 @@ #include /* clock_t, clock, CLOCKS_PER_SEC */ #include "mem.h" -#include "zstd_static.h" +#include "zstd_static.h" /* ZSTD_VERSION_STRING */ #include "fse_static.h" #include "zbuff.h" #include "datagen.h" @@ -42,11 +42,8 @@ * Constants **************************************/ #define PROGRAM_DESCRIPTION "Zstandard speed analyzer" -#ifndef ZSTD_VERSION -# define ZSTD_VERSION "" -#endif #define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION, (int)(sizeof(void*)*8), AUTHOR, __DATE__ +#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR, __DATE__ #define KB *(1<<10) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 5bf532e1a..e664bf433 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -40,7 +40,7 @@ #include /* timeb */ #include /* strcmp */ #include /* clock_t */ -#include "zstd_static.h" +#include "zstd_static.h" /* ZSTD_VERSION_STRING */ #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" /* XXH64 */ #include "mem.h" @@ -49,10 +49,6 @@ /*-************************************ * Constants **************************************/ -#ifndef ZSTD_VERSION -# define ZSTD_VERSION "" -#endif - #define KB *(1U<<10) #define MB *(1U<<20) #define GB *(1U<<30) @@ -805,7 +801,7 @@ int main(int argc, const char** argv) } } } } /* for (argNb=1; argNb tmp $ZSTD -f tmp # trivial compression case, creates tmp.zst $ZSTD -df tmp.zst # trivial decompression case (overwrites tmp) @@ -47,9 +48,12 @@ $ZSTD -q tmp && die "overwrite check failed!" $ZSTD -q -f tmp $ZSTD -q --force tmp $ZSTD -df tmp && die "should have refused : wrong extension" -cp tmp tmp2.zst -$ZSTD -df tmp2.zst && die "should have failed : wrong format" -rm tmp2.zst + + +echo "\n**** Pass-Through mode **** " +echo "Hello world !" | $ZSTD -df +echo "Hello world !" | $ZSTD -dcf + echo "\n**** frame concatenation **** " @@ -63,8 +67,7 @@ $ZSTD -dc helloworld.zstd > result.tmp cat result.tmp sdiff helloworld.tmp result.tmp rm ./*.tmp ./*.zstd - -echo frame concatenation test completed +echo "frame concatenation tests completed" echo "\n**** flush write error test **** " @@ -75,6 +78,33 @@ echo "echo foo | $ZSTD | $ZSTD -d > /dev/full" echo foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!" +echo "\n**** test sparse file support **** " + +./datagen -g5M -P100 > tmpSparse +$ZSTD tmpSparse -c | $ZSTD -dv -o tmpSparseRegen +diff -s tmpSparse tmpSparseRegen +$ZSTD tmpSparse -c | $ZSTD -dv --sparse -c > tmpOutSparse +diff -s tmpSparse tmpOutSparse +$ZSTD tmpSparse -c | $ZSTD -dv --no-sparse -c > tmpOutNoSparse +diff -s tmpSparse tmpOutNoSparse +ls -ls tmpSparse* +./datagen -s1 -g1200007 -P100 | $ZSTD | $ZSTD -dv --sparse -c > tmpSparseOdd # Odd size file (to not finish on an exact nb of blocks) +./datagen -s1 -g1200007 -P100 | diff -s - tmpSparseOdd +ls -ls tmpSparseOdd +echo "\n Sparse Compatibility with Console :" +echo "Hello World 1 !" | $ZSTD | $ZSTD -d -c +echo "Hello World 2 !" | $ZSTD | $ZSTD -d | cat +echo "\n Sparse Compatibility with Append :" +./datagen -P100 -g1M > tmpSparse1M +cat tmpSparse1M tmpSparse1M > tmpSparse2M +$ZSTD -v -f tmpSparse1M -o tmpSparseCompressed +$ZSTD -d -v -f tmpSparseCompressed -o tmpSparseRegenerated +$ZSTD -d -v -f tmpSparseCompressed -c >> tmpSparseRegenerated +ls -ls tmpSparse* +diff tmpSparse2M tmpSparseRegenerated +# rm tmpSparse* + + echo "\n**** dictionary tests **** " ./datagen > tmpDict @@ -109,7 +139,9 @@ ls -ls tmp* echo "compress multiple files including a missing one (notHere) : " $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" + echo "\n**** integrity tests **** " + echo "test one file (tmp1.zst) " $ZSTD -t tmp1.zst $ZSTD --test tmp1.zst @@ -118,6 +150,7 @@ $ZSTD -t *.zst echo "test good and bad files (*) " $ZSTD -t * && die "bad files not detected !" + echo "\n**** zstd round-trip tests **** " roundTripTest diff --git a/programs/util.h b/programs/util.h index e6eb5f41e..f8b4dcb6c 100644 --- a/programs/util.h +++ b/programs/util.h @@ -37,7 +37,6 @@ extern "C" { # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #if _MSC_VER <= 1800 /* (1800 = Visual Studio 2013) */ #define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */ - //#define snprintf _snprintf #endif #endif @@ -47,7 +46,7 @@ extern "C" { # define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ # if defined(__sun__) /* Sun Solaris 32-bits requires specific definitions */ # define _LARGEFILE_SOURCE /* fseeko, ftello */ -# else +# else # define _LARGEFILE64_SOURCE /* off64_t, fseeko64, ftello64 */ # endif #endif @@ -91,11 +90,15 @@ extern "C" { # define SET_HIGH_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) # define UTIL_sleep(s) Sleep(1000*s) # define UTIL_sleepMilli(milli) Sleep(milli) -#elif (defined(__unix__) || defined(__unix) || defined(__midipix__) || (defined(__APPLE__) && defined(__MACH__))) +#elif (defined(__unix__) || defined(__unix) || defined(__VMS) || defined(__midipix__) || (defined(__APPLE__) && defined(__MACH__))) # include # include /* setpriority */ # include /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */ -# define SET_HIGH_PRIORITY setpriority(PRIO_PROCESS, 0, -20) +# if defined(PRIO_PROCESS) +# define SET_HIGH_PRIORITY setpriority(PRIO_PROCESS, 0, -20) +# else +# define SET_HIGH_PRIORITY /* disabled */ +# endif # define UTIL_sleep(s) sleep(s) # if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 199309L) # define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); } @@ -140,8 +143,8 @@ UTIL_STATIC void UTIL_waitForNextTick(UTIL_time_t ticksPerSecond) { UTIL_time_t clockStart, clockEnd; UTIL_getTime(&clockStart); - do { - UTIL_getTime(&clockEnd); + do { + UTIL_getTime(&clockEnd); } while (UTIL_getSpanTimeNano(ticksPerSecond, clockStart, clockEnd) == 0); } @@ -280,7 +283,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_ fprintf(stderr, "Cannot open directory '%s': %s\n", dirName, strerror(errno)); return 0; } - + while ((entry = readdir(dir)) != NULL) { if (strcmp (entry->d_name, "..") == 0 || strcmp (entry->d_name, ".") == 0) continue; @@ -324,8 +327,8 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_ #endif // #ifdef _WIN32 -/* - * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, +/* + * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer) * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called. diff --git a/programs/xxhash.c b/programs/xxhash.c index 2a66e251a..92df90295 100644 --- a/programs/xxhash.c +++ b/programs/xxhash.c @@ -65,7 +65,7 @@ You can contact the author at : /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ /*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. * Should endian-independance be of no importance for your application, you may set the #define below to 1, diff --git a/programs/zbufftest.c b/programs/zbufftest.c index d7f7ac00d..e9e54586c 100644 --- a/programs/zbufftest.c +++ b/programs/zbufftest.c @@ -49,10 +49,6 @@ /*-************************************ * Constants **************************************/ -#ifndef ZSTD_VERSION -# define ZSTD_VERSION "" -#endif - #define KB *(1U<<10) #define MB *(1U<<20) #define GB *(1U<<30) @@ -153,7 +149,6 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo void* compressedBuffer = malloc(compressedBufferSize); size_t const decodedBufferSize = CNBufferSize; void* decodedBuffer = malloc(decodedBufferSize); - U32 randState = seed; size_t result, cSize, readSize, genSize; U32 testNb=0; ZBUFF_CCtx* zc = ZBUFF_createCCtx_advanced(customMem); @@ -164,7 +159,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo DISPLAY("Not enough memory, aborting\n"); goto _output_error; } - RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., randState); + RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed); /* Basic compression test */ DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); @@ -247,19 +242,13 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max) { const BYTE* b1 = (const BYTE*)buf1; const BYTE* b2 = (const BYTE*)buf2; - size_t i; - for (i=0; i "); DISPLAY(__VA_ARGS__); \ - DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } - - static size_t FUZ_rLogLength(U32* seed, U32 logLength) { size_t const lengthMask = ((size_t)1 << logLength) - 1; @@ -272,6 +261,11 @@ static size_t FUZ_randomLength(U32* seed, U32 maxLog) return FUZ_rLogLength(seed, logLength); } +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } + static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility) { static const U32 maxSrcLog = 24; @@ -594,7 +588,7 @@ int main(int argc, const char** argv) } } } /* for(argNb=1; argNb 0; i--) { if (programName[i] == '/') { i++; break; } } - programName += i; + { size_t pos; + for (pos = (int)strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } } + programName += pos; + } /* preset behaviors */ if (!strcmp(programName, ZSTD_UNZSTD)) decode=1; if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; } /* command switches */ - for(i=1; i use stdin and stdout */ - if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark, outFileName=stdoutmark; + filenameIdx += !filenameIdx; /*< default input is stdin */ + if (!strcmp(filenameTable[0], stdinmark) && !outFileName ) outFileName = stdoutmark; /*< when input is stdin, default output is stdout */ /* Check if input/output defined as console; trigger an error in this case */ if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName)); - if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) CLEAN_RETURN(badusage(programName)); + if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !(forceStdout && decode)) + CLEAN_RETURN(badusage(programName)); /* user-selected output filename, only possible with a single file */ if (outFileName && strcmp(outFileName,stdoutmark) && strcmp(outFileName,nulmark) && (filenameIdx>1)) { @@ -435,9 +445,9 @@ int main(int argCount, const char** argv) { /* decompression */ #ifndef ZSTD_NODECOMPRESS if (filenameIdx==1 && outFileName) - operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName); + operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName); else - operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName); + operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName); #else DISPLAY("Decompression not supported\n"); #endif diff --git a/versionsTest/.gitignore b/versionsTest/.gitignore new file mode 100644 index 000000000..bdb2cbdff --- /dev/null +++ b/versionsTest/.gitignore @@ -0,0 +1,3 @@ +# Tmp test directory +zstdtest + diff --git a/versionsTest/Makefile b/versionsTest/Makefile new file mode 100644 index 000000000..420b81e9f --- /dev/null +++ b/versionsTest/Makefile @@ -0,0 +1,39 @@ +# ########################################################################## +# Zstd tests - Makefile +# Based on LZ4 version test, by Takayuki Matsuoka - 2015-2016 +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at : +# - ZSTD homepage : http://www.zstd.net/ +# ########################################################################## +# versionstest : Compatibility test between zstd versions stored on Github (v0.1+) +# ########################################################################## + +PYTHON?= python3 +TESTDIR := zstdtest + +default: all + +all: versionsTest + +versionsTest: + $(PYTHON) test-zstd-versions.py + +clean: + @rm -fR $(TESTDIR) + @echo Cleaning completed diff --git a/versionsTest/test-zstd-versions.py b/versionsTest/test-zstd-versions.py new file mode 100644 index 000000000..b18c3d3c2 --- /dev/null +++ b/versionsTest/test-zstd-versions.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 + +# Based on LZ4 version test script, by Takayuki Matsuoka + +import glob +import subprocess +import filecmp +import os +import shutil +import sys +import hashlib + +repo_url = 'https://github.com/Cyan4973/zstd.git' +tmp_dir_name = 'versionsTest/zstdtest' +make_cmd = 'make' +git_cmd = 'git' +test_dat_src = 'README.md' +test_dat = 'test_dat' +head = 'vdevel' + +def proc(cmd_args, pipe=True, dummy=False): + if dummy: + return + if pipe: + subproc = subprocess.Popen(cmd_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + else: + subproc = subprocess.Popen(cmd_args) + return subproc.communicate() + +def make(args, pipe=True): + return proc([make_cmd] + args, pipe) + +def git(args, pipe=True): + return proc([git_cmd] + args, pipe) + +def get_git_tags(): + stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) + tags = stdout.decode('utf-8').split() + return tags + +def compress_sample(tag, sample): + try: + from subprocess import DEVNULL # py3k + except ImportError: + DEVNULL = open(os.devnull, 'wb') + if subprocess.call(['./zstd.' + tag, '-f' , sample], stderr=DEVNULL)==0: + os.rename(sample + '.zst', sample + '_01_64_' + tag + '.zst') + if subprocess.call(['./zstd.' + tag, '-5f' , sample], stderr=DEVNULL)==0: + os.rename(sample + '.zst', sample + '_05_64_' + tag + '.zst') + if subprocess.call(['./zstd.' + tag, '-9f' , sample], stderr=DEVNULL)==0 : + os.rename(sample + '.zst', sample + '_09_64_' + tag + '.zst') + if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL)==0 : + os.rename(sample + '.zst', sample + '_15_64_' + tag + '.zst') + if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL)==0: + os.rename(sample + '.zst', sample + '_18_64_' + tag + '.zst') + # zstdFiles = glob.glob("*.zst*") + # print(zstdFiles) + +# http://stackoverflow.com/a/19711609/2132223 +def sha1_of_file(filepath): + with open(filepath, 'rb') as f: + return hashlib.sha1(f.read()).hexdigest() + +def remove_duplicates(): + list_of_zst = sorted(glob.glob('*.zst')) + for i, ref_zst in enumerate(list_of_zst): + if not os.path.isfile(ref_zst): + continue + for j in range(i+1, len(list_of_zst)): + compared_zst = list_of_zst[j] + if not os.path.isfile(compared_zst): + continue + if filecmp.cmp(ref_zst, compared_zst): + os.remove(compared_zst) + print('duplicated : {} == {}'.format(ref_zst, compared_zst)) + +def decompress_zst(tag): + dec_error = 0 + list_zst = sorted(glob.glob('*.zst')) + try: + from subprocess import DEVNULL # py3k + except ImportError: + DEVNULL = open(os.devnull, 'wb') + for file_zst in list_zst: + print(file_zst, end=" ") + print(tag, end=" ") + file_dec = file_zst + '_d64_' + tag + '.dec' + if subprocess.call(['./zstd.' + tag, '-df', file_zst, '-o', file_dec], stderr=DEVNULL)==0: + if not filecmp.cmp(file_dec, test_dat): + print('ERR !! ') + dec_error = 1 + else: + print('OK ') + else: + print('command does not work') + return dec_error + + +if __name__ == '__main__': + error_code = 0 + base_dir = os.getcwd() + '/..' # /path/to/zstd + tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/versionsTest/zstdtest + clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/versionsTest/zstdtest/zstd + programs_dir = base_dir + '/programs' # /path/to/zstd/programs + os.makedirs(tmp_dir, exist_ok=True) + + # since Travis clones limited depth, we should clone full repository + if not os.path.isdir(clone_dir): + git(['clone', repo_url, clone_dir]) + + shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) + + # Retrieve all release tags + print('Retrieve all release tags :') + os.chdir(clone_dir) + tags = get_git_tags() + [head] + print(tags); + + # Build all release zstd + for tag in tags: + os.chdir(base_dir) + dst_zstd = '{}/zstd.{}' .format(tmp_dir, tag) # /path/to/zstd/test/zstdtest/zstd. + if not os.path.isfile(dst_zstd) or tag == head: + if tag != head: + r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/test/zstdtest/ + os.makedirs(r_dir, exist_ok=True) + os.chdir(clone_dir) + git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) + os.chdir(r_dir + '/programs') # /path/to/zstd/zstdtest//programs + make(['clean', 'zstd'], False) + else: + os.chdir(programs_dir) + make(['zstd'], False) + shutil.copy2('zstd', dst_zstd) + + # remove any remaining *.zst and *.dec from previous test + os.chdir(tmp_dir) + for compressed in glob.glob("*.zst"): + os.remove(compressed) + for dec in glob.glob("*.dec"): + os.remove(dec) + + print('Compress test.dat by all released zstd') + + error_code = 0; + for tag in tags: + print(tag) + compress_sample(tag, test_dat) + remove_duplicates() + error_code += decompress_zst(tag) + + print('') + print('Enumerate different compressed files') + zstds = sorted(glob.glob('*.zst')) + for zstd in zstds: + print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) + + if error_code != 0: + print('==== ERROR !!! =====') + + sys.exit(error_code)