1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-08 17:22:10 +03:00

Fix merge conflicts

This commit is contained in:
Nick Magerko
2019-08-22 11:51:41 -07:00
17 changed files with 120 additions and 65 deletions

View File

@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
### Version ### Version
0.3.2 (17/07/19) 0.3.3 (16/08/19)
Introduction Introduction
@@ -358,6 +358,7 @@ It may be followed by an optional `Content_Checksum`
__`Block_Type`__ __`Block_Type`__
The next 2 bits represent the `Block_Type`. The next 2 bits represent the `Block_Type`.
`Block_Type` influences the meaning of `Block_Size`.
There are 4 block types : There are 4 block types :
| Value | 0 | 1 | 2 | 3 | | Value | 0 | 1 | 2 | 3 |
@@ -384,9 +385,12 @@ There are 4 block types :
__`Block_Size`__ __`Block_Size`__
The upper 21 bits of `Block_Header` represent the `Block_Size`. The upper 21 bits of `Block_Header` represent the `Block_Size`.
`Block_Size` is the size of the block excluding the header. When `Block_Type` is `Compressed_Block` or `Raw_Block`,
A block can contain any number of bytes (even zero), up to `Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`.
`Block_Maximum_Decompressed_Size`, which is the smallest of: When `Block_Type` is `RLE_Block`, `Block_Content`s size is always 1,
and `Block_Size` represents the number of times this byte must be repeated.
A block can contain and decompress into any number of bytes (even zero),
up to `Block_Maximum_Decompressed_Size`, which is the smallest of:
- Window_Size - Window_Size
- 128 KB - 128 KB
@@ -1653,6 +1657,7 @@ or at least provide a meaningful error code explaining for which reason it canno
Version changes Version changes
--------------- ---------------
- 0.3.3 : clarifications for field Block_Size
- 0.3.2 : remove additional block size restriction on compressed blocks - 0.3.2 : remove additional block size restriction on compressed blocks
- 0.3.1 : minor clarification regarding offset history update rules - 0.3.1 : minor clarification regarding offset history update rules
- 0.3.0 : minor edits to match RFC8478 - 0.3.0 : minor edits to match RFC8478

View File

@@ -2001,12 +2001,17 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* Sequences Header */ /* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall); dstSize_tooSmall);
if (nbSeq < 0x7F) if (nbSeq < 128) {
*op++ = (BYTE)nbSeq; *op++ = (BYTE)nbSeq;
else if (nbSeq < LONGNBSEQ) } else if (nbSeq < LONGNBSEQ) {
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; op[0] = (BYTE)((nbSeq>>8) + 0x80);
else op[1] = (BYTE)nbSeq;
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; op+=2;
} else {
op[0]=0xFF;
MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
op+=3;
}
assert(op <= oend); assert(op <= oend);
if (nbSeq==0) { if (nbSeq==0) {
/* Copy the old tables over as if we repeated them */ /* Copy the old tables over as if we repeated them */

View File

@@ -571,7 +571,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
unsigned const prime1 = 2654435761U; unsigned const prime1 = 2654435761U;
unsigned const prime2 = 2246822519U; unsigned const prime2 = 2246822519U;
unsigned acc = prime1; unsigned acc = prime1;
size_t p=0;; size_t p=0;
for (p=0; p<length; p++) { for (p=0; p<length; p++) {
acc *= prime2; acc *= prime2;
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21); ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);

View File

@@ -2889,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{ {
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected); if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize); memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer; dctx->litPtr = dctx->litBuffer;

View File

@@ -2655,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{ {
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected); if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize); memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer; dctx->litPtr = dctx->litBuffer;
@@ -3034,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
{ {
/* blockType == blockCompressed */ /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
size_t litCSize;
if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);
/* Decode literals sub-block */ /* Decode literals sub-block */
size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
if (ZSTD_isError(litCSize)) return litCSize; if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize; ip += litCSize;
srcSize -= litCSize; srcSize -= litCSize;

View File

@@ -201,7 +201,7 @@ static void DiB_fillNoise(void* buffer, size_t length)
unsigned const prime1 = 2654435761U; unsigned const prime1 = 2654435761U;
unsigned const prime2 = 2246822519U; unsigned const prime2 = 2246822519U;
unsigned acc = prime1; unsigned acc = prime1;
size_t p=0;; size_t p=0;
for (p=0; p<length; p++) { for (p=0; p<length; p++) {
acc *= prime2; acc *= prime2;

View File

@@ -305,6 +305,7 @@ struct FIO_prefs_s {
int ldmMinMatch; int ldmMinMatch;
int ldmBucketSizeLog; int ldmBucketSizeLog;
int ldmHashRateLog; int ldmHashRateLog;
size_t streamSrcSize;
size_t targetCBlockSize; size_t targetCBlockSize;
int srcSizeHint; int srcSizeHint;
ZSTD_literalCompressionMode_e literalCompressionMode; ZSTD_literalCompressionMode_e literalCompressionMode;
@@ -351,6 +352,7 @@ FIO_prefs_t* FIO_createPreferences(void)
ret->ldmMinMatch = 0; ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
ret->streamSrcSize = 0;
ret->targetCBlockSize = 0; ret->targetCBlockSize = 0;
ret->srcSizeHint = 0; ret->srcSizeHint = 0;
ret->literalCompressionMode = ZSTD_lcm_auto; ret->literalCompressionMode = ZSTD_lcm_auto;
@@ -421,6 +423,10 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
prefs->rsyncable = rsyncable; prefs->rsyncable = rsyncable;
} }
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
prefs->streamSrcSize = streamSrcSize;
}
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
prefs->targetCBlockSize = targetCBlockSize; prefs->targetCBlockSize = targetCBlockSize;
} }
@@ -640,7 +646,6 @@ typedef struct {
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, int cLevel, const char* dictFileName, int cLevel,
U64 srcSize,
ZSTD_compressionParameters comprParams) { ZSTD_compressionParameters comprParams) {
cRess_t ress; cRess_t ress;
memset(&ress, 0, sizeof(ress)); memset(&ress, 0, sizeof(ress));
@@ -707,10 +712,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
#endif #endif
/* dictionary */ /* dictionary */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */
free(dictBuffer); free(dictBuffer);
} }
@@ -1012,6 +1014,9 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs,
/* init */ /* init */
if (fileSize != UTIL_FILESIZE_UNKNOWN) { if (fileSize != UTIL_FILESIZE_UNKNOWN) {
CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
} else if (prefs->streamSrcSize > 0) {
/* unknown source size; use the declared stream size */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
} }
(void)srcFileName; (void)srcFileName;
@@ -1370,10 +1375,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
const char* dictFileName, int compressionLevel, const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams) ZSTD_compressionParameters comprParams)
{ {
U64 const fileSize = UTIL_getFileSize(srcFileName); cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
@@ -1424,10 +1426,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
ZSTD_compressionParameters comprParams) ZSTD_compressionParameters comprParams)
{ {
int error = 0; int error = 0;
U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
/* init */ /* init */
assert(outFileName != NULL || suffix != NULL); assert(outFileName != NULL || suffix != NULL);

View File

@@ -71,6 +71,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint); void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
void FIO_setLiteralCompressionMode( void FIO_setLiteralCompressionMode(

View File

@@ -144,6 +144,11 @@ the last one takes effect.
Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
_note_ : at the time of this writing, `--adapt` can remain stuck at low speed _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
when combined with multiple worker threads (>=2). when combined with multiple worker threads (>=2).
* `--stream-size=#` :
Sets the pledged source size of input coming from a stream. This value must be exact, as it
will be included in the produced frame header. Incorrect stream sizes will cause an error.
This information will be used to better optimize compression parameters, resulting in
better and potentially faster compression, especially for smaller source sizes.
* `--size-hint=#`: * `--size-hint=#`:
When handling input from a stream, `zstd` must guess how large the source size When handling input from a stream, `zstd` must guess how large the source size
will be when optimizing compression parameters. If the stream size is relatively will be when optimizing compression parameters. If the stream size is relatively

View File

@@ -141,6 +141,7 @@ static int usage_advanced(const char* programName)
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n"); DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n"); DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
@@ -589,6 +590,7 @@ int main(int argCount, const char* argv[])
const char* suffix = ZSTD_EXTENSION; const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize; unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictID = 0; unsigned dictID = 0;
size_t streamSrcSize = 0;
size_t targetCBlockSize = 0; size_t targetCBlockSize = 0;
size_t srcSizeHint = 0; size_t srcSizeHint = 0;
int dictCLevel = g_defaultDictCLevel; int dictCLevel = g_defaultDictCLevel;
@@ -747,6 +749,7 @@ int main(int argCount, const char* argv[])
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--long")) { if (longCommandWArg(&argument, "--long")) {
@@ -1153,6 +1156,7 @@ int main(int argCount, const char* argv[])
FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMin(prefs, adaptMin);
FIO_setAdaptMax(prefs, adaptMax); FIO_setAdaptMax(prefs, adaptMax);
FIO_setRsyncable(prefs, rsyncable); FIO_setRsyncable(prefs, rsyncable);
FIO_setStreamSrcSize(prefs, streamSrcSize);
FIO_setTargetCBlockSize(prefs, targetCBlockSize); FIO_setTargetCBlockSize(prefs, targetCBlockSize);
FIO_setSrcSizeHint(prefs, srcSizeHint); FIO_setSrcSizeHint(prefs, srcSizeHint);
FIO_setLiteralCompressionMode(prefs, literalCompressionMode); FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
@@ -1164,7 +1168,7 @@ int main(int argCount, const char* argv[])
else else
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else #else
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */ (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n"); DISPLAY("Compression not supported \n");
#endif #endif
} else { /* decompression or test */ } else { /* decompression or test */

View File

@@ -113,15 +113,6 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
# Install libfuzzer (not usable for MSAN testing)
# Provided for convenience. To use this library run make libFuzzer and
# set LDFLAGS=-L.
.PHONY: libFuzzer
libFuzzer:
@$(RM) -rf Fuzzer
@git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer Fuzzer
@cd Fuzzer && ./build.sh
corpora/%_seed_corpus.zip: corpora/%_seed_corpus.zip:
@mkdir -p corpora @mkdir -p corpora
$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip $(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip

View File

@@ -35,6 +35,8 @@ The environment variables can be overridden with the corresponding flags
`--cc`, `--cflags`, etc. `--cc`, `--cflags`, etc.
The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or
`--lib-fuzzing-engine`, the default is `libregression.a`. `--lib-fuzzing-engine`, the default is `libregression.a`.
Alternatively, you can use Clang's built in fuzzing engine with
`--enable-fuzzer`.
It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and
coverage instrumentation `--enable-coverage`. coverage instrumentation `--enable-coverage`.
It sets sane defaults which can be overridden with flags `--debug`, It sets sane defaults which can be overridden with flags `--debug`,
@@ -51,22 +53,25 @@ The command used to run the fuzzer is printed for debugging.
## LibFuzzer ## LibFuzzer
``` ```
# Build libfuzzer if necessary
make libFuzzer
# Build the fuzz targets # Build the fuzz targets
./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
# OR equivalently # OR equivalently
CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan
# Run the fuzzer # Run the fuzzer
./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4 ./fuzz.py libfuzzer TARGET <libfuzzer args like -jobs=4>
``` ```
where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc. where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc.
### MSAN ### MSAN
Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library Fuzzing with `libFuzzer` and `MSAN` is as easy as:
and libFuzzer with MSAN.
```
CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan
./fuzz.py libfuzzer TARGET <libfuzzer args>
```
`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`, `fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`,
`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass `MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass
the extra parameters only for MSAN. the extra parameters only for MSAN.

View File

@@ -24,21 +24,38 @@ def abs_join(a, *p):
return os.path.abspath(os.path.join(a, *p)) return os.path.abspath(os.path.join(a, *p))
class InputType(object):
RAW_DATA = 1
COMPRESSED_DATA = 2
class FrameType(object):
ZSTD = 1
BLOCK = 2
class TargetInfo(object):
def __init__(self, input_type, frame_type=FrameType.ZSTD):
self.input_type = input_type
self.frame_type = frame_type
# Constants # Constants
FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
TARGETS = [ TARGET_INFO = {
'simple_round_trip', 'simple_round_trip': TargetInfo(InputType.RAW_DATA),
'stream_round_trip', 'stream_round_trip': TargetInfo(InputType.RAW_DATA),
'block_round_trip', 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
'simple_decompress', 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
'stream_decompress', 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
'block_decompress', 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
'dictionary_round_trip', 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
'dictionary_decompress', 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
'zstd_frame_info', 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
'simple_compress', 'simple_compress': TargetInfo(InputType.RAW_DATA),
] }
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all'] ALL_TARGETS = TARGETS + ['all']
FUZZ_RNG_SEED_SIZE = 4 FUZZ_RNG_SEED_SIZE = 4
@@ -67,7 +84,7 @@ MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
def create(r): def create(r):
d = os.path.abspath(r) d = os.path.abspath(r)
if not os.path.isdir(d): if not os.path.isdir(d):
os.mkdir(d) os.makedirs(d)
return d return d
@@ -158,7 +175,7 @@ def compiler_version(cc, cxx):
assert(b'clang' in cxx_version_bytes) assert(b'clang' in cxx_version_bytes)
compiler = 'clang' compiler = 'clang'
elif b'gcc' in cc_version_bytes: elif b'gcc' in cc_version_bytes:
assert(b'gcc' in cxx_version_bytes) assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
compiler = 'gcc' compiler = 'gcc'
if compiler is not None: if compiler is not None:
version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
@@ -699,7 +716,8 @@ def gen(args):
'-o{}'.format(decompressed), '-o{}'.format(decompressed),
] ]
if 'block_' in args.TARGET: info = TARGET_INFO[args.TARGET]
if info.frame_type == FrameType.BLOCK:
cmd += [ cmd += [
'--gen-blocks', '--gen-blocks',
'--max-block-size-log={}'.format(args.max_size_log) '--max-block-size-log={}'.format(args.max_size_log)
@@ -710,10 +728,11 @@ def gen(args):
print(' '.join(cmd)) print(' '.join(cmd))
subprocess.check_call(cmd) subprocess.check_call(cmd)
if '_round_trip' in args.TARGET: if info.input_type == InputType.RAW_DATA:
print('using decompressed data in {}'.format(decompressed)) print('using decompressed data in {}'.format(decompressed))
samples = decompressed samples = decompressed
elif '_decompress' in args.TARGET: else:
assert info.input_type == InputType.COMPRESSED_DATA
print('using compressed data in {}'.format(compressed)) print('using compressed data in {}'.format(compressed))
samples = compressed samples = compressed

View File

@@ -401,7 +401,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
{ size_t u; { size_t u;
for (u=0; u<CNBuffSize; u++) { for (u=0; u<CNBuffSize; u++) {
if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;; if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
} } } }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
@@ -758,7 +758,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
{ size_t u; { size_t u;
for (u=0; u<CNBuffSize; u++) { for (u=0; u<CNBuffSize; u++) {
if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u])
goto _output_error;; goto _output_error;
} } } }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
@@ -839,7 +839,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
{ size_t u; { size_t u;
for (u=0; u<CNBuffSize; u++) { for (u=0; u<CNBuffSize; u++) {
if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;; if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
} } } }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");

View File

@@ -108,7 +108,6 @@ else
fi fi
println "\n===> simple tests " println "\n===> simple tests "
./datagen > tmp ./datagen > tmp
@@ -409,6 +408,23 @@ println "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
println "\n===> stream-size mode"
./datagen -g11000 > tmp
println "test : basic file compression vs sized streaming compression"
file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
if [ "$stream_size" -gt "$file_size" ]; then
die "hinted compression larger than expected"
fi
println "test : sized streaming compression and decompression"
cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
$ZSTD -df tmp.zst -o tmp_decompress
cmp tmp tmp_decompress || die "difference between original and decompressed file"
println "test : incorrect stream size"
cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
println "\n===> size-hint mode" println "\n===> size-hint mode"
./datagen -g11000 > tmp ./datagen -g11000 > tmp

View File

@@ -184,7 +184,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
{ size_t i; { size_t i;
for (i=0; i<CNBufferSize; i++) { for (i=0; i<CNBufferSize; i++) {
if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;; if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
} } } }
DISPLAYLEVEL(4, "OK \n"); DISPLAYLEVEL(4, "OK \n");
@@ -213,7 +213,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
{ size_t i; { size_t i;
for (i=0; i<CNBufferSize; i++) { for (i=0; i<CNBufferSize; i++) {
if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;; if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
} } } }
DISPLAYLEVEL(4, "OK \n"); DISPLAYLEVEL(4, "OK \n");

View File

@@ -481,7 +481,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
{ size_t i; { size_t i;
for (i=0; i<CNBufferSize; i++) { for (i=0; i<CNBufferSize; i++) {
if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;; if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
} } } }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");