From bb85fe064d4d42e1f9d40a7c026f1cc550856cc3 Mon Sep 17 00:00:00 2001 From: Christophe Chevalier Date: Fri, 23 Sep 2016 21:47:27 +0200 Subject: [PATCH 1/9] Update .gitignore for new location of msbuild projects It seems that when the projects folder was moved to the new path in cfe5fe45819804b6ef148dc8524fcec1fcd1fc43, the `build/bin` was changed to `build/` instead of `bin/` and building makes a lot of stuff show up in git. --- build/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/.gitignore b/build/.gitignore index 7ceb958ea..86ed710bd 100644 --- a/build/.gitignore +++ b/build/.gitignore @@ -1,7 +1,7 @@ *Copy # Visual C++ -build/ +bin/ VS2005/ VS2008/ VS2010/ From e5b60e859b5c5f9c34893053f02f5952431a6522 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 23 Sep 2016 13:07:54 -0700 Subject: [PATCH 2/9] [pzstd] Update README to reflect new CLI --- contrib/pzstd/README.md | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/contrib/pzstd/README.md b/contrib/pzstd/README.md index eba64085a..05ceb5599 100644 --- a/contrib/pzstd/README.md +++ b/contrib/pzstd/README.md @@ -4,24 +4,31 @@ Parallel Zstandard is a Pigz-like tool for Zstandard. It provides Zstandard format compatible compression and decompression that is able to utilize multiple cores. It breaks the input up into equal sized chunks and compresses each chunk independently into a Zstandard frame. It then concatenates the frames together to produce the final compressed output. -Optionally, with the `-p` option, PZstandard will write a 12 byte header for each frame that is a skippable frame in the Zstandard format, which tells PZstandard the size of the next compressed frame. -When `-p` is specified for compression, PZstandard can decompress the output in parallel. +Pzstandard will write a 12 byte header for each frame that is a skippable frame in the Zstandard format, which tells PZstandard the size of the next compressed frame. +PZstandard supports parallel decompression of files compressed with PZstandard. +When decompressing files compressed with Zstandard, PZstandard does IO in one thread, and decompression in another. ## Usage +PZstandard supports the same command line interface as Zstandard, but also provies the `-p` option to specify the number of threads. +Dictionary mode is not currently supported. + Basic usage - pzstd input-file -o output-file -n num-threads [ -p ] -# # Compression - pzstd -d input-file -o output-file -n num-threads # Decompression + pzstd input-file -o output-file -p num-threads -# # Compression + pzstd -d input-file -o output-file -p num-threads # Decompression PZstandard also supports piping and fifo pipes - cat input-file | pzstd -n num-threads [ -p ] -# -c > /dev/null + cat input-file | pzstd -p num-threads -# -c > /dev/null For more options pzstd --help +PZstandard tries to pick a smart default number of threads if not specified (displayed in `pzstd --help`). +If this number is not suitable, during compilation you can define `PZSTD_NUM_THREADS` to the number of threads you prefer. + ## Benchmarks As a reference, PZstandard and Pigz were compared on an Intel Core i7 @ 3.1 GHz, each using 4 threads, with the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia). @@ -32,8 +39,8 @@ Compression Speed vs Ratio with 4 Threads | Decompression Speed with 4 Threads The test procedure was to run each of the following commands 2 times for each compression level, and take the minimum time. - time pzstd -# -n 4 -p -c silesia.tar > silesia.tar.zst - time pzstd -d -n 4 -c silesia.tar.zst > /dev/null + time pzstd -# -p 4 -c silesia.tar > silesia.tar.zst + time pzstd -d -p 4 -c silesia.tar.zst > /dev/null time pigz -# -p 4 -k -c silesia.tar > silesia.tar.gz time pigz -d -p 4 -k -c silesia.tar.gz > /dev/null From d249889b9ff484f25c6085a8e190fcc16ca15ff5 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 23 Sep 2016 12:55:21 -0700 Subject: [PATCH 3/9] [pzstd] Print (de)compression results --- contrib/pzstd/Pzstd.cpp | 55 +++++++++++++++++++++++--------- contrib/pzstd/Pzstd.h | 8 +++-- contrib/pzstd/test/PzstdTest.cpp | 2 ++ 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/contrib/pzstd/Pzstd.cpp b/contrib/pzstd/Pzstd.cpp index ccd4f6266..5de90e8b6 100644 --- a/contrib/pzstd/Pzstd.cpp +++ b/contrib/pzstd/Pzstd.cpp @@ -52,16 +52,18 @@ static std::uintmax_t fileSizeOrZero(const std::string &file) { return size; } -static size_t handleOneInput(const Options &options, +static std::uint64_t handleOneInput(const Options &options, const std::string &inputFile, FILE* inputFd, + const std::string &outputFile, FILE* outputFd, ErrorHolder &errorHolder) { auto inputSize = fileSizeOrZero(inputFile); // WorkQueue outlives ThreadPool so in the case of error we are certain // we don't accidently try to call push() on it after it is destroyed. WorkQueue> outs{options.numThreads + 1}; - size_t bytesWritten; + std::uint64_t bytesRead; + std::uint64_t bytesWritten; { // Initialize the thread pool with numThreads + 1 // We add one because the read thread spends most of its time waiting. @@ -71,8 +73,9 @@ static size_t handleOneInput(const Options &options, if (!options.decompress) { // Add a job that reads the input and starts all the compression jobs executor.add( - [&errorHolder, &outs, &executor, inputFd, inputSize, &options] { - asyncCompressChunks( + [&errorHolder, &outs, &executor, inputFd, inputSize, &options, + &bytesRead] { + bytesRead = asyncCompressChunks( errorHolder, outs, executor, @@ -85,13 +88,27 @@ static size_t handleOneInput(const Options &options, bytesWritten = writeFile(errorHolder, outs, outputFd, options.decompress); } else { // Add a job that reads the input and starts all the decompression jobs - executor.add([&errorHolder, &outs, &executor, inputFd] { - asyncDecompressFrames(errorHolder, outs, executor, inputFd); + executor.add([&errorHolder, &outs, &executor, inputFd, &bytesRead] { + bytesRead = asyncDecompressFrames(errorHolder, outs, executor, inputFd); }); // Start writing bytesWritten = writeFile(errorHolder, outs, outputFd, options.decompress); } } + if (options.verbosity > 1 && !errorHolder.hasError()) { + std::string inputFileName = inputFile == "-" ? "stdin" : inputFile; + std::string outputFileName = outputFile == "-" ? "stdout" : outputFile; + if (!options.decompress) { + double ratio = static_cast(bytesWritten) / + static_cast(bytesRead + !bytesRead); + std::fprintf(stderr, "%-20s :%6.2f%% (%6llu => %6llu bytes, %s)\n", + inputFileName.c_str(), ratio * 100, bytesRead, bytesWritten, + outputFileName.c_str()); + } else { + std::fprintf(stderr, "%-20s: %llu bytes \n", + inputFileName.c_str(),bytesWritten); + } + } return bytesWritten; } @@ -185,7 +202,7 @@ int pzstdMain(const Options &options) { } auto closeOutputGuard = makeScopeGuard([&] { std::fclose(outputFd); }); // (de)compress the file - handleOneInput(options, input, inputFd, outputFd, errorHolder); + handleOneInput(options, input, inputFd, outputFile, outputFd, errorHolder); if (errorHolder.hasError()) { continue; } @@ -359,11 +376,13 @@ FileStatus fileStatus(FILE* fd) { * Returns the status of the file after all of the reads have occurred. */ static FileStatus -readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd) { +readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd, + std::uint64_t *totalBytesRead) { Buffer buffer(size); while (!buffer.empty()) { auto bytesRead = std::fread(buffer.data(), 1, std::min(chunkSize, buffer.size()), fd); + *totalBytesRead += bytesRead; queue.push(buffer.splitAt(bytesRead)); auto status = fileStatus(fd); if (status != FileStatus::Continue) { @@ -373,7 +392,7 @@ readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd) { return FileStatus::Continue; } -void asyncCompressChunks( +std::uint64_t asyncCompressChunks( ErrorHolder& errorHolder, WorkQueue>& chunks, ThreadPool& executor, @@ -382,6 +401,7 @@ void asyncCompressChunks( size_t numThreads, ZSTD_parameters params) { auto chunksGuard = makeScopeGuard([&] { chunks.finish(); }); + std::uint64_t bytesRead = 0; // Break the input up into chunks of size `step` and compress each chunk // independently. @@ -401,9 +421,10 @@ void asyncCompressChunks( // Pass the output queue to the writer thread. chunks.push(std::move(out)); // Fill the input queue for the compression job we just started - status = readData(*in, ZSTD_CStreamInSize(), step, fd); + status = readData(*in, ZSTD_CStreamInSize(), step, fd, &bytesRead); } errorHolder.check(status != FileStatus::Error, "Error reading input"); + return bytesRead; } /** @@ -484,12 +505,14 @@ static void decompress( } } -void asyncDecompressFrames( +std::uint64_t asyncDecompressFrames( ErrorHolder& errorHolder, WorkQueue>& frames, ThreadPool& executor, FILE* fd) { auto framesGuard = makeScopeGuard([&] { frames.finish(); }); + std::uint64_t totalBytesRead = 0; + // Split the source up into its component frames. // If we find our recognized skippable frame we know the next frames size // which means that we can decompress each standard frame in independently. @@ -509,6 +532,7 @@ void asyncDecompressFrames( // frameSize is 0 if the frame info can't be decoded. Buffer buffer(SkippableFrame::kSize); auto bytesRead = std::fread(buffer.data(), 1, buffer.size(), fd); + totalBytesRead += bytesRead; status = fileStatus(fd); if (bytesRead == 0 && status != FileStatus::Continue) { break; @@ -533,14 +557,15 @@ void asyncDecompressFrames( // We hit a non SkippableFrame ==> not compressed by pzstd or corrupted // Pass the rest of the source to this decompression task while (status == FileStatus::Continue && !errorHolder.hasError()) { - status = readData(*in, chunkSize, chunkSize, fd); + status = readData(*in, chunkSize, chunkSize, fd, &totalBytesRead); } break; } // Fill the input queue for the decompression job we just started - status = readData(*in, chunkSize, frameSize, fd); + status = readData(*in, chunkSize, frameSize, fd, &totalBytesRead); } errorHolder.check(status != FileStatus::Error, "Error reading input"); + return totalBytesRead; } /// Write `data` to `fd`, returns true iff success. @@ -554,12 +579,12 @@ static bool writeData(ByteRange data, FILE* fd) { return true; } -size_t writeFile( +std::uint64_t writeFile( ErrorHolder& errorHolder, WorkQueue>& outs, FILE* outputFd, bool decompress) { - size_t bytesWritten = 0; + std::uint64_t bytesWritten = 0; std::shared_ptr out; // Grab the output queue for each decompression job (in order). while (outs.pop(out) && !errorHolder.hasError()) { diff --git a/contrib/pzstd/Pzstd.h b/contrib/pzstd/Pzstd.h index 0c21d1352..c3b2926b6 100644 --- a/contrib/pzstd/Pzstd.h +++ b/contrib/pzstd/Pzstd.h @@ -45,8 +45,9 @@ int pzstdMain(const Options& options); * @param size The size of the input file if known, 0 otherwise * @param numThreads The number of threads in the thread pool * @param parameters The zstd parameters to use for compression + * @returns The number of bytes read from the file */ -void asyncCompressChunks( +std::uint64_t asyncCompressChunks( ErrorHolder& errorHolder, WorkQueue>& chunks, ThreadPool& executor, @@ -66,8 +67,9 @@ void asyncCompressChunks( * as soon as it is available * @param executor The thread pool to run compression jobs in * @param fd The input file descriptor + * @returns The number of bytes read from the file */ -void asyncDecompressFrames( +std::uint64_t asyncDecompressFrames( ErrorHolder& errorHolder, WorkQueue>& frames, ThreadPool& executor, @@ -84,7 +86,7 @@ void asyncDecompressFrames( * @param decompress Are we decompressing? * @returns The number of bytes written */ -std::size_t writeFile( +std::uint64_t writeFile( ErrorHolder& errorHolder, WorkQueue>& outs, FILE* outputFd, diff --git a/contrib/pzstd/test/PzstdTest.cpp b/contrib/pzstd/test/PzstdTest.cpp index 64bcf9cab..c85f73a39 100644 --- a/contrib/pzstd/test/PzstdTest.cpp +++ b/contrib/pzstd/test/PzstdTest.cpp @@ -54,6 +54,7 @@ TEST(Pzstd, SmallSizes) { options.inputFiles = {inputFile}; options.numThreads = numThreads; options.compressionLevel = level; + options.verbosity = 1; ASSERT_TRUE(roundTrip(options)); errorGuard.dismiss(); } @@ -91,6 +92,7 @@ TEST(Pzstd, LargeSizes) { options.inputFiles = {inputFile}; options.numThreads = std::min(numThreads, options.numThreads); options.compressionLevel = level; + options.verbosity = 1; ASSERT_TRUE(roundTrip(options)); errorGuard.dismiss(); } From dac03769082a895dafae6bc629db085c655faa8f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 23 Sep 2016 14:38:25 -0700 Subject: [PATCH 4/9] [pzstd] Add header required for Visual Studios --- contrib/pzstd/Options.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/pzstd/Options.cpp b/contrib/pzstd/Options.cpp index 5562ee18f..2d8d32203 100644 --- a/contrib/pzstd/Options.cpp +++ b/contrib/pzstd/Options.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include From 39801674881e4d18aaf70990bcd038c3d398c6d1 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 23 Sep 2016 15:47:26 -0700 Subject: [PATCH 5/9] [pzstd] Add status update for MB written --- contrib/pzstd/Pzstd.cpp | 32 +++++++++++++++++++++++++++++--- contrib/pzstd/Pzstd.h | 4 +++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/contrib/pzstd/Pzstd.cpp b/contrib/pzstd/Pzstd.cpp index 5de90e8b6..e0826b9d8 100644 --- a/contrib/pzstd/Pzstd.cpp +++ b/contrib/pzstd/Pzstd.cpp @@ -14,6 +14,7 @@ #include "utils/ThreadPool.h" #include "utils/WorkQueue.h" +#include #include #include #include @@ -85,14 +86,16 @@ static std::uint64_t handleOneInput(const Options &options, options.determineParameters()); }); // Start writing - bytesWritten = writeFile(errorHolder, outs, outputFd, options.decompress); + bytesWritten = writeFile(errorHolder, outs, outputFd, options.decompress, + options.verbosity); } else { // Add a job that reads the input and starts all the decompression jobs executor.add([&errorHolder, &outs, &executor, inputFd, &bytesRead] { bytesRead = asyncDecompressFrames(errorHolder, outs, executor, inputFd); }); // Start writing - bytesWritten = writeFile(errorHolder, outs, outputFd, options.decompress); + bytesWritten = writeFile(errorHolder, outs, outputFd, options.decompress, + options.verbosity); } } if (options.verbosity > 1 && !errorHolder.hasError()) { @@ -579,11 +582,33 @@ static bool writeData(ByteRange data, FILE* fd) { return true; } +void updateWritten(int verbosity, std::uint64_t bytesWritten) { + if (verbosity <= 1) { + return; + } + using Clock = std::chrono::system_clock; + static Clock::time_point then; + constexpr std::chrono::milliseconds refreshRate{150}; + + auto now = Clock::now(); + if (now - then > refreshRate) { + then = now; + std::fprintf(stderr, "\rWritten: %u MB ", + static_cast(bytesWritten >> 20)); + } +} + std::uint64_t writeFile( ErrorHolder& errorHolder, WorkQueue>& outs, FILE* outputFd, - bool decompress) { + bool decompress, + int verbosity) { + auto lineClearGuard = makeScopeGuard([verbosity] { + if (verbosity > 1) { + std::fprintf(stderr, "\r%79s\r", ""); + } + }); std::uint64_t bytesWritten = 0; std::shared_ptr out; // Grab the output queue for each decompression job (in order). @@ -608,6 +633,7 @@ std::uint64_t writeFile( return bytesWritten; } bytesWritten += buffer.size(); + updateWritten(verbosity, bytesWritten); } } return bytesWritten; diff --git a/contrib/pzstd/Pzstd.h b/contrib/pzstd/Pzstd.h index c3b2926b6..fe44ccfde 100644 --- a/contrib/pzstd/Pzstd.h +++ b/contrib/pzstd/Pzstd.h @@ -84,11 +84,13 @@ std::uint64_t asyncDecompressFrames( * (de)compression job. * @param outputFd The file descriptor to write to * @param decompress Are we decompressing? + * @param verbosity The verbosity level to log at * @returns The number of bytes written */ std::uint64_t writeFile( ErrorHolder& errorHolder, WorkQueue>& outs, FILE* outputFd, - bool decompress); + bool decompress, + int verbosity); } From 58d5dfea5468998c83ed75afbadb0b1bc4146af7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 25 Sep 2016 01:34:03 +0200 Subject: [PATCH 6/9] zstreamtest uses ZSTD_reset?Stream --- tests/zstreamtest.c | 63 ++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index d10d4f125..085de8139 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -358,34 +358,32 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres { static const U32 maxSrcLog = 24; static const U32 maxSampleLog = 19; + size_t const srcBufferSize = (size_t)1< Date: Mon, 26 Sep 2016 14:06:08 +0200 Subject: [PATCH 7/9] zstreamtest can fuzztest pledgedSrcSize --- lib/decompress/zstd_decompress.c | 1 + tests/zstreamtest.c | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 3410bbc0a..47b5f42c7 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1554,6 +1554,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds) size_t ZSTD_resetDStream(ZSTD_DStream* zds) { + if (zds->ddict == NULL) return ERROR(stage_wrong); /* must be init at least once */ zds->stage = zdss_loadHeader; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->legacyVersion = 0; diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 085de8139..7dcd8ea07 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -436,7 +436,8 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres /* compression init */ if (maxTestSize /* at least one test happened */ && resetAllowed && (FUZ_rand(&lseed)&1)) { - ZSTD_resetCStream(zc, 0); + U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + ZSTD_resetCStream(zc, pledgedSrcSize); } else { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1; @@ -449,22 +450,23 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres { ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize); params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; - { size_t const initError = ZSTD_initCStream_advanced(zc, dict, dictSize, params, 0); + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + size_t const initError = ZSTD_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize); CHECK (ZSTD_isError(initError),"ZSTD_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); } } } /* multi-segments compression test */ XXH64_reset(&xxhState, 0); - { U32 const maxNbChunks = (FUZ_rand(&lseed) & 127) + 2; - ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; U32 n; - for (n=0, cSize=0, totalTestSize=0 ; (n Date: Mon, 26 Sep 2016 16:41:05 +0200 Subject: [PATCH 8/9] fixed : init*_advanced() followed by reset() with different pledgedSrcSiz --- lib/compress/zstd_compress.c | 26 ++++++-------------------- lib/zstd.h | 8 ++++---- tests/zstreamtest.c | 22 +++++++++++++++------- 3 files changed, 25 insertions(+), 31 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 298278c99..94f4b5a25 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -142,21 +142,8 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) } -/** ZSTD_checkCParams_advanced() : - temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */ -size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize) -{ - if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams); - if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported); - if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */ - if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN; /* fake value - temporary work around */ - if ((srcSize <= (1ULL << cParams.hashLog)) & ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN; /* fake value - temporary work around */ - return ZSTD_checkCParams(cParams); -} - - /** ZSTD_adjustCParams() : - optimize cPar for a given input (`srcSize` and `dictSize`). + optimize `cPar` for a given input (`srcSize` and `dictSize`). mostly downsizing to reduce memory consumption and initialization. Both `srcSize` and `dictSize` are optional (use 0 if unknown), but if both are 0, no optimization can be done. @@ -169,7 +156,7 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u { U32 const minSrcSize = (srcSize==0) ? 500 : 0; U64 const rSize = srcSize + dictSize + minSrcSize; if (rSize < ((U64)1< srcLog) cPar.windowLog = srcLog; } } if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; @@ -178,7 +165,6 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */ if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - if ((cPar.hashLog < ZSTD_HASHLOG_MIN) & ((U32)cPar.strategy >= (U32)ZSTD_btlazy2)) cPar.hashLog = ZSTD_HASHLOG_MIN; /* required to ensure collision resistance in bt */ return cPar; } @@ -2556,7 +2542,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, ZSTD_parameters params, unsigned long long pledgedSrcSize) { /* compression parameters verification and optimization */ - CHECK_F(ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize)); + CHECK_F(ZSTD_checkCParams(params.cParams)); return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize); } @@ -2644,7 +2630,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params) { - CHECK_F(ZSTD_checkCParams_advanced(params.cParams, srcSize)); + CHECK_F(ZSTD_checkCParams(params.cParams)); return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); } @@ -2851,7 +2837,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; if (zcs->inBuffSize < neededInBuffSize) { zcs->inBuffSize = neededInBuffSize; - ZSTD_free(zcs->inBuff, zcs->customMem); /* should not be necessary */ + ZSTD_free(zcs->inBuff, zcs->customMem); zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem); if (zcs->inBuff == NULL) return ERROR(memory_allocation); } @@ -2859,7 +2845,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, } if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) { zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1; - ZSTD_free(zcs->outBuff, zcs->customMem); /* should not be necessary */ + ZSTD_free(zcs->outBuff, zcs->customMem); zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem); if (zcs->outBuff == NULL) return ERROR(memory_allocation); } diff --git a/lib/zstd.h b/lib/zstd.h index d7eb9c01f..dd3f5df4c 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -290,11 +290,11 @@ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* outp #define ZSTD_WINDOWLOG_MAX_32 25 #define ZSTD_WINDOWLOG_MAX_64 27 #define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 18 -#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CHAINLOG_MIN 4 +#define ZSTD_WINDOWLOG_MIN 10 #define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -#define ZSTD_HASHLOG_MIN 12 +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN #define ZSTD_HASHLOG3_MAX 17 #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 7dcd8ea07..8486013c2 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -374,7 +374,8 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres ZSTD_DStream* const zd_noise = ZSTD_createDStream(); clock_t const startClock = clock(); const BYTE* dict=NULL; /* can keep same dict on 2 consecutive tests */ - size_t dictSize=0, maxTestSize=0; + size_t dictSize = 0; + U32 oldTestLog = 0; /* allocations */ cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); @@ -407,6 +408,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres XXH64_state_t xxhState; U64 crcOrig; U32 resetAllowed = 1; + size_t maxTestSize; /* init */ DISPLAYUPDATE(2, "\r%6u", testNb); @@ -435,23 +437,29 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres } /* compression init */ - if (maxTestSize /* at least one test happened */ && resetAllowed && (FUZ_rand(&lseed)&1)) { - U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; - ZSTD_resetCStream(zc, pledgedSrcSize); + if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ + && oldTestLog /* at least one test happened */ && resetAllowed) { + maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); + if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + size_t const resetError = ZSTD_resetCStream(zc, pledgedSrcSize); + CHECK(ZSTD_isError(resetError), "ZSTD_resetCStream error : %s", ZSTD_getErrorName(resetError)); + } } else { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1; maxTestSize = FUZ_rLogLength(&lseed, testLog); + oldTestLog = testLog; /* random dictionary selection */ dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0; { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); dict = srcBuffer + dictStart; } - { ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize); + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; - { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; - size_t const initError = ZSTD_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize); + { size_t const initError = ZSTD_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize); CHECK (ZSTD_isError(initError),"ZSTD_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); } } } From 47094ea66b449cbf0c3947d573c407275d25c8e2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Sep 2016 18:03:33 +0200 Subject: [PATCH 9/9] added comment on filePos --- lib/dictBuilder/zdict.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 8a38aadeb..874351ebf 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -505,7 +505,8 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize, { size_t pos; for (pos=0; pos < bufferSize; pos++) reverseSuffix[suffix[pos]] = (U32)pos; - /* build file pos */ + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ filePos[0] = 0; for (pos=1; pos