From 1b5d80d633827bdb4d934b54833d5bfbebc000bf Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 18 Jan 2018 11:03:27 -0800 Subject: [PATCH 01/30] zstdmt: added ability to flush current job before it's completed however, zstdmt may still wait on next available worker, so it's not smooth yet. --- lib/compress/zstdmt_compress.c | 66 +++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 5624b5e3d..812bc89b1 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -315,7 +315,7 @@ typedef struct { unsigned firstChunk; unsigned lastChunk; unsigned jobCompleted; - unsigned jobScanned; + unsigned checksumWritten; ZSTD_pthread_mutex_t* jobCompleted_mutex; ZSTD_pthread_cond_t* jobCompleted_cond; ZSTD_CCtx_params params; @@ -389,7 +389,7 @@ void ZSTDMT_compressChunk(void* jobDescription) BYTE* op = ostart; BYTE* oend = op + dstBuff.size; int blockNb; - DEBUGLOG(5, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks); + DEBUGLOG(2, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks); assert(job->cSize == 0); for (blockNb = 1; blockNb < nbBlocks; blockNb++) { size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, ZSTD_BLOCKSIZE_MAX); @@ -400,10 +400,13 @@ void ZSTDMT_compressChunk(void* jobDescription) ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); /* note : it's a mtctx mutex */ job->cSize += cSize; job->consumed = ZSTD_BLOCKSIZE_MAX * blockNb; + DEBUGLOG(2, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(job->jobCompleted_cond); ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex); } /* last block */ - if ((nbBlocks > 0) | job->lastChunk /*need to output a "last block" flag*/ ) { + if ((nbBlocks > 0) | job->lastChunk /*must output a "last block" flag*/ ) { size_t const lastBlockSize1 = job->srcSize & (ZSTD_BLOCKSIZE_MAX-1); size_t const lastBlockSize = ((lastBlockSize1==0) & (job->srcSize>=ZSTD_BLOCKSIZE_MAX)) ? ZSTD_BLOCKSIZE_MAX : lastBlockSize1; size_t const cSize = (job->lastChunk) ? @@ -428,7 +431,6 @@ _endJob: ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); job->consumed = job->srcSize; job->jobCompleted = 1; - job->jobScanned = 0; ZSTD_pthread_cond_signal(job->jobCompleted_cond); ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex); } @@ -1017,6 +1019,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); zcs->jobs[jobID].lastChunk = endFrame; zcs->jobs[jobID].jobCompleted = 0; + zcs->jobs[jobID].checksumWritten = 0; zcs->jobs[jobID].dstFlushed = 0; zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; @@ -1069,43 +1072,48 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush) { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; - DEBUGLOG(5, "ZSTDMT_flushNextJob (blocking:%u)", blockToFlush); + DEBUGLOG(2, "ZSTDMT_flushNextJob (blocking:%u)", blockToFlush); if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); - while (zcs->jobs[wJobID].jobCompleted==0) { - DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); + while (zcs->jobs[wJobID].dstFlushed == zcs->jobs[wJobID].cSize) { + DEBUGLOG(2, "waiting for something to flush from job %u (currently flushed: %u bytes)", + zcs->doneJobID, (U32)zcs->jobs[wJobID].dstFlushed); + assert(zcs->jobs[wJobID].jobCompleted==0); if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */ ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */ } - ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); - /* compression job completed : output can be flushed */ + + /* some output is available to be flushed */ { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; - if (!job.jobScanned) { - if (ZSTD_isError(job.cSize)) { - DEBUGLOG(5, "ZSTDMT_flushNextJob: job %u : compression error detected : %s", - zcs->doneJobID, ZSTD_getErrorName(job.cSize)); - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return job.cSize; - } - DEBUGLOG(5, "ZSTDMT_flushNextJob: zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag); - if (zcs->params.fParams.checksumFlag) { - if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */ - U32 const checksum = (U32)XXH64_digest(&zcs->xxhState); - DEBUGLOG(5, "ZSTDMT_flushNextJob: writing checksum : %08X \n", checksum); - MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum); - job.cSize += 4; - zcs->jobs[wJobID].cSize += 4; - } } - zcs->jobs[wJobID].jobScanned = 1; + ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); + if (ZSTD_isError(job.cSize)) { + DEBUGLOG(5, "ZSTDMT_flushNextJob: job %u : compression error detected : %s", + zcs->doneJobID, ZSTD_getErrorName(job.cSize)); + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return job.cSize; } + /* add frame checksum if necessary */ + if ( zcs->frameEnded + && (zcs->doneJobID+1 == zcs->nextJobID) + && (zcs->params.fParams.checksumFlag) + && (!job.checksumWritten) ) { + U32 const checksum = (U32)XXH64_digest(&zcs->xxhState); + DEBUGLOG(5, "ZSTDMT_flushNextJob: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum); + job.cSize += 4; + zcs->jobs[wJobID].cSize += 4; + zcs->jobs[wJobID].checksumWritten = 1; + } + assert(job.cSize >= job.dstFlushed); { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); - DEBUGLOG(5, "ZSTDMT_flushNextJob: Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); + DEBUGLOG(2, "ZSTDMT_flushNextJob: Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; } - if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */ + if ( job.jobCompleted + && (job.dstFlushed == job.cSize) ) { /* output buffer fully flushed => move to next one */ ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = g_nullBuffer; zcs->jobs[wJobID].jobCompleted = 0; From 997e4d0ccd4d4122949ab6726569542285f6b85b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 18 Jan 2018 14:39:51 -0800 Subject: [PATCH 02/30] added POOL_tryAdd() --- doc/zstd_manual.html | 8 ++++- lib/common/pool.c | 71 +++++++++++++++++++++++++++++++------------- lib/common/pool.h | 27 +++++++++++------ 3 files changed, 75 insertions(+), 31 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 473d5be8b..dc3f8be1b 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -582,10 +582,16 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, - but it may change to mean "empty" in some future version, so prefer using macro ZSTD_CONTENTSIZE_UNKNOWN. + but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. @return : 0, or an error code (which can be tested using ZSTD_isError())


+
typedef struct {
+    unsigned long long ingested;
+    unsigned long long consumed;
+    unsigned long long produced;
+} ZSTD_frameProgression;
+

Advanced Streaming decompression functions

typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
 size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);   /* obsolete : this API will be removed in a future version */
 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */
diff --git a/lib/common/pool.c b/lib/common/pool.c
index 98b109e72..773488b07 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -12,6 +12,7 @@
 /* ======   Dependencies   ======= */
 #include   /* size_t */
 #include "pool.h"
+#include "zstd_internal.h"  /* ZSTD_malloc, ZSTD_free */
 
 /* ======   Compiler specifics   ====== */
 #if defined(_MSC_VER)
@@ -193,32 +194,54 @@ static int isQueueFull(POOL_ctx const* ctx) {
     }
 }
 
-void POOL_add(void* ctxVoid, POOL_function function, void *opaque) {
-    POOL_ctx* const ctx = (POOL_ctx*)ctxVoid;
-    if (!ctx) { return; }
 
-    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
-    {   POOL_job const job = {function, opaque};
+static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
+{
+    POOL_job const job = {function, opaque};
+    assert(ctx != NULL);
+    if (ctx->shutdown) return;
 
-        /* Wait until there is space in the queue for the new job */
-        while (isQueueFull(ctx) && !ctx->shutdown) {
-          ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
-        }
-        /* The queue is still going => there is space */
-        if (!ctx->shutdown) {
-            ctx->queueEmpty = 0;
-            ctx->queue[ctx->queueTail] = job;
-            ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
-        }
-    }
-    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    ctx->queueEmpty = 0;
+    ctx->queue[ctx->queueTail] = job;
+    ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
     ZSTD_pthread_cond_signal(&ctx->queuePopCond);
 }
 
-#else  /* ZSTD_MULTITHREAD  not defined */
-/* No multi-threading support */
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    /* Wait until there is space in the queue for the new job */
+    while (isQueueFull(ctx) && (!ctx->shutdown)) {
+        ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+}
 
-/* We don't need any data, but if it is empty malloc() might return NULL. */
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    if (isQueueFull(ctx)) {
+        ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+        return 0;
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    return 1;
+}
+
+
+#else  /* ZSTD_MULTITHREAD  not defined */
+
+/* ========================== */
+/* No multi-threading support */
+/* ========================== */
+
+
+/* We don't need any data, but if it is empty, malloc() might return NULL. */
 struct POOL_ctx_s {
     int dummy;
 };
@@ -240,11 +263,17 @@ void POOL_free(POOL_ctx* ctx) {
     (void)ctx;
 }
 
-void POOL_add(void* ctx, POOL_function function, void* opaque) {
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
     (void)ctx;
     function(opaque);
 }
 
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    (void)ctx;
+    function(opaque);
+    return 1;
+}
+
 size_t POOL_sizeof(POOL_ctx* ctx) {
     if (ctx==NULL) return 0;  /* supports sizeof NULL */
     assert(ctx == &g_ctx);
diff --git a/lib/common/pool.h b/lib/common/pool.h
index 08c63715a..a57e9b4fa 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -17,7 +17,8 @@ extern "C" {
 
 
 #include    /* size_t */
-#include "zstd_internal.h"   /* ZSTD_customMem */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
+#include "zstd.h"
 
 typedef struct POOL_ctx_s POOL_ctx;
 
@@ -27,35 +28,43 @@ typedef struct POOL_ctx_s POOL_ctx;
  *  The maximum number of queued jobs before blocking is `queueSize`.
  * @return : POOL_ctx pointer on success, else NULL.
 */
-POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
 
-POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
 
 /*! POOL_free() :
     Free a thread pool returned by POOL_create().
 */
-void POOL_free(POOL_ctx *ctx);
+void POOL_free(POOL_ctx* ctx);
 
 /*! POOL_sizeof() :
     return memory usage of pool returned by POOL_create().
 */
-size_t POOL_sizeof(POOL_ctx *ctx);
+size_t POOL_sizeof(POOL_ctx* ctx);
 
 /*! POOL_function :
     The function type that can be added to a thread pool.
 */
-typedef void (*POOL_function)(void *);
+typedef void (*POOL_function)(void*);
 /*! POOL_add_function :
     The function type for a generic thread pool add function.
 */
-typedef void (*POOL_add_function)(void *, POOL_function, void *);
+typedef void (*POOL_add_function)(void*, POOL_function, void*);
 
 /*! POOL_add() :
-    Add the job `function(opaque)` to the thread pool.
+    Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
     Possibly blocks until there is room in the queue.
     Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
 */
-void POOL_add(void *ctx, POOL_function function, void *opaque);
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+/*! POOL_tryAdd() :
+    Add the job `function(opaque)` to the thread pool if a worker is available.
+    return immediately otherwise.
+   @return : 1 if successful, 0 if not.
+*/
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
 
 
 #if defined (__cplusplus)

From 6f7280fb33f5717f2f30e2c19b57512a96325bd8 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 18 Jan 2018 16:20:26 -0800
Subject: [PATCH 03/30] fixed frame checksum issue

and race conditions
---
 lib/compress/zstd_compress.c   |  3 ++-
 lib/compress/zstdmt_compress.c | 40 ++++++++++++++++++----------------
 tests/zstreamtest.c            |  6 ++++-
 3 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index e8840be97..a87c368ea 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2396,7 +2396,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
     BYTE* op = ostart;
     size_t fhSize = 0;
 
-    DEBUGLOG(5, "ZSTD_writeEpilogue");
+    DEBUGLOG(4, "ZSTD_writeEpilogue");
     if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong);  /* init missing */
 
     /* special case : empty frame */
@@ -2420,6 +2420,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
     if (cctx->appliedParams.fParams.checksumFlag) {
         U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
         if (dstCapacity<4) return ERROR(dstSize_tooSmall);
+        DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", checksum);
         MEM_writeLE32(op, checksum);
         op += 4;
     }
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index ffcbdf5f8..1aa6b866f 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -315,7 +315,7 @@ typedef struct {
     unsigned firstChunk;
     unsigned lastChunk;
     unsigned jobCompleted;
-    unsigned checksumWritten;
+    unsigned frameChecksumNeeded;
     ZSTD_pthread_mutex_t* jobCompleted_mutex;
     ZSTD_pthread_cond_t* jobCompleted_cond;
     ZSTD_CCtx_params params;
@@ -1019,7 +1019,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
     zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
     zcs->jobs[jobID].lastChunk = endFrame;
     zcs->jobs[jobID].jobCompleted = 0;
-    zcs->jobs[jobID].checksumWritten = 0;
+    zcs->jobs[jobID].frameChecksumNeeded = endFrame && (zcs->nextJobID>0) && zcs->params.fParams.checksumFlag;
     zcs->jobs[jobID].dstFlushed = 0;
     zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
     zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
@@ -1065,22 +1065,25 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
 }
 
 
-/* ZSTDMT_flushNextJob() :
- * output : will be updated with amount of data flushed .
- * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
- * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
+/*! ZSTDMT_flushNextJob() :
+ * `output` : will be updated with amount of data flushed .
+ * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
+ * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
 static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
 {
     unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
     DEBUGLOG(2, "ZSTDMT_flushNextJob (blocking:%u)", blockToFlush);
-    if (zcs->doneJobID == zcs->nextJobID) return 0;   /* all flushed ! */
+    if (zcs->doneJobID == zcs->nextJobID) {
+        DEBUGLOG(2, "ZSTDMT_flushNextJob: doneJobID==nextJobID : nothing to flush !")
+        return 0;   /* all flushed ! */
+    }
     ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
     while (zcs->jobs[wJobID].dstFlushed == zcs->jobs[wJobID].cSize) {
+        if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; }  /* nothing ready to be flushed => skip */
+        if (zcs->jobs[wJobID].jobCompleted==1) break;
         DEBUGLOG(2, "waiting for something to flush from job %u (currently flushed: %u bytes)",
                     zcs->doneJobID, (U32)zcs->jobs[wJobID].dstFlushed);
-        assert(zcs->jobs[wJobID].jobCompleted==0);
-        if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; }  /* nothing ready to be flushed => skip */
-        ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush */
+        ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush but more to come */
     }
 
     /* some output is available to be flushed */
@@ -1094,16 +1097,14 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
             return job.cSize;
         }
         /* add frame checksum if necessary */
-        if ( zcs->frameEnded
-          && (zcs->doneJobID+1 == zcs->nextJobID)
-          && (zcs->params.fParams.checksumFlag)
-          && (!job.checksumWritten) ) {
+        if ( job.jobCompleted
+          && job.frameChecksumNeeded ) {
             U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
             DEBUGLOG(5, "ZSTDMT_flushNextJob: writing checksum : %08X \n", checksum);
             MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
             job.cSize += 4;
             zcs->jobs[wJobID].cSize += 4;
-            zcs->jobs[wJobID].checksumWritten = 1;
+            zcs->jobs[wJobID].frameChecksumNeeded = 0;
         }
         assert(job.cSize >= job.dstFlushed);
         {   size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
@@ -1114,19 +1115,20 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
         }
         if ( job.jobCompleted
           && (job.dstFlushed == job.cSize) ) {   /* output buffer fully flushed => move to next one */
+            DEBUGLOG(2, "Job %u completed, moving to next one", zcs->doneJobID);
             ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
             zcs->jobs[wJobID].dstBuff = g_nullBuffer;
             zcs->jobs[wJobID].jobCompleted = 0;
-            zcs->doneJobID++;
             zcs->consumed += job.srcSize;
             zcs->produced += job.cSize;
+            zcs->doneJobID++;
         } else {
             zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
         }
-        /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
+        /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
         if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
-        if (zcs->doneJobID < zcs->nextJobID) return 1;   /* still some buffer to flush */
-        zcs->allJobsCompleted = zcs->frameEnded;   /* frame completed and entirely flushed */
+        if (zcs->doneJobID < zcs->nextJobID) return 1;   /* still some more buffer to flush */
+        zcs->allJobsCompleted = zcs->frameEnded;   /* last frame entirely flushed */
         return 0;   /* everything flushed */
 }   }
 
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 5cd1ea0fb..c26cb3295 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1346,8 +1346,12 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
                 outBuff.size = outBuff.pos + dstBuffSize;
                 DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes \n", (U32)readCSrcSize);
                 decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff);
+                if (ZSTD_getErrorCode(decompressionResult) == ZSTD_error_corruption_detected) {
+                    DISPLAY("ZSTD_decompressStream: checksum error : \n");
+                    findDiff(copyBuffer, dstBuffer, totalTestSize);
+                }
                 CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult));
-                DISPLAYLEVEL(6, "inBuff.pos = %u \n", (U32)readCSrcSize);
+                DISPLAYLEVEL(6, "total ingested (inBuff.pos) = %u \n", (U32)inBuff.pos);
             }
             CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize);
             CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize);

From 70f81d603037242124d7ae446429462f996f16ab Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 19 Jan 2018 10:01:40 -0800
Subject: [PATCH 04/30] zstdmt uses POOL_tryAdd() to call a new worker

so that it's no longer a blocking call.
This makes it possible to stream out data gradually,
while waiting for a worker to become available.
---
 lib/compress/zstdmt_compress.c | 177 ++++++++++++++++++---------------
 programs/fileio.c              |  30 ++----
 2 files changed, 108 insertions(+), 99 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 1aa6b866f..68f974d7b 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -389,7 +389,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
         BYTE* op = ostart;
         BYTE* oend = op + dstBuff.size;
         int blockNb;
-        DEBUGLOG(2, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks);
+        DEBUGLOG(5, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks);
         assert(job->cSize == 0);
         for (blockNb = 1; blockNb < nbBlocks; blockNb++) {
             size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, ZSTD_BLOCKSIZE_MAX);
@@ -400,7 +400,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
             ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);   /* note : it's a mtctx mutex */
             job->cSize += cSize;
             job->consumed = ZSTD_BLOCKSIZE_MAX * blockNb;
-            DEBUGLOG(2, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)",
+            DEBUGLOG(5, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)",
                         (U32)cSize, (U32)job->cSize);
             ZSTD_pthread_cond_signal(job->jobCompleted_cond);
             ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
@@ -458,6 +458,7 @@ struct ZSTDMT_CCtx_s {
     size_t prefixSize;
     size_t targetPrefixSize;
     inBuff_t inBuff;
+    int jobReady;        /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
     XXH64_state_t xxhState;
     unsigned singleBlockingThread;
     unsigned jobIDMask;
@@ -668,14 +669,17 @@ unsigned ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx)
 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
 {
     ZSTD_frameProgression fs;
-    DEBUGLOG(5, "ZSTDMT_getFrameProgression");
+    DEBUGLOG(6, "ZSTDMT_getFrameProgression");
     ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
     fs.consumed = mtctx->consumed;
     fs.produced = mtctx->produced;
     assert(mtctx->inBuff.filled >= mtctx->prefixSize);
     fs.ingested = mtctx->consumed + (mtctx->inBuff.filled - mtctx->prefixSize);
     {   unsigned jobNb;
-        for (jobNb = mtctx->doneJobID ; jobNb < mtctx->nextJobID ; jobNb++) {
+        unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
+        DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
+                    mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+        for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
             unsigned const wJobID = jobNb & mtctx->jobIDMask;
             size_t const cResult = mtctx->jobs[wJobID].cSize;
             size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
@@ -999,59 +1003,60 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
 {
     unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
 
-    DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
-                zcs->nextJobID, (U32)srcSize, (U32)zcs->prefixSize);
-    zcs->jobs[jobID].src = zcs->inBuff.buffer;
-    zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
-    zcs->jobs[jobID].srcSize = srcSize;
-    zcs->jobs[jobID].consumed = 0;
-    zcs->jobs[jobID].cSize = 0;
-    zcs->jobs[jobID].prefixSize = zcs->prefixSize;
-    assert(zcs->inBuff.filled >= srcSize + zcs->prefixSize);
-    zcs->jobs[jobID].params = zcs->params;
-    /* do not calculate checksum within sections, but write it in header for first section */
-    if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
-    zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
-    zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
-    zcs->jobs[jobID].dstBuff = g_nullBuffer;
-    zcs->jobs[jobID].cctxPool = zcs->cctxPool;
-    zcs->jobs[jobID].bufPool = zcs->bufPool;
-    zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
-    zcs->jobs[jobID].lastChunk = endFrame;
-    zcs->jobs[jobID].jobCompleted = 0;
-    zcs->jobs[jobID].frameChecksumNeeded = endFrame && (zcs->nextJobID>0) && zcs->params.fParams.checksumFlag;
-    zcs->jobs[jobID].dstFlushed = 0;
-    zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
-    zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
+    if (!zcs->jobReady) {
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
+                    zcs->nextJobID, (U32)srcSize, (U32)zcs->prefixSize);
+        zcs->jobs[jobID].src = zcs->inBuff.buffer;
+        zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
+        zcs->jobs[jobID].srcSize = srcSize;
+        zcs->jobs[jobID].consumed = 0;
+        zcs->jobs[jobID].cSize = 0;
+        zcs->jobs[jobID].prefixSize = zcs->prefixSize;
+        assert(zcs->inBuff.filled >= srcSize + zcs->prefixSize);
+        zcs->jobs[jobID].params = zcs->params;
+        /* do not calculate checksum within sections, but write it in header for first section */
+        if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
+        zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
+        zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
+        zcs->jobs[jobID].dstBuff = g_nullBuffer;
+        zcs->jobs[jobID].cctxPool = zcs->cctxPool;
+        zcs->jobs[jobID].bufPool = zcs->bufPool;
+        zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
+        zcs->jobs[jobID].lastChunk = endFrame;
+        zcs->jobs[jobID].jobCompleted = 0;
+        zcs->jobs[jobID].frameChecksumNeeded = endFrame && (zcs->nextJobID>0) && zcs->params.fParams.checksumFlag;
+        zcs->jobs[jobID].dstFlushed = 0;
+        zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
+        zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
 
-    if (zcs->params.fParams.checksumFlag)
-        XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->prefixSize, srcSize);
+        if (zcs->params.fParams.checksumFlag)
+            XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->prefixSize, srcSize);
 
-    /* get a new buffer for next input */
-    if (!endFrame) {
-        size_t const newPrefixSize = MIN(srcSize + zcs->prefixSize, zcs->targetPrefixSize);
-        zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
-        if (zcs->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
-            zcs->jobs[jobID].jobCompleted = 1;
-            zcs->nextJobID++;
-            ZSTDMT_waitForAllJobsCompleted(zcs);
-            ZSTDMT_releaseAllJobResources(zcs);
-            return ERROR(memory_allocation);
-        }
-        zcs->inBuff.filled -= srcSize + zcs->prefixSize - newPrefixSize;
-        memmove(zcs->inBuff.buffer.start,
-            (const char*)zcs->jobs[jobID].srcStart + zcs->prefixSize + srcSize - newPrefixSize,
-            zcs->inBuff.filled);
-        zcs->prefixSize = newPrefixSize;
-    } else {   /* if (endFrame==1) */
-        zcs->inBuff.buffer = g_nullBuffer;
-        zcs->inBuff.filled = 0;
-        zcs->prefixSize = 0;
-        zcs->frameEnded = 1;
-        if (zcs->nextJobID == 0) {
-            /* single chunk exception : checksum is calculated directly within worker thread */
-            zcs->params.fParams.checksumFlag = 0;
-    }   }
+        /* get a new buffer for next input */
+        if (!endFrame) {
+            size_t const newPrefixSize = MIN(srcSize + zcs->prefixSize, zcs->targetPrefixSize);
+            zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
+            if (zcs->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
+                zcs->jobs[jobID].jobCompleted = 1;
+                zcs->nextJobID++;
+                ZSTDMT_waitForAllJobsCompleted(zcs);
+                ZSTDMT_releaseAllJobResources(zcs);
+                return ERROR(memory_allocation);
+            }
+            zcs->inBuff.filled -= srcSize + zcs->prefixSize - newPrefixSize;
+            memmove(zcs->inBuff.buffer.start,   /* copy end of current job into next job, as "prefix" */
+                (const char*)zcs->jobs[jobID].srcStart + zcs->prefixSize + srcSize - newPrefixSize,
+                zcs->inBuff.filled);
+            zcs->prefixSize = newPrefixSize;
+        } else {   /* endFrame==1 => no need for another input buffer */
+            zcs->inBuff.buffer = g_nullBuffer;
+            zcs->inBuff.filled = 0;
+            zcs->prefixSize = 0;
+            zcs->frameEnded = 1;
+            if (zcs->nextJobID == 0) {
+                /* single chunk exception : checksum is calculated directly within worker thread */
+                zcs->params.fParams.checksumFlag = 0;
+    }   }   }
 
     DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
                 zcs->nextJobID,
@@ -1059,8 +1064,13 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
                 zcs->jobs[jobID].lastChunk,
                 zcs->doneJobID,
                 zcs->doneJobID & zcs->jobIDMask);
-    POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]);   /* this call is blocking when thread worker pool is exhausted */
-    zcs->nextJobID++;
+    if (POOL_tryAdd(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID])) {
+        zcs->nextJobID++;
+        zcs->jobReady = 0;
+    } else {
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", zcs->nextJobID);
+        zcs->jobReady = 1;
+    }
     return 0;
 }
 
@@ -1072,16 +1082,17 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
 static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
 {
     unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
-    DEBUGLOG(2, "ZSTDMT_flushNextJob (blocking:%u)", blockToFlush);
+    DEBUGLOG(5, "ZSTDMT_flushNextJob (blocking:%u)", blockToFlush);
     if (zcs->doneJobID == zcs->nextJobID) {
-        DEBUGLOG(2, "ZSTDMT_flushNextJob: doneJobID==nextJobID : nothing to flush !")
+        DEBUGLOG(5, "ZSTDMT_flushNextJob: doneJobID(%u)==(%u)nextJobID : nothing to flush !",
+                    zcs->doneJobID, zcs->nextJobID)
         return 0;   /* all flushed ! */
     }
     ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
     while (zcs->jobs[wJobID].dstFlushed == zcs->jobs[wJobID].cSize) {
         if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; }  /* nothing ready to be flushed => skip */
         if (zcs->jobs[wJobID].jobCompleted==1) break;
-        DEBUGLOG(2, "waiting for something to flush from job %u (currently flushed: %u bytes)",
+        DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
                     zcs->doneJobID, (U32)zcs->jobs[wJobID].dstFlushed);
         ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush but more to come */
     }
@@ -1108,14 +1119,16 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
         }
         assert(job.cSize >= job.dstFlushed);
         {   size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
-            DEBUGLOG(2, "ZSTDMT_flushNextJob: Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
+            DEBUGLOG(5, "ZSTDMT_flushNextJob: Flushing %u bytes from job %u (completion:%.1f%%)",
+                        (U32)toWrite, zcs->doneJobID, (double)job.consumed / job.srcSize * 100);
             memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
             output->pos += toWrite;
             job.dstFlushed += toWrite;
         }
         if ( job.jobCompleted
           && (job.dstFlushed == job.cSize) ) {   /* output buffer fully flushed => move to next one */
-            DEBUGLOG(2, "Job %u completed, moving to next one", zcs->doneJobID);
+            DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
+                    zcs->doneJobID, (U32)job.dstFlushed);
             ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
             zcs->jobs[wJobID].dstBuff = g_nullBuffer;
             zcs->jobs[wJobID].jobCompleted = 0;
@@ -1128,6 +1141,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi
         /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
         if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
         if (zcs->doneJobID < zcs->nextJobID) return 1;   /* still some more buffer to flush */
+        if (zcs->jobReady) return 1;   /* some more work to do ! */
         zcs->allJobsCompleted = zcs->frameEnded;   /* last frame entirely flushed */
         return 0;   /* everything flushed */
 }   }
@@ -1176,7 +1190,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     }
 
     /* fill input buffer */
-    if (input->size > input->pos) {   /* support NULL input */
+    if ( (!mtctx->jobReady)
+      && (input->size > input->pos) ) {   /* support NULL input */
         if (mtctx->inBuff.buffer.start == NULL) {
             mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);  /* note : allocation can fail, in which case, no forward input progress */
             mtctx->inBuff.filled = 0;
@@ -1186,34 +1201,36 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
         }   }
         if (mtctx->inBuff.buffer.start != NULL) {
             size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
-            DEBUGLOG(5, "inBuff:%08X;  inBuffSize=%u;  ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
+            DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
+                        (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->inBuffSize);
             memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
             input->pos += toLoad;
             mtctx->inBuff.filled += toLoad;
             forwardInputProgress = toLoad>0;
     }   }
 
-    if ( (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
-      && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {   /* avoid overwriting job round buffer */
+    if ( (mtctx->jobReady)
+      || ( (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
+        && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) ) {   /* avoid overwriting job round buffer */
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
     }
 
     /* check for potential compressed data ready to be flushed */
-    CHECK_F( ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress /* blockToFlush */) ); /* block if there was no forward input progress */
+    {   size_t const remainingToFlush = ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress); /* block if there was no forward input progress */
+        if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not flush yet */
+        if (mtctx->jobReady) return remainingToFlush;   /* some more input ready to be compressed */
 
-    if (input->pos < input->size)  /* input not consumed : do not flush yet */
-        endOp = ZSTD_e_continue;
-
-    switch(endOp)
-    {
-        case ZSTD_e_flush:
-            return ZSTDMT_flushStream(mtctx, output);
-        case ZSTD_e_end:
-            return ZSTDMT_endStream(mtctx, output);
-        case ZSTD_e_continue:
-            return 1;
-        default:
-            return ERROR(GENERIC);   /* invalid endDirective */
+        switch(endOp)
+        {
+            case ZSTD_e_flush:
+                return ZSTDMT_flushStream(mtctx, output);
+            case ZSTD_e_end:
+                return ZSTDMT_endStream(mtctx, output);
+            case ZSTD_e_continue:
+                return 1;
+            default:
+                return ERROR(GENERIC);   /* invalid endDirective */
+        }
     }
 }
 
diff --git a/programs/fileio.c b/programs/fileio.c
index 7045a5323..6039a75a2 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -792,6 +792,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
         /* Fill input Buffer */
         size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
         ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
+        DISPLAYLEVEL(6, "fread %u bytes from source \n", (U32)inSize);
         readsize += inSize;
 
         if (inSize == 0 || (fileSize != UTIL_FILESIZE_UNKNOWN && readsize == fileSize))
@@ -803,32 +804,23 @@ static int FIO_compressFilename_internal(cRess_t ress,
             CHECK_V(result, ZSTD_compress_generic(ress.cctx, &outBuff, &inBuff, directive));
 
             /* Write compressed stream */
-            DISPLAYLEVEL(6, "ZSTD_compress_generic,ZSTD_e_continue: generated %u bytes \n",
-                            (U32)outBuff.pos);
+            DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => intput pos(%u)<=(%u)size ; output generated %u bytes \n",
+                            (U32)directive, (U32)inBuff.pos, (U32)inBuff.size, (U32)outBuff.pos);
             if (outBuff.pos) {
                 size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
                 if (sizeCheck!=outBuff.pos)
                     EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
                 compressedfilesize += outBuff.pos;
             }
+            if (READY_FOR_UPDATE()) {
+                ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
+                DISPLAYUPDATE(2, "\rRead :%6u MB - Consumed :%6u MB - Compressed :%6u MB => %.2f%%",
+                                (U32)(zfp.ingested >> 20),
+                                (U32)(zfp.consumed >> 20),
+                                (U32)(zfp.produced >> 20),
+                                (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100 );
+            }
         }
-#if 1
-    if (READY_FOR_UPDATE()) {
-        ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
-        DISPLAYUPDATE(2, "\rRead :%6u MB - Consumed :%6u MB - Compressed :%6u MB => %.2f%%",
-                        (U32)(zfp.ingested >> 20),
-                        (U32)(zfp.consumed >> 20),
-                        (U32)(zfp.produced >> 20),
-                        (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100 );
-    }
-#else
-        if (fileSize == UTIL_FILESIZE_UNKNOWN) {
-            DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20));
-        } else {
-            DISPLAYUPDATE(2, "\rRead : %u / %u MB",
-                                (U32)(readsize>>20), (U32)(fileSize>>20));
-        }
-#endif
     } while (directive != ZSTD_e_end);
 
 finish:

From dc696234539e5741ced8a1c663c3343ddaf59e10 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 19 Jan 2018 12:41:56 -0800
Subject: [PATCH 05/30] zstdmt: fixed corruption issue in ZSTDMT_endStream()

when invoked directly.
---
 lib/compress/zstdmt_compress.c |  2 +-
 tests/zstreamtest.c            | 14 ++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 68f974d7b..303bc1f71 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1249,7 +1249,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
     size_t const srcSize = mtctx->inBuff.filled - mtctx->prefixSize;
     DEBUGLOG(5, "ZSTDMT_flushStream_internal");
 
-    if ( ((srcSize > 0) || (endFrame && !mtctx->frameEnded))
+    if ( (mtctx->jobReady || (srcSize > 0) || (endFrame && !mtctx->frameEnded))
        && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {
            DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job");
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index c26cb3295..b21611abc 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -864,10 +864,16 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max)
         if (b1[u] != b2[u]) break;
     }
     DISPLAY("Error at position %u / %u \n", (U32)u, (U32)max);
-    DISPLAY(" %02X %02X %02X  :%02X:  %02X %02X %02X %02X %02X \n",
-            b1[u-3], b1[u-2], b1[u-1], b1[u-0], b1[u+1], b1[u+2], b1[u+3], b1[u+4], b1[u+5]);
-    DISPLAY(" %02X %02X %02X  :%02X:  %02X %02X %02X %02X %02X \n",
-            b2[u-3], b2[u-2], b2[u-1], b2[u-0], b2[u+1], b2[u+2], b2[u+3], b2[u+4], b2[u+5]);
+    if (u>=3)
+        DISPLAY(" %02X %02X %02X ",
+                b1[u-3], b1[u-2], b1[u-1]);
+    DISPLAY(" :%02X:  %02X %02X %02X %02X %02X \n",
+            b1[u], b1[u+1], b1[u+2], b1[u+3], b1[u+4], b1[u+5]);
+    if (u>=3)
+        DISPLAY(" %02X %02X %02X ",
+                b2[u-3], b2[u-2], b2[u-1]);
+    DISPLAY(" :%02X:  %02X %02X %02X %02X %02X \n",
+            b2[u], b2[u+1], b2[u+2], b2[u+3], b2[u+4], b2[u+5]);
     return u;
 }
 

From 940634a610bc108d42571e959a51886a34f30c46 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 19 Jan 2018 13:19:59 -0800
Subject: [PATCH 06/30] zstdmt : simplify job creation

job will not be created when not enough room within job Table
---
 lib/compress/zstdmt_compress.c | 27 ++++++++++-----------------
 tests/zstreamtest.c            |  8 ++++++--
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 303bc1f71..5b253f162 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1003,6 +1003,10 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
 {
     unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
 
+    unsigned const limitID = zcs->doneJobID & zcs->jobIDMask;
+    if ((zcs->doneJobID < zcs->nextJobID) & (jobID == limitID))
+        return 0;  /* new job would overwrite unflushed older job */
+
     if (!zcs->jobReady) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
                     zcs->nextJobID, (U32)srcSize, (U32)zcs->prefixSize);
@@ -1210,27 +1214,15 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     }   }
 
     if ( (mtctx->jobReady)
-      || ( (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
-        && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) ) {   /* avoid overwriting job round buffer */
+      || (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
+      || ( (endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0) ) ) {   /* avoid overwriting job round buffer */
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
     }
 
     /* check for potential compressed data ready to be flushed */
     {   size_t const remainingToFlush = ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress); /* block if there was no forward input progress */
         if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not flush yet */
-        if (mtctx->jobReady) return remainingToFlush;   /* some more input ready to be compressed */
-
-        switch(endOp)
-        {
-            case ZSTD_e_flush:
-                return ZSTDMT_flushStream(mtctx, output);
-            case ZSTD_e_end:
-                return ZSTDMT_endStream(mtctx, output);
-            case ZSTD_e_continue:
-                return 1;
-            default:
-                return ERROR(GENERIC);   /* invalid endDirective */
-        }
+        return remainingToFlush;
     }
 }
 
@@ -1249,8 +1241,9 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
     size_t const srcSize = mtctx->inBuff.filled - mtctx->prefixSize;
     DEBUGLOG(5, "ZSTDMT_flushStream_internal");
 
-    if ( (mtctx->jobReady || (srcSize > 0) || (endFrame && !mtctx->frameEnded))
-       && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {
+    if ( mtctx->jobReady     /* one job ready for a worker to pick up */
+      || (srcSize > 0)       /* still some data within input buffer */
+      || (endFrame && !mtctx->frameEnded)) {  /* need a last 0-size block to end frame */
            DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job");
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
     }
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index b21611abc..6ec14bb8e 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -863,6 +863,10 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max)
     for (u=0; u No difference detected within %u bytes \n", (U32)max);
+        return u;
+    }
     DISPLAY("Error at position %u / %u \n", (U32)u, (U32)max);
     if (u>=3)
         DISPLAY(" %02X %02X %02X ",
@@ -1352,8 +1356,8 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
                 outBuff.size = outBuff.pos + dstBuffSize;
                 DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes \n", (U32)readCSrcSize);
                 decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff);
-                if (ZSTD_getErrorCode(decompressionResult) == ZSTD_error_corruption_detected) {
-                    DISPLAY("ZSTD_decompressStream: checksum error : \n");
+                if (ZSTD_isError(decompressionResult)) {
+                    DISPLAY("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(decompressionResult));
                     findDiff(copyBuffer, dstBuffer, totalTestSize);
                 }
                 CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult));

From 3ad7d4951c9c54b9709935ad54c9fb4e81b940f2 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 19 Jan 2018 17:35:08 -0800
Subject: [PATCH 07/30] zstdmt : finally vanquished an elusive and rare race
 condition

---
 lib/compress/zstdmt_compress.c | 97 ++++++++++++++++++----------------
 tests/zstreamtest.c            | 88 +++++++++++++++---------------
 2 files changed, 97 insertions(+), 88 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 5b253f162..3e665f574 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1003,9 +1003,11 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
 {
     unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
 
-    unsigned const limitID = zcs->doneJobID & zcs->jobIDMask;
-    if ((zcs->doneJobID < zcs->nextJobID) & (jobID == limitID))
-        return 0;  /* new job would overwrite unflushed older job */
+    if (zcs->nextJobID > zcs->doneJobID + zcs->jobIDMask) {
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
+        assert((zcs->nextJobID & zcs->jobIDMask) == (zcs->doneJobID & zcs->jobIDMask));
+        return 0;
+    }
 
     if (!zcs->jobReady) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
@@ -1079,76 +1081,78 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
 }
 
 
-/*! ZSTDMT_flushNextJob() :
- * `output` : will be updated with amount of data flushed .
+/*! ZSTDMT_flushProduced() :
+ * `output` : `pos` will be updated with amount of data flushed .
  * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
  * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
-static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
+static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
 {
     unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
-    DEBUGLOG(5, "ZSTDMT_flushNextJob (blocking:%u)", blockToFlush);
-    if (zcs->doneJobID == zcs->nextJobID) {
-        DEBUGLOG(5, "ZSTDMT_flushNextJob: doneJobID(%u)==(%u)nextJobID : nothing to flush !",
-                    zcs->doneJobID, zcs->nextJobID)
-        return 0;   /* all flushed ! */
-    }
+    DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u)", blockToFlush);
+    assert(output->size >= output->pos);
+
     ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
-    while (zcs->jobs[wJobID].dstFlushed == zcs->jobs[wJobID].cSize) {
-        if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; }  /* nothing ready to be flushed => skip */
-        if (zcs->jobs[wJobID].jobCompleted==1) break;
-        DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
-                    zcs->doneJobID, (U32)zcs->jobs[wJobID].dstFlushed);
-        ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush but more to come */
-    }
+    if (blockToFlush && (zcs->doneJobID < zcs->nextJobID)) {
+        while (zcs->jobs[wJobID].dstFlushed == zcs->jobs[wJobID].cSize) {
+            if (zcs->jobs[wJobID].jobCompleted==1) break;
+            DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
+                        zcs->doneJobID, (U32)zcs->jobs[wJobID].dstFlushed);
+            ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush but more to come */
+    }   }
 
     /* some output is available to be flushed */
     {   ZSTDMT_jobDescription job = zcs->jobs[wJobID];
         ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
         if (ZSTD_isError(job.cSize)) {
-            DEBUGLOG(5, "ZSTDMT_flushNextJob: job %u : compression error detected : %s",
+            DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
                         zcs->doneJobID, ZSTD_getErrorName(job.cSize));
             ZSTDMT_waitForAllJobsCompleted(zcs);
             ZSTDMT_releaseAllJobResources(zcs);
             return job.cSize;
         }
-        /* add frame checksum if necessary */
+        /* add frame checksum if necessary (can only happen once) */
         if ( job.jobCompleted
           && job.frameChecksumNeeded ) {
             U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
-            DEBUGLOG(5, "ZSTDMT_flushNextJob: writing checksum : %08X \n", checksum);
+            DEBUGLOG(5, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
             MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
             job.cSize += 4;
             zcs->jobs[wJobID].cSize += 4;
             zcs->jobs[wJobID].frameChecksumNeeded = 0;
         }
         assert(job.cSize >= job.dstFlushed);
-        {   size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
-            DEBUGLOG(5, "ZSTDMT_flushNextJob: Flushing %u bytes from job %u (completion:%.1f%%)",
+        if (job.dstBuff.start != NULL) {  /* one buffer present : some job is ongoing */
+            size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
+            DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%.1f%%)",
                         (U32)toWrite, zcs->doneJobID, (double)job.consumed / job.srcSize * 100);
             memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
             output->pos += toWrite;
             job.dstFlushed += toWrite;
-        }
-        if ( job.jobCompleted
-          && (job.dstFlushed == job.cSize) ) {   /* output buffer fully flushed => move to next one */
-            DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
-                    zcs->doneJobID, (U32)job.dstFlushed);
-            ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
-            zcs->jobs[wJobID].dstBuff = g_nullBuffer;
-            zcs->jobs[wJobID].jobCompleted = 0;
-            zcs->consumed += job.srcSize;
-            zcs->produced += job.cSize;
-            zcs->doneJobID++;
-        } else {
-            zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
-        }
+
+            if ( job.jobCompleted
+              && (job.dstFlushed == job.cSize) ) {   /* output buffer fully flushed => move to next one */
+                DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
+                        zcs->doneJobID, (U32)job.dstFlushed);
+                ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
+                zcs->jobs[wJobID].dstBuff = g_nullBuffer;
+                zcs->jobs[wJobID].jobCompleted = 0;
+                zcs->consumed += job.srcSize;
+                zcs->produced += job.cSize;
+                zcs->doneJobID++;
+            } else {
+                zcs->jobs[wJobID].dstFlushed = job.dstFlushed;   /* remember how much was flushed for next attempt */
+        }   }
+
         /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
         if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
-        if (zcs->doneJobID < zcs->nextJobID) return 1;   /* still some more buffer to flush */
-        if (zcs->jobReady) return 1;   /* some more work to do ! */
-        zcs->allJobsCompleted = zcs->frameEnded;   /* last frame entirely flushed */
-        return 0;   /* everything flushed */
-}   }
+        if (job.srcSize > job.consumed) return 1;   /* current job not completely compressed */
+    }
+    if (zcs->doneJobID < zcs->nextJobID) return 1;   /* some more jobs to flush */
+    if (zcs->jobReady) return 1;   /* at least one more job to do ! */
+    if (zcs->inBuff.filled > 0) return 1;   /* input not empty */
+    zcs->allJobsCompleted = zcs->frameEnded;   /* last frame entirely flushed */
+    return 0;   /* everything flushed */
+}
 
 
 /** ZSTDMT_compressStream_generic() :
@@ -1220,7 +1224,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     }
 
     /* check for potential compressed data ready to be flushed */
-    {   size_t const remainingToFlush = ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress); /* block if there was no forward input progress */
+    {   size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress); /* block if there was no forward input progress */
         if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not flush yet */
         return remainingToFlush;
     }
@@ -1244,12 +1248,13 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
     if ( mtctx->jobReady     /* one job ready for a worker to pick up */
       || (srcSize > 0)       /* still some data within input buffer */
       || (endFrame && !mtctx->frameEnded)) {  /* need a last 0-size block to end frame */
-           DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job");
+           DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
+                        (U32)srcSize, endFrame);
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
     }
 
     /* check if there is any data available to flush */
-    return ZSTDMT_flushNextJob(mtctx, output, 1 /* blockToFlush */);
+    return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */);
 }
 
 
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 6ec14bb8e..8c66c53bd 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -99,8 +99,8 @@ unsigned int FUZ_rand(unsigned int* seedPtr)
     if (cond) {                                              \
         DISPLAY("Error => ");                                \
         DISPLAY(__VA_ARGS__);                                \
-        DISPLAY(" (seed %u, test nb %u, line %u)  \n",       \
-                seed, testNb, __LINE__);                     \
+        DISPLAY(" (seed %u, test nb %u, line %u (sig %08X) \n", \
+                seed, testNb, __LINE__, coreSeed);           \
         goto _output_error;                                  \
 }   }
 
@@ -219,6 +219,7 @@ static int basicUnitTests(U32 seed, double compressibility)
     size_t cSize;
     int testResult = 0;
     U32 testNb = 1;
+    U32 coreSeed = 0;  /* just to conform with CHECK_Z macro display */
     ZSTD_CStream* zc = ZSTD_createCStream();
     ZSTD_DStream* zd = ZSTD_createDStream();
     ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
@@ -958,10 +959,13 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
         size_t maxTestSize;
 
         /* init */
-        if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests); }
-        else { DISPLAYUPDATE(2, "\r%6u          ", testNb); }
         FUZ_rand(&coreSeed);
         lseed = coreSeed ^ prime32;
+        if (nbTests >= testNb) {
+            DISPLAYUPDATE(2, "\r%6u/%6u (%08X)   ", testNb, nbTests, lseed);
+        } else {
+            DISPLAYUPDATE(2, "\r%6u  (%08X)        ", testNb, lseed);
+        }
 
         /* states full reset (deliberately not synchronized) */
         /* some issues can only happen when reusing states */
@@ -1171,7 +1175,6 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
     UTIL_time_t const startClock = UTIL_getTime();
     const BYTE* dict=NULL;   /* can keep same dict on 2 consecutive tests */
     size_t dictSize = 0;
-    U32 oldTestLog = 0;
     int const cLevelMax = bigTests ? (U32)ZSTD_maxCLevel()-1 : g_cLevelMax_smallTests;
     U32 const nbThreadsMax = bigTests ? 4 : 2;
 
@@ -1193,6 +1196,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
     RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed);    /* sparse content */
     memset(copyBuffer, 0x65, copyBufferSize);                             /* make copyBuffer considered initialized */
     ZSTD_initDStream_usingDict(zd, NULL, 0);  /* ensure at least one init */
+    DISPLAYLEVEL(6, "Creating initial context with %u threads \n", nbThreads);
 
     /* catch up testNb */
     for (testNb=1; testNb < startTest; testNb++)
@@ -1205,14 +1209,14 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
         size_t totalTestSize, totalGenSize, cSize;
         XXH64_state_t xxhState;
         U64 crcOrig;
-        U32 resetAllowed = 1;
         size_t maxTestSize;
 
-        /* init */
-        if (testNb < nbTests) {
-            DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests);
-        } else { DISPLAYUPDATE(2, "\r%6u          ", testNb); }
         FUZ_rand(&coreSeed);
+        if (nbTests >= testNb) {
+            DISPLAYUPDATE(2, "\r%6u/%6u (%08X)   ", testNb, nbTests, coreSeed);
+        } else {
+            DISPLAYUPDATE(2, "\r%6u  (%08X)        ", testNb, coreSeed);
+        }
         lseed = coreSeed ^ prime32;
 
         /* states full reset (deliberately not synchronized) */
@@ -1223,7 +1227,6 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
             ZSTDMT_freeCCtx(zc);
             zc = ZSTDMT_createCCtx(nbThreads);
             CHECK(zc==NULL, "ZSTDMT_createCCtx allocation error")
-            resetAllowed=0;
         }
         if ((FUZ_rand(&lseed) & 0xFF) == 132) {
             ZSTD_freeDStream(zd);
@@ -1248,16 +1251,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
         }
 
         /* compression init */
-        if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */
-            && oldTestLog /* at least one test happened */ && resetAllowed) {
-            maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2);
-            if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1;
-            {   int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1;
-                DISPLAYLEVEL(5, "Init with compression level = %i \n", compressionLevel);
-                CHECK_Z( ZSTDMT_initCStream(zc, compressionLevel) );
-            }
-        } else {
-            U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
+        {   U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
             U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog;
             int const cLevelCandidate = ( FUZ_rand(&lseed)
                             % (ZSTD_maxCLevel() - (MAX(testLog, dictLog) / 2)) )
@@ -1266,24 +1260,29 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
             int const cLevelMin = MAX(cLevelThreadAdjusted, 1);  /* no negative cLevel yet */
             int const cLevel = MIN(cLevelMin, cLevelMax);
             maxTestSize = FUZ_rLogLength(&lseed, testLog);
-            oldTestLog = testLog;
-            /* random dictionary selection */
-            dictSize  = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0;
-            {   size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
-                dict = srcBuffer + dictStart;
-            }
-            {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize;
-                ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize);
-                DISPLAYLEVEL(5, "Init with windowLog = %u, pledgedSrcSize = %u, dictSize = %u \n",
-                    params.cParams.windowLog, (U32)pledgedSrcSize, (U32)dictSize);
-                params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1;
-                DISPLAYLEVEL(5, "checksumFlag : %u \n", params.fParams.checksumFlag);
-                CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapSectionLog, FUZ_rand(&lseed) % 12) );
-                CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_jobSize, FUZ_rand(&lseed) % (2*maxTestSize+1)) );   /* custome job size */
-                CHECK_Z( ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) );
-        }   }
+
+            if (FUZ_rand(&lseed)&1) {   /* simple init */
+                int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1;
+                DISPLAYLEVEL(5, "Init with compression level = %i \n", compressionLevel);
+                CHECK_Z( ZSTDMT_initCStream(zc, compressionLevel) );
+            } else {   /* advanced init */
+                /* random dictionary selection */
+                dictSize  = ((FUZ_rand(&lseed)&63)==1) ? FUZ_rLogLength(&lseed, dictLog) : 0;
+                {   size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
+                    dict = srcBuffer + dictStart;
+                }
+                {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize;
+                    ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize);
+                    DISPLAYLEVEL(5, "Init with windowLog = %u, pledgedSrcSize = %u, dictSize = %u \n",
+                        params.cParams.windowLog, (U32)pledgedSrcSize, (U32)dictSize);
+                    params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
+                    params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
+                    params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1;
+                    DISPLAYLEVEL(5, "checksumFlag : %u \n", params.fParams.checksumFlag);
+                    CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapSectionLog, FUZ_rand(&lseed) % 12) );
+                    CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_jobSize, FUZ_rand(&lseed) % (2*maxTestSize+1)) );   /* custome job size */
+                    CHECK_Z( ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) );
+        }   }   }
 
         /* multi-segments compression test */
         XXH64_reset(&xxhState, 0);
@@ -1336,10 +1335,13 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
             }   }
             crcOrig = XXH64_digest(&xxhState);
             cSize = outBuff.pos;
-            DISPLAYLEVEL(5, "Frame completed : %u bytes \n", (U32)cSize);
+            DISPLAYLEVEL(5, "Frame completed : %u bytes compressed into %u bytes \n",
+                            (U32)totalTestSize, (U32)cSize);
         }
 
         /* multi - fragments decompression test */
+        assert(totalTestSize < dstBufferSize);
+        memset(dstBuffer, 170, totalTestSize);   /* init dest area */
         if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) {
             CHECK_Z( ZSTD_resetDStream(zd) );
         } else {
@@ -1354,14 +1356,16 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
                 size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
                 inBuff.size = inBuff.pos + readCSrcSize;
                 outBuff.size = outBuff.pos + dstBuffSize;
-                DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes \n", (U32)readCSrcSize);
+                DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes into outBuff %u bytes \n",
+                                (U32)readCSrcSize, (U32)dstBuffSize);
                 decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff);
                 if (ZSTD_isError(decompressionResult)) {
                     DISPLAY("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(decompressionResult));
                     findDiff(copyBuffer, dstBuffer, totalTestSize);
                 }
                 CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult));
-                DISPLAYLEVEL(6, "total ingested (inBuff.pos) = %u \n", (U32)inBuff.pos);
+                DISPLAYLEVEL(6, "total ingested (inBuff.pos) = %u and produced (outBuff.pos) = %u \n",
+                                (U32)inBuff.pos, (U32)outBuff.pos);
             }
             CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize);
             CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize);

From a7ef3a219c1981617adfcd9c04086e67b41a123b Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 19 Jan 2018 18:19:09 -0800
Subject: [PATCH 08/30] zstdmt : fixed last job size

---
 lib/compress/zstdmt_compress.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 3e665f574..c204fb84b 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1220,7 +1220,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     if ( (mtctx->jobReady)
       || (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
       || ( (endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0) ) ) {   /* avoid overwriting job round buffer */
-        CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
+        size_t const jobSize = MIN(mtctx->inBuff.filled - mtctx->prefixSize, mtctx->targetSectionSize);
+        CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp==ZSTD_e_end) );
     }
 
     /* check for potential compressed data ready to be flushed */

From 6711396d97f631f8513fb3b8867ccd5acc8421dd Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 19 Jan 2018 22:11:11 -0800
Subject: [PATCH 09/30] zstreamtest : fixed test 32 : multi-thread compression

using ZSTD_compress_generic(,,ZSTD_e_end)
Since it already provides ZSTD_e_end as directive,
it should not be followed by ZSTDMT_endStream().
---
 lib/compress/zstdmt_compress.h |  5 +++--
 tests/zstreamtest.c            | 25 ++++++++++++++++---------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h
index 7c1e7e27b..34dfc488f 100644
--- a/lib/compress/zstdmt_compress.h
+++ b/lib/compress/zstdmt_compress.h
@@ -97,11 +97,12 @@ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
 
 
 /*! ZSTDMT_compressStream_generic() :
- *  Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
+ *  Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
  *  depending on flush directive.
  * @return : minimum amount of data still to be flushed
  *           0 if fully flushed
- *           or an error code */
+ *           or an error code
+ *  note : needs to be init using any ZSTD_initCStream*() variant */
 ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
                                                 ZSTD_outBuffer* output,
                                                 ZSTD_inBuffer* input,
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 8c66c53bd..250857120 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -219,7 +219,7 @@ static int basicUnitTests(U32 seed, double compressibility)
     size_t cSize;
     int testResult = 0;
     U32 testNb = 1;
-    U32 coreSeed = 0;  /* just to conform with CHECK_Z macro display */
+    U32 coreSeed = 0;  /* this name to conform with CHECK_Z macro display */
     ZSTD_CStream* zc = ZSTD_createCStream();
     ZSTD_DStream* zd = ZSTD_createDStream();
     ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
@@ -373,7 +373,7 @@ static int basicUnitTests(U32 seed, double compressibility)
       DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s);
     }
 
-    /* Byte-by-byte decompression test */
+    /* Decompression by small increment */
     DISPLAYLEVEL(3, "test%3i : decompress byte-by-byte : ", testNb++);
     {   /* skippable frame */
         size_t r = 1;
@@ -383,8 +383,10 @@ static int basicUnitTests(U32 seed, double compressibility)
         inBuff.pos = 0;
         outBuff.pos = 0;
         while (r) {   /* skippable frame */
-            inBuff.size = inBuff.pos + 1;
-            outBuff.size = outBuff.pos + 1;
+            size_t const inSize = FUZ_rand(&coreSeed) & 15;
+            size_t const outSize = FUZ_rand(&coreSeed) & 15;
+            inBuff.size = inBuff.pos + inSize;
+            outBuff.size = outBuff.pos + outSize;
             r = ZSTD_decompressStream(zd, &outBuff, &inBuff);
             if (ZSTD_isError(r)) goto _output_error;
         }
@@ -392,8 +394,10 @@ static int basicUnitTests(U32 seed, double compressibility)
         ZSTD_initDStream_usingDict(zd, CNBuffer, dictSize);
         r=1;
         while (r) {
-            inBuff.size = inBuff.pos + 1;
-            outBuff.size = outBuff.pos + 1;
+            size_t const inSize = FUZ_rand(&coreSeed) & 15;
+            size_t const outSize = FUZ_rand(&coreSeed) & 15;
+            inBuff.size = inBuff.pos + inSize;
+            outBuff.size = outBuff.pos + outSize;
             r = ZSTD_decompressStream(zd, &outBuff, &inBuff);
             if (ZSTD_isError(r)) goto _output_error;
         }
@@ -695,10 +699,13 @@ static int basicUnitTests(U32 seed, double compressibility)
     inBuff.src = CNBuffer;
     inBuff.size = CNBufferSize;
     inBuff.pos = 0;
-    CHECK_Z( ZSTDMT_compressStream_generic(mtctx, &outBuff, &inBuff, ZSTD_e_end) );
+    {   size_t const compressResult = ZSTDMT_compressStream_generic(mtctx, &outBuff, &inBuff, ZSTD_e_end);
+        if (compressResult != 0) goto _output_error;  /* compression must be completed in a single round */
+    }
     if (inBuff.pos != inBuff.size) goto _output_error;   /* entire input should be consumed */
-    { size_t const r = ZSTDMT_endStream(mtctx, &outBuff);
-      if (r != 0) goto _output_error; }  /* error, or some data not flushed */
+    {   size_t const compressedSize = ZSTD_findFrameCompressedSize(compressedBuffer, outBuff.pos);
+        if (compressedSize != outBuff.pos) goto _output_error;  /* must be a full valid frame */
+    }
     DISPLAYLEVEL(3, "OK \n");
 
     /* Complex multithreading + dictionary test */

From ebd955e26a86cf5dc317126a5bbed2b5738840f5 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 23 Jan 2018 13:12:40 -0800
Subject: [PATCH 10/30] zstdmt : fixed ending frame with 0-size block

---
 lib/compress/zstdmt_compress.c | 10 +++++++---
 tests/zstreamtest.c            | 22 +++++++++++++++-------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index c204fb84b..be683e83f 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1166,7 +1166,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 {
     size_t const newJobThreshold = mtctx->prefixSize + mtctx->targetSectionSize;
     unsigned forwardInputProgress = 0;
-    DEBUGLOG(5, "ZSTDMT_compressStream_generic ");
+    DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u)", (U32)endOp);
     assert(output->pos <= output->size);
     assert(input->pos  <= input->size);
 
@@ -1215,11 +1215,15 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
             input->pos += toLoad;
             mtctx->inBuff.filled += toLoad;
             forwardInputProgress = toLoad>0;
-    }   }
+        }
+        if ((input->pos < input->size) && (endOp == ZSTD_e_end))
+            endOp = ZSTD_e_flush;   /* can't end now : not all input consumed */
+    }
 
     if ( (mtctx->jobReady)
       || (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
-      || ( (endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0) ) ) {   /* avoid overwriting job round buffer */
+      || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0))  /* something to flush : let's go */
+      || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) {   /* must finish the frame with a zero-size block */
         size_t const jobSize = MIN(mtctx->inBuff.filled - mtctx->prefixSize, mtctx->targetSectionSize);
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp==ZSTD_e_end) );
     }
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 250857120..3a738c2fc 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -99,8 +99,8 @@ unsigned int FUZ_rand(unsigned int* seedPtr)
     if (cond) {                                              \
         DISPLAY("Error => ");                                \
         DISPLAY(__VA_ARGS__);                                \
-        DISPLAY(" (seed %u, test nb %u, line %u (sig %08X) \n", \
-                seed, testNb, __LINE__, coreSeed);           \
+        DISPLAY(" (seed %u, test nb %u, line %u) \n",        \
+                seed, testNb, __LINE__);                     \
         goto _output_error;                                  \
 }   }
 
@@ -1611,7 +1611,11 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
                 }
 
                 /* mess with frame parameters */
-                if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_checksumFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) );
+                if (FUZ_rand(&lseed) & 1) {
+                    U32 const checksumFlag = FUZ_rand(&lseed) & 1;
+                    DISPLAYLEVEL(5, "t%u: frame checksum : %u \n", testNb, checksumFlag);
+                    CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_checksumFlag, checksumFlag, useOpaqueAPI) );
+                }
                 if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_dictIDFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) );
                 if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_contentSizeFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) );
                 if (FUZ_rand(&lseed) & 1) {
@@ -1676,8 +1680,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
                 outBuff.size = outBuff.pos + dstBuffSize;
 
                 CHECK_Z( ZSTD_compress_generic(zc, &outBuff, &inBuff, flush) );
-                DISPLAYLEVEL(6, "t%u: compress consumed %u bytes (total : %u) \n",
-                    testNb, (U32)inBuff.pos, (U32)(totalTestSize + inBuff.pos));
+                DISPLAYLEVEL(6, "t%u: compress consumed %u bytes (total : %u) ; flush: %u (total : %u) \n",
+                    testNb, (U32)inBuff.pos, (U32)(totalTestSize + inBuff.pos), (U32)flush, (U32)outBuff.pos);
 
                 XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos);
                 memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos);
@@ -1685,7 +1689,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
             }
 
             /* final frame epilogue */
-            {   size_t remainingToFlush = (size_t)(-1);
+            {   size_t remainingToFlush = 1;
                 while (remainingToFlush) {
                     ZSTD_inBuffer inBuff = { NULL, 0, 0 };
                     size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
@@ -1722,8 +1726,12 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
                 DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes (pos:%u/%u)\n",
                             (U32)readCSrcSize, (U32)inBuff.pos, (U32)cSize);
                 decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff);
+                if (ZSTD_isError(decompressionResult)) {
+                    DISPLAY("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(decompressionResult));
+                    findDiff(copyBuffer, dstBuffer, totalTestSize);
+                }
                 CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult));
-                DISPLAYLEVEL(6, "inBuff.pos = %u \n", (U32)readCSrcSize);
+                CHECK (inBuff.pos > cSize, "ZSTD_decompressStream consumes too much input : %u > %u ", (U32)inBuff.pos, (U32)cSize);
             }
             CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize);
             CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize);

From de5e38a7a60ef80f4e71838df87cea15c5e15047 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 23 Jan 2018 14:03:07 -0800
Subject: [PATCH 11/30] zstdmt: fixed minor race condition

no real consequence, but pollute tsan tests :

job->dstBuff is being modified inside worker,
while main thread might read it accidentally
because it copies whole job.
But since it doesn't used dstBuff, there is no real consequence.

Other potential solution : only copy useful data, instead of whole job
---
 lib/compress/zstdmt_compress.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index be683e83f..922839810 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -344,7 +344,9 @@ void ZSTDMT_compressChunk(void* jobDescription)
             job->cSize = ERROR(memory_allocation);
             goto _endJob;
         }
+        ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);   /* note : it's a mtctx mutex */
         job->dstBuff = dstBuff;
+        ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
     }
 
     /* init */

From c1cc57f270e07caf0a8356be921511fedf3a6c8f Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 23 Jan 2018 15:19:11 -0800
Subject: [PATCH 12/30] zstdmt : fix end condition (ZSTD_e_end)

When ZSTD_e_end directive is provided,
the question is not only "are internal buffers completely flushed",
it is also "is current frame completed".

In some rare cases,
it was possible for internal buffers to be completely flushed,
triggering a @return == 0,
but frame was not completed as it needed a last null-size block to mark the end,
resulting in an unfinished frame.
---
 lib/compress/zstdmt_compress.c | 21 +++++++++++----------
 lib/zstd.h                     |  5 +++--
 tests/zstreamtest.c            |  3 ++-
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 922839810..448a3e73f 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1087,7 +1087,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
  * `output` : `pos` will be updated with amount of data flushed .
  * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
  * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
-static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
+static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
 {
     unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
     DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u)", blockToFlush);
@@ -1116,7 +1116,7 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, uns
         if ( job.jobCompleted
           && job.frameChecksumNeeded ) {
             U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
-            DEBUGLOG(5, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
+            DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
             MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
             job.cSize += 4;
             zcs->jobs[wJobID].cSize += 4;
@@ -1150,9 +1150,10 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, uns
         if (job.srcSize > job.consumed) return 1;   /* current job not completely compressed */
     }
     if (zcs->doneJobID < zcs->nextJobID) return 1;   /* some more jobs to flush */
-    if (zcs->jobReady) return 1;   /* at least one more job to do ! */
+    if (zcs->jobReady) return 1;   /* one job is ready and queued! */
     if (zcs->inBuff.filled > 0) return 1;   /* input not empty */
     zcs->allJobsCompleted = zcs->frameEnded;   /* last frame entirely flushed */
+    if (end == ZSTD_e_end) return !zcs->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? It may need a last null-block */
     return 0;   /* everything flushed */
 }
 
@@ -1231,7 +1232,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     }
 
     /* check for potential compressed data ready to be flushed */
-    {   size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress); /* block if there was no forward input progress */
+    {   size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
         if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not flush yet */
         return remainingToFlush;
     }
@@ -1247,21 +1248,21 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu
 }
 
 
-static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned endFrame)
+static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
 {
     size_t const srcSize = mtctx->inBuff.filled - mtctx->prefixSize;
     DEBUGLOG(5, "ZSTDMT_flushStream_internal");
 
     if ( mtctx->jobReady     /* one job ready for a worker to pick up */
       || (srcSize > 0)       /* still some data within input buffer */
-      || (endFrame && !mtctx->frameEnded)) {  /* need a last 0-size block to end frame */
+      || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) {  /* need a last 0-size block to end frame */
            DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
-                        (U32)srcSize, endFrame);
+                        (U32)srcSize, (U32)endFrame);
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
     }
 
     /* check if there is any data available to flush */
-    return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */);
+    return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
 }
 
 
@@ -1270,7 +1271,7 @@ size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
     DEBUGLOG(5, "ZSTDMT_flushStream");
     if (mtctx->singleBlockingThread)
         return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
-    return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */);
+    return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
 }
 
 size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
@@ -1278,5 +1279,5 @@ size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
     DEBUGLOG(4, "ZSTDMT_endStream");
     if (mtctx->singleBlockingThread)
         return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
-    return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */);
+    return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
 }
diff --git a/lib/zstd.h b/lib/zstd.h
index aab6be3b0..da9d295cd 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1136,10 +1136,11 @@ typedef enum {
  *                                                     and then immediately returns, just indicating that there is some data remaining to be flushed.
  *                                                     The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
  *  - Exception : in multi-threading mode, if the first call requests a ZSTD_e_end directive, it is blocking : it will complete compression before giving back control to caller.
- *  - @return provides the minimum amount of data remaining to be flushed from internal buffers
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
  *            or an error code, which can be tested using ZSTD_isError().
  *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
- *            This is useful to determine if a ZSTD_e_flush or ZSTD_e_end directive is completed.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
  *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
  *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
  *            Before starting a new compression job, or changing compression parameters,
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 3a738c2fc..762224d53 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1695,8 +1695,9 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
                     size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog+1);
                     size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize);
                     outBuff.size = outBuff.pos + adjustedDstSize;
-                    DISPLAYLEVEL(6, "End-flush into dst buffer of size %u \n", (U32)adjustedDstSize);
+                    DISPLAYLEVEL(6, "t%u: End-flush into dst buffer of size %u \n", testNb, (U32)adjustedDstSize);
                     remainingToFlush = ZSTD_compress_generic(zc, &outBuff, &inBuff, ZSTD_e_end);
+                    DISPLAYLEVEL(6, "t%u: Total flushed so far : %u bytes \n", testNb, (U32)outBuff.pos);
                     CHECK( ZSTD_isError(remainingToFlush),
                           "ZSTD_compress_generic w/ ZSTD_e_end error : %s",
                            ZSTD_getErrorName(remainingToFlush) );

From 5f349b129c118b65fdaf60f3f87f84669040680c Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 23 Jan 2018 15:52:40 -0800
Subject: [PATCH 13/30] zstdmt : correctly set end of frame

---
 lib/compress/zstdmt_compress.c | 7 ++++---
 lib/zstd.h                     | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 448a3e73f..0207920dc 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1001,9 +1001,10 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
 }
 
 
-static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, ZSTD_EndDirective endOp)
 {
     unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
+    int const endFrame = (endOp == ZSTD_e_end);
 
     if (zcs->nextJobID > zcs->doneJobID + zcs->jobIDMask) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
@@ -1060,7 +1061,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi
             zcs->inBuff.buffer = g_nullBuffer;
             zcs->inBuff.filled = 0;
             zcs->prefixSize = 0;
-            zcs->frameEnded = 1;
+            zcs->frameEnded = endFrame;
             if (zcs->nextJobID == 0) {
                 /* single chunk exception : checksum is calculated directly within worker thread */
                 zcs->params.fParams.checksumFlag = 0;
@@ -1228,7 +1229,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
       || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0))  /* something to flush : let's go */
       || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) {   /* must finish the frame with a zero-size block */
         size_t const jobSize = MIN(mtctx->inBuff.filled - mtctx->prefixSize, mtctx->targetSectionSize);
-        CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp==ZSTD_e_end) );
+        CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
     }
 
     /* check for potential compressed data ready to be flushed */
diff --git a/lib/zstd.h b/lib/zstd.h
index da9d295cd..e573daf5b 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1120,7 +1120,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
 
 
 typedef enum {
-    ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal conditions */
     ZSTD_e_flush,      /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
     ZSTD_e_end         /* flush any remaining data and close current frame. Any additional data starts a new frame. */
 } ZSTD_EndDirective;

From 4f7c896113e39557257c73cd38bf9362fc095e72 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 23 Jan 2018 18:00:51 -0800
Subject: [PATCH 14/30] zstdmt : fixed complex sequencing bug

zstdmt would shortcut to single-thread blocking mode
in some rare cases where data is sent to be compressed but is not yet ready.
---
 tests/zstreamtest.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 762224d53..ef5cf65fb 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1724,9 +1724,11 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
                 size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
                 inBuff.size = inBuff.pos + readCSrcSize;
                 outBuff.size = outBuff.pos + dstBuffSize;
-                DISPLAYLEVEL(6, "ZSTD_decompressStream input %u bytes (pos:%u/%u)\n",
-                            (U32)readCSrcSize, (U32)inBuff.pos, (U32)cSize);
+                DISPLAYLEVEL(6, "decompression presented %u new bytes (pos:%u/%u)\n",
+                                (U32)readCSrcSize, (U32)inBuff.pos, (U32)cSize);
                 decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff);
+                DISPLAYLEVEL(6, "so far: consumed = %u, produced = %u \n",
+                                (U32)inBuff.pos, (U32)outBuff.pos);
                 if (ZSTD_isError(decompressionResult)) {
                     DISPLAY("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(decompressionResult));
                     findDiff(copyBuffer, dstBuffer, totalTestSize);
@@ -1734,8 +1736,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
                 CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult));
                 CHECK (inBuff.pos > cSize, "ZSTD_decompressStream consumes too much input : %u > %u ", (U32)inBuff.pos, (U32)cSize);
             }
-            CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize);
             CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize);
+            CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize);
             {   U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0);
                 if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize);
                 CHECK (crcDest!=crcOrig, "decompressed data corrupted");

From 1272d8e760eb8d6ad42cbc2fe1ec898a26b815f5 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 25 Jan 2018 14:52:34 -0800
Subject: [PATCH 15/30] zstdmt:: renamed mutex and cond to underline they are
 context-global

---
 lib/compress/zstdmt_compress.c | 73 +++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 0207920dc..bc9209d5b 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -316,8 +316,8 @@ typedef struct {
     unsigned lastChunk;
     unsigned jobCompleted;
     unsigned frameChecksumNeeded;
-    ZSTD_pthread_mutex_t* jobCompleted_mutex;
-    ZSTD_pthread_cond_t* jobCompleted_cond;
+    ZSTD_pthread_mutex_t* mtctx_mutex;
+    ZSTD_pthread_cond_t* mtctx_cond;
     ZSTD_CCtx_params params;
     const ZSTD_CDict* cdict;
     ZSTDMT_CCtxPool* cctxPool;
@@ -344,9 +344,9 @@ void ZSTDMT_compressChunk(void* jobDescription)
             job->cSize = ERROR(memory_allocation);
             goto _endJob;
         }
-        ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);   /* note : it's a mtctx mutex */
+        ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
         job->dstBuff = dstBuff;
-        ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
+        ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
     }
 
     /* init */
@@ -399,13 +399,13 @@ void ZSTDMT_compressChunk(void* jobDescription)
             ip += ZSTD_BLOCKSIZE_MAX;
             op += cSize; assert(op < oend);
             /* stats */
-            ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);   /* note : it's a mtctx mutex */
+            ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);   /* note : it's a mtctx mutex */
             job->cSize += cSize;
             job->consumed = ZSTD_BLOCKSIZE_MAX * blockNb;
             DEBUGLOG(5, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)",
                         (U32)cSize, (U32)job->cSize);
-            ZSTD_pthread_cond_signal(job->jobCompleted_cond);
-            ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
+            ZSTD_pthread_cond_signal(job->mtctx_cond);
+            ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
         }
         /* last block */
         if ((nbBlocks > 0) | job->lastChunk /*must output a "last block" flag*/ ) {
@@ -416,10 +416,10 @@ void ZSTDMT_compressChunk(void* jobDescription)
                  ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
             if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
             /* stats */
-            ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);   /* note : it's a mtctx mutex */
+            ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);   /* note : it's a mtctx mutex */
             job->cSize += cSize;
             job->consumed = job->srcSize;
-            ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
+            ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
         }
     }
 #endif
@@ -430,11 +430,11 @@ _endJob:
     ZSTDMT_releaseBuffer(job->bufPool, job->src);
     job->src = g_nullBuffer; job->srcStart = NULL;
     /* report */
-    ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
+    ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
     job->consumed = job->srcSize;
     job->jobCompleted = 1;
-    ZSTD_pthread_cond_signal(job->jobCompleted_cond);
-    ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
+    ZSTD_pthread_cond_signal(job->mtctx_cond);
+    ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
 }
 
 
@@ -452,8 +452,8 @@ struct ZSTDMT_CCtx_s {
     ZSTDMT_jobDescription* jobs;
     ZSTDMT_bufferPool* bufPool;
     ZSTDMT_CCtxPool* cctxPool;
-    ZSTD_pthread_mutex_t jobCompleted_mutex;
-    ZSTD_pthread_cond_t jobCompleted_cond;
+    ZSTD_pthread_mutex_t mtctx_mutex;
+    ZSTD_pthread_cond_t mtctx_cond;
     ZSTD_CCtx_params params;
     size_t targetSectionSize;
     size_t inBuffSize;
@@ -538,11 +538,11 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
         ZSTDMT_freeCCtx(mtctx);
         return NULL;
     }
-    if (ZSTD_pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
+    if (ZSTD_pthread_mutex_init(&mtctx->mtctx_mutex, NULL)) {
         ZSTDMT_freeCCtx(mtctx);
         return NULL;
     }
-    if (ZSTD_pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
+    if (ZSTD_pthread_cond_init(&mtctx->mtctx_cond, NULL)) {
         ZSTDMT_freeCCtx(mtctx);
         return NULL;
     }
@@ -582,12 +582,12 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
     DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
     while (zcs->doneJobID < zcs->nextJobID) {
         unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
-        ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+        ZSTD_PTHREAD_MUTEX_LOCK(&zcs->mtctx_mutex);
         while (zcs->jobs[jobID].jobCompleted==0) {
             DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID);   /* we want to block when waiting for data to flush */
-            ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
+            ZSTD_pthread_cond_wait(&zcs->mtctx_cond, &zcs->mtctx_mutex);
         }
-        ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+        ZSTD_pthread_mutex_unlock(&zcs->mtctx_mutex);
         zcs->doneJobID++;
     }
 }
@@ -601,8 +601,8 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
     ZSTDMT_freeBufferPool(mtctx->bufPool);
     ZSTDMT_freeCCtxPool(mtctx->cctxPool);
     ZSTD_freeCDict(mtctx->cdictLocal);
-    ZSTD_pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
-    ZSTD_pthread_cond_destroy(&mtctx->jobCompleted_cond);
+    ZSTD_pthread_mutex_destroy(&mtctx->mtctx_mutex);
+    ZSTD_pthread_cond_destroy(&mtctx->mtctx_cond);
     ZSTD_free(mtctx, mtctx->cMem);
     return 0;
 }
@@ -672,7 +672,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
 {
     ZSTD_frameProgression fs;
     DEBUGLOG(6, "ZSTDMT_getFrameProgression");
-    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
+    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
     fs.consumed = mtctx->consumed;
     fs.produced = mtctx->produced;
     assert(mtctx->inBuff.filled >= mtctx->prefixSize);
@@ -690,7 +690,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
             fs.produced += produced;
         }
     }
-    ZSTD_pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
+    ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
     return fs;
 }
 
@@ -783,8 +783,8 @@ static size_t ZSTDMT_compress_advanced_internal(
             mtctx->jobs[u].firstChunk = (u==0);
             mtctx->jobs[u].lastChunk = (u==nbChunks-1);
             mtctx->jobs[u].jobCompleted = 0;
-            mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
-            mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
+            mtctx->jobs[u].mtctx_mutex = &mtctx->mtctx_mutex;
+            mtctx->jobs[u].mtctx_cond = &mtctx->mtctx_cond;
 
             if (params.fParams.checksumFlag) {
                 XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
@@ -804,12 +804,12 @@ static size_t ZSTDMT_compress_advanced_internal(
         unsigned chunkID;
         for (chunkID=0; chunkIDjobCompleted_mutex);
+            ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
             while (mtctx->jobs[chunkID].jobCompleted==0) {
                 DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
-                ZSTD_pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
+                ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);
             }
-            ZSTD_pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
+            ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
             DEBUGLOG(5, "ready to write chunk %u ", chunkID);
 
             mtctx->jobs[chunkID].srcStart = NULL;
@@ -1035,8 +1035,8 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, ZSTD
         zcs->jobs[jobID].jobCompleted = 0;
         zcs->jobs[jobID].frameChecksumNeeded = endFrame && (zcs->nextJobID>0) && zcs->params.fParams.checksumFlag;
         zcs->jobs[jobID].dstFlushed = 0;
-        zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
-        zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
+        zcs->jobs[jobID].mtctx_mutex = &zcs->mtctx_mutex;
+        zcs->jobs[jobID].mtctx_cond = &zcs->mtctx_cond;
 
         if (zcs->params.fParams.checksumFlag)
             XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->prefixSize, srcSize);
@@ -1067,7 +1067,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, ZSTD
                 zcs->params.fParams.checksumFlag = 0;
     }   }   }
 
-    DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
+    DEBUGLOG(2, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
                 zcs->nextJobID,
                 (U32)zcs->jobs[jobID].srcSize,
                 zcs->jobs[jobID].lastChunk,
@@ -1094,18 +1094,18 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, uns
     DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u)", blockToFlush);
     assert(output->size >= output->pos);
 
-    ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
+    ZSTD_PTHREAD_MUTEX_LOCK(&zcs->mtctx_mutex);
     if (blockToFlush && (zcs->doneJobID < zcs->nextJobID)) {
         while (zcs->jobs[wJobID].dstFlushed == zcs->jobs[wJobID].cSize) {
             if (zcs->jobs[wJobID].jobCompleted==1) break;
             DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
                         zcs->doneJobID, (U32)zcs->jobs[wJobID].dstFlushed);
-            ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);  /* block when nothing available to flush but more to come */
+            ZSTD_pthread_cond_wait(&zcs->mtctx_cond, &zcs->mtctx_mutex);  /* block when nothing available to flush but more to come */
     }   }
 
     /* some output is available to be flushed */
     {   ZSTDMT_jobDescription job = zcs->jobs[wJobID];
-        ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
+        ZSTD_pthread_mutex_unlock(&zcs->mtctx_mutex);
         if (ZSTD_isError(job.cSize)) {
             DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
                         zcs->doneJobID, ZSTD_getErrorName(job.cSize));
@@ -1186,8 +1186,9 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     /* single-pass shortcut (note : synchronous-mode) */
     if ( (mtctx->nextJobID == 0)     /* just started */
       && (mtctx->inBuff.filled == 0) /* nothing buffered */
+      && (!mtctx->jobReady)          /* no job already created */
       && (endOp == ZSTD_e_end)       /* end order */
-      && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
+      && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
         size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
                 (char*)output->dst + output->pos, output->size - output->pos,
                 (const char*)input->src + input->pos, input->size - input->pos,
@@ -1234,7 +1235,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 
     /* check for potential compressed data ready to be flushed */
     {   size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
-        if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not flush yet */
+        if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not end flush yet */
         return remainingToFlush;
     }
 }

From a1d4041e69601b84c08082f866665a6a2eaaa8d4 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 25 Jan 2018 17:35:49 -0800
Subject: [PATCH 16/30] zstdmt: removed job->jobCompleted

replaced by equivalent signal job->consumer == job->srcSize.

created additional functions
ZSTD_writeLastEmptyBlock()
and
ZSTDMT_writeLastEmptyBlock()
required when it's necessary to finish a frame with a last empty job, to create an "end of frame" marker.

It avoids creating a job with srcSize==0.
---
 lib/compress/zstd_compress.c          |  45 ++--
 lib/compress/zstd_compress_internal.h |   9 +
 lib/compress/zstdmt_compress.c        | 366 ++++++++++++++------------
 3 files changed, 231 insertions(+), 189 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index a87c368ea..7b16d7364 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1824,7 +1824,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
                                         void* dst, size_t dstCapacity,
                                         const void* src, size_t srcSize)
 {
-    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u) (dictLimit=%u, nextToUpdate=%u)",
+    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
                 (U32)dstCapacity, zc->blockState.matchState.dictLimit, zc->blockState.matchState.nextToUpdate);
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1)
         return 0;   /* don't even attempt compression below a certain srcSize */
@@ -1837,9 +1837,9 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
         if (current > zc->blockState.matchState.nextToUpdate + 384)
             zc->blockState.matchState.nextToUpdate = current - MIN(192, (U32)(current - zc->blockState.matchState.nextToUpdate - 384));
     }
-    /* find and store sequences */
-    {
-        U32 const extDict = zc->blockState.matchState.lowLimit < zc->blockState.matchState.dictLimit;
+
+    /* select and store sequences */
+    {   U32 const extDict = zc->blockState.matchState.lowLimit < zc->blockState.matchState.dictLimit;
         size_t lastLLSize;
         { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; }
         if (zc->appliedParams.ldmParams.enableLdm) {
@@ -1848,26 +1848,20 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
                     U32 rep[ZSTD_REP_NUM], ZSTD_CCtx_params const* params,
                     void const* src, size_t srcSize);
             ZSTD_ldmBlockCompressor const ldmBlockCompressor = extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm;
-
             lastLLSize = ldmBlockCompressor(&zc->ldmState, &zc->blockState.matchState, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams, src, srcSize);
-        } else {
+        } else {   /* not long range mode */
             ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
-
             lastLLSize = blockCompressor(&zc->blockState.matchState, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
         }
-        {
-            const BYTE* const anchor = (const BYTE*)src + srcSize - lastLLSize;
-            ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize);
-        }
-    }
-    /* encode */
-    {
-        size_t const cSize = ZSTD_compressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize, zc->entropyWorkspace);
-        if (ZSTD_isError(cSize) || cSize == 0)
-            return cSize;
+        {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+            ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+    }   }
+
+    /* encode sequences and literals */
+    {   size_t const cSize = ZSTD_compressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize, zc->entropyWorkspace);
+        if (ZSTD_isError(cSize) || cSize == 0) return cSize;
         /* confirm repcodes and entropy tables */
-        {
-            ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+        {   ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
             zc->blockState.prevCBlock = zc->blockState.nextCBlock;
             zc->blockState.nextCBlock = tmp;
         }
@@ -2030,6 +2024,19 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
     return pos;
 }
 
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapcity` is too small (mtctx_mutex);
-        job->dstBuff = dstBuff;
+        job->dstBuff = dstBuff;   /* this value can be read in ZSTDMT_flush, when it copies the whole job */
         ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
     }
 
@@ -369,8 +368,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
             if (ZSTD_isError(initError)) {
                 job->cSize = initError;
                 goto _endJob;
-        }   }
-    }
+    }   }   }
     if (!job->firstChunk) {  /* flush and overwrite frame header when it's not first job */
         size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
         if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
@@ -379,12 +377,9 @@ void ZSTDMT_compressChunk(void* jobDescription)
     }
 
     /* compress */
-#if 0
-    job->cSize = (job->lastChunk) ?
-                 ZSTD_compressEnd     (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
-                 ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
-#else
-    if (sizeof(size_t) > sizeof(int)) assert(job->srcSize < ((size_t)INT_MAX) * ZSTD_BLOCKSIZE_MAX);   /* check overflow */
+    if (sizeof(size_t) > sizeof(int))
+        assert(job->srcSize < ((size_t)INT_MAX) * ZSTD_BLOCKSIZE_MAX);   /* check overflow */
+
     {   int const nbBlocks = (int)((job->srcSize + (ZSTD_BLOCKSIZE_MAX-1)) / ZSTD_BLOCKSIZE_MAX);
         const BYTE* ip = (const BYTE*) src;
         BYTE* const ostart = (BYTE*)dstBuff.start;
@@ -404,7 +399,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
             job->consumed = ZSTD_BLOCKSIZE_MAX * blockNb;
             DEBUGLOG(5, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)",
                         (U32)cSize, (U32)job->cSize);
-            ZSTD_pthread_cond_signal(job->mtctx_cond);
+            ZSTD_pthread_cond_signal(job->mtctx_cond);   /* warns some more data is ready to be flushed */
             ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
         }
         /* last block */
@@ -416,23 +411,19 @@ void ZSTDMT_compressChunk(void* jobDescription)
                  ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
             if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
             /* stats */
-            ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);   /* note : it's a mtctx mutex */
+            ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
             job->cSize += cSize;
-            job->consumed = job->srcSize;
             ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
-        }
-    }
-#endif
+    }   }
 
 _endJob:
-    /* release */
+    /* release resources */
     ZSTDMT_releaseCCtx(job->cctxPool, cctx);
     ZSTDMT_releaseBuffer(job->bufPool, job->src);
     job->src = g_nullBuffer; job->srcStart = NULL;
     /* report */
     ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
     job->consumed = job->srcSize;
-    job->jobCompleted = 1;
     ZSTD_pthread_cond_signal(job->mtctx_cond);
     ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
 }
@@ -577,18 +568,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
     mtctx->allJobsCompleted = 1;
 }
 
-static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
+static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
 {
     DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
-    while (zcs->doneJobID < zcs->nextJobID) {
-        unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
-        ZSTD_PTHREAD_MUTEX_LOCK(&zcs->mtctx_mutex);
-        while (zcs->jobs[jobID].jobCompleted==0) {
-            DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID);   /* we want to block when waiting for data to flush */
-            ZSTD_pthread_cond_wait(&zcs->mtctx_cond, &zcs->mtctx_mutex);
+    while (mtctx->doneJobID < mtctx->nextJobID) {
+        unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
+        ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
+        while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].srcSize) {
+            DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", mtctx->doneJobID);   /* we want to block when waiting for data to flush */
+            ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);
         }
-        ZSTD_pthread_mutex_unlock(&zcs->mtctx_mutex);
-        zcs->doneJobID++;
+        ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
+        mtctx->doneJobID++;
     }
 }
 
@@ -769,7 +760,7 @@ static size_t ZSTDMT_compress_advanced_internal(
             mtctx->jobs[u].src = g_nullBuffer;
             mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
             mtctx->jobs[u].prefixSize = dictSize;
-            mtctx->jobs[u].srcSize = chunkSize;
+            mtctx->jobs[u].srcSize = chunkSize; assert(chunkSize > 0);  /* avoid job.srcSize == 0 */
             mtctx->jobs[u].consumed = 0;
             mtctx->jobs[u].cSize = 0;
             mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
@@ -782,7 +773,6 @@ static size_t ZSTDMT_compress_advanced_internal(
             mtctx->jobs[u].bufPool = mtctx->bufPool;
             mtctx->jobs[u].firstChunk = (u==0);
             mtctx->jobs[u].lastChunk = (u==nbChunks-1);
-            mtctx->jobs[u].jobCompleted = 0;
             mtctx->jobs[u].mtctx_mutex = &mtctx->mtctx_mutex;
             mtctx->jobs[u].mtctx_cond = &mtctx->mtctx_cond;
 
@@ -805,7 +795,7 @@ static size_t ZSTDMT_compress_advanced_internal(
         for (chunkID=0; chunkIDmtctx_mutex);
-            while (mtctx->jobs[chunkID].jobCompleted==0) {
+            while (mtctx->jobs[chunkID].consumed < mtctx->jobs[chunkID].srcSize) {
                 DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
                 ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);
             }
@@ -879,7 +869,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
 /* ====================================== */
 
 size_t ZSTDMT_initCStream_internal(
-        ZSTDMT_CCtx* zcs,
+        ZSTDMT_CCtx* mtctx,
         const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
         const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
         unsigned long long pledgedSrcSize)
@@ -888,8 +878,8 @@ size_t ZSTDMT_initCStream_internal(
     /* params are supposed to be fully validated at this point */
     assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
-    assert(zcs->cctxPool->totalCCtx == params.nbThreads);
-    zcs->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
+    assert(mtctx->cctxPool->totalCCtx == params.nbThreads);
+    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
     if (params.jobSize == 0) {
         if (params.cParams.windowLog >= 29)
             params.jobSize = ZSTDMT_JOBSIZE_MAX;
@@ -898,56 +888,56 @@ size_t ZSTDMT_initCStream_internal(
     }
     if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
 
-    if (zcs->singleBlockingThread) {
+    if (mtctx->singleBlockingThread) {
         ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
         DEBUGLOG(4, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
         assert(singleThreadParams.nbThreads == 0);
-        return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
+        return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
                                          dict, dictSize, cdict,
                                          singleThreadParams, pledgedSrcSize);
     }
     DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u threads", params.nbThreads);
 
-    if (zcs->allJobsCompleted == 0) {   /* previous compression not correctly finished */
-        ZSTDMT_waitForAllJobsCompleted(zcs);
-        ZSTDMT_releaseAllJobResources(zcs);
-        zcs->allJobsCompleted = 1;
+    if (mtctx->allJobsCompleted == 0) {   /* previous compression not correctly finished */
+        ZSTDMT_waitForAllJobsCompleted(mtctx);
+        ZSTDMT_releaseAllJobResources(mtctx);
+        mtctx->allJobsCompleted = 1;
     }
 
-    zcs->params = params;
-    zcs->frameContentSize = pledgedSrcSize;
+    mtctx->params = params;
+    mtctx->frameContentSize = pledgedSrcSize;
     if (dict) {
-        ZSTD_freeCDict(zcs->cdictLocal);
-        zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+        ZSTD_freeCDict(mtctx->cdictLocal);
+        mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
                                                     ZSTD_dlm_byCopy, dictMode, /* note : a loadPrefix becomes an internal CDict */
-                                                    params.cParams, zcs->cMem);
-        zcs->cdict = zcs->cdictLocal;
-        if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+                                                    params.cParams, mtctx->cMem);
+        mtctx->cdict = mtctx->cdictLocal;
+        if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
     } else {
-        ZSTD_freeCDict(zcs->cdictLocal);
-        zcs->cdictLocal = NULL;
-        zcs->cdict = cdict;
+        ZSTD_freeCDict(mtctx->cdictLocal);
+        mtctx->cdictLocal = NULL;
+        mtctx->cdict = cdict;
     }
 
     assert(params.overlapSizeLog <= 9);
-    zcs->targetPrefixSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog));
-    DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(zcs->targetPrefixSize>>10));
-    zcs->targetSectionSize = params.jobSize;
-    if (zcs->targetSectionSize < ZSTDMT_JOBSIZE_MIN) zcs->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
-    if (zcs->targetSectionSize < zcs->targetPrefixSize) zcs->targetSectionSize = zcs->targetPrefixSize;  /* job size must be >= overlap size */
-    DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(zcs->targetSectionSize>>10), params.jobSize);
-    zcs->inBuffSize = zcs->targetPrefixSize + zcs->targetSectionSize;
-    DEBUGLOG(4, "inBuff Size : %u KB", (U32)(zcs->inBuffSize>>10));
-    ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
-    zcs->inBuff.buffer = g_nullBuffer;
-    zcs->prefixSize = 0;
-    zcs->doneJobID = 0;
-    zcs->nextJobID = 0;
-    zcs->frameEnded = 0;
-    zcs->allJobsCompleted = 0;
-    zcs->consumed = 0;
-    zcs->produced = 0;
-    if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
+    mtctx->targetPrefixSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog));
+    DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
+    mtctx->targetSectionSize = params.jobSize;
+    if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
+    if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize;  /* job size must be >= overlap size */
+    DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
+    mtctx->inBuffSize = mtctx->targetPrefixSize + mtctx->targetSectionSize;
+    DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->inBuffSize>>10));
+    ZSTDMT_setBufferSize(mtctx->bufPool, MAX(mtctx->inBuffSize, ZSTD_compressBound(mtctx->targetSectionSize)) );
+    mtctx->inBuff.buffer = g_nullBuffer;
+    mtctx->prefixSize = 0;
+    mtctx->doneJobID = 0;
+    mtctx->nextJobID = 0;
+    mtctx->frameEnded = 0;
+    mtctx->allJobsCompleted = 0;
+    mtctx->consumed = 0;
+    mtctx->produced = 0;
+    if (params.fParams.checksumFlag) XXH64_reset(&mtctx->xxhState, 0);
     return 0;
 }
 
@@ -982,103 +972,134 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
  * pledgedSrcSize can be zero == unknown (for the time being)
  * prefer using ZSTD_CONTENTSIZE_UNKNOWN,
  * as `0` might mean "empty" in the future */
-size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
+size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
 {
     if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
-    if (zcs->params.nbThreads==1)
-        return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
-    return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, 0, zcs->params,
+    if (mtctx->params.nbThreads==1)
+        return ZSTD_resetCStream(mtctx->cctxPool->cctx[0], pledgedSrcSize);
+    return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dm_auto, 0, mtctx->params,
                                        pledgedSrcSize);
 }
 
-size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
+size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
-    ZSTD_CCtx_params cctxParams = zcs->params;   /* retrieve sticky params */
+    ZSTD_CCtx_params cctxParams = mtctx->params;   /* retrieve sticky params */
     DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
     cctxParams.cParams = params.cParams;
     cctxParams.fParams = params.fParams;
-    return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
 }
 
 
-static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, ZSTD_EndDirective endOp)
+/* ZSTDMT_writeLastEmptyBlock()
+ * Write a single empty block with an end-of-frame
+ * to finish a frame.
+ * Completed synchronously.
+ * @return : 0, or an error code (can fail due to memory allocation)
+ */
+static size_t ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
 {
-    unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
+    assert(job->srcSize == 0);
+    assert(job->lastChunk == 1);
+    assert(job->firstChunk == 0);   /* first chunk needs to create frame header too */
+    assert(job->dstBuff.start == NULL);  /* invoked from streaming variant only */
+    {   buffer_t const dstBuff = ZSTDMT_getBuffer(job->bufPool);
+        if (dstBuff.start==NULL) return ERROR(memory_allocation);
+        job->dstBuff = dstBuff;   /* will be released by ZSTDMT_flushProduced() */
+        assert(dstBuff.size >= ZSTD_blockHeaderSize);
+        job->cSize = ZSTD_writeLastEmptyBlock(dstBuff.start, dstBuff.size);
+        assert(!ZSTD_isError(job->cSize));
+        assert(job->consumed == 0);
+    }
+    return 0;
+}
+
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
+{
+    unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask;
     int const endFrame = (endOp == ZSTD_e_end);
 
-    if (zcs->nextJobID > zcs->doneJobID + zcs->jobIDMask) {
+    if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
-        assert((zcs->nextJobID & zcs->jobIDMask) == (zcs->doneJobID & zcs->jobIDMask));
+        assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask));
         return 0;
     }
 
-    if (!zcs->jobReady) {
+    if (!mtctx->jobReady) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
-                    zcs->nextJobID, (U32)srcSize, (U32)zcs->prefixSize);
-        zcs->jobs[jobID].src = zcs->inBuff.buffer;
-        zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
-        zcs->jobs[jobID].srcSize = srcSize;
-        zcs->jobs[jobID].consumed = 0;
-        zcs->jobs[jobID].cSize = 0;
-        zcs->jobs[jobID].prefixSize = zcs->prefixSize;
-        assert(zcs->inBuff.filled >= srcSize + zcs->prefixSize);
-        zcs->jobs[jobID].params = zcs->params;
+                    mtctx->nextJobID, (U32)srcSize, (U32)mtctx->prefixSize);
+        mtctx->jobs[jobID].src = mtctx->inBuff.buffer;
+        mtctx->jobs[jobID].srcStart = mtctx->inBuff.buffer.start;
+        mtctx->jobs[jobID].srcSize = srcSize;
+        mtctx->jobs[jobID].consumed = 0;
+        mtctx->jobs[jobID].cSize = 0;
+        mtctx->jobs[jobID].prefixSize = mtctx->prefixSize;
+        assert(mtctx->inBuff.filled >= srcSize + mtctx->prefixSize);
+        mtctx->jobs[jobID].params = mtctx->params;
         /* do not calculate checksum within sections, but write it in header for first section */
-        if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
-        zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
-        zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
-        zcs->jobs[jobID].dstBuff = g_nullBuffer;
-        zcs->jobs[jobID].cctxPool = zcs->cctxPool;
-        zcs->jobs[jobID].bufPool = zcs->bufPool;
-        zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
-        zcs->jobs[jobID].lastChunk = endFrame;
-        zcs->jobs[jobID].jobCompleted = 0;
-        zcs->jobs[jobID].frameChecksumNeeded = endFrame && (zcs->nextJobID>0) && zcs->params.fParams.checksumFlag;
-        zcs->jobs[jobID].dstFlushed = 0;
-        zcs->jobs[jobID].mtctx_mutex = &zcs->mtctx_mutex;
-        zcs->jobs[jobID].mtctx_cond = &zcs->mtctx_cond;
+        if (mtctx->nextJobID) mtctx->jobs[jobID].params.fParams.checksumFlag = 0;
+        mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL;
+        mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize;
+        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+        mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
+        mtctx->jobs[jobID].bufPool = mtctx->bufPool;
+        mtctx->jobs[jobID].firstChunk = (mtctx->nextJobID==0);
+        mtctx->jobs[jobID].lastChunk = endFrame;
+        mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag;
+        mtctx->jobs[jobID].dstFlushed = 0;
+        mtctx->jobs[jobID].mtctx_mutex = &mtctx->mtctx_mutex;
+        mtctx->jobs[jobID].mtctx_cond = &mtctx->mtctx_cond;
 
-        if (zcs->params.fParams.checksumFlag)
-            XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->prefixSize, srcSize);
+        if (mtctx->params.fParams.checksumFlag)
+            XXH64_update(&mtctx->xxhState, (const char*)mtctx->inBuff.buffer.start + mtctx->prefixSize, srcSize);
 
         /* get a new buffer for next input */
         if (!endFrame) {
-            size_t const newPrefixSize = MIN(srcSize + zcs->prefixSize, zcs->targetPrefixSize);
-            zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
-            if (zcs->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
-                zcs->jobs[jobID].jobCompleted = 1;
-                zcs->nextJobID++;
-                ZSTDMT_waitForAllJobsCompleted(zcs);
-                ZSTDMT_releaseAllJobResources(zcs);
+            size_t const newPrefixSize = MIN(srcSize + mtctx->prefixSize, mtctx->targetPrefixSize);
+            mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);
+            if (mtctx->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
+                mtctx->jobs[jobID].srcSize = mtctx->jobs[jobID].consumed = 0;
+                mtctx->nextJobID++;
+                ZSTDMT_waitForAllJobsCompleted(mtctx);
+                ZSTDMT_releaseAllJobResources(mtctx);
                 return ERROR(memory_allocation);
             }
-            zcs->inBuff.filled -= srcSize + zcs->prefixSize - newPrefixSize;
-            memmove(zcs->inBuff.buffer.start,   /* copy end of current job into next job, as "prefix" */
-                (const char*)zcs->jobs[jobID].srcStart + zcs->prefixSize + srcSize - newPrefixSize,
-                zcs->inBuff.filled);
-            zcs->prefixSize = newPrefixSize;
+            mtctx->inBuff.filled -= srcSize + mtctx->prefixSize - newPrefixSize;
+            memmove(mtctx->inBuff.buffer.start,   /* copy end of current job into next job, as "prefix" */
+                (const char*)mtctx->jobs[jobID].srcStart + mtctx->prefixSize + srcSize - newPrefixSize,
+                mtctx->inBuff.filled);
+            mtctx->prefixSize = newPrefixSize;
         } else {   /* endFrame==1 => no need for another input buffer */
-            zcs->inBuff.buffer = g_nullBuffer;
-            zcs->inBuff.filled = 0;
-            zcs->prefixSize = 0;
-            zcs->frameEnded = endFrame;
-            if (zcs->nextJobID == 0) {
-                /* single chunk exception : checksum is calculated directly within worker thread */
-                zcs->params.fParams.checksumFlag = 0;
-    }   }   }
+            mtctx->inBuff.buffer = g_nullBuffer;
+            mtctx->inBuff.filled = 0;
+            mtctx->prefixSize = 0;
+            mtctx->frameEnded = endFrame;
+            if (mtctx->nextJobID == 0) {
+                /* single chunk exception : checksum is already calculated directly within worker thread */
+                mtctx->params.fParams.checksumFlag = 0;
+        }   }
 
-    DEBUGLOG(2, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
-                zcs->nextJobID,
-                (U32)zcs->jobs[jobID].srcSize,
-                zcs->jobs[jobID].lastChunk,
-                zcs->doneJobID,
-                zcs->doneJobID & zcs->jobIDMask);
-    if (POOL_tryAdd(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID])) {
-        zcs->nextJobID++;
-        zcs->jobReady = 0;
+        if ( (srcSize == 0)
+          && (mtctx->nextJobID>0)/*single chunk must also write frame header*/ ) {
+            assert(endOp == ZSTD_e_end);  /* only possible case : need to end the frame with an empty last block */
+            CHECK_F( ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID) );
+            mtctx->nextJobID++;
+            return 0;
+        }
+    }
+
+    DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
+                mtctx->nextJobID,
+                (U32)mtctx->jobs[jobID].srcSize,
+                mtctx->jobs[jobID].lastChunk,
+                mtctx->doneJobID,
+                mtctx->doneJobID & mtctx->jobIDMask);
+    if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[jobID])) {
+        mtctx->nextJobID++;
+        mtctx->jobReady = 0;
     } else {
-        DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", zcs->nextJobID);
-        zcs->jobReady = 1;
+        DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID);
+        mtctx->jobReady = 1;
     }
     return 0;
 }
@@ -1088,73 +1109,78 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, ZSTD
  * `output` : `pos` will be updated with amount of data flushed .
  * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
  * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
-static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
+static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
 {
-    unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
+    unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
     DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u)", blockToFlush);
     assert(output->size >= output->pos);
 
-    ZSTD_PTHREAD_MUTEX_LOCK(&zcs->mtctx_mutex);
-    if (blockToFlush && (zcs->doneJobID < zcs->nextJobID)) {
-        while (zcs->jobs[wJobID].dstFlushed == zcs->jobs[wJobID].cSize) {
-            if (zcs->jobs[wJobID].jobCompleted==1) break;
+    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
+    if (blockToFlush && (mtctx->doneJobID < mtctx->nextJobID)) {
+        assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
+        while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) {
+            if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].srcSize) {
+                DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond",
+                            mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].srcSize);
+                break;
+            }
             DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
-                        zcs->doneJobID, (U32)zcs->jobs[wJobID].dstFlushed);
-            ZSTD_pthread_cond_wait(&zcs->mtctx_cond, &zcs->mtctx_mutex);  /* block when nothing available to flush but more to come */
+                        mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+            ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);  /* block when nothing available to flush but more to come */
     }   }
 
     /* some output is available to be flushed */
-    {   ZSTDMT_jobDescription job = zcs->jobs[wJobID];
-        ZSTD_pthread_mutex_unlock(&zcs->mtctx_mutex);
+    {   ZSTDMT_jobDescription job = mtctx->jobs[wJobID];
+        ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
         if (ZSTD_isError(job.cSize)) {
             DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
-                        zcs->doneJobID, ZSTD_getErrorName(job.cSize));
-            ZSTDMT_waitForAllJobsCompleted(zcs);
-            ZSTDMT_releaseAllJobResources(zcs);
+                        mtctx->doneJobID, ZSTD_getErrorName(job.cSize));
+            ZSTDMT_waitForAllJobsCompleted(mtctx);
+            ZSTDMT_releaseAllJobResources(mtctx);
             return job.cSize;
         }
         /* add frame checksum if necessary (can only happen once) */
-        if ( job.jobCompleted
+        assert(job.consumed <= job.srcSize);
+        if ( (job.consumed == job.srcSize)
           && job.frameChecksumNeeded ) {
-            U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
+            U32 const checksum = (U32)XXH64_digest(&mtctx->xxhState);
             DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
             MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
             job.cSize += 4;
-            zcs->jobs[wJobID].cSize += 4;
-            zcs->jobs[wJobID].frameChecksumNeeded = 0;
+            mtctx->jobs[wJobID].cSize += 4;
+            mtctx->jobs[wJobID].frameChecksumNeeded = 0;
         }
         assert(job.cSize >= job.dstFlushed);
-        if (job.dstBuff.start != NULL) {  /* one buffer present : some job is ongoing */
+        if (job.dstBuff.start != NULL) {  /* dst buffer present : some work is ongoing or completed */
             size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
             DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%.1f%%)",
-                        (U32)toWrite, zcs->doneJobID, (double)job.consumed / job.srcSize * 100);
+                        (U32)toWrite, mtctx->doneJobID, (double)job.consumed / (job.srcSize + !job.srcSize /*avoid div0*/) * 100);
             memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
             output->pos += toWrite;
             job.dstFlushed += toWrite;
 
-            if ( job.jobCompleted
+            if ( (job.consumed == job.srcSize)
               && (job.dstFlushed == job.cSize) ) {   /* output buffer fully flushed => move to next one */
                 DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
-                        zcs->doneJobID, (U32)job.dstFlushed);
-                ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
-                zcs->jobs[wJobID].dstBuff = g_nullBuffer;
-                zcs->jobs[wJobID].jobCompleted = 0;
-                zcs->consumed += job.srcSize;
-                zcs->produced += job.cSize;
-                zcs->doneJobID++;
+                        mtctx->doneJobID, (U32)job.dstFlushed);
+                ZSTDMT_releaseBuffer(mtctx->bufPool, job.dstBuff);
+                mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
+                mtctx->consumed += job.srcSize;
+                mtctx->produced += job.cSize;
+                mtctx->doneJobID++;
             } else {
-                zcs->jobs[wJobID].dstFlushed = job.dstFlushed;   /* remember how much was flushed for next attempt */
+                mtctx->jobs[wJobID].dstFlushed = job.dstFlushed;   /* remember how much was flushed for next attempt */
         }   }
 
         /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
         if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
         if (job.srcSize > job.consumed) return 1;   /* current job not completely compressed */
     }
-    if (zcs->doneJobID < zcs->nextJobID) return 1;   /* some more jobs to flush */
-    if (zcs->jobReady) return 1;   /* one job is ready and queued! */
-    if (zcs->inBuff.filled > 0) return 1;   /* input not empty */
-    zcs->allJobsCompleted = zcs->frameEnded;   /* last frame entirely flushed */
-    if (end == ZSTD_e_end) return !zcs->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? It may need a last null-block */
+    if (mtctx->doneJobID < mtctx->nextJobID) return 1;   /* some more jobs to flush */
+    if (mtctx->jobReady) return 1;   /* one job is ready and queued! */
+    if (mtctx->inBuff.filled > 0) return 1;   /* input not empty */
+    mtctx->allJobsCompleted = mtctx->frameEnded;   /* last frame entirely flushed */
+    if (end == ZSTD_e_end) return !mtctx->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? It may need a last null-block */
     return 0;   /* everything flushed */
 }
 
@@ -1241,12 +1267,12 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 }
 
 
-size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
 {
-    CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
+    CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
 
     /* recommended next input size : fill current input buffer */
-    return zcs->inBuffSize - zcs->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
+    return mtctx->inBuffSize - mtctx->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
 }
 
 

From 777d3c15593295457d8ecf2c60209e3bee910311 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Thu, 25 Jan 2018 17:45:18 -0800
Subject: [PATCH 17/30] fixed minor declaration-after-statement warning

---
 lib/compress/zstd_compress.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 7b16d7364..2904cc1da 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2032,9 +2032,10 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
 {
     if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall);
-    U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
-    MEM_writeLE24(dst, cBlockHeader24);
-    return ZSTD_blockHeaderSize;
+    {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
+        MEM_writeLE24(dst, cBlockHeader24);
+        return ZSTD_blockHeaderSize;
+    }
 }
 
 

From 8e128eaf058dfc9e70d19fa59ab21053845926b3 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 10:20:38 -0800
Subject: [PATCH 18/30] zstdmt : refactor job members

grouped by sharing properties
---
 doc/zstd_manual.html           |  7 ++++---
 lib/compress/zstdmt_compress.c | 38 +++++++++++++++++-----------------
 tests/zstreamtest.c            |  4 ++--
 3 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index dc3f8be1b..b4aed91e5 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -926,7 +926,7 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t
 


typedef enum {
-    ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal conditions */
     ZSTD_e_flush,      /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
     ZSTD_e_end         /* flush any remaining data and close current frame. Any additional data starts a new frame. */
 } ZSTD_EndDirective;
@@ -945,10 +945,11 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t
                                                      and then immediately returns, just indicating that there is some data remaining to be flushed.
                                                      The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
   - Exception : in multi-threading mode, if the first call requests a ZSTD_e_end directive, it is blocking : it will complete compression before giving back control to caller.
-  - @return provides the minimum amount of data remaining to be flushed from internal buffers
+  - @return provides a minimum amount of data remaining to be flushed from internal buffers
             or an error code, which can be tested using ZSTD_isError().
             if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
-            This is useful to determine if a ZSTD_e_flush or ZSTD_e_end directive is completed.
+            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
   - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
             only ZSTD_e_end or ZSTD_e_flush operations are allowed.
             Before starting a new compression job, or changing compression parameters,
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 451bb5bf6..4e53367fd 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -300,28 +300,28 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
 
 
 /* ------------------------------------------ */
-/* =====          Thread worker         ===== */
+/* =====          Worker thread         ===== */
 /* ------------------------------------------ */
 
 typedef struct {
-    buffer_t src;
-    const void* srcStart;
-    size_t   prefixSize;
-    size_t   srcSize;
-    size_t   consumed;
-    buffer_t dstBuff;
-    size_t   cSize;
-    size_t   dstFlushed;
-    unsigned firstChunk;
-    unsigned lastChunk;
-    unsigned frameChecksumNeeded;
-    ZSTD_pthread_mutex_t* mtctx_mutex;
-    ZSTD_pthread_cond_t* mtctx_cond;
-    ZSTD_CCtx_params params;
-    const ZSTD_CDict* cdict;
-    ZSTDMT_CCtxPool* cctxPool;
-    ZSTDMT_bufferPool* bufPool;
-    unsigned long long fullFrameSize;
+    size_t   consumed;       /* SHARED - init0 by mtctx, then modified by worker AND read by mtctx */
+    size_t   cSize;          /* SHARED - init0 by mtctx, then modified by worker AND read by mtctx */
+    ZSTD_pthread_mutex_t* mtctx_mutex;   /* Thread-safe - used by mtctx and (all) worker */
+    ZSTD_pthread_cond_t* mtctx_cond;     /* Thread-safe - used by mtctx and (all) worker */
+    ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) worker */
+    ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) worker */
+    buffer_t dstBuff;        /* set by worker (or mtctx), then read by worker, then modified by mtctx => no barrier */
+    buffer_t src;            /* set by mtctx, then modified by worker => no barrier */
+    const void* srcStart;    /* set by mtctx, then read by worker => no barrier */
+    size_t   prefixSize;     /* set by mtctx, then read by worker => no barrier */
+    size_t   srcSize;        /* set by mtctx, then read by worker => no barrier */
+    unsigned firstChunk;     /* set by mtctx, then read by worker => no barrier */
+    unsigned lastChunk;      /* set by mtctx, then read by worker => no barrier */
+    ZSTD_CCtx_params params;             /* set by mtctx, then read by worker => no barrier */
+    const ZSTD_CDict* cdict;             /* set by mtctx, then read by worker => no barrier */
+    unsigned long long fullFrameSize;    /* set by mtctx, then read by worker => no barrier */
+    size_t   dstFlushed;                 /* used only by mtctx */
+    unsigned frameChecksumNeeded;        /* used only by mtctx */
 } ZSTDMT_jobDescription;
 
 /* ZSTDMT_compressChunk() is a POOL_function type */
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index ef5cf65fb..16dcba73a 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1220,9 +1220,9 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
 
         FUZ_rand(&coreSeed);
         if (nbTests >= testNb) {
-            DISPLAYUPDATE(2, "\r%6u/%6u (%08X)   ", testNb, nbTests, coreSeed);
+            DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests);
         } else {
-            DISPLAYUPDATE(2, "\r%6u  (%08X)        ", testNb, coreSeed);
+            DISPLAYUPDATE(2, "\r%6u         ", testNb);
         }
         lseed = coreSeed ^ prime32;
 

From fca13c6855d28d6ba8628794465372cb6088442d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 10:44:09 -0800
Subject: [PATCH 19/30] zstdmt : fixed memory leak

writeLastEmptyBlock() must release srcBuffer
as mtctx assumes it's done by job worker.

minor : changed 2 job member names (src->srcBuffer, srcStart->prefixStart) for clarity
---
 lib/compress/zstdmt_compress.c | 73 ++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 35 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 4e53367fd..9fea4969f 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -304,19 +304,19 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
 /* ------------------------------------------ */
 
 typedef struct {
-    size_t   consumed;       /* SHARED - init0 by mtctx, then modified by worker AND read by mtctx */
-    size_t   cSize;          /* SHARED - init0 by mtctx, then modified by worker AND read by mtctx */
+    size_t   consumed;                   /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
+    size_t   cSize;                      /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
     ZSTD_pthread_mutex_t* mtctx_mutex;   /* Thread-safe - used by mtctx and (all) worker */
     ZSTD_pthread_cond_t* mtctx_cond;     /* Thread-safe - used by mtctx and (all) worker */
     ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) worker */
     ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) worker */
-    buffer_t dstBuff;        /* set by worker (or mtctx), then read by worker, then modified by mtctx => no barrier */
-    buffer_t src;            /* set by mtctx, then modified by worker => no barrier */
-    const void* srcStart;    /* set by mtctx, then read by worker => no barrier */
-    size_t   prefixSize;     /* set by mtctx, then read by worker => no barrier */
-    size_t   srcSize;        /* set by mtctx, then read by worker => no barrier */
-    unsigned firstChunk;     /* set by mtctx, then read by worker => no barrier */
-    unsigned lastChunk;      /* set by mtctx, then read by worker => no barrier */
+    buffer_t dstBuff;                    /* set by worker (or mtctx), then read by worker, then modified by mtctx => no barrier */
+    buffer_t srcBuff;                    /* set by mtctx, then released by worker => no barrier */
+    const void* prefixStart;             /* set by mtctx, then read by worker => no barrier */
+    size_t   prefixSize;                 /* set by mtctx, then read by worker => no barrier */
+    size_t   srcSize;                    /* set by mtctx, then read by worker => no barrier */
+    unsigned firstChunk;                 /* set by mtctx, then read by worker => no barrier */
+    unsigned lastChunk;                  /* set by mtctx, then read by worker => no barrier */
     ZSTD_CCtx_params params;             /* set by mtctx, then read by worker => no barrier */
     const ZSTD_CDict* cdict;             /* set by mtctx, then read by worker => no barrier */
     unsigned long long fullFrameSize;    /* set by mtctx, then read by worker => no barrier */
@@ -329,7 +329,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
 {
     ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
     ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
-    const void* const src = (const char*)job->srcStart + job->prefixSize;
+    const void* const src = (const char*)job->prefixStart + job->prefixSize;
     buffer_t dstBuff = job->dstBuff;
 
     /* ressources */
@@ -362,7 +362,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
                 goto _endJob;
         }   }
         {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
-                                        job->srcStart, job->prefixSize, ZSTD_dm_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
+                                        job->prefixStart, job->prefixSize, ZSTD_dm_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
                                         NULL, /*cdict*/
                                         jobParams, pledgedSrcSize);
             if (ZSTD_isError(initError)) {
@@ -419,8 +419,8 @@ void ZSTDMT_compressChunk(void* jobDescription)
 _endJob:
     /* release resources */
     ZSTDMT_releaseCCtx(job->cctxPool, cctx);
-    ZSTDMT_releaseBuffer(job->bufPool, job->src);
-    job->src = g_nullBuffer; job->srcStart = NULL;
+    ZSTDMT_releaseBuffer(job->bufPool, job->srcBuff);
+    job->srcBuff = g_nullBuffer; job->prefixStart = NULL;
     /* report */
     ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
     job->consumed = job->srcSize;
@@ -557,9 +557,9 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
         DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
         ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
         mtctx->jobs[jobID].dstBuff = g_nullBuffer;
-        DEBUGLOG(4, "job%02u: release src address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].src.start);
-        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
-        mtctx->jobs[jobID].src = g_nullBuffer;
+        DEBUGLOG(4, "job%02u: release src address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].srcBuff.start);
+        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].srcBuff);
+        mtctx->jobs[jobID].srcBuff = g_nullBuffer;
     }
     memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
     DEBUGLOG(4, "input: release address %08X", (U32)(size_t)mtctx->inBuff.buffer.start);
@@ -757,8 +757,8 @@ static size_t ZSTDMT_compress_advanced_internal(
             buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
             size_t dictSize = u ? overlapSize : 0;
 
-            mtctx->jobs[u].src = g_nullBuffer;
-            mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
+            mtctx->jobs[u].srcBuff = g_nullBuffer;
+            mtctx->jobs[u].prefixStart = srcStart + frameStartPos - dictSize;
             mtctx->jobs[u].prefixSize = dictSize;
             mtctx->jobs[u].srcSize = chunkSize; assert(chunkSize > 0);  /* avoid job.srcSize == 0 */
             mtctx->jobs[u].consumed = 0;
@@ -781,7 +781,7 @@ static size_t ZSTDMT_compress_advanced_internal(
             }
 
             DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u  (%u bytes)", u, (U32)chunkSize);
-            DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
+            DEBUG_PRINTHEX(6, mtctx->jobs[u].prefixStart, 12);
             POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
 
             frameStartPos += chunkSize;
@@ -802,7 +802,7 @@ static size_t ZSTDMT_compress_advanced_internal(
             ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
             DEBUGLOG(5, "ready to write chunk %u ", chunkID);
 
-            mtctx->jobs[chunkID].srcStart = NULL;
+            mtctx->jobs[chunkID].prefixStart = NULL;
             {   size_t const cSize = mtctx->jobs[chunkID].cSize;
                 if (ZSTD_isError(cSize)) error = cSize;
                 if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
@@ -999,18 +999,21 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
  */
 static size_t ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
 {
-    assert(job->srcSize == 0);
     assert(job->lastChunk == 1);
-    assert(job->firstChunk == 0);   /* first chunk needs to create frame header too */
-    assert(job->dstBuff.start == NULL);  /* invoked from streaming variant only */
-    {   buffer_t const dstBuff = ZSTDMT_getBuffer(job->bufPool);
-        if (dstBuff.start==NULL) return ERROR(memory_allocation);
-        job->dstBuff = dstBuff;   /* will be released by ZSTDMT_flushProduced() */
-        assert(dstBuff.size >= ZSTD_blockHeaderSize);
-        job->cSize = ZSTD_writeLastEmptyBlock(dstBuff.start, dstBuff.size);
-        assert(!ZSTD_isError(job->cSize));
-        assert(job->consumed == 0);
-    }
+    assert(job->srcSize == 0);      /* last chunk is empty -> will be simplified into a last empty block */
+    assert(job->firstChunk == 0);   /* cannot be first chunk, as it also needs to create frame header */
+    /* A job created by streaming variant starts with a src buffer, but no dst buffer.
+     * It summons a dstBuffer itself, compresses into it, then releases srcBuffer, and gives result to mtctx.
+     * When done, srcBuffer is empty, while dstBuffer is filled, and will be released by mtctx.
+     * This shortcut will simply switch srcBuffer for dstBuffer, providing same outcome as a normal job */
+    assert(job->dstBuff.start == NULL);  /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
+    assert(job->srcBuff.start != NULL);      /* invoked from streaming variant only (otherwise, srcBuff might be user's input) */
+    assert(job->srcBuff.size >= ZSTD_blockHeaderSize);
+    job->dstBuff = job->srcBuff;
+    job->srcBuff = g_nullBuffer;
+    job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.size);
+    assert(!ZSTD_isError(job->cSize));
+    assert(job->consumed == 0);
     return 0;
 }
 
@@ -1028,12 +1031,12 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
     if (!mtctx->jobReady) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
                     mtctx->nextJobID, (U32)srcSize, (U32)mtctx->prefixSize);
-        mtctx->jobs[jobID].src = mtctx->inBuff.buffer;
-        mtctx->jobs[jobID].srcStart = mtctx->inBuff.buffer.start;
+        mtctx->jobs[jobID].srcBuff = mtctx->inBuff.buffer;
+        mtctx->jobs[jobID].prefixStart = mtctx->inBuff.buffer.start;
+        mtctx->jobs[jobID].prefixSize = mtctx->prefixSize;
         mtctx->jobs[jobID].srcSize = srcSize;
         mtctx->jobs[jobID].consumed = 0;
         mtctx->jobs[jobID].cSize = 0;
-        mtctx->jobs[jobID].prefixSize = mtctx->prefixSize;
         assert(mtctx->inBuff.filled >= srcSize + mtctx->prefixSize);
         mtctx->jobs[jobID].params = mtctx->params;
         /* do not calculate checksum within sections, but write it in header for first section */
@@ -1066,7 +1069,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
             }
             mtctx->inBuff.filled -= srcSize + mtctx->prefixSize - newPrefixSize;
             memmove(mtctx->inBuff.buffer.start,   /* copy end of current job into next job, as "prefix" */
-                (const char*)mtctx->jobs[jobID].srcStart + mtctx->prefixSize + srcSize - newPrefixSize,
+                (const char*)mtctx->jobs[jobID].prefixStart + mtctx->prefixSize + srcSize - newPrefixSize,
                 mtctx->inBuff.filled);
             mtctx->prefixSize = newPrefixSize;
         } else {   /* endFrame==1 => no need for another input buffer */

From d2b62b6fa5a752e8312a6c8b9fbedcf53bfa60ff Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 11:06:34 -0800
Subject: [PATCH 20/30] minor : ZSTDMT_writeLastEmptyBlock() is a void function

because it cannot fail
---
 lib/compress/zstdmt_compress.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 9fea4969f..7b37c5b33 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -142,6 +142,10 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
     return poolSize + totalBufferSize;
 }
 
+/* ZSTDMT_setBufferSize() :
+ * all future buffers provided by this buffer pool will have _at least_ this size
+ * note : it's better for all buffers to have same size,
+ * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */
 static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
 {
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
@@ -992,12 +996,11 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
 
 
 /* ZSTDMT_writeLastEmptyBlock()
- * Write a single empty block with an end-of-frame
- * to finish a frame.
- * Completed synchronously.
- * @return : 0, or an error code (can fail due to memory allocation)
+ * Write a single empty block with an end-of-frame to finish a frame.
+ * Job must be created from streaming variant.
+ * This function is always successfull if expected conditions are fulfilled.
  */
-static size_t ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
+static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
 {
     assert(job->lastChunk == 1);
     assert(job->srcSize == 0);      /* last chunk is empty -> will be simplified into a last empty block */
@@ -1006,15 +1009,14 @@ static size_t ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
      * It summons a dstBuffer itself, compresses into it, then releases srcBuffer, and gives result to mtctx.
      * When done, srcBuffer is empty, while dstBuffer is filled, and will be released by mtctx.
      * This shortcut will simply switch srcBuffer for dstBuffer, providing same outcome as a normal job */
-    assert(job->dstBuff.start == NULL);  /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
-    assert(job->srcBuff.start != NULL);      /* invoked from streaming variant only (otherwise, srcBuff might be user's input) */
-    assert(job->srcBuff.size >= ZSTD_blockHeaderSize);
+    assert(job->dstBuff.start == NULL);   /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
+    assert(job->srcBuff.start != NULL);   /* invoked from streaming variant only (otherwise, srcBuff might be user's input) */
+    assert(job->srcBuff.size >= ZSTD_blockHeaderSize);   /* no buffer should ever be that small */
     job->dstBuff = job->srcBuff;
     job->srcBuff = g_nullBuffer;
     job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.size);
     assert(!ZSTD_isError(job->cSize));
     assert(job->consumed == 0);
-    return 0;
 }
 
 static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
@@ -1031,6 +1033,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
     if (!mtctx->jobReady) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
                     mtctx->nextJobID, (U32)srcSize, (U32)mtctx->prefixSize);
+        assert(mtctx->jobs[jobID].srcBuff.start == NULL);   /* no buffer left : supposed already released */
         mtctx->jobs[jobID].srcBuff = mtctx->inBuff.buffer;
         mtctx->jobs[jobID].prefixStart = mtctx->inBuff.buffer.start;
         mtctx->jobs[jobID].prefixSize = mtctx->prefixSize;
@@ -1085,7 +1088,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
         if ( (srcSize == 0)
           && (mtctx->nextJobID>0)/*single chunk must also write frame header*/ ) {
             assert(endOp == ZSTD_e_end);  /* only possible case : need to end the frame with an empty last block */
-            CHECK_F( ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID) );
+            ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
             mtctx->nextJobID++;
             return 0;
         }
@@ -1162,10 +1165,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
             output->pos += toWrite;
             job.dstFlushed += toWrite;
 
-            if ( (job.consumed == job.srcSize)
-              && (job.dstFlushed == job.cSize) ) {   /* output buffer fully flushed => move to next one */
+            if ( (job.consumed == job.srcSize)      /* job completed */
+              && (job.dstFlushed == job.cSize) ) {  /* output buffer fully flushed => free this job position */
                 DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
                         mtctx->doneJobID, (U32)job.dstFlushed);
+                assert(job.srcBuff.start == NULL);  /* srcBuff supposed already released */
                 ZSTDMT_releaseBuffer(mtctx->bufPool, job.dstBuff);
                 mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
                 mtctx->consumed += job.srcSize;

From 79b6e28b0a168bb076dbaf653b1a7d71bbd1a201 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 12:15:43 -0800
Subject: [PATCH 21/30] zstdmt : flush() only lock to read shared job members

Other job members are accessed directly.
This avoids a full job copy, which would access everything,
including a few members that are supposed to be used by worker only,
uselessly requiring additional locks to avoid race conditions.
---
 lib/compress/zstdmt_compress.c | 115 +++++++++++++++++----------------
 1 file changed, 58 insertions(+), 57 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 7b37c5b33..c7df32d37 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -309,16 +309,16 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
 
 typedef struct {
     size_t   consumed;                   /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
-    size_t   cSize;                      /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
-    ZSTD_pthread_mutex_t* mtctx_mutex;   /* Thread-safe - used by mtctx and (all) worker */
-    ZSTD_pthread_cond_t* mtctx_cond;     /* Thread-safe - used by mtctx and (all) worker */
-    ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) worker */
-    ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) worker */
-    buffer_t dstBuff;                    /* set by worker (or mtctx), then read by worker, then modified by mtctx => no barrier */
+    size_t   cSize;                      /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
+    ZSTD_pthread_mutex_t* mtctx_mutex;   /* Thread-safe - used by mtctx and (all) workers */
+    ZSTD_pthread_cond_t* mtctx_cond;     /* Thread-safe - used by mtctx and (all) workers */
+    ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) workers */
+    ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) workers */
+    buffer_t dstBuff;                    /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
     buffer_t srcBuff;                    /* set by mtctx, then released by worker => no barrier */
-    const void* prefixStart;             /* set by mtctx, then read by worker => no barrier */
+    const void* prefixStart;             /* set by mtctx, then read and set0 by worker => no barrier */
     size_t   prefixSize;                 /* set by mtctx, then read by worker => no barrier */
-    size_t   srcSize;                    /* set by mtctx, then read by worker => no barrier */
+    size_t   srcSize;                    /* set by mtctx, then read by worker & mtctx => no barrier */
     unsigned firstChunk;                 /* set by mtctx, then read by worker => no barrier */
     unsigned lastChunk;                  /* set by mtctx, then read by worker => no barrier */
     ZSTD_CCtx_params params;             /* set by mtctx, then read by worker => no barrier */
@@ -341,15 +341,13 @@ void ZSTDMT_compressChunk(void* jobDescription)
         job->cSize = ERROR(memory_allocation);
         goto _endJob;
     }
-    if (dstBuff.start == NULL) {
+    if (dstBuff.start == NULL) {   /* streaming job : doesn't provide a dstBuffer */
         dstBuff = ZSTDMT_getBuffer(job->bufPool);
         if (dstBuff.start==NULL) {
             job->cSize = ERROR(memory_allocation);
             goto _endJob;
         }
-        ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
         job->dstBuff = dstBuff;   /* this value can be read in ZSTDMT_flush, when it copies the whole job */
-        ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
     }
 
     /* init */
@@ -1087,6 +1085,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
 
         if ( (srcSize == 0)
           && (mtctx->nextJobID>0)/*single chunk must also write frame header*/ ) {
+            DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
             assert(endOp == ZSTD_e_end);  /* only possible case : need to end the frame with an empty last block */
             ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
             mtctx->nextJobID++;
@@ -1094,12 +1093,12 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
         }
     }
 
-    DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u) (note : doneJob = %u=>%u)",
+    DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u, jobNb == %u (mod:%u))",
                 mtctx->nextJobID,
                 (U32)mtctx->jobs[jobID].srcSize,
                 mtctx->jobs[jobID].lastChunk,
-                mtctx->doneJobID,
-                mtctx->doneJobID & mtctx->jobIDMask);
+                mtctx->nextJobID,
+                jobID);
     if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[jobID])) {
         mtctx->nextJobID++;
         mtctx->jobReady = 0;
@@ -1118,15 +1117,17 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
 static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
 {
     unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
-    DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u)", blockToFlush);
+    DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
+                blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
     assert(output->size >= output->pos);
 
     ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
-    if (blockToFlush && (mtctx->doneJobID < mtctx->nextJobID)) {
+    if (  blockToFlush
+      && (mtctx->doneJobID < mtctx->nextJobID) ) {
         assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
-        while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) {
+        while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) {  /* nothing to flush */
             if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].srcSize) {
-                DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond",
+                DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
                             mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].srcSize);
                 break;
             }
@@ -1135,60 +1136,60 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
             ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);  /* block when nothing available to flush but more to come */
     }   }
 
-    /* some output is available to be flushed */
-    {   ZSTDMT_jobDescription job = mtctx->jobs[wJobID];
+    /* try to flush something */
+    {   size_t cSize = mtctx->jobs[wJobID].cSize;
+        size_t const srcConsumed = mtctx->jobs[wJobID].consumed;
         ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
-        if (ZSTD_isError(job.cSize)) {
+        if (ZSTD_isError(cSize)) {
             DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
-                        mtctx->doneJobID, ZSTD_getErrorName(job.cSize));
+                        mtctx->doneJobID, ZSTD_getErrorName(cSize));
             ZSTDMT_waitForAllJobsCompleted(mtctx);
             ZSTDMT_releaseAllJobResources(mtctx);
-            return job.cSize;
+            return cSize;
         }
         /* add frame checksum if necessary (can only happen once) */
-        assert(job.consumed <= job.srcSize);
-        if ( (job.consumed == job.srcSize)
-          && job.frameChecksumNeeded ) {
+        assert(srcConsumed <= mtctx->jobs[wJobID].srcSize);
+        if ( (srcConsumed == mtctx->jobs[wJobID].srcSize)   /* job completed -> worker no longer active */
+          && mtctx->jobs[wJobID].frameChecksumNeeded ) {
             U32 const checksum = (U32)XXH64_digest(&mtctx->xxhState);
             DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
-            MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
-            job.cSize += 4;
-            mtctx->jobs[wJobID].cSize += 4;
+            MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum);
+            cSize += 4;
+            mtctx->jobs[wJobID].cSize += 4;  /* can write this shared value, as worker is no longer active */
             mtctx->jobs[wJobID].frameChecksumNeeded = 0;
         }
-        assert(job.cSize >= job.dstFlushed);
-        if (job.dstBuff.start != NULL) {  /* dst buffer present : some work is ongoing or completed */
-            size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
-            DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%.1f%%)",
-                        (U32)toWrite, mtctx->doneJobID, (double)job.consumed / (job.srcSize + !job.srcSize /*avoid div0*/) * 100);
-            memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
+        if (cSize > 0) {  /* compression is ongoing or completed */
+            size_t const toWrite = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
+            DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u)",
+                        (U32)toWrite, mtctx->doneJobID, (U32)srcConsumed, (U32)mtctx->jobs[wJobID].srcSize);
+            assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
+            memcpy((char*)output->dst + output->pos, (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, toWrite);
             output->pos += toWrite;
-            job.dstFlushed += toWrite;
+            mtctx->jobs[wJobID].dstFlushed += toWrite;  /* can write : this value is only used by mtctx */
 
-            if ( (job.consumed == job.srcSize)      /* job completed */
-              && (job.dstFlushed == job.cSize) ) {  /* output buffer fully flushed => free this job position */
+            if ( (srcConsumed == mtctx->jobs[wJobID].srcSize)    /* job completed */
+              && (mtctx->jobs[wJobID].dstFlushed == cSize) ) {   /* output buffer fully flushed => free this job position */
                 DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
-                        mtctx->doneJobID, (U32)job.dstFlushed);
-                assert(job.srcBuff.start == NULL);  /* srcBuff supposed already released */
-                ZSTDMT_releaseBuffer(mtctx->bufPool, job.dstBuff);
+                        mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+                assert(mtctx->jobs[wJobID].srcBuff.start == NULL);  /* srcBuff supposed already released */
+                ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
                 mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
-                mtctx->consumed += job.srcSize;
-                mtctx->produced += job.cSize;
+                mtctx->jobs[wJobID].cSize = 0;   /* ensure this job slot is considered "not started" in future check */
+                mtctx->consumed += mtctx->jobs[wJobID].srcSize;
+                mtctx->produced += cSize;
                 mtctx->doneJobID++;
-            } else {
-                mtctx->jobs[wJobID].dstFlushed = job.dstFlushed;   /* remember how much was flushed for next attempt */
         }   }
 
         /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
-        if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
-        if (job.srcSize > job.consumed) return 1;   /* current job not completely compressed */
+        if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
+        if (mtctx->jobs[wJobID].srcSize > srcConsumed) return 1;   /* current job not completely compressed */
     }
-    if (mtctx->doneJobID < mtctx->nextJobID) return 1;   /* some more jobs to flush */
-    if (mtctx->jobReady) return 1;   /* one job is ready and queued! */
-    if (mtctx->inBuff.filled > 0) return 1;   /* input not empty */
-    mtctx->allJobsCompleted = mtctx->frameEnded;   /* last frame entirely flushed */
-    if (end == ZSTD_e_end) return !mtctx->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? It may need a last null-block */
-    return 0;   /* everything flushed */
+    if (mtctx->doneJobID < mtctx->nextJobID) return 1;   /* some more jobs ongoing */
+    if (mtctx->jobReady) return 1;      /* one job is ready to push, just not yet in the list */
+    if (mtctx->inBuff.filled > 0) return 1;   /* input is not empty, and still needs to be converted into a job */
+    mtctx->allJobsCompleted = mtctx->frameEnded;   /* all chunks are entirely flushed => if this one is last one, frame is completed */
+    if (end == ZSTD_e_end) return !mtctx->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */
+    return 0;   /* internal buffers fully flushed */
 }
 
 
@@ -1217,10 +1218,10 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     }
 
     /* single-pass shortcut (note : synchronous-mode) */
-    if ( (mtctx->nextJobID == 0)     /* just started */
-      && (mtctx->inBuff.filled == 0) /* nothing buffered */
-      && (!mtctx->jobReady)          /* no job already created */
-      && (endOp == ZSTD_e_end)       /* end order */
+    if ( (mtctx->nextJobID == 0)      /* just started */
+      && (mtctx->inBuff.filled == 0)  /* nothing buffered */
+      && (!mtctx->jobReady)           /* no job already created */
+      && (endOp == ZSTD_e_end)        /* end order */
       && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
         size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
                 (char*)output->dst + output->pos, output->size - output->pos,

From 0d426f6b8391c3b5c5714fd83c014609ec238e7d Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 13:00:14 -0800
Subject: [PATCH 22/30] zstdmt : refactor a few member names

for clarity
---
 lib/compress/zstdmt_compress.c | 94 +++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 46 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index c7df32d37..3bf585d49 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -83,7 +83,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
 
 typedef struct buffer_s {
     void* start;
-    size_t size;
+    size_t capacity;
 } buffer_t;
 
 static const buffer_t g_nullBuffer = { NULL, 0 };
@@ -136,7 +136,7 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
     size_t totalBufferSize = 0;
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     for (u=0; utotalBuffers; u++)
-        totalBufferSize += bufPool->bTable[u].size;
+        totalBufferSize += bufPool->bTable[u].capacity;
     ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
 
     return poolSize + totalBufferSize;
@@ -165,12 +165,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     if (bufPool->nbBuffers) {   /* try to use an existing buffer */
         buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
-        size_t const availBufferSize = buf.size;
+        size_t const availBufferSize = buf.capacity;
         bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
         if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
             /* large enough, but not too much */
             DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
-                        bufPool->nbBuffers, (U32)buf.size);
+                        bufPool->nbBuffers, (U32)buf.capacity);
             ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
             return buf;
         }
@@ -184,7 +184,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
     {   buffer_t buffer;
         void* const start = ZSTD_malloc(bSize, bufPool->cMem);
         buffer.start = start;   /* note : start can be NULL if malloc fails ! */
-        buffer.size = (start==NULL) ? 0 : bSize;
+        buffer.capacity = (start==NULL) ? 0 : bSize;
         if (start==NULL) {
             DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!");
         } else {
@@ -203,7 +203,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
     if (bufPool->nbBuffers < bufPool->totalBuffers) {
         bufPool->bTable[bufPool->nbBuffers++] = buf;  /* stored for later use */
         DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
-                    (U32)buf.size, (U32)(bufPool->nbBuffers-1));
+                    (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
         ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
         return;
     }
@@ -372,7 +372,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
                 goto _endJob;
     }   }   }
     if (!job->firstChunk) {  /* flush and overwrite frame header when it's not first job */
-        size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
+        size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, src, 0);
         if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
         DEBUGLOG(5, "ZSTDMT_compressChunk: flush and overwrite %u bytes of frame header (not first chunk)", (U32)hSize);
         ZSTD_invalidateRepCodes(cctx);
@@ -386,7 +386,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
         const BYTE* ip = (const BYTE*) src;
         BYTE* const ostart = (BYTE*)dstBuff.start;
         BYTE* op = ostart;
-        BYTE* oend = op + dstBuff.size;
+        BYTE* oend = op + dstBuff.capacity;
         int blockNb;
         DEBUGLOG(5, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks);
         assert(job->cSize == 0);
@@ -437,6 +437,8 @@ _endJob:
 
 typedef struct {
     buffer_t buffer;
+    size_t targetCapacity;  /* note : buffers provided by the pool may be larger than target capacity */
+    size_t prefixSize;
     size_t filled;
 } inBuff_t;
 
@@ -449,8 +451,6 @@ struct ZSTDMT_CCtx_s {
     ZSTD_pthread_cond_t mtctx_cond;
     ZSTD_CCtx_params params;
     size_t targetSectionSize;
-    size_t inBuffSize;
-    size_t prefixSize;
     size_t targetPrefixSize;
     inBuff_t inBuff;
     int jobReady;        /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
@@ -663,13 +663,13 @@ unsigned ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx)
  * Note : mutex will be acquired during statistics collection. */
 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
 {
-    ZSTD_frameProgression fs;
+    ZSTD_frameProgression fps;
     DEBUGLOG(6, "ZSTDMT_getFrameProgression");
     ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
-    fs.consumed = mtctx->consumed;
-    fs.produced = mtctx->produced;
-    assert(mtctx->inBuff.filled >= mtctx->prefixSize);
-    fs.ingested = mtctx->consumed + (mtctx->inBuff.filled - mtctx->prefixSize);
+    fps.consumed = mtctx->consumed;
+    fps.produced = mtctx->produced;
+    assert(mtctx->inBuff.filled >= mtctx->inBuff.prefixSize);
+    fps.ingested = mtctx->consumed + (mtctx->inBuff.filled - mtctx->inBuff.prefixSize);
     {   unsigned jobNb;
         unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
         DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
@@ -678,13 +678,13 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
             unsigned const wJobID = jobNb & mtctx->jobIDMask;
             size_t const cResult = mtctx->jobs[wJobID].cSize;
             size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
-            fs.consumed += mtctx->jobs[wJobID].consumed;
-            fs.ingested += mtctx->jobs[wJobID].srcSize;
-            fs.produced += produced;
+            fps.consumed += mtctx->jobs[wJobID].consumed;
+            fps.ingested += mtctx->jobs[wJobID].srcSize;
+            fps.produced += produced;
         }
     }
     ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
-    return fs;
+    return fps;
 }
 
 
@@ -928,11 +928,11 @@ size_t ZSTDMT_initCStream_internal(
     if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
     if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize;  /* job size must be >= overlap size */
     DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
-    mtctx->inBuffSize = mtctx->targetPrefixSize + mtctx->targetSectionSize;
-    DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->inBuffSize>>10));
-    ZSTDMT_setBufferSize(mtctx->bufPool, MAX(mtctx->inBuffSize, ZSTD_compressBound(mtctx->targetSectionSize)) );
+    mtctx->inBuff.targetCapacity = mtctx->targetPrefixSize + mtctx->targetSectionSize;
+    DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->inBuff.targetCapacity>>10));
+    ZSTDMT_setBufferSize(mtctx->bufPool, MAX(mtctx->inBuff.targetCapacity, ZSTD_compressBound(mtctx->targetSectionSize)) );
     mtctx->inBuff.buffer = g_nullBuffer;
-    mtctx->prefixSize = 0;
+    mtctx->inBuff.prefixSize = 0;
     mtctx->doneJobID = 0;
     mtctx->nextJobID = 0;
     mtctx->frameEnded = 0;
@@ -1009,10 +1009,10 @@ static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
      * This shortcut will simply switch srcBuffer for dstBuffer, providing same outcome as a normal job */
     assert(job->dstBuff.start == NULL);   /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
     assert(job->srcBuff.start != NULL);   /* invoked from streaming variant only (otherwise, srcBuff might be user's input) */
-    assert(job->srcBuff.size >= ZSTD_blockHeaderSize);   /* no buffer should ever be that small */
+    assert(job->srcBuff.capacity >= ZSTD_blockHeaderSize);   /* no buffer should ever be that small */
     job->dstBuff = job->srcBuff;
     job->srcBuff = g_nullBuffer;
-    job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.size);
+    job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity);
     assert(!ZSTD_isError(job->cSize));
     assert(job->consumed == 0);
 }
@@ -1030,15 +1030,15 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
 
     if (!mtctx->jobReady) {
         DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
-                    mtctx->nextJobID, (U32)srcSize, (U32)mtctx->prefixSize);
+                    mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefixSize);
         assert(mtctx->jobs[jobID].srcBuff.start == NULL);   /* no buffer left : supposed already released */
         mtctx->jobs[jobID].srcBuff = mtctx->inBuff.buffer;
         mtctx->jobs[jobID].prefixStart = mtctx->inBuff.buffer.start;
-        mtctx->jobs[jobID].prefixSize = mtctx->prefixSize;
+        mtctx->jobs[jobID].prefixSize = mtctx->inBuff.prefixSize;
         mtctx->jobs[jobID].srcSize = srcSize;
+        assert(mtctx->inBuff.filled >= srcSize + mtctx->inBuff.prefixSize);
         mtctx->jobs[jobID].consumed = 0;
         mtctx->jobs[jobID].cSize = 0;
-        assert(mtctx->inBuff.filled >= srcSize + mtctx->prefixSize);
         mtctx->jobs[jobID].params = mtctx->params;
         /* do not calculate checksum within sections, but write it in header for first section */
         if (mtctx->nextJobID) mtctx->jobs[jobID].params.fParams.checksumFlag = 0;
@@ -1055,28 +1055,28 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
         mtctx->jobs[jobID].mtctx_cond = &mtctx->mtctx_cond;
 
         if (mtctx->params.fParams.checksumFlag)
-            XXH64_update(&mtctx->xxhState, (const char*)mtctx->inBuff.buffer.start + mtctx->prefixSize, srcSize);
+            XXH64_update(&mtctx->xxhState, (const char*)mtctx->inBuff.buffer.start + mtctx->inBuff.prefixSize, srcSize);
 
         /* get a new buffer for next input */
         if (!endFrame) {
-            size_t const newPrefixSize = MIN(srcSize + mtctx->prefixSize, mtctx->targetPrefixSize);
+            size_t const newPrefixSize = MIN(mtctx->inBuff.filled, mtctx->targetPrefixSize);
             mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);
-            if (mtctx->inBuff.buffer.start == NULL) {   /* not enough memory to allocate next input buffer */
+            if (mtctx->inBuff.buffer.start == NULL) {    /* not enough memory to allocate a new input buffer */
                 mtctx->jobs[jobID].srcSize = mtctx->jobs[jobID].consumed = 0;
                 mtctx->nextJobID++;
                 ZSTDMT_waitForAllJobsCompleted(mtctx);
                 ZSTDMT_releaseAllJobResources(mtctx);
                 return ERROR(memory_allocation);
             }
-            mtctx->inBuff.filled -= srcSize + mtctx->prefixSize - newPrefixSize;
+            mtctx->inBuff.filled -= (mtctx->inBuff.prefixSize + srcSize) - newPrefixSize;
             memmove(mtctx->inBuff.buffer.start,   /* copy end of current job into next job, as "prefix" */
-                (const char*)mtctx->jobs[jobID].prefixStart + mtctx->prefixSize + srcSize - newPrefixSize,
+                (const char*)mtctx->jobs[jobID].prefixStart + mtctx->inBuff.prefixSize + srcSize - newPrefixSize,
                 mtctx->inBuff.filled);
-            mtctx->prefixSize = newPrefixSize;
+            mtctx->inBuff.prefixSize = newPrefixSize;
         } else {   /* endFrame==1 => no need for another input buffer */
             mtctx->inBuff.buffer = g_nullBuffer;
             mtctx->inBuff.filled = 0;
-            mtctx->prefixSize = 0;
+            mtctx->inBuff.prefixSize = 0;
             mtctx->frameEnded = endFrame;
             if (mtctx->nextJobID == 0) {
                 /* single chunk exception : checksum is already calculated directly within worker thread */
@@ -1202,7 +1202,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
                                      ZSTD_inBuffer* input,
                                      ZSTD_EndDirective endOp)
 {
-    size_t const newJobThreshold = mtctx->prefixSize + mtctx->targetSectionSize;
+    size_t const newJobThreshold = mtctx->inBuff.prefixSize + mtctx->targetSectionSize;
     unsigned forwardInputProgress = 0;
     DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u)", (U32)endOp);
     assert(output->pos <= output->size);
@@ -1240,16 +1240,18 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
     if ( (!mtctx->jobReady)
       && (input->size > input->pos) ) {   /* support NULL input */
         if (mtctx->inBuff.buffer.start == NULL) {
-            mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);  /* note : allocation can fail, in which case, no forward input progress */
+            mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool);  /* note : allocation can fail, in which case, buffer.start==NULL */
             mtctx->inBuff.filled = 0;
-            if ( (mtctx->inBuff.buffer.start == NULL)    /* allocation failure */
+            if ( (mtctx->inBuff.buffer.start == NULL)        /* allocation failure */
               && (mtctx->doneJobID == mtctx->nextJobID) ) {  /* and nothing to flush */
-                return ERROR(memory_allocation);   /* no forward progress possible => output an error */
-        }   }
-        if (mtctx->inBuff.buffer.start != NULL) {
-            size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
+                return ERROR(memory_allocation);    /* no forward progress possible => output an error */
+            }
+            assert(mtctx->inBuff.buffer.capacity >= mtctx->inBuff.targetCapacity);  /* pool must provide a buffer >= targetCapacity */
+        }
+        if (mtctx->inBuff.buffer.start != NULL) {   /* no buffer for input, but it's possible to flush, and then reclaim the buffer */
+            size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuff.targetCapacity - mtctx->inBuff.filled);
             DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
-                        (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->inBuffSize);
+                        (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->inBuff.targetCapacity);
             memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
             input->pos += toLoad;
             mtctx->inBuff.filled += toLoad;
@@ -1263,7 +1265,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
       || (mtctx->inBuff.filled >= newJobThreshold)  /* filled enough : let's compress */
       || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0))  /* something to flush : let's go */
       || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) {   /* must finish the frame with a zero-size block */
-        size_t const jobSize = MIN(mtctx->inBuff.filled - mtctx->prefixSize, mtctx->targetSectionSize);
+        size_t const jobSize = MIN(mtctx->inBuff.filled - mtctx->inBuff.prefixSize, mtctx->targetSectionSize);
         CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
     }
 
@@ -1280,13 +1282,13 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_in
     CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
 
     /* recommended next input size : fill current input buffer */
-    return mtctx->inBuffSize - mtctx->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
+    return mtctx->inBuff.targetCapacity - mtctx->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
 }
 
 
 static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
 {
-    size_t const srcSize = mtctx->inBuff.filled - mtctx->prefixSize;
+    size_t const srcSize = mtctx->inBuff.filled - mtctx->inBuff.prefixSize;
     DEBUGLOG(5, "ZSTDMT_flushStream_internal");
 
     if ( mtctx->jobReady     /* one job ready for a worker to pick up */

From 27c5853c4285fa678fb9f606a87d21852b533ac0 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 14:35:54 -0800
Subject: [PATCH 23/30] zstdmt: job table correctly cleaned after synchronous
 ZSTDMT_compress()

---
 lib/compress/zstdmt_compress.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 3bf585d49..d78257b2e 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -559,6 +559,7 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
         DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
         ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
         mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+        mtctx->jobs[jobID].cSize = 0;
         DEBUGLOG(4, "job%02u: release src address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].srcBuff.start);
         ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].srcBuff);
         mtctx->jobs[jobID].srcBuff = g_nullBuffer;
@@ -816,6 +817,7 @@ static size_t ZSTDMT_compress_advanced_internal(
                         ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
                 }   }
                 mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
+                mtctx->jobs[chunkID].cSize = 0;
                 dstPos += cSize ;
             }
         }  /* for (chunkID=0; chunkIDjobs[wJobID].cSize += 4;  /* can write this shared value, as worker is no longer active */
             mtctx->jobs[wJobID].frameChecksumNeeded = 0;
         }
-        if (cSize > 0) {  /* compression is ongoing or completed */
+        if (cSize > 0) {   /* compression is ongoing or completed */
             size_t const toWrite = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
-            DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u)",
-                        (U32)toWrite, mtctx->doneJobID, (U32)srcConsumed, (U32)mtctx->jobs[wJobID].srcSize);
+            DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
+                        (U32)toWrite, mtctx->doneJobID, (U32)srcConsumed, (U32)mtctx->jobs[wJobID].srcSize, (U32)cSize);
+            assert(mtctx->doneJobID < mtctx->nextJobID);
             assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
+            assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
             memcpy((char*)output->dst + output->pos, (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, toWrite);
             output->pos += toWrite;
             mtctx->jobs[wJobID].dstFlushed += toWrite;  /* can write : this value is only used by mtctx */
@@ -1204,7 +1208,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
 {
     size_t const newJobThreshold = mtctx->inBuff.prefixSize + mtctx->targetSectionSize;
     unsigned forwardInputProgress = 0;
-    DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u)", (U32)endOp);
+    DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
+                (U32)endOp, (U32)(input->size - input->pos));
     assert(output->pos <= output->size);
     assert(input->pos  <= input->size);
 

From 77e36273de31ded4fc35703514175af07bdf2fb5 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 17:08:58 -0800
Subject: [PATCH 24/30] zstdmt: minor code refactor for clarity

---
 lib/compress/zstdmt_compress.c | 80 +++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index d78257b2e..e04864507 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -379,35 +379,35 @@ void ZSTDMT_compressChunk(void* jobDescription)
     }
 
     /* compress */
-    if (sizeof(size_t) > sizeof(int))
-        assert(job->srcSize < ((size_t)INT_MAX) * ZSTD_BLOCKSIZE_MAX);   /* check overflow */
-
-    {   int const nbBlocks = (int)((job->srcSize + (ZSTD_BLOCKSIZE_MAX-1)) / ZSTD_BLOCKSIZE_MAX);
+    {   size_t const blockSize = ZSTD_BLOCKSIZE_MAX;
+        int const nbBlocks = (int)((job->srcSize + (blockSize-1)) / blockSize);
         const BYTE* ip = (const BYTE*) src;
         BYTE* const ostart = (BYTE*)dstBuff.start;
         BYTE* op = ostart;
         BYTE* oend = op + dstBuff.capacity;
         int blockNb;
+        if (sizeof(size_t) > sizeof(int)) assert(job->srcSize < ((size_t)INT_MAX) * blockSize);   /* check overflow */
         DEBUGLOG(5, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks);
         assert(job->cSize == 0);
         for (blockNb = 1; blockNb < nbBlocks; blockNb++) {
-            size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, ZSTD_BLOCKSIZE_MAX);
+            size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, blockSize);
             if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
-            ip += ZSTD_BLOCKSIZE_MAX;
+            ip += blockSize;
             op += cSize; assert(op < oend);
             /* stats */
             ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);   /* note : it's a mtctx mutex */
             job->cSize += cSize;
-            job->consumed = ZSTD_BLOCKSIZE_MAX * blockNb;
+            job->consumed = blockSize * blockNb;
             DEBUGLOG(5, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)",
                         (U32)cSize, (U32)job->cSize);
             ZSTD_pthread_cond_signal(job->mtctx_cond);   /* warns some more data is ready to be flushed */
             ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
         }
         /* last block */
+        assert(blockSize > 0); assert((blockSize & (blockSize - 1)) == 0);  /* blockSize must be power of 2 for mask==(blockSize-1) to work */
         if ((nbBlocks > 0) | job->lastChunk /*must output a "last block" flag*/ ) {
-            size_t const lastBlockSize1 = job->srcSize & (ZSTD_BLOCKSIZE_MAX-1);
-            size_t const lastBlockSize = ((lastBlockSize1==0) & (job->srcSize>=ZSTD_BLOCKSIZE_MAX)) ? ZSTD_BLOCKSIZE_MAX : lastBlockSize1;
+            size_t const lastBlockSize1 = job->srcSize & (blockSize-1);
+            size_t const lastBlockSize = ((lastBlockSize1==0) & (job->srcSize>=blockSize)) ? blockSize : lastBlockSize1;
             size_t const cSize = (job->lastChunk) ?
                  ZSTD_compressEnd     (cctx, op, oend-op, ip, lastBlockSize) :
                  ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
@@ -469,21 +469,10 @@ struct ZSTDMT_CCtx_s {
     const ZSTD_CDict* cdict;
 };
 
-/* Sets parameters relevant to the compression job, initializing others to
- * default values. Notably, nbThreads should probably be zero. */
-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
-{
-    ZSTD_CCtx_params jobParams;
-    memset(&jobParams, 0, sizeof(jobParams));
-
-    jobParams.cParams = params.cParams;
-    jobParams.fParams = params.fParams;
-    jobParams.compressionLevel = params.compressionLevel;
-
-    jobParams.ldmParams = params.ldmParams;
-    return jobParams;
-}
-
+/* ZSTDMT_allocJobsTable()
+ * allocate, and just init to zero, a job table.
+ * update *nbJobsPtr to next power of 2 value, as size of table
+ * No reverse free() function is provided : just use ZSTD_free() */
 static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
 {
     U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
@@ -524,6 +513,7 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
     mtctx->allJobsCompleted = 1;
     mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem);
     mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
+    assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0);  /* ensure nbJobs is a power of 2 */
     mtctx->jobIDMask = nbJobs - 1;
     mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
     mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
@@ -649,6 +639,21 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
     }
 }
 
+/* Sets parameters relevant to the compression job, initializing others to
+ * default values. Notably, nbThreads should probably be zero. */
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+{
+    ZSTD_CCtx_params jobParams;
+    memset(&jobParams, 0, sizeof(jobParams));
+
+    jobParams.cParams = params.cParams;
+    jobParams.fParams = params.fParams;
+    jobParams.compressionLevel = params.compressionLevel;
+
+    jobParams.ldmParams = params.ldmParams;
+    return jobParams;
+}
+
 /* ZSTDMT_getNbThreads():
  * @return nb threads currently active in mtctx.
  * mtctx must be valid */
@@ -1139,8 +1144,9 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
     }   }
 
     /* try to flush something */
-    {   size_t cSize = mtctx->jobs[wJobID].cSize;
-        size_t const srcConsumed = mtctx->jobs[wJobID].consumed;
+    {   size_t cSize = mtctx->jobs[wJobID].cSize;                  /* shared */
+        size_t const srcConsumed = mtctx->jobs[wJobID].consumed;   /* shared */
+        size_t const srcSize = mtctx->jobs[wJobID].srcSize;        /* read-only, could be done after mutex lock, but no-declaration-after-statement */
         ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
         if (ZSTD_isError(cSize)) {
             DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
@@ -1150,8 +1156,8 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
             return cSize;
         }
         /* add frame checksum if necessary (can only happen once) */
-        assert(srcConsumed <= mtctx->jobs[wJobID].srcSize);
-        if ( (srcConsumed == mtctx->jobs[wJobID].srcSize)   /* job completed -> worker no longer active */
+        assert(srcConsumed <= srcSize);
+        if ( (srcConsumed == srcSize)   /* job completed -> worker no longer active */
           && mtctx->jobs[wJobID].frameChecksumNeeded ) {
             U32 const checksum = (U32)XXH64_digest(&mtctx->xxhState);
             DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
@@ -1161,17 +1167,19 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
             mtctx->jobs[wJobID].frameChecksumNeeded = 0;
         }
         if (cSize > 0) {   /* compression is ongoing or completed */
-            size_t const toWrite = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
+            size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
             DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
-                        (U32)toWrite, mtctx->doneJobID, (U32)srcConsumed, (U32)mtctx->jobs[wJobID].srcSize, (U32)cSize);
+                        (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize);
             assert(mtctx->doneJobID < mtctx->nextJobID);
             assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
             assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
-            memcpy((char*)output->dst + output->pos, (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, toWrite);
-            output->pos += toWrite;
-            mtctx->jobs[wJobID].dstFlushed += toWrite;  /* can write : this value is only used by mtctx */
+            memcpy((char*)output->dst + output->pos,
+                   (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
+                   toFlush);
+            output->pos += toFlush;
+            mtctx->jobs[wJobID].dstFlushed += toFlush;  /* can write : this value is only used by mtctx */
 
-            if ( (srcConsumed == mtctx->jobs[wJobID].srcSize)    /* job completed */
+            if ( (srcConsumed == srcSize)    /* job completed */
               && (mtctx->jobs[wJobID].dstFlushed == cSize) ) {   /* output buffer fully flushed => free this job position */
                 DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
                         mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
@@ -1179,14 +1187,14 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
                 ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
                 mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
                 mtctx->jobs[wJobID].cSize = 0;   /* ensure this job slot is considered "not started" in future check */
-                mtctx->consumed += mtctx->jobs[wJobID].srcSize;
+                mtctx->consumed += srcSize;
                 mtctx->produced += cSize;
                 mtctx->doneJobID++;
         }   }
 
         /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
         if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
-        if (mtctx->jobs[wJobID].srcSize > srcConsumed) return 1;   /* current job not completely compressed */
+        if (srcSize > srcConsumed) return 1;   /* current job not completely compressed */
     }
     if (mtctx->doneJobID < mtctx->nextJobID) return 1;   /* some more jobs ongoing */
     if (mtctx->jobReady) return 1;      /* one job is ready to push, just not yet in the list */

From 9c40ae7ff16a6561914092e130b47c7a45f7914b Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 17:48:33 -0800
Subject: [PATCH 25/30] zstdmt: there is now one mutex/cond per job

---
 lib/compress/zstdmt_compress.c | 103 +++++++++++++++++----------------
 tests/zstreamtest.c            |   4 +-
 2 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index e04864507..22560bfb1 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -310,8 +310,8 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
 typedef struct {
     size_t   consumed;                   /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
     size_t   cSize;                      /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
-    ZSTD_pthread_mutex_t* mtctx_mutex;   /* Thread-safe - used by mtctx and (all) workers */
-    ZSTD_pthread_cond_t* mtctx_cond;     /* Thread-safe - used by mtctx and (all) workers */
+    ZSTD_pthread_mutex_t job_mutex;      /* Thread-safe - used by mtctx and worker */
+    ZSTD_pthread_cond_t job_cond;        /* Thread-safe - used by mtctx and worker */
     ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) workers */
     ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) workers */
     buffer_t dstBuff;                    /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
@@ -395,13 +395,13 @@ void ZSTDMT_compressChunk(void* jobDescription)
             ip += blockSize;
             op += cSize; assert(op < oend);
             /* stats */
-            ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);   /* note : it's a mtctx mutex */
+            ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   /* note : it's a mtctx mutex */
             job->cSize += cSize;
             job->consumed = blockSize * blockNb;
             DEBUGLOG(5, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)",
                         (U32)cSize, (U32)job->cSize);
-            ZSTD_pthread_cond_signal(job->mtctx_cond);   /* warns some more data is ready to be flushed */
-            ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
+            ZSTD_pthread_cond_signal(&job->job_cond);   /* warns some more data is ready to be flushed */
+            ZSTD_pthread_mutex_unlock(&job->job_mutex);
         }
         /* last block */
         assert(blockSize > 0); assert((blockSize & (blockSize - 1)) == 0);  /* blockSize must be power of 2 for mask==(blockSize-1) to work */
@@ -413,9 +413,9 @@ void ZSTDMT_compressChunk(void* jobDescription)
                  ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
             if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
             /* stats */
-            ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
+            ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
             job->cSize += cSize;
-            ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
+            ZSTD_pthread_mutex_unlock(&job->job_mutex);
     }   }
 
 _endJob:
@@ -424,10 +424,10 @@ _endJob:
     ZSTDMT_releaseBuffer(job->bufPool, job->srcBuff);
     job->srcBuff = g_nullBuffer; job->prefixStart = NULL;
     /* report */
-    ZSTD_PTHREAD_MUTEX_LOCK(job->mtctx_mutex);
+    ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
     job->consumed = job->srcSize;
-    ZSTD_pthread_cond_signal(job->mtctx_cond);
-    ZSTD_pthread_mutex_unlock(job->mtctx_mutex);
+    ZSTD_pthread_cond_signal(&job->job_cond);
+    ZSTD_pthread_mutex_unlock(&job->job_mutex);
 }
 
 
@@ -447,8 +447,6 @@ struct ZSTDMT_CCtx_s {
     ZSTDMT_jobDescription* jobs;
     ZSTDMT_bufferPool* bufPool;
     ZSTDMT_CCtxPool* cctxPool;
-    ZSTD_pthread_mutex_t mtctx_mutex;
-    ZSTD_pthread_cond_t mtctx_cond;
     ZSTD_CCtx_params params;
     size_t targetSectionSize;
     size_t targetPrefixSize;
@@ -470,16 +468,34 @@ struct ZSTDMT_CCtx_s {
 };
 
 /* ZSTDMT_allocJobsTable()
- * allocate, and just init to zero, a job table.
+ * allocate and init a job table.
  * update *nbJobsPtr to next power of 2 value, as size of table
  * No reverse free() function is provided : just use ZSTD_free() */
-static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
+static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
 {
     U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
     U32 const nbJobs = 1 << nbJobsLog2;
-    *nbJobsPtr = nbJobs;
-    return (ZSTDMT_jobDescription*) ZSTD_calloc(
+    ZSTDMT_jobDescription* const jobTable = ZSTD_calloc(
                             nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+    U32 jobNb;
+    if (jobTable==NULL) return NULL;
+    *nbJobsPtr = nbJobs;
+    for (jobNb=0; jobNbcMem = cMem;
     mtctx->allJobsCompleted = 1;
     mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem);
-    mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
+    mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
     assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0);  /* ensure nbJobs is a power of 2 */
     mtctx->jobIDMask = nbJobs - 1;
     mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
@@ -521,14 +537,6 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
         ZSTDMT_freeCCtx(mtctx);
         return NULL;
     }
-    if (ZSTD_pthread_mutex_init(&mtctx->mtctx_mutex, NULL)) {
-        ZSTDMT_freeCCtx(mtctx);
-        return NULL;
-    }
-    if (ZSTD_pthread_cond_init(&mtctx->mtctx_cond, NULL)) {
-        ZSTDMT_freeCCtx(mtctx);
-        return NULL;
-    }
     DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
     return mtctx;
 }
@@ -566,12 +574,12 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
     DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
     while (mtctx->doneJobID < mtctx->nextJobID) {
         unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
-        ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
+        ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
         while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].srcSize) {
             DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", mtctx->doneJobID);   /* we want to block when waiting for data to flush */
-            ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);
+            ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
         }
-        ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
+        ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
         mtctx->doneJobID++;
     }
 }
@@ -581,12 +589,10 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
     if (mtctx==NULL) return 0;   /* compatible with free on NULL */
     POOL_free(mtctx->factory);   /* stop and free worker threads */
     ZSTDMT_releaseAllJobResources(mtctx);  /* release job resources into pools first */
-    ZSTD_free(mtctx->jobs, mtctx->cMem);
+    ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
     ZSTDMT_freeBufferPool(mtctx->bufPool);
     ZSTDMT_freeCCtxPool(mtctx->cctxPool);
     ZSTD_freeCDict(mtctx->cdictLocal);
-    ZSTD_pthread_mutex_destroy(&mtctx->mtctx_mutex);
-    ZSTD_pthread_cond_destroy(&mtctx->mtctx_cond);
     ZSTD_free(mtctx, mtctx->cMem);
     return 0;
 }
@@ -671,7 +677,6 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
 {
     ZSTD_frameProgression fps;
     DEBUGLOG(6, "ZSTDMT_getFrameProgression");
-    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
     fps.consumed = mtctx->consumed;
     fps.produced = mtctx->produced;
     assert(mtctx->inBuff.filled >= mtctx->inBuff.prefixSize);
@@ -682,14 +687,16 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
                     mtctx->doneJobID, lastJobNb, mtctx->jobReady)
         for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
             unsigned const wJobID = jobNb & mtctx->jobIDMask;
-            size_t const cResult = mtctx->jobs[wJobID].cSize;
-            size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
-            fps.consumed += mtctx->jobs[wJobID].consumed;
-            fps.ingested += mtctx->jobs[wJobID].srcSize;
-            fps.produced += produced;
+            ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex);
+            {   size_t const cResult = mtctx->jobs[wJobID].cSize;
+                size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
+                fps.consumed += mtctx->jobs[wJobID].consumed;
+                fps.ingested += mtctx->jobs[wJobID].srcSize;
+                fps.produced += produced;
+            }
+            ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
         }
     }
-    ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
     return fps;
 }
 
@@ -749,9 +756,9 @@ static size_t ZSTDMT_compress_advanced_internal(
 
     if (nbChunks > mtctx->jobIDMask+1) {  /* enlarge job table */
         U32 nbJobs = nbChunks;
-        ZSTD_free(mtctx->jobs, mtctx->cMem);
+        ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
         mtctx->jobIDMask = 0;
-        mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
+        mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
         if (mtctx->jobs==NULL) return ERROR(memory_allocation);
         assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0));  /* ensure nbJobs is a power of 2 */
         mtctx->jobIDMask = nbJobs - 1;
@@ -781,8 +788,6 @@ static size_t ZSTDMT_compress_advanced_internal(
             mtctx->jobs[u].bufPool = mtctx->bufPool;
             mtctx->jobs[u].firstChunk = (u==0);
             mtctx->jobs[u].lastChunk = (u==nbChunks-1);
-            mtctx->jobs[u].mtctx_mutex = &mtctx->mtctx_mutex;
-            mtctx->jobs[u].mtctx_cond = &mtctx->mtctx_cond;
 
             if (params.fParams.checksumFlag) {
                 XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
@@ -802,12 +807,12 @@ static size_t ZSTDMT_compress_advanced_internal(
         unsigned chunkID;
         for (chunkID=0; chunkIDmtctx_mutex);
+            ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[chunkID].job_mutex);
             while (mtctx->jobs[chunkID].consumed < mtctx->jobs[chunkID].srcSize) {
                 DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
-                ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);
+                ZSTD_pthread_cond_wait(&mtctx->jobs[chunkID].job_cond, &mtctx->jobs[chunkID].job_mutex);
             }
-            ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
+            ZSTD_pthread_mutex_unlock(&mtctx->jobs[chunkID].job_mutex);
             DEBUGLOG(5, "ready to write chunk %u ", chunkID);
 
             mtctx->jobs[chunkID].prefixStart = NULL;
@@ -1058,8 +1063,6 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
         mtctx->jobs[jobID].lastChunk = endFrame;
         mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag;
         mtctx->jobs[jobID].dstFlushed = 0;
-        mtctx->jobs[jobID].mtctx_mutex = &mtctx->mtctx_mutex;
-        mtctx->jobs[jobID].mtctx_cond = &mtctx->mtctx_cond;
 
         if (mtctx->params.fParams.checksumFlag)
             XXH64_update(&mtctx->xxhState, (const char*)mtctx->inBuff.buffer.start + mtctx->inBuff.prefixSize, srcSize);
@@ -1128,7 +1131,7 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
                 blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
     assert(output->size >= output->pos);
 
-    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->mtctx_mutex);
+    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
     if (  blockToFlush
       && (mtctx->doneJobID < mtctx->nextJobID) ) {
         assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
@@ -1140,14 +1143,14 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
             }
             DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
                         mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
-            ZSTD_pthread_cond_wait(&mtctx->mtctx_cond, &mtctx->mtctx_mutex);  /* block when nothing available to flush but more to come */
+            ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex);  /* block when nothing to flush but some to come */
     }   }
 
     /* try to flush something */
     {   size_t cSize = mtctx->jobs[wJobID].cSize;                  /* shared */
         size_t const srcConsumed = mtctx->jobs[wJobID].consumed;   /* shared */
         size_t const srcSize = mtctx->jobs[wJobID].srcSize;        /* read-only, could be done after mutex lock, but no-declaration-after-statement */
-        ZSTD_pthread_mutex_unlock(&mtctx->mtctx_mutex);
+        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
         if (ZSTD_isError(cSize)) {
             DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
                         mtctx->doneJobID, ZSTD_getErrorName(cSize));
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 16dcba73a..ecc477c71 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -969,9 +969,9 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
         FUZ_rand(&coreSeed);
         lseed = coreSeed ^ prime32;
         if (nbTests >= testNb) {
-            DISPLAYUPDATE(2, "\r%6u/%6u (%08X)   ", testNb, nbTests, lseed);
+            DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests);
         } else {
-            DISPLAYUPDATE(2, "\r%6u  (%08X)        ", testNb, lseed);
+            DISPLAYUPDATE(2, "\r%6u        ", testNb);
         }
 
         /* states full reset (deliberately not synchronized) */

From caf9e96dc3958850f14b3ddba31003e5f2f2a700 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Fri, 26 Jan 2018 18:09:25 -0800
Subject: [PATCH 26/30] job mutex creation is checked

---
 lib/compress/zstdmt_compress.c | 44 ++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 22560bfb1..865035691 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -467,26 +467,6 @@ struct ZSTDMT_CCtx_s {
     const ZSTD_CDict* cdict;
 };
 
-/* ZSTDMT_allocJobsTable()
- * allocate and init a job table.
- * update *nbJobsPtr to next power of 2 value, as size of table
- * No reverse free() function is provided : just use ZSTD_free() */
-static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
-{
-    U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
-    U32 const nbJobs = 1 << nbJobsLog2;
-    ZSTDMT_jobDescription* const jobTable = ZSTD_calloc(
-                            nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
-    U32 jobNb;
-    if (jobTable==NULL) return NULL;
-    *nbJobsPtr = nbJobs;
-    for (jobNb=0; jobNb
Date: Fri, 26 Jan 2018 18:18:42 -0800
Subject: [PATCH 27/30] fixed minor conversion warning for C++ compilation mode

---
 lib/compress/zstdmt_compress.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 865035691..72515edb8 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -486,8 +486,8 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
     U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
     U32 const nbJobs = 1 << nbJobsLog2;
     U32 jobNb;
-    ZSTDMT_jobDescription* const jobTable = ZSTD_calloc(
-                            nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+    ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
+                ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
     int initError = 0;
     if (jobTable==NULL) return NULL;
     *nbJobsPtr = nbJobs;

From 2cb0740b6b24b42d0255046e5692539c7b8534f8 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Tue, 30 Jan 2018 14:43:36 -0800
Subject: [PATCH 28/30] zstdmt: changed naming convention

to avoid confusion with blocks.

also:
- jobs are cut into chunks of 512KB now, to reduce nb of mutex calls.
- fix function declaration ZSTD_getBlockSizeMax()
- fix outdated comment
---
 doc/zstd_manual.html           |   2 +-
 lib/compress/zstdmt_compress.c | 176 ++++++++++++++++-----------------
 lib/zstd.h                     |   2 +-
 3 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index b4aed91e5..debd37f7a 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1153,7 +1153,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t
         Use ZSTD_insertBlock() for such a case.
 


-

Raw zstd block functions

size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+

Raw zstd block functions

size_t ZSTD_getBlockSizeMax(const ZSTD_CCtx* cctx);
 size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 72515edb8..ec5f0bbbd 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -319,8 +319,8 @@ typedef struct {
     const void* prefixStart;             /* set by mtctx, then read and set0 by worker => no barrier */
     size_t   prefixSize;                 /* set by mtctx, then read by worker => no barrier */
     size_t   srcSize;                    /* set by mtctx, then read by worker & mtctx => no barrier */
-    unsigned firstChunk;                 /* set by mtctx, then read by worker => no barrier */
-    unsigned lastChunk;                  /* set by mtctx, then read by worker => no barrier */
+    unsigned firstJob;                   /* set by mtctx, then read by worker => no barrier */
+    unsigned lastJob;                    /* set by mtctx, then read by worker => no barrier */
     ZSTD_CCtx_params params;             /* set by mtctx, then read by worker => no barrier */
     const ZSTD_CDict* cdict;             /* set by mtctx, then read by worker => no barrier */
     unsigned long long fullFrameSize;    /* set by mtctx, then read by worker => no barrier */
@@ -328,8 +328,8 @@ typedef struct {
     unsigned frameChecksumNeeded;        /* used only by mtctx */
 } ZSTDMT_jobDescription;
 
-/* ZSTDMT_compressChunk() is a POOL_function type */
-void ZSTDMT_compressChunk(void* jobDescription)
+/* ZSTDMT_compressionJob() is a POOL_function type */
+void ZSTDMT_compressionJob(void* jobDescription)
 {
     ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
     ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
@@ -353,12 +353,12 @@ void ZSTDMT_compressChunk(void* jobDescription)
     /* init */
     if (job->cdict) {
         size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dm_auto, job->cdict, job->params, job->fullFrameSize);
-        assert(job->firstChunk);  /* only allowed for first job */
+        assert(job->firstJob);  /* only allowed for first job */
         if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
     } else {  /* srcStart points at reloaded section */
-        U64 const pledgedSrcSize = job->firstChunk ? job->fullFrameSize : job->srcSize;
+        U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->srcSize;
         ZSTD_CCtx_params jobParams = job->params;   /* do not modify job->params ! copy it, modify the copy */
-        {   size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstChunk);
+        {   size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
             if (ZSTD_isError(forceWindowError)) {
                 job->cSize = forceWindowError;
                 goto _endJob;
@@ -371,44 +371,44 @@ void ZSTDMT_compressChunk(void* jobDescription)
                 job->cSize = initError;
                 goto _endJob;
     }   }   }
-    if (!job->firstChunk) {  /* flush and overwrite frame header when it's not first job */
+    if (!job->firstJob) {  /* flush and overwrite frame header when it's not first job */
         size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, src, 0);
         if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
-        DEBUGLOG(5, "ZSTDMT_compressChunk: flush and overwrite %u bytes of frame header (not first chunk)", (U32)hSize);
+        DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
         ZSTD_invalidateRepCodes(cctx);
     }
 
     /* compress */
-    {   size_t const blockSize = ZSTD_BLOCKSIZE_MAX;
-        int const nbBlocks = (int)((job->srcSize + (blockSize-1)) / blockSize);
+    {   size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
+        int const nbChunks = (int)((job->srcSize + (chunkSize-1)) / chunkSize);
         const BYTE* ip = (const BYTE*) src;
         BYTE* const ostart = (BYTE*)dstBuff.start;
         BYTE* op = ostart;
         BYTE* oend = op + dstBuff.capacity;
-        int blockNb;
-        if (sizeof(size_t) > sizeof(int)) assert(job->srcSize < ((size_t)INT_MAX) * blockSize);   /* check overflow */
-        DEBUGLOG(5, "ZSTDMT_compressChunk: compress %u bytes in %i blocks", (U32)job->srcSize, nbBlocks);
+        int chunkNb;
+        if (sizeof(size_t) > sizeof(int)) assert(job->srcSize < ((size_t)INT_MAX) * chunkSize);   /* check overflow */
+        DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->srcSize, nbChunks);
         assert(job->cSize == 0);
-        for (blockNb = 1; blockNb < nbBlocks; blockNb++) {
-            size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, blockSize);
+        for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
+            size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
             if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
-            ip += blockSize;
+            ip += chunkSize;
             op += cSize; assert(op < oend);
             /* stats */
-            ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   /* note : it's a mtctx mutex */
+            ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
             job->cSize += cSize;
-            job->consumed = blockSize * blockNb;
-            DEBUGLOG(5, "ZSTDMT_compressChunk: compress new block : cSize==%u bytes (total: %u)",
+            job->consumed = chunkSize * chunkNb;
+            DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
                         (U32)cSize, (U32)job->cSize);
             ZSTD_pthread_cond_signal(&job->job_cond);   /* warns some more data is ready to be flushed */
             ZSTD_pthread_mutex_unlock(&job->job_mutex);
         }
         /* last block */
-        assert(blockSize > 0); assert((blockSize & (blockSize - 1)) == 0);  /* blockSize must be power of 2 for mask==(blockSize-1) to work */
-        if ((nbBlocks > 0) | job->lastChunk /*must output a "last block" flag*/ ) {
-            size_t const lastBlockSize1 = job->srcSize & (blockSize-1);
-            size_t const lastBlockSize = ((lastBlockSize1==0) & (job->srcSize>=blockSize)) ? blockSize : lastBlockSize1;
-            size_t const cSize = (job->lastChunk) ?
+        assert(chunkSize > 0); assert((chunkSize & (chunkSize - 1)) == 0);  /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
+        if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
+            size_t const lastBlockSize1 = job->srcSize & (chunkSize-1);
+            size_t const lastBlockSize = ((lastBlockSize1==0) & (job->srcSize>=chunkSize)) ? chunkSize : lastBlockSize1;
+            size_t const cSize = (job->lastJob) ?
                  ZSTD_compressEnd     (cctx, op, oend-op, ip, lastBlockSize) :
                  ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
             if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
@@ -580,7 +580,7 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
         unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
         ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
         while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].srcSize) {
-            DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", mtctx->doneJobID);   /* we want to block when waiting for data to flush */
+            DEBUGLOG(5, "waiting for jobCompleted signal from job %u", mtctx->doneJobID);   /* we want to block when waiting for data to flush */
             ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
         }
         ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
@@ -709,16 +709,16 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
 /* =====   Multi-threaded compression   ===== */
 /* ------------------------------------------ */
 
-static unsigned ZSTDMT_computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
+static unsigned ZSTDMT_computeNbJobs(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
     assert(nbThreads>0);
-    {   size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
-        size_t const chunkMaxSize = chunkSizeTarget << 2;
-        size_t const passSizeMax = chunkMaxSize * nbThreads;
+    {   size_t const jobSizeTarget = (size_t)1 << (windowLog + 2);
+        size_t const jobMaxSize = jobSizeTarget << 2;
+        size_t const passSizeMax = jobMaxSize * nbThreads;
         unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
-        unsigned const nbChunksLarge = multiplier * nbThreads;
-        unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
-        unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
-        return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
+        unsigned const nbJobsLarge = multiplier * nbThreads;
+        unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
+        unsigned const nbJobsSmall = MIN(nbJobsMax, nbThreads);
+        return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
 }   }
 
 /* ZSTDMT_compress_advanced_internal() :
@@ -734,44 +734,44 @@ static size_t ZSTDMT_compress_advanced_internal(
     ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
     unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
     size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
-    unsigned nbChunks = ZSTDMT_computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads);
-    size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
-    size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize;   /* avoid too small last block */
+    unsigned const nbJobs = ZSTDMT_computeNbJobs(srcSize, params.cParams.windowLog, params.nbThreads);
+    size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
+    size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize;   /* avoid too small last block */
     const char* const srcStart = (const char*)src;
     size_t remainingSrcSize = srcSize;
-    unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize));  /* presumes avgChunkSize >= 256 KB, which should be the case */
+    unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize));  /* presumes avgJobSize >= 256 KB, which should be the case */
     size_t frameStartPos = 0, dstBufferPos = 0;
     XXH64_state_t xxh64;
     assert(jobParams.nbThreads == 0);
     assert(mtctx->cctxPool->totalCCtx == params.nbThreads);
 
-    DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ",
-                nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize);
+    DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
+                nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
 
-    if ((nbChunks==1) | (params.nbThreads<=1)) {   /* fallback to single-thread mode : this is a blocking invocation anyway */
+    if ((nbJobs==1) | (params.nbThreads<=1)) {   /* fallback to single-thread mode : this is a blocking invocation anyway */
         ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
         if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
         return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
     }
 
-    assert(avgChunkSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
-    ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
+    assert(avgJobSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
+    ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
     XXH64_reset(&xxh64, 0);
 
-    if (nbChunks > mtctx->jobIDMask+1) {  /* enlarge job table */
-        U32 nbJobs = nbChunks;
+    if (nbJobs > mtctx->jobIDMask+1) {  /* enlarge job table */
+        U32 jobsTableSize = nbJobs;
         ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
         mtctx->jobIDMask = 0;
-        mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
+        mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
         if (mtctx->jobs==NULL) return ERROR(memory_allocation);
-        assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0));  /* ensure nbJobs is a power of 2 */
-        mtctx->jobIDMask = nbJobs - 1;
+        assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0));  /* ensure jobsTableSize is a power of 2 */
+        mtctx->jobIDMask = jobsTableSize - 1;
     }
 
     {   unsigned u;
-        for (u=0; ujobs[u].srcBuff = g_nullBuffer;
             mtctx->jobs[u].prefixStart = srcStart + frameStartPos - dictSize;
             mtctx->jobs[u].prefixSize = dictSize;
-            mtctx->jobs[u].srcSize = chunkSize; assert(chunkSize > 0);  /* avoid job.srcSize == 0 */
+            mtctx->jobs[u].srcSize = jobSize; assert(jobSize > 0);  /* avoid job.srcSize == 0 */
             mtctx->jobs[u].consumed = 0;
             mtctx->jobs[u].cSize = 0;
             mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
@@ -790,51 +790,51 @@ static size_t ZSTDMT_compress_advanced_internal(
             mtctx->jobs[u].dstBuff = dstBuffer;
             mtctx->jobs[u].cctxPool = mtctx->cctxPool;
             mtctx->jobs[u].bufPool = mtctx->bufPool;
-            mtctx->jobs[u].firstChunk = (u==0);
-            mtctx->jobs[u].lastChunk = (u==nbChunks-1);
+            mtctx->jobs[u].firstJob = (u==0);
+            mtctx->jobs[u].lastJob = (u==nbJobs-1);
 
             if (params.fParams.checksumFlag) {
-                XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
+                XXH64_update(&xxh64, srcStart + frameStartPos, jobSize);
             }
 
-            DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u  (%u bytes)", u, (U32)chunkSize);
+            DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u  (%u bytes)", u, (U32)jobSize);
             DEBUG_PRINTHEX(6, mtctx->jobs[u].prefixStart, 12);
-            POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
+            POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
 
-            frameStartPos += chunkSize;
+            frameStartPos += jobSize;
             dstBufferPos += dstBufferCapacity;
-            remainingSrcSize -= chunkSize;
+            remainingSrcSize -= jobSize;
     }   }
 
     /* collect result */
     {   size_t error = 0, dstPos = 0;
-        unsigned chunkID;
-        for (chunkID=0; chunkIDjobs[chunkID].job_mutex);
-            while (mtctx->jobs[chunkID].consumed < mtctx->jobs[chunkID].srcSize) {
-                DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
-                ZSTD_pthread_cond_wait(&mtctx->jobs[chunkID].job_cond, &mtctx->jobs[chunkID].job_mutex);
+        unsigned jobID;
+        for (jobID=0; jobIDjobs[jobID].job_mutex);
+            while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].srcSize) {
+                DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
+                ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
             }
-            ZSTD_pthread_mutex_unlock(&mtctx->jobs[chunkID].job_mutex);
-            DEBUGLOG(5, "ready to write chunk %u ", chunkID);
+            ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
+            DEBUGLOG(5, "ready to write job %u ", jobID);
 
-            mtctx->jobs[chunkID].prefixStart = NULL;
-            {   size_t const cSize = mtctx->jobs[chunkID].cSize;
+            mtctx->jobs[jobID].prefixStart = NULL;
+            {   size_t const cSize = mtctx->jobs[jobID].cSize;
                 if (ZSTD_isError(cSize)) error = cSize;
                 if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
-                if (chunkID) {   /* note : chunk 0 is written directly at dst, which is correct position */
+                if (jobID) {   /* note : job 0 is written directly at dst, which is correct position */
                     if (!error)
-                        memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize);  /* may overlap when chunk compressed within dst */
-                    if (chunkID >= compressWithinDst) {  /* chunk compressed into its own buffer, which must be released */
-                        DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
-                        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
+                        memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize);  /* may overlap when job compressed within dst */
+                    if (jobID >= compressWithinDst) {  /* job compressed into its own buffer, which must be released */
+                        DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
+                        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
                 }   }
-                mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
-                mtctx->jobs[chunkID].cSize = 0;
+                mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+                mtctx->jobs[jobID].cSize = 0;
                 dstPos += cSize ;
             }
-        }  /* for (chunkID=0; chunkIDlastChunk == 1);
-    assert(job->srcSize == 0);      /* last chunk is empty -> will be simplified into a last empty block */
-    assert(job->firstChunk == 0);   /* cannot be first chunk, as it also needs to create frame header */
+    assert(job->lastJob == 1);
+    assert(job->srcSize == 0);    /* last job is empty -> will be simplified into a last empty block */
+    assert(job->firstJob == 0);   /* cannot be first job, as it also needs to create frame header */
     /* A job created by streaming variant starts with a src buffer, but no dst buffer.
      * It summons a dstBuffer itself, compresses into it, then releases srcBuffer, and gives result to mtctx.
      * When done, srcBuffer is empty, while dstBuffer is filled, and will be released by mtctx.
@@ -1063,8 +1063,8 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
         mtctx->jobs[jobID].dstBuff = g_nullBuffer;
         mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
         mtctx->jobs[jobID].bufPool = mtctx->bufPool;
-        mtctx->jobs[jobID].firstChunk = (mtctx->nextJobID==0);
-        mtctx->jobs[jobID].lastChunk = endFrame;
+        mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
+        mtctx->jobs[jobID].lastJob = endFrame;
         mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag;
         mtctx->jobs[jobID].dstFlushed = 0;
 
@@ -1093,12 +1093,12 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
             mtctx->inBuff.prefixSize = 0;
             mtctx->frameEnded = endFrame;
             if (mtctx->nextJobID == 0) {
-                /* single chunk exception : checksum is already calculated directly within worker thread */
+                /* single job exception : checksum is already calculated directly within worker thread */
                 mtctx->params.fParams.checksumFlag = 0;
         }   }
 
         if ( (srcSize == 0)
-          && (mtctx->nextJobID>0)/*single chunk must also write frame header*/ ) {
+          && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) {
             DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
             assert(endOp == ZSTD_e_end);  /* only possible case : need to end the frame with an empty last block */
             ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
@@ -1110,10 +1110,10 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZS
     DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u, jobNb == %u (mod:%u))",
                 mtctx->nextJobID,
                 (U32)mtctx->jobs[jobID].srcSize,
-                mtctx->jobs[jobID].lastChunk,
+                mtctx->jobs[jobID].lastJob,
                 mtctx->nextJobID,
                 jobID);
-    if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[jobID])) {
+    if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) {
         mtctx->nextJobID++;
         mtctx->jobReady = 0;
     } else {
@@ -1206,7 +1206,7 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
     if (mtctx->doneJobID < mtctx->nextJobID) return 1;   /* some more jobs ongoing */
     if (mtctx->jobReady) return 1;      /* one job is ready to push, just not yet in the list */
     if (mtctx->inBuff.filled > 0) return 1;   /* input is not empty, and still needs to be converted into a job */
-    mtctx->allJobsCompleted = mtctx->frameEnded;   /* all chunks are entirely flushed => if this one is last one, frame is completed */
+    mtctx->allJobsCompleted = mtctx->frameEnded;   /* all jobs are entirely flushed => if this one is last one, frame is completed */
     if (end == ZSTD_e_end) return !mtctx->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */
     return 0;   /* internal buffers fully flushed */
 }
diff --git a/lib/zstd.h b/lib/zstd.h
index e573daf5b..4c77197d1 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1373,7 +1373,7 @@ ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx);
 #define ZSTD_BLOCKSIZELOG_MAX 17
 #define ZSTD_BLOCKSIZE_MAX   (1<
Date: Tue, 30 Jan 2018 15:03:39 -0800
Subject: [PATCH 29/30] fixed function declaration ZSTD_getBlockSize()

---
 lib/zstd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/zstd.h b/lib/zstd.h
index 4c77197d1..e573daf5b 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1373,7 +1373,7 @@ ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx);
 #define ZSTD_BLOCKSIZELOG_MAX 17
 #define ZSTD_BLOCKSIZE_MAX   (1<
Date: Tue, 30 Jan 2018 15:05:12 -0800
Subject: [PATCH 30/30] updated zstd api manual

---
 doc/zstd_manual.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index debd37f7a..b4aed91e5 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1153,7 +1153,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t
         Use ZSTD_insertBlock() for such a case.
 


-

Raw zstd block functions

size_t ZSTD_getBlockSizeMax(const ZSTD_CCtx* cctx);
+

Raw zstd block functions

size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
 size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */