diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml
index a31eec6bb..08df1c07c 100644
--- a/.github/workflows/dev-short-tests.yml
+++ b/.github/workflows/dev-short-tests.yml
@@ -275,7 +275,7 @@ jobs:
 
   qemu-consistency:
     name: QEMU ${{ matrix.name }}
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     strategy:
       fail-fast: false  # 'false' means Don't stop matrix workflows even if some matrix failed.
       matrix:
diff --git a/build/meson/meson_options.txt b/build/meson/meson_options.txt
index accf3fa10..f35cd5fc8 100644
--- a/build/meson/meson_options.txt
+++ b/build/meson/meson_options.txt
@@ -14,7 +14,7 @@ option('legacy_level', type: 'integer', min: 0, max: 7, value: 5,
   description: 'Support any legacy format: 7 to 1 for v0.7+ to v0.1+')
 option('debug_level', type: 'integer', min: 0, max: 9, value: 1,
   description: 'Enable run-time debug. See lib/common/debug.h')
-option('backtrace', type: 'boolean', value: false,
+option('backtrace', type: 'feature', value: 'disabled',
   description: 'Display a stack backtrace when execution generates a runtime exception')
 option('static_runtime', type: 'boolean', value: false,
   description: 'Link to static run-time libraries on MSVC')
diff --git a/build/meson/programs/meson.build b/build/meson/programs/meson.build
index 8df3a5dcb..8cee115da 100644
--- a/build/meson/programs/meson.build
+++ b/build/meson/programs/meson.build
@@ -51,7 +51,8 @@ endif
 
 export_dynamic_on_windows = false
 # explicit backtrace enable/disable for Linux & Darwin
-if not use_backtrace
+execinfo = cc.has_header('execinfo.h', required: use_backtrace)
+if not execinfo.found()
   zstd_c_args += '-DBACKTRACE_ENABLE=0'
 elif use_debug and host_machine_os == os_windows  # MinGW target
   zstd_c_args += '-DBACKTRACE_ENABLE=1'
diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile
index baa1f24c6..f80ee8653 100644
--- a/contrib/linux-kernel/Makefile
+++ b/contrib/linux-kernel/Makefile
@@ -34,7 +34,6 @@ libzstd:
 		-DFSE_STATIC_LINKING_ONLY \
 		-DHUF_STATIC_LINKING_ONLY \
 		-DXXH_STATIC_LINKING_ONLY \
-		-DMEM_FORCE_MEMORY_ACCESS=0 \
 		-D__GNUC__ \
 		-D__linux__=1 \
 		-DSTATIC_BMI2=0 \
diff --git a/contrib/linux-kernel/test/test.c b/contrib/linux-kernel/test/test.c
index 6cd1730bb..67d248e0c 100644
--- a/contrib/linux-kernel/test/test.c
+++ b/contrib/linux-kernel/test/test.c
@@ -186,11 +186,14 @@ static void __attribute__((noinline)) use(void *x) {
   asm volatile("" : "+r"(x));
 }
 
+static void __attribute__((noinline)) fill_stack(void) {
+  memset(g_stack, 0x33, 8192);
+}
+
 static void __attribute__((noinline)) set_stack(void) {
 
   char stack[8192];
   g_stack = stack;
-  memset(g_stack, 0x33, 8192);
   use(g_stack);
 }
 
@@ -208,6 +211,7 @@ static void __attribute__((noinline)) check_stack(void) {
 
 static void test_stack_usage(test_data_t const *data) {
   set_stack();
+  fill_stack();
   test_f2fs();
   test_btrfs(data);
   test_decompress_unzstd(data);
diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c
index a482638b9..1bb2d0e81 100644
--- a/contrib/seekable_format/tests/seekable_tests.c
+++ b/contrib/seekable_format/tests/seekable_tests.c
@@ -186,6 +186,40 @@ int main(int argc, const char** argv)
     }
     printf("Success!\n");
 
+
+    printf("Test %u - check ZSTD magic in compressing empty string: ", testNb++);
+    { // compressing empty string should return a zstd header
+        size_t const capacity = 255;
+        char* inBuffer = malloc(capacity);
+        assert(inBuffer != NULL);
+        inBuffer[0] = '\0';
+        void* const outBuffer = malloc(capacity);
+        assert(outBuffer != NULL);
+
+        ZSTD_seekable_CStream *s = ZSTD_seekable_createCStream();
+        ZSTD_seekable_initCStream(s, 1, 1, 255);
+
+        ZSTD_inBuffer input = { .src=inBuffer, .pos=0, .size=0 };
+        ZSTD_outBuffer output = { .dst=outBuffer, .pos=0, .size=capacity };
+
+        ZSTD_seekable_compressStream(s, &output, &input);
+        ZSTD_seekable_endStream(s, &output);
+
+        if((((char*)output.dst)[0] != '\x28') | (((char*)output.dst)[1] != '\xb5') | (((char*)output.dst)[2] != '\x2f') | (((char*)output.dst)[3] != '\xfd')) {
+            printf("%#02x %#02x %#02x %#02x\n", ((char*)output.dst)[0], ((char*)output.dst)[1] , ((char*)output.dst)[2] , ((char*)output.dst)[3] );
+
+            free(inBuffer);
+            free(outBuffer);
+            ZSTD_seekable_freeCStream(s);
+            goto _test_error;
+        }
+
+        free(inBuffer);
+        free(outBuffer);
+        ZSTD_seekable_freeCStream(s);
+    }
+    printf("Success!\n");
+
     /* TODO: Add more tests */
     printf("Finished tests\n");
     return 0;
diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c
index 242bd2ac3..7ec9bb577 100644
--- a/contrib/seekable_format/zstdseek_compress.c
+++ b/contrib/seekable_format/zstdseek_compress.c
@@ -350,7 +350,7 @@ size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output)
 
 size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
 {
-    if (!zcs->writingSeekTable && zcs->frameDSize) {
+    if (!zcs->writingSeekTable) {
         const size_t endFrame = ZSTD_seekable_endFrame(zcs, output);
         if (ZSTD_isError(endFrame)) return endFrame;
         /* return an accurate size hint */
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 46596ed61..493782f6f 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -133,21 +133,15 @@ MEM_STATIC size_t MEM_swapST(size_t in);
 /*-**************************************************************
 *  Memory I/O Implementation
 *****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS :
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory:
+ * Method 0 : always use `memcpy()`. Safe and portable.
+ * Method 1 : Use compiler extension to set unaligned access.
  * Method 2 : direct access. This method is portable but violate C standard.
  *            It can generate buggy code on targets depending on alignment.
- *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
+ * Default  : method 1 if supported, else method 0
  */
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
+#  ifdef __GNUC__
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -190,30 +184,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
 
 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
-    __pragma( pack(push, 1) )
-    typedef struct { U16 v; } unalign16;
-    typedef struct { U32 v; } unalign32;
-    typedef struct { U64 v; } unalign64;
-    typedef struct { size_t v; } unalignArch;
-    __pragma( pack(pop) )
-#else
-    typedef struct { U16 v; } __attribute__((packed)) unalign16;
-    typedef struct { U32 v; } __attribute__((packed)) unalign32;
-    typedef struct { U64 v; } __attribute__((packed)) unalign64;
-    typedef struct { size_t v; } __attribute__((packed)) unalignArch;
-#endif
+typedef __attribute__((aligned(1))) U16 unalign16;
+typedef __attribute__((aligned(1))) U32 unalign32;
+typedef __attribute__((aligned(1))) U64 unalign64;
+typedef __attribute__((aligned(1))) size_t unalignArch;
 
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
-MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
+MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; }
+MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; }
+MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; }
+MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; }
 
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; }
 
 #else
 
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 5bc7a9f3f..0069a7b1b 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1370,8 +1370,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
     }
 
     /* resize windowLog if input is small enough, to use less memory */
-    if ( (srcSize < maxWindowResize)
-      && (dictSize < maxWindowResize) )  {
+    if ( (srcSize <= maxWindowResize)
+      && (dictSize <= maxWindowResize) )  {
         U32 const tSize = (U32)(srcSize + dictSize);
         static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
         U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 9fe326e73..e95b8822f 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1549,7 +1549,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
  *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
 {
-    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
+    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
     size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
     if (ZSTD_isError(hError)) return 0;
     return zfp.dictID;
@@ -2058,6 +2058,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                     size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
                     DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    assert(istart != NULL);
                     ip = istart + cSize;
                     op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
                     zds->expected = 0;
@@ -2143,6 +2144,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                 }
                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
                     FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+                    assert(ip != NULL);
                     ip += neededInSize;
                     /* Function modifies the stage so we must break */
                     break;
@@ -2166,8 +2168,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                                     "should never happen");
                     loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
                 }
-                ip += loadedSize;
-                zds->inPos += loadedSize;
+                if (loadedSize != 0) {
+                    /* ip may be NULL */
+                    ip += loadedSize;
+                    zds->inPos += loadedSize;
+                }
                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
 
                 /* decode loaded input */
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index d3d4055b8..c9e4f632f 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -190,25 +190,6 @@ typedef   signed long long  S64;
 /****************************************************************
 *  Memory I/O
 *****************************************************************/
-/* FSE_FORCE_MEMORY_ACCESS
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets generating assembly depending on alignment.
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef FSE_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define FSE_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
-
 
 static unsigned FSE_32bits(void)
 {
@@ -221,24 +202,6 @@ static unsigned FSE_isLittleEndian(void)
     return one.c[0];
 }
 
-#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
-
-static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
-static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
-static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
-
-static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-#else
-
 static U16 FSE_read16(const void* memPtr)
 {
     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
@@ -254,8 +217,6 @@ static U64 FSE_read64(const void* memPtr)
     U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
-#endif /* FSE_FORCE_MEMORY_ACCESS */
-
 static U16 FSE_readLE16(const void* memPtr)
 {
     if (FSE_isLittleEndian())
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index c975143c6..d80481784 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -115,24 +115,6 @@ extern "C" {
 /****************************************************************
 *  Memory I/O
 *****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets generating assembly depending on alignment.
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define MEM_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
 
 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
@@ -143,33 +125,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
     return one.c[0];
 }
 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
-
-/* violates C standard on structure alignment.
-Only use if no other choice to achieve best performance on target platform */
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
-
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
-
-#else
-
-/* default method, safe and standard.
-   can sometimes prove slower */
-
 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
@@ -190,9 +145,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
     memcpy(memPtr, &value, sizeof(value));
 }
 
-#endif /* MEM_FORCE_MEMORY_ACCESS */
-
-
 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
 {
     if (MEM_isLittleEndian())
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index b3ff1977c..20ca5681a 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -116,24 +116,6 @@ extern "C" {
 /****************************************************************
 *  Memory I/O
 *****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets generating assembly depending on alignment.
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define MEM_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
 
 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
@@ -144,33 +126,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
     return one.c[0];
 }
 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
-
-/* violates C standard on structure alignment.
-Only use if no other choice to achieve best performance on target platform */
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
-
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
-
-#else
-
-/* default method, safe and standard.
-   can sometimes prove slower */
-
 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
@@ -191,10 +146,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
     memcpy(memPtr, &value, sizeof(value));
 }
 
-
-#endif /* MEM_FORCE_MEMORY_ACCESS */
-
-
 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
 {
     if (MEM_isLittleEndian())
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index b0a0cbee0..4f7eb9277 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -87,24 +87,6 @@ extern "C" {
 /****************************************************************
 *  Memory I/O
 *****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets generating assembly depending on alignment.
- *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define MEM_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
 
 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
@@ -115,33 +97,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
     return one.c[0];
 }
 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
-
-/* violates C standard on structure alignment.
-Only use if no other choice to achieve best performance on target platform */
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
-
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
-
-#else
-
-/* default method, safe and standard.
-   can sometimes prove slower */
-
 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
@@ -162,9 +117,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
     memcpy(memPtr, &value, sizeof(value));
 }
 
-#endif /* MEM_FORCE_MEMORY_ACCESS */
-
-
 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
 {
     if (MEM_isLittleEndian())
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 75bfe7b2c..9e73a5daa 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -106,24 +106,6 @@ extern "C" {
 /*-**************************************************************
 *  Memory I/O
 *****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS :
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets depending on alignment.
- *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define MEM_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
 
 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; }
 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; }
@@ -134,37 +116,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
     return one.c[0];
 }
 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
-
-/* violates C standard, by lying on structure alignment.
-Only use if no other choice to achieve best performance on target platform */
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
-
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
-
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
-
-#else
-
-/* default method, safe and standard.
-   can sometimes prove slower */
-
 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
@@ -195,9 +146,6 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value)
     memcpy(memPtr, &value, sizeof(value));
 }
 
-#endif /* MEM_FORCE_MEMORY_ACCESS */
-
-
 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
 {
     if (MEM_isLittleEndian())
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index 346221b00..3839a5f63 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -108,24 +108,6 @@ extern "C" {
 /*-**************************************************************
 *  Memory I/O
 *****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS :
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets depending on alignment.
- *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define MEM_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
 
 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
@@ -136,33 +118,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
     return one.c[0];
 }
 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
-
-/* violates C standard, by lying on structure alignment.
-Only use if no other choice to achieve best performance on target platform */
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
-
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
-
-#else
-
-/* default method, safe and standard.
-   can sometimes prove slower */
-
 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
@@ -183,9 +138,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
     memcpy(memPtr, &value, sizeof(value));
 }
 
-
-#endif /* MEM_FORCE_MEMORY_ACCESS */
-
 MEM_STATIC U32 MEM_swap32(U32 in)
 {
 #if defined(_MSC_VER)     /* Visual Studio */
@@ -4035,7 +3987,8 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
                     size_t const toLoad = hSize - zbd->lhSize;   /* if hSize!=0, hSize > zbd->lhSize */
                     if (ZSTDv06_isError(hSize)) return hSize;
                     if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
-                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
+                        if (ip != NULL)
+                            memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
                         zbd->lhSize += iend-ip;
                         *dstCapacityPtr = 0;
                         return (hSize - zbd->lhSize) + ZSTDv06_blockHeaderSize;   /* remaining header bytes + next block header */
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index ea7aa21b4..efe6fbefe 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -268,24 +268,6 @@ extern "C" {
 /*-**************************************************************
 *  Memory I/O
 *****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS :
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets depending on alignment.
- *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
- * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define MEM_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
 
 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
@@ -296,33 +278,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
     return one.c[0];
 }
 
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
-
-/* violates C standard, by lying on structure alignment.
-Only use if no other choice to achieve best performance on target platform */
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
-
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
-
-#else
-
-/* default method, safe and standard.
-   can sometimes prove slower */
-
 MEM_STATIC U16 MEM_read16(const void* memPtr)
 {
     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
@@ -343,8 +298,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value)
     memcpy(memPtr, &value, sizeof(value));
 }
 
-#endif /* MEM_FORCE_MEMORY_ACCESS */
-
 MEM_STATIC U32 MEM_swap32(U32 in)
 {
 #if defined(_MSC_VER)     /* Visual Studio */
@@ -4417,7 +4370,8 @@ size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd,
                 if (hSize != 0) {
                     size_t const toLoad = hSize - zbd->lhSize;   /* if hSize!=0, hSize > zbd->lhSize */
                     if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
-                        memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
+                        if (ip != NULL)
+                            memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
                         zbd->lhSize += iend-ip;
                         *dstCapacityPtr = 0;
                         return (hSize - zbd->lhSize) + ZSTDv07_blockHeaderSize;   /* remaining header bytes + next block header */
diff --git a/lib/zstd.h b/lib/zstd.h
index 1867efc93..1dff31b4e 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -2573,6 +2573,8 @@ typedef struct {
     unsigned headerSize;
     unsigned dictID;
     unsigned checksumFlag;
+    unsigned _reserved1;
+    unsigned _reserved2;
 } ZSTD_frameHeader;
 
 /*! ZSTD_getFrameHeader() :
diff --git a/programs/fileio.c b/programs/fileio.c
index b9d14211e..313a530ce 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -249,6 +249,18 @@ struct FIO_ctx_s {
     size_t totalBytesOutput;
 };
 
+static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx)
+{
+    return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3;
+}
+
+static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx)
+{
+    int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1);
+    assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0);
+    return shouldDisplay;
+}
+
 
 /*-*************************************
 *  Parameters: Initialization
@@ -1044,14 +1056,16 @@ FIO_compressGzFrame(const cRess_t* ress,  /* buffers & handlers are used, but no
                 strm.avail_out = (uInt)writeJob->bufferSize;
             }   }
         if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
-            DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
-                          (unsigned)(inFileSize>>20),
-                          (double)outFileSize/inFileSize*100)
+            DISPLAYUPDATE_PROGRESS(
+                    "\rRead : %u MB ==> %.2f%% ",
+                    (unsigned)(inFileSize>>20),
+                    (double)outFileSize/inFileSize*100)
         } else {
-            DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
-                          (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
-                          (double)outFileSize/inFileSize*100);
-        }   }
+            DISPLAYUPDATE_PROGRESS(
+                    "\rRead : %u / %u MB ==> %.2f%% ",
+                    (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
+                    (double)outFileSize/inFileSize*100);
+    }   }
 
     while (1) {
         int const ret = deflate(&strm, Z_FINISH);
@@ -1141,11 +1155,11 @@ FIO_compressLzmaFrame(cRess_t* ress,
                 strm.avail_out = writeJob->bufferSize;
         }   }
         if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
-            DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
+            DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
                             (unsigned)(inFileSize>>20),
                             (double)outFileSize/inFileSize*100)
         else
-            DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
+            DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
                             (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
                             (double)outFileSize/inFileSize*100);
         if (ret == LZMA_STREAM_END) break;
@@ -1225,11 +1239,11 @@ FIO_compressLz4Frame(cRess_t* ress,
                             srcFileName, LZ4F_getErrorName(outSize));
             outFileSize += outSize;
             if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
-                DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
+                DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
                                 (unsigned)(inFileSize>>20),
                                 (double)outFileSize/inFileSize*100)
             } else {
-                DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
+                DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
                                 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
                                 (double)outFileSize/inFileSize*100);
             }
@@ -1287,6 +1301,9 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
     unsigned inputPresented = 0;
     unsigned inputBlocked = 0;
     unsigned lastJobID = 0;
+    UTIL_time_t lastAdaptTime = UTIL_getTime();
+    U64 const adaptEveryMicro = REFRESH_RATE;
+
     UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize);
 
     DISPLAYLEVEL(6, "compression using zstd format \n");
@@ -1355,131 +1372,137 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
                 compressedfilesize += outBuff.pos;
             }
 
-            /* display notification; and adapt compression level */
-            if (READY_FOR_UPDATE()) {
+            /* adaptive mode : statistics measurement and speed correction */
+            if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) {
+                ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
+
+                lastAdaptTime = UTIL_getTime();
+
+                /* check output speed */
+                if (zfp.currentJobID > 1) {  /* only possible if nbWorkers >= 1 */
+
+                    unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
+                    unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
+                    assert(zfp.produced >= previous_zfp_update.produced);
+                    assert(prefs->nbWorkers >= 1);
+
+                    /* test if compression is blocked
+                        * either because output is slow and all buffers are full
+                        * or because input is slow and no job can start while waiting for at least one buffer to be filled.
+                        * note : exclude starting part, since currentJobID > 1 */
+                    if ( (zfp.consumed == previous_zfp_update.consumed)   /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
+                        && (zfp.nbActiveWorkers == 0)                       /* confirmed : no compression ongoing */
+                        ) {
+                        DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
+                        speedChange = slower;
+                    }
+
+                    previous_zfp_update = zfp;
+
+                    if ( (newlyProduced > (newlyFlushed * 9 / 8))   /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
+                        && (flushWaiting == 0)                        /* flush speed was never slowed by lack of production, so it's operating at max capacity */
+                        ) {
+                        DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
+                        speedChange = slower;
+                    }
+                    flushWaiting = 0;
+                }
+
+                /* course correct only if there is at least one new job completed */
+                if (zfp.currentJobID > lastJobID) {
+                    DISPLAYLEVEL(6, "compression level adaptation check \n")
+
+                    /* check input speed */
+                    if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) {   /* warm up period, to fill all workers */
+                        if (inputBlocked <= 0) {
+                            DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
+                            speedChange = slower;
+                        } else if (speedChange == noChange) {
+                            unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
+                            unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
+                            unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
+                            unsigned long long newlyFlushed  = zfp.flushed  - previous_zfp_correction.flushed;
+                            previous_zfp_correction = zfp;
+                            assert(inputPresented > 0);
+                            DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
+                                            inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
+                                            (unsigned)newlyIngested, (unsigned)newlyConsumed,
+                                            (unsigned)newlyFlushed, (unsigned)newlyProduced);
+                            if ( (inputBlocked > inputPresented / 8)     /* input is waiting often, because input buffers is full : compression or output too slow */
+                                && (newlyFlushed * 33 / 32 > newlyProduced)  /* flush everything that is produced */
+                                && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
+                            ) {
+                                DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
+                                                newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
+                                speedChange = faster;
+                            }
+                        }
+                        inputBlocked = 0;
+                        inputPresented = 0;
+                    }
+
+                    if (speedChange == slower) {
+                        DISPLAYLEVEL(6, "slower speed , higher compression \n")
+                        compressionLevel ++;
+                        if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
+                        if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
+                        compressionLevel += (compressionLevel == 0);   /* skip 0 */
+                        ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
+                    }
+                    if (speedChange == faster) {
+                        DISPLAYLEVEL(6, "faster speed , lighter compression \n")
+                        compressionLevel --;
+                        if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
+                        compressionLevel -= (compressionLevel == 0);   /* skip 0 */
+                        ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
+                    }
+                    speedChange = noChange;
+
+                    lastJobID = zfp.currentJobID;
+                }  /* if (zfp.currentJobID > lastJobID) */
+            } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */
+
+            /* display notification */
+            if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) {
                 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
                 double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
                 UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed);
                 UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed);
                 UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced);
 
+                DELAY_NEXT_UPDATE();
+
                 /* display progress notifications */
+                DISPLAY_PROGRESS("\r%79s\r", "");    /* Clear out the current displayed line */
                 if (g_display_prefs.displayLevel >= 3) {
-                    DISPLAYUPDATE(3, "\r(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ",
-                                compressionLevel,
-                                buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
-                                consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
-                                produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
-                                cShare );
-                } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) {
+                    /* Verbose progress update */
+                    DISPLAY_PROGRESS(
+                            "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ",
+                            compressionLevel,
+                            buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
+                            consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
+                            produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
+                            cShare );
+                } else {
                     /* Require level 2 or forcibly displayed progress counter for summarized updates */
-                    DISPLAYLEVEL(1, "\r%79s\r", "");    /* Clear out the current displayed line */
                     if (fCtx->nbFilesTotal > 1) {
                         size_t srcFileNameSize = strlen(srcFileName);
                         /* Ensure that the string we print is roughly the same size each time */
                         if (srcFileNameSize > 18) {
                             const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
-                            DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ",
+                            DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ",
                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
                         } else {
-                            DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ",
+                            DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ",
                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
                         }
                     }
-                    DISPLAYLEVEL(1, "Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
+                    DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
                     if (fileSize != UTIL_FILESIZE_UNKNOWN)
-                        DISPLAYLEVEL(2, "/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
-                    DISPLAYLEVEL(1, " ==> %2.f%%", cShare);
-                    DELAY_NEXT_UPDATE();
+                        DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
+                    DISPLAY_PROGRESS(" ==> %2.f%%", cShare);
                 }
-
-                /* adaptive mode : statistics measurement and speed correction */
-                if (prefs->adaptiveMode) {
-
-                    /* check output speed */
-                    if (zfp.currentJobID > 1) {  /* only possible if nbWorkers >= 1 */
-
-                        unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
-                        unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
-                        assert(zfp.produced >= previous_zfp_update.produced);
-                        assert(prefs->nbWorkers >= 1);
-
-                        /* test if compression is blocked
-                         * either because output is slow and all buffers are full
-                         * or because input is slow and no job can start while waiting for at least one buffer to be filled.
-                         * note : exclude starting part, since currentJobID > 1 */
-                        if ( (zfp.consumed == previous_zfp_update.consumed)   /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
-                          && (zfp.nbActiveWorkers == 0)                       /* confirmed : no compression ongoing */
-                          ) {
-                            DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
-                            speedChange = slower;
-                        }
-
-                        previous_zfp_update = zfp;
-
-                        if ( (newlyProduced > (newlyFlushed * 9 / 8))   /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
-                          && (flushWaiting == 0)                        /* flush speed was never slowed by lack of production, so it's operating at max capacity */
-                          ) {
-                            DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
-                            speedChange = slower;
-                        }
-                        flushWaiting = 0;
-                    }
-
-                    /* course correct only if there is at least one new job completed */
-                    if (zfp.currentJobID > lastJobID) {
-                        DISPLAYLEVEL(6, "compression level adaptation check \n")
-
-                        /* check input speed */
-                        if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) {   /* warm up period, to fill all workers */
-                            if (inputBlocked <= 0) {
-                                DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
-                                speedChange = slower;
-                            } else if (speedChange == noChange) {
-                                unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
-                                unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
-                                unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
-                                unsigned long long newlyFlushed  = zfp.flushed  - previous_zfp_correction.flushed;
-                                previous_zfp_correction = zfp;
-                                assert(inputPresented > 0);
-                                DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
-                                                inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
-                                                (unsigned)newlyIngested, (unsigned)newlyConsumed,
-                                                (unsigned)newlyFlushed, (unsigned)newlyProduced);
-                                if ( (inputBlocked > inputPresented / 8)     /* input is waiting often, because input buffers is full : compression or output too slow */
-                                  && (newlyFlushed * 33 / 32 > newlyProduced)  /* flush everything that is produced */
-                                  && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
-                                ) {
-                                    DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
-                                                    newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
-                                    speedChange = faster;
-                                }
-                            }
-                            inputBlocked = 0;
-                            inputPresented = 0;
-                        }
-
-                        if (speedChange == slower) {
-                            DISPLAYLEVEL(6, "slower speed , higher compression \n")
-                            compressionLevel ++;
-                            if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
-                            if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
-                            compressionLevel += (compressionLevel == 0);   /* skip 0 */
-                            ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
-                        }
-                        if (speedChange == faster) {
-                            DISPLAYLEVEL(6, "faster speed , lighter compression \n")
-                            compressionLevel --;
-                            if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
-                            compressionLevel -= (compressionLevel == 0);   /* skip 0 */
-                            ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
-                        }
-                        speedChange = noChange;
-
-                        lastJobID = zfp.currentJobID;
-                    }  /* if (zfp.currentJobID > lastJobID) */
-                }  /* if (g_adaptiveMode) */
-            }  /* if (READY_FOR_UPDATE()) */
+            }  /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */
         }  /* while ((inBuff.pos != inBuff.size) */
     } while (directive != ZSTD_e_end);
 
@@ -1555,20 +1578,18 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
     /* Status */
     fCtx->totalBytesInput += (size_t)readsize;
     fCtx->totalBytesOutput += (size_t)compressedfilesize;
-    DISPLAYLEVEL(2, "\r%79s\r", "");
-    if (g_display_prefs.displayLevel >= 2 &&
-        !fCtx->hasStdoutOutput &&
-        (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) {
+    DISPLAY_PROGRESS("\r%79s\r", "");
+    if (FIO_shouldDisplayFileSummary(fCtx)) {
         UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize);
         UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize);
         if (readsize == 0) {
-            DISPLAYLEVEL(2,"%-20s :  (%6.*f%s => %6.*f%s, %s) \n",
+            DISPLAY_SUMMARY("%-20s :  (%6.*f%s => %6.*f%s, %s) \n",
                 srcFileName,
                 hr_isize.precision, hr_isize.value, hr_isize.suffix,
                 hr_osize.precision, hr_osize.value, hr_osize.suffix,
                 dstFileName);
         } else {
-            DISPLAYLEVEL(2,"%-20s :%6.2f%%   (%6.*f%s => %6.*f%s, %s) \n",
+            DISPLAY_SUMMARY("%-20s :%6.2f%%   (%6.*f%s => %6.*f%s, %s) \n",
                 srcFileName,
                 (double)compressedfilesize / (double)readsize * 100,
                 hr_isize.precision, hr_isize.value, hr_isize.suffix,
@@ -1917,16 +1938,23 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
             FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
     }
 
-    if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) {
+    if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
         UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput);
         UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput);
 
-        DISPLAYLEVEL(2, "\r%79s\r", "");
-        DISPLAYLEVEL(2, "%3d files compressed :%.2f%%   (%6.*f%4s => %6.*f%4s)\n",
-                        fCtx->nbFilesProcessed,
-                        (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
-                        hr_isize.precision, hr_isize.value, hr_isize.suffix,
-                        hr_osize.precision, hr_osize.value, hr_osize.suffix);
+        DISPLAY_PROGRESS("\r%79s\r", "");
+        if (fCtx->totalBytesInput == 0) {
+            DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n",
+                            fCtx->nbFilesProcessed,
+                            hr_isize.precision, hr_isize.value, hr_isize.suffix,
+                            hr_osize.precision, hr_osize.value, hr_osize.suffix);
+        } else {
+            DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n",
+                            fCtx->nbFilesProcessed,
+                            (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
+                            hr_isize.precision, hr_isize.value, hr_isize.suffix,
+                            hr_osize.precision, hr_osize.value, hr_osize.suffix);
+        }
     }
 
     FIO_freeCResources(&ress);
@@ -2067,7 +2095,6 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
         ZSTD_inBuffer  inBuff = { ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 };
         ZSTD_outBuffer outBuff= { writeJob->buffer, writeJob->bufferSize, 0 };
         size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
-        const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2;
         UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
         if (ZSTD_isError(readSizeHint)) {
             DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
@@ -2085,14 +2112,15 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
             size_t srcFileNameSize = strlen(srcFileName);
             if (srcFileNameSize > 18) {
                 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
-                DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s...    ",
-                              fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
+                DISPLAYUPDATE_PROGRESS(
+                        "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s...    ",
+                        fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
             } else {
-                DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s...    ",
+                DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s...    ",
                             fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
             }
         } else {
-            DISPLAYUPDATE(displayLevel, "\r%-20.20s : %.*f%s...     ",
+            DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s...     ",
                             srcFileName, hrs.precision, hrs.value, hrs.suffix);
         }
 
@@ -2307,7 +2335,7 @@ FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName)
                 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
                 filesize += decodedBytes;
                 hrs = UTIL_makeHumanReadableSize(filesize);
-                DISPLAYUPDATE(2, "\rDecompressed : %.*f%s  ", hrs.precision, hrs.value, hrs.suffix);
+                DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s  ", hrs.precision, hrs.value, hrs.suffix);
             }
 
             if (!nextToLoad) break;
@@ -2415,13 +2443,9 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
 
     /* Final Status */
     fCtx->totalBytesOutput += (size_t)filesize;
-    DISPLAYLEVEL(2, "\r%79s\r", "");
-    /* No status message in pipe mode (stdin - stdout) or multi-files mode */
-    if ((g_display_prefs.displayLevel >= 2 && fCtx->nbFilesTotal <= 1) ||
-        g_display_prefs.displayLevel >= 3 ||
-        g_display_prefs.progressSetting == FIO_ps_always) {
-        DISPLAYLEVEL(1, "\r%-20s: %llu bytes \n", srcFileName, filesize);
-    }
+    DISPLAY_PROGRESS("\r%79s\r", "");
+    if (FIO_shouldDisplayFileSummary(fCtx))
+        DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize);
 
     return 0;
 }
@@ -2730,8 +2754,10 @@ FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
             FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
     }
 
-    if (fCtx->nbFilesProcessed >= 1  && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0)
-        DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput);
+    if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
+        DISPLAY_PROGRESS("\r%79s\r", "");
+        DISPLAY_SUMMARY("%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput);
+    }
 
     FIO_freeDResources(ress);
     return error;
@@ -3010,7 +3036,7 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis
     }   }
 
     if (numFiles == 0) {
-        if (!IS_CONSOLE(stdin)) {
+        if (!UTIL_isConsole(stdin)) {
             DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
         }
         DISPLAYLEVEL(1, "No files given \n");
diff --git a/programs/fileio_common.h b/programs/fileio_common.h
index 282c2f13b..aec2e8d56 100644
--- a/programs/fileio_common.h
+++ b/programs/fileio_common.h
@@ -38,16 +38,24 @@ extern FIO_display_prefs_t g_display_prefs;
 extern UTIL_time_t g_displayClock;
 
 #define REFRESH_RATE  ((U64)(SEC_TO_MICRO / 6))
-#define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > REFRESH_RATE)
+#define READY_FOR_UPDATE() (UTIL_clockSpanMicro(g_displayClock) > REFRESH_RATE || g_display_prefs.displayLevel >= 4)
 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
 #define DISPLAYUPDATE(l, ...) {                              \
         if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \
-            if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
+            if (READY_FOR_UPDATE()) { \
                 DELAY_NEXT_UPDATE();                         \
                 DISPLAY(__VA_ARGS__);                        \
                 if (g_display_prefs.displayLevel>=4) fflush(stderr);       \
     }   }   }
 
+#define SHOULD_DISPLAY_SUMMARY() \
+    (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always)
+#define SHOULD_DISPLAY_PROGRESS()                       \
+    (g_display_prefs.progressSetting != FIO_ps_never && SHOULD_DISPLAY_SUMMARY())
+#define DISPLAY_PROGRESS(...) { if (SHOULD_DISPLAY_PROGRESS()) { DISPLAYLEVEL(1, __VA_ARGS__); }}
+#define DISPLAYUPDATE_PROGRESS(...) { if (SHOULD_DISPLAY_PROGRESS()) { DISPLAYUPDATE(1, __VA_ARGS__); }}
+#define DISPLAY_SUMMARY(...) { if (SHOULD_DISPLAY_SUMMARY()) { DISPLAYLEVEL(1, __VA_ARGS__); } }
+
 #undef MIN  /* in case it would be already defined */
 #define MIN(a,b)    ((a) < (b) ? (a) : (b))
 
diff --git a/programs/platform.h b/programs/platform.h
index f80d13559..7006f97f2 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -127,6 +127,10 @@ extern "C" {
 
 /*-*********************************************
 *  Detect if isatty() and fileno() are available
+*
+*  Note: Use UTIL_isConsole() for the zstd CLI
+*  instead, as it allows faking is console for
+*  testing.
 ************************************************/
 #if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \
  || (PLATFORM_POSIX_VERSION >= 200112L) \
diff --git a/programs/util.c b/programs/util.c
index 63b3ae176..bdb651074 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -288,6 +288,34 @@ int UTIL_isLink(const char* infilename)
     return 0;
 }
 
+static int g_fakeStdinIsConsole = 0;
+static int g_fakeStderrIsConsole = 0;
+static int g_fakeStdoutIsConsole = 0;
+
+int UTIL_isConsole(FILE* file)
+{
+    if (file == stdin && g_fakeStdinIsConsole)
+        return 1;
+    if (file == stderr && g_fakeStderrIsConsole)
+        return 1;
+    if (file == stdout && g_fakeStdoutIsConsole)
+        return 1;
+    return IS_CONSOLE(file);
+}
+
+void UTIL_fakeStdinIsConsole(void)
+{
+    g_fakeStdinIsConsole = 1;
+}
+void UTIL_fakeStdoutIsConsole(void)
+{
+    g_fakeStdoutIsConsole = 1;
+}
+void UTIL_fakeStderrIsConsole(void)
+{
+    g_fakeStderrIsConsole = 1;
+}
+
 U64 UTIL_getFileSize(const char* infilename)
 {
     stat_t statbuf;
diff --git a/programs/util.h b/programs/util.h
index faf8c9f11..cf1738772 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -175,6 +175,20 @@ int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
 int UTIL_isLink(const char* infilename);
 int UTIL_isFIFO(const char* infilename);
 
+/**
+ * Returns with the given file descriptor is a console.
+ * Allows faking whether stdin/stdout/stderr is a console
+ * using UTIL_fake*IsConsole().
+ */
+int UTIL_isConsole(FILE* file);
+
+/**
+ * Pretends that stdin/stdout/stderr is a console for testing.
+ */
+void UTIL_fakeStdinIsConsole(void);
+void UTIL_fakeStdoutIsConsole(void);
+void UTIL_fakeStderrIsConsole(void);
+
 #define UTIL_FILESIZE_UNKNOWN  ((U64)(-1))
 U64 UTIL_getFileSize(const char* infilename);
 U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles);
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 362f320a9..cc6bbb935 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -27,8 +27,8 @@
 /*-************************************
 *  Dependencies
 **************************************/
-#include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */
-#include "util.h"     /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */
+#include "platform.h" /* PLATFORM_POSIX_VERSION */
+#include "util.h"     /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList, UTIL_isConsole */
 #include <stdlib.h>   /* getenv */
 #include <string.h>   /* strcmp, strlen */
 #include <stdio.h>    /* fprintf(), stdin, stdout, stderr */
@@ -987,6 +987,9 @@ int main(int argCount, const char* argv[])
                 if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; }
                 if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; }
                 if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
+                if (!strcmp(argument, "--fake-stdin-is-console")) { UTIL_fakeStdinIsConsole(); continue; }
+                if (!strcmp(argument, "--fake-stdout-is-console")) { UTIL_fakeStdoutIsConsole(); continue; }
+                if (!strcmp(argument, "--fake-stderr-is-console")) { UTIL_fakeStderrIsConsole(); continue; }
 
                 /* long commands with arguments */
 #ifndef ZSTD_NODICT
@@ -1437,12 +1440,12 @@ int main(int argCount, const char* argv[])
     /* Check if input/output defined as console; trigger an error in this case */
     if (!forceStdin
      && (UTIL_searchFileNamesTable(filenames, stdinmark) != -1)
-     && IS_CONSOLE(stdin) ) {
+     && UTIL_isConsole(stdin) ) {
         DISPLAYLEVEL(1, "stdin is a console, aborting\n");
         CLEAN_RETURN(1);
     }
     if ( (!outFileName || !strcmp(outFileName, stdoutmark))
-      && IS_CONSOLE(stdout)
+      && UTIL_isConsole(stdout)
       && (UTIL_searchFileNamesTable(filenames, stdinmark) != -1)
       && !forceStdout
       && operation!=zom_decompress ) {
@@ -1479,7 +1482,7 @@ int main(int argCount, const char* argv[])
     /* No status message in pipe mode (stdin - stdout) */
     hasStdout = outFileName && !strcmp(outFileName,stdoutmark);
 
-    if ((hasStdout || !IS_CONSOLE(stderr)) && (g_displayLevel==2)) g_displayLevel=1;
+    if ((hasStdout || !UTIL_isConsole(stderr)) && (g_displayLevel==2)) g_displayLevel=1;
 
     /* IO Stream/File */
     FIO_setHasStdoutOutput(fCtx, hasStdout);
diff --git a/tests/cli-tests/compression/adapt.sh b/tests/cli-tests/compression/adapt.sh
index 564e955b5..30b9afaa0 100755
--- a/tests/cli-tests/compression/adapt.sh
+++ b/tests/cli-tests/compression/adapt.sh
@@ -4,3 +4,11 @@ set -e
 
 # Test --adapt
 zstd -f file --adapt -c | zstd -t
+
+datagen -g100M > file100M
+
+# Pick parameters to force fast adaptation, even on slow systems
+zstd --adapt -vvvv -19 --zstd=wlog=10 file100M -o /dev/null 2>&1 | grep -q "faster speed , lighter compression"
+
+# Adaption still happens with --no-progress
+zstd --no-progress --adapt -vvvv -19 --zstd=wlog=10 file100M -o /dev/null 2>&1 | grep -q "faster speed , lighter compression"
diff --git a/tests/cli-tests/compression/window-resize.sh b/tests/cli-tests/compression/window-resize.sh
new file mode 100755
index 000000000..3b5e6fe24
--- /dev/null
+++ b/tests/cli-tests/compression/window-resize.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+datagen -g1G > file
+zstd --long=31 -1 --single-thread --no-content-size -f file
+zstd -l -v file.zst
+
+# We want to ignore stderr (its outputting "*** zstd command line interface
+# 64-bits v1.5.3, by Yann Collet ***")
+
+rm file file.zst
diff --git a/tests/cli-tests/compression/window-resize.sh.stderr.ignore b/tests/cli-tests/compression/window-resize.sh.stderr.ignore
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/cli-tests/compression/window-resize.sh.stdout.glob b/tests/cli-tests/compression/window-resize.sh.stdout.glob
new file mode 100644
index 000000000..313d216e1
--- /dev/null
+++ b/tests/cli-tests/compression/window-resize.sh.stdout.glob
@@ -0,0 +1,3 @@
+...
+Window Size: 1.000 GiB (1073741824 B)
+...
diff --git a/tests/cli-tests/progress/no-progress.sh b/tests/cli-tests/progress/no-progress.sh
new file mode 100755
index 000000000..708878f26
--- /dev/null
+++ b/tests/cli-tests/progress/no-progress.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+#!/bin/sh
+
+. "$COMMON/platform.sh"
+
+set -e
+
+echo hello > hello
+echo world > world
+
+zstd -q hello world
+
+println >&2 "Tests cases where progress information should not be printed"
+
+for args in \
+	"" \
+	"--fake-stderr-is-console -q" \
+	"--fake-stderr-is-console -qq --progress" \
+	"--no-progress --fake-stderr-is-console" \
+	"--no-progress --fake-stderr-is-console -v"
+do
+	println >&2 "args = $args"
+	println >&2 "compress file to file"
+	zstd $args -f hello
+	println >&2 "compress pipe to pipe"
+	zstd $args < hello > $INTOVOID
+	println >&2 "compress pipe to file"
+	zstd $args < hello -fo hello.zst
+	println >&2 "compress file to pipe"
+	zstd $args hello -c > $INTOVOID
+	println >&2 "compress 2 files"
+	zstd $args -f hello world
+
+	println >&2 "decompress file to file"
+	zstd $args -d -f hello.zst
+	println >&2 "decompress pipe to pipe"
+	zstd $args -d < hello.zst > $INTOVOID
+	println >&2 "decompress pipe to file"
+	zstd $args -d < hello.zst -fo hello
+	println >&2 "decompress file to pipe"
+	zstd $args -d hello.zst -c > $INTOVOID
+	println >&2 "decompress 2 files"
+	zstd $args -d -f hello.zst world.zst
+	println >&2 ""
+done
diff --git a/tests/cli-tests/progress/no-progress.sh.stderr.glob b/tests/cli-tests/progress/no-progress.sh.stderr.glob
new file mode 100644
index 000000000..f07ad3803
--- /dev/null
+++ b/tests/cli-tests/progress/no-progress.sh.stderr.glob
@@ -0,0 +1,90 @@
+Tests cases where progress information should not be printed
+args =
+compress file to file
+compress pipe to pipe
+compress pipe to file
+compress file to pipe
+compress 2 files
+decompress file to file
+decompress pipe to pipe
+decompress pipe to file
+decompress file to pipe
+decompress 2 files
+
+args = --fake-stderr-is-console -q
+compress file to file
+compress pipe to pipe
+compress pipe to file
+compress file to pipe
+compress 2 files
+decompress file to file
+decompress pipe to pipe
+decompress pipe to file
+decompress file to pipe
+decompress 2 files
+
+args = --fake-stderr-is-console -qq --progress
+compress file to file
+compress pipe to pipe
+compress pipe to file
+compress file to pipe
+compress 2 files
+decompress file to file
+decompress pipe to pipe
+decompress pipe to file
+decompress file to pipe
+decompress 2 files
+
+args = --no-progress --fake-stderr-is-console
+compress file to file
+hello*hello.zst*
+compress pipe to pipe
+compress pipe to file
+*stdin*hello.zst*
+compress file to pipe
+compress 2 files
+2 files compressed*
+decompress file to file
+hello.zst*
+decompress pipe to pipe
+decompress pipe to file
+*stdin*
+decompress file to pipe
+decompress 2 files
+2 files decompressed*
+
+args = --no-progress --fake-stderr-is-console -v
+compress file to file
+*zstd*
+hello*hello.zst*
+compress pipe to pipe
+*zstd*
+*stdin*stdout*
+compress pipe to file
+*zstd*
+*stdin*hello.zst*
+compress file to pipe
+*zstd*
+*hello*stdout*
+compress 2 files
+*zstd*
+*hello*hello.zst*
+*world*world.zst*
+2 files compressed*
+decompress file to file
+*zstd*
+hello.zst*
+decompress pipe to pipe
+*zstd*
+*stdin*
+decompress pipe to file
+*zstd*
+*stdin*
+decompress file to pipe
+*zstd*
+hello.zst*
+decompress 2 files
+*zstd*
+hello.zst*
+world.zst*
+2 files decompressed*
diff --git a/tests/cli-tests/progress/progress.sh b/tests/cli-tests/progress/progress.sh
new file mode 100755
index 000000000..eb464993a
--- /dev/null
+++ b/tests/cli-tests/progress/progress.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+. "$COMMON/platform.sh"
+
+set -e
+
+println >&2 "Tests cases where progress information should be printed"
+
+echo hello > hello
+echo world > world
+
+zstd -q hello world
+
+for args in \
+	"--progress" \
+	"--fake-stderr-is-console" \
+	"--progress --fake-stderr-is-console -q"; do
+	println >&2 "args = $args"
+	println >&2 "compress file to file"
+	zstd $args -f hello
+	println >&2 "compress pipe to pipe"
+	zstd $args < hello > $INTOVOID
+	println >&2 "compress pipe to file"
+	zstd $args < hello -fo hello.zst
+	println >&2 "compress file to pipe"
+	zstd $args hello -c > $INTOVOID
+	println >&2 "compress 2 files"
+	zstd $args -f hello world
+
+	println >&2 "decompress file to file"
+	zstd $args -d -f hello.zst
+	println >&2 "decompress pipe to pipe"
+	zstd $args -d < hello.zst > $INTOVOID
+	println >&2 "decompress pipe to file"
+	zstd $args -d < hello.zst -fo hello
+	println >&2 "decompress file to pipe"
+	zstd $args -d hello.zst -c > $INTOVOID
+	println >&2 "decompress 2 files"
+	zstd $args -d -f hello.zst world.zst
+	println >&2 ""
+done
diff --git a/tests/cli-tests/progress/progress.sh.stderr.glob b/tests/cli-tests/progress/progress.sh.stderr.glob
new file mode 100644
index 000000000..ca620d3c2
--- /dev/null
+++ b/tests/cli-tests/progress/progress.sh.stderr.glob
@@ -0,0 +1,62 @@
+Tests cases where progress information should be printed
+args = --progress
+compress file to file
+*Read:*hello*hello.zst*
+compress pipe to pipe
+*Read:*stdin*stdout*
+compress pipe to file
+*Read:*stdin*hello.zst*
+compress file to pipe
+*Read:*hello*stdout*
+compress 2 files
+*Read*2 files compressed*
+decompress file to file
+*hello.zst*hello.zst*
+decompress pipe to pipe
+*stdin*stdin*
+decompress pipe to file
+*stdin*stdin*
+decompress file to pipe
+*hello.zst*hello.zst*
+decompress 2 files
+*hello.zst*2 files decompressed*
+
+args = --fake-stderr-is-console
+compress file to file
+*Read:*hello*hello.zst*
+compress pipe to pipe
+compress pipe to file
+*Read:*stdin*hello.zst*
+compress file to pipe
+compress 2 files
+*Read*2 files compressed*
+decompress file to file
+*hello.zst*hello.zst*
+decompress pipe to pipe
+decompress pipe to file
+*stdin*stdin*
+decompress file to pipe
+decompress 2 files
+*hello.zst*2 files decompressed*
+
+args = --progress --fake-stderr-is-console -q
+compress file to file
+*Read:*hello*hello.zst*
+compress pipe to pipe
+*Read:*stdin*stdout*
+compress pipe to file
+*Read:*stdin*hello.zst*
+compress file to pipe
+*Read:*hello*stdout*
+compress 2 files
+*Read*2 files compressed*
+decompress file to file
+*hello.zst*hello.zst*
+decompress pipe to pipe
+*stdin*stdin*
+decompress pipe to file
+*stdin*stdin*
+decompress file to pipe
+*hello.zst*hello.zst*
+decompress 2 files
+*hello.zst*2 files decompressed*
diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h
index 810daa2ce..6f3fb2994 100644
--- a/tests/fuzz/fuzz.h
+++ b/tests/fuzz/fuzz.h
@@ -26,8 +26,7 @@
  * @param MEM_FORCE_MEMORY_ACCESS:
  *        This flag controls how the zstd library accesses unaligned memory.
  *        It can be undefined, or 0 through 2. If it is undefined, it selects
- *        the method to use based on the compiler. If testing with UBSAN set
- *        MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method.
+ *        the method to use based on the compiler.
  * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  *        This is the canonical flag to enable deterministic builds for fuzzing.
  *        Changes to zstd for fuzzing are gated behind this define.
diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c
index e0cdd34d9..86a39b8c9 100644
--- a/tests/fuzz/stream_decompress.c
+++ b/tests/fuzz/stream_decompress.c
@@ -99,14 +99,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 
     while (size > 0) {
         ZSTD_inBuffer in = makeInBuffer(&src, &size, producer);
-        while (in.pos != in.size) {
+        do {
+            size_t const rc = ZSTD_decompressStream(dstream, &out, &in);
+            if (ZSTD_isError(rc)) goto error;
             if (out.pos == out.size) {
                 if (stableOutBuffer) goto error;
                 out = makeOutBuffer(producer, buf, bufSize);
             }
-            size_t const rc = ZSTD_decompressStream(dstream, &out, &in);
-            if (ZSTD_isError(rc)) goto error;
-        }
+        } while (in.pos != in.size);
     }
 
 error: