From d916c908e031dc5c911b4188886deb9f1ccf43b5 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Mon, 4 Jul 2016 00:42:58 +0200
Subject: [PATCH] updated doc

---
 NEWS                       |   3 +
 lib/common/zstd.h          |   2 +-
 programs/zstdcli.c         |  19 +++----
 zstd_compression_format.md | 113 ++++++++++++++++++++++++++++++++-----
 4 files changed, 113 insertions(+), 24 deletions(-)

diff --git a/NEWS b/NEWS
index f1a8eff84..5d6d0a869 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+v0.7.3
+added : OpenBSD target, by Juan Francisco Cantero Hurtado
+
 v0.7.2
 fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski
 fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
diff --git a/lib/common/zstd.h b/lib/common/zstd.h
index de2ec1eb9..3dcd533db 100644
--- a/lib/common/zstd.h
+++ b/lib/common/zstd.h
@@ -61,7 +61,7 @@ extern "C" {
 ***************************************/
 #define ZSTD_VERSION_MAJOR    0
 #define ZSTD_VERSION_MINOR    7
-#define ZSTD_VERSION_RELEASE  2
+#define ZSTD_VERSION_RELEASE  3
 
 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index c63655c43..d1693a3f8 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -31,7 +31,7 @@
 /*-************************************
 *  Includes
 **************************************/
-#include "util.h"     /* Compiler options, UTIL_HAS_CREATEFILELIST */
+#include "util.h"     /* Compiler options, UTIL_HAS_CREATEFILELIST, errno */
 #include <string.h>   /* strcmp, strlen */
 #include <ctype.h>    /* toupper */
 #include "fileio.h"
@@ -45,7 +45,6 @@
 #include "zstd.h"     /* ZSTD_VERSION_STRING */
 
 
-
 /*-************************************
 *  OS-specific Includes
 **************************************/
@@ -53,12 +52,12 @@
 #  include <io.h>       /* _isatty */
 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
 #else
-#if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_SOURCE)
-#  include <unistd.h>   /* isatty */
-#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
-#else
-#  define IS_CONSOLE(stdStream) 0
-#endif
+#  if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_SOURCE)
+#    include <unistd.h>   /* isatty */
+#    define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#  else
+#    define IS_CONSOLE(stdStream) 0
+#  endif
 #endif
 
 
@@ -116,6 +115,7 @@ static int usage(const char* programName)
     DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
     DISPLAY( " -f     : overwrite output without prompting \n");
     DISPLAY( "--rm    : remove source file(s) after successful de/compression \n");
+    DISPLAY( " -k     : preserve source file(s) (default) \n");
     DISPLAY( " -h/-H  : display help/long help and exit\n");
     return 0;
 }
@@ -169,7 +169,6 @@ static int badusage(const char* programName)
     return 1;
 }
 
-
 static void waitEnter(void)
 {
     int unused;
@@ -229,7 +228,7 @@ int main(int argCount, const char** argv)
     (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
     (void)dictCLevel; (void)dictSelect; (void)dictID;  /* not used when ZSTD_NODICT set */
     (void)decode; (void)cLevel; /* not used when ZSTD_NOCOMPRESS set */
-    if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); }
+    if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
     filenameTable[0] = stdinmark;
     displayOut = stderr;
     /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
diff --git a/zstd_compression_format.md b/zstd_compression_format.md
index c05c237cc..7816d1cb9 100644
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
 
 ### Version
 
-0.0.1 (30/06/2016 - Work in progress - unfinished)
+0.0.2 (July 2016 - Work in progress - unfinished)
 
 
 Introduction
@@ -464,7 +464,7 @@ __Sizes format for Raw or RLE block__ :
                Total literal header size is 2 bytes.
                `size = ((h[0] & 15) << 8) + h[1];`
 - Value : 11 : Regenerated size uses 20 bits (0-1048575).
-               Total literal header size is 2 bytes.
+               Total literal header size is 3 bytes.
                `size = ((h[0] & 15) << 16) + (h[1]<<8) + h[2];`
 
 Note : it's allowed to represent a short value (ex : `13`)
@@ -507,9 +507,9 @@ This power of 2 gives `maxBits`, the depth of the current tree.
 __Example__ :
 Let's presume the following huffman tree must be described :
 
-|  Value | 0 | 1 | 2 | 3 | 4 | 5 |
-| ------ | - | - | - | - | - | - |
-| nbBits | 1 | 2 | 3 | 0 | 4 | 4 |
+| literal |  0  |  1  |  2  |  3  |  4  |  5  |
+| ------- | --- | --- | --- | --- | --- | --- |
+| nbBits  |  1  |  2  |  3  |  0  |  4  |  4  |
 
 The tree depth is 4, since its smallest element uses 4 bits.
 Value `5` will not be listed, nor will values above `5`.
@@ -517,20 +517,18 @@ Values from `0` to `4` will be listed using `weight` instead of `nbBits`.
 Weight formula is : `weight = nbBits ? maxBits + 1 - nbBits : 0;`
 It gives the following serie of weights :
 
-| weight | 4 | 3 | 2 | 0 | 1 |
-| ------ | - | - | - | - | - |
-|  Value | 0 | 1 | 2 | 3 | 4 |
+| weights |  4  |  3  |  2  |  0  |  1  |
+| ------- | --- | --- | --- | --- | --- |
+| literal |  0  |  1  |  2  |  3  |  4  |
 
 The decoder will do the inverse operation :
-having collected weights of symbols from `0` to `4`,
-it knows the last symbol, `5`, is present with a non-zero weight.
+having collected weights of literals from `0` to `4`,
+it knows the last literal, `5`, is present with a non-zero weight.
 The weight of `5` can be deduced by joining to the nearest power of 2.
 Sum of 2^(weight-1) (excluding 0) is :
-8 + 4 + 2 + 0 + 1 = 15
+`8 + 4 + 2 + 0 + 1 = 15`
 Nearest power of 2 is 16.
 Therefore, `maxBits = 4` and `weight[5] = 1`.
-It can then proceed to transform back weights into nbBits :
-`weight = nbBits ? maxBits + 1 - nbBits : 0;` .
 
 ##### Huffman Tree header
 
@@ -584,6 +582,95 @@ FSE header and bitstreams are described in a separated chapter.
 
 ##### Conversion from weights to huffman prefix codes
 
+All present symbols shall now have a `weight` value.
+A `weight` directly represent a `range` of prefix codes,
+following the formulae : `range = weight ? 1 << (weight-1) : 0 ;`
+Symbols are sorted by weight.
+Within same weight, symbols keep natural order.
+Starting from lowest weight,
+symbols are being allocated to a range of prefix codes.
+Symbols with a weight of zero are not present.
+
+It can then proceed to transform weights into nbBits :
+`nbBits = nbBits ? maxBits + 1 - weight : 0;` .
+
+
+__Example__ :
+Let's presume the following huffman tree has been decoded :
+
+| Literal |  0  |  1  |  2  |  3  |  4  |  5  |
+| ------- | --- | --- | --- | --- | --- | --- |
+|  weight |  4  |  3  |  2  |  0  |  1  |  1  |
+
+Sorted by weight and then natural order,
+it gives the following distribution :
+
+| Literal      |  3  |  4  |  5  |  2  |  1  |   0  |
+| ------------ | --- | --- | --- | --- | --- | ---- |
+| weight       |  0  |  1  |  1  |  2  |  3  |   4  |
+| range        |  0  |  1  |  1  |  2  |  4  |   8  |
+| prefix codes | N/A |  0  |  1  | 2-3 | 4-7 | 8-15 |
+| nb bits      |  0  |  4  |  4  |  3  |  2  |   1  |
+
+
+
+#### Literals bitstreams
+
+##### Bitstreams sizes
+
+As seen in a previous paragraph,
+there are 2 flavors of huffman-compressed literals :
+single stream, and 4-streams.
+
+4-streams is useful for CPU with multiple execution units and OoO operations.
+Since each stream can be decoded independently,
+it's possible to decode them up to 4x faster than a single stream,
+presuming the CPU has enough parallelism available.
+
+For single stream, header provides both the compressed and regenerated size.
+For 4-streams though,
+header only provides compressed and regenerated size of all 4 streams combined.
+
+In order to properly decode the 4 streams,
+it's necessary to know the compressed and regenerated size of each stream.
+
+Regenerated size is easiest :
+each stream has a size of `(totalSize+3)/4`,
+except the last one, which is up to 3 bytes smaller, to reach totalSize.
+
+Compressed size must be provided explicitly : in the 4-streams variant,
+bitstream is preceded by 3 unsigned short values using Little Endian convention.
+Each value represent the compressed size of one stream, in order.
+The last stream size is deducted from total compressed size
+and from already known stream sizes :
+`stream4CSize = totalCSize - 6 - stream1CSize - stream2CSize - stream3CSize;`
+
+##### Bitstreams reading
+
+Each bitstream must be read _backward_,
+that is starting from the end down to the beginning.
+Therefore it's necessary to know the size of each bitstream.
+
+It's also necessary to know exactly which _bit_ is the latest.
+This is detected by a final bit flag :
+the highest bit of latest byte is a final-bit-flag.
+Consequently, a last byte of `0` is not possible.
+And the final-bit-flag itself is not part of the useful bitstream.
+Hence, the last byte contain between 0 and 7 useful bits.
+
+Starting from the end,
+it's possible to read the bitstream in a little-endian fashion,
+keeping track of already used bits.
+
+Extracting `maxBits`,
+it's then possible to compare extracted value to the prefix codes table,
+determining the symbol to decode and number of bits to discard.
+
+The process continues up to reading the required number of symbols per stream.
+If a bitstream is not entirely and exactly consumed,
+hence reaching exactly its beginning position with all bits consumed,
+the decoding process is considered faulty.
+