updated spec

2025-12-24 17:21:03 +03:00 · 2016-07-08 19:16:57 +02:00
parent c40ba718d7
commit bd10607063
2 changed files with 37 additions and 6 deletions
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1284,8 +1284,8 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t


 struct ZSTD_DDict_s {
-    void* dictContent;
-    size_t dictContentSize;
+    void* dict;
+    size_t dictSize;
    ZSTD_DCtx* refContext;
 };  /* typedef'd tp ZSTD_CDict within zstd.h */

@@ -1317,8 +1317,8 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_cu
                return NULL;
        }   }

-        ddict->dictContent = dictContent;
-        ddict->dictContentSize = dictSize;
+        ddict->dict = dictContent;
+        ddict->dictSize = dictSize;
        ddict->refContext = dctx;
        return ddict;
    }
@@ -1338,7 +1338,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
    ZSTD_freeFunction const cFree = ddict->refContext->customMem.customFree;
    void* const opaque = ddict->refContext->customMem.opaque;
    ZSTD_freeDCtx(ddict->refContext);
-    cFree(opaque, ddict->dictContent);
+    cFree(opaque, ddict->dict);
    cFree(opaque, ddict);
    return 0;
 }
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.

 ### Version

-0.0.2 (July 2016 - Work in progress - unfinished)
+0.1.0 (08/07/16)


 Introduction
@@ -1119,6 +1119,37 @@ with the new offset taking first spot.
  pushing the other ones by one position.


+Dictionary format
+-----------------
+
+`zstd` is compatible with "pure content" dictionaries, free of any format restriction.
+But dictionaries created by `zstd --train` follow a format, described here.
+
+__Pre-requisites__ : a dictionary has a known length,
+                     defined either by a buffer limit, or a file size.
+
+| Header | DictID | Stats | Content |
+| ------ | ------ | ----- | ------- |
+
+__Header__ : 4 bytes ID, value 0xEC30A437, Little Endian format
+
+__Dict_ID__ : 4 bytes, stored in Little Endian format.
+              DictID can be any value, except 0 (which means no DictID).
+              It's used by decoders to check they use the correct dictionary.
+
+__Stats__ : Entropy tables, following the same format as a [compressed blocks].
+            They are stored in following order :
+            Huffman tables for literals, FSE table for offset,
+            FSE table for matchLenth, and finally FSE table for litLength.
+            It's then followed by 3 offset values, populating recent offsets,
+            stored in order 4-bytes little endian each, for a total of 12 bytes.
+
+__Content__ : Where the actual dictionary content is.
+              Content depends on Dictionary size.
+
+[compressed blocks]: #compressed-block-format
+

 Version changes
 ---------------
+0.1.0 initial release