1
0
mirror of https://github.com/facebook/zstd.git synced 2025-11-27 11:41:11 +03:00

Merge pull request #838 from stellamplau/ldm-mergeDev

Add long distance matcher
This commit is contained in:
Yann Collet
2017-09-13 13:24:08 -07:00
committed by GitHub
37 changed files with 1433 additions and 136 deletions

View File

@@ -113,6 +113,7 @@ Advanced arguments :
-c : force write to standard output, even if it is the console
-l : print information about zstd compressed files
--ultra : enable levels beyond 19, up to 22 (requires more memory)
--long : enable long distance matching (requires more memory)
--no-dictID : don't write dictID into header (dictionary compression)
--[no-]check : integrity check (default:enabled)
-r : operate recursively on directories
@@ -139,3 +140,60 @@ Benchmark arguments :
-B# : cut file into independent blocks of size # (default: no block)
--priority=rt : set process priority to real-time
```
#### Long distance matching mode
The long distance matching mode, enabled with `--long`, is designed to improve
the compression ratio for files with long matches at a large distance (up to the
maximum window size, `128 MiB`) while still maintaining compression speed.
Enabling this mode sets the window size to `128 MiB` and thus increases the memory
usage for both the compressor and decompressor. Performance in terms of speed is
dependent on long matches being found. Compression speed may degrade if few long
matches are found. Decompression speed usually improves when there are many long
distance matches.
Below are graphs comparing the compression speed, compression ratio, and
decompression speed with and without long distance matching on an ideal use
case: a tar of four versions of clang (versions `3.4.1`, `3.4.2`, `3.5.0`,
`3.5.1`) with a total size of `244889600 B`. This is an ideal use case as there
are many long distance matches within the maximum window size of `128 MiB` (each
version is less than `128 MiB`).
Compression Speed vs Ratio | Decompression Speed
---------------------------|---------------------
![Compression Speed vs Ratio](../doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](../doc/images/ldmDspeed.png "Decompression Speed")
| Method | Compression ratio | Compression speed | Decompression speed |
|:-------|------------------:|-------------------------:|---------------------------:|
| `zstd -1` | `5.065` | `284.8 MB/s` | `759.3 MB/s` |
| `zstd -5` | `5.826` | `124.9 MB/s` | `674.0 MB/s` |
| `zstd -10` | `6.504` | `29.5 MB/s` | `771.3 MB/s` |
| `zstd -1 --long` | `17.426` | `220.6 MB/s` | `1638.4 MB/s` |
| `zstd -5 --long` | `19.661` | `165.5 MB/s` | `1530.6 MB/s`|
| `zstd -10 --long`| `21.949` | `75.6 MB/s` | `1632.6 MB/s`|
On this file, the compression ratio improves significantly with minimal impact
on compression speed, and the decompression speed doubles.
On the other extreme, compressing a file with few long distance matches (such as
the [Silesia compression corpus]) will likely lead to a deterioration in
compression speed (for lower levels) with minimal change in compression ratio.
The below table illustrates this on the [Silesia compression corpus].
[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
| Method | Compression ratio | Compression speed | Decompression speed |
|:-------|------------------:|-------------------------:|---------------------------:|
| `zstd -1` | `2.878` | `231.7 MB/s` | `594.4 MB/s` |
| `zstd -1 --long` | `2.929` | `106.5 MB/s` | `517.9 MB/s` |
| `zstd -5` | `3.274` | `77.1 MB/s` | `464.2 MB/s` |
| `zstd -5 --long` | `3.319` | `51.7 MB/s` | `371.9 MB/s` |
| `zstd -10` | `3.523` | `16.4 MB/s` | `489.2 MB/s` |
| `zstd -10 --long`| `3.566` | `16.2 MB/s` | `415.7 MB/s` |

View File

@@ -130,6 +130,31 @@ void BMK_setNbThreads(unsigned nbThreads) {
#endif
g_nbThreads = nbThreads;
}
static U32 g_ldmFlag = 0;
void BMK_setLdmFlag(unsigned ldmFlag) {
g_ldmFlag = ldmFlag;
}
static U32 g_ldmMinMatch = 0;
void BMK_setLdmMinMatch(unsigned ldmMinMatch) {
g_ldmMinMatch = ldmMinMatch;
}
static U32 g_ldmHashLog = 0;
void BMK_setLdmHashLog(unsigned ldmHashLog) {
g_ldmHashLog = ldmHashLog;
}
#define BMK_LDM_PARAM_NOTSET 9999
static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET;
void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) {
g_ldmBucketSizeLog = ldmBucketSizeLog;
}
static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET;
void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
g_ldmHashEveryLog = ldmHashEveryLog;
}
/* ********************************************************
@@ -265,6 +290,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
#ifdef ZSTD_NEWAPI
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog);
if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) {
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog);
}
if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) {
ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog);
}
ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);

View File

@@ -26,5 +26,10 @@ void BMK_setNbThreads(unsigned nbThreads);
void BMK_setNotificationLevel(unsigned level);
void BMK_setAdditionalParam(int additionalParam);
void BMK_setDecodeOnlyMode(unsigned decodeFlag);
void BMK_setLdmFlag(unsigned ldmFlag);
void BMK_setLdmMinMatch(unsigned ldmMinMatch);
void BMK_setLdmHashLog(unsigned ldmHashLog);
void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog);
#endif /* BENCH_H_121279284357 */

View File

@@ -214,6 +214,30 @@ void FIO_setOverlapLog(unsigned overlapLog){
DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
g_overlapLog = overlapLog;
}
static U32 g_ldmFlag = 0;
void FIO_setLdmFlag(unsigned ldmFlag) {
g_ldmFlag = (ldmFlag>0);
}
static U32 g_ldmHashLog = 0;
void FIO_setLdmHashLog(unsigned ldmHashLog) {
g_ldmHashLog = ldmHashLog;
}
static U32 g_ldmMinMatch = 0;
void FIO_setLdmMinMatch(unsigned ldmMinMatch) {
g_ldmMinMatch = ldmMinMatch;
}
#define FIO_LDM_PARAM_NOTSET 9999
static U32 g_ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) {
g_ldmBucketSizeLog = ldmBucketSizeLog;
}
static U32 g_ldmHashEveryLog = FIO_LDM_PARAM_NOTSET;
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) {
g_ldmHashEveryLog = ldmHashEveryLog;
}
/*-*************************************
@@ -399,8 +423,20 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
/* compression parameters */
/* compression level */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(
ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) );
if (g_ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog) );
}
if (g_ldmHashEveryLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) );
}
/* compression parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) );

View File

@@ -57,6 +57,11 @@ void FIO_setMemLimit(unsigned memLimit);
void FIO_setNbThreads(unsigned nbThreads);
void FIO_setBlockSize(unsigned blockSize);
void FIO_setOverlapLog(unsigned overlapLog);
void FIO_setLdmFlag(unsigned ldmFlag);
void FIO_setLdmHashLog(unsigned ldmHashLog);
void FIO_setLdmMinMatch(unsigned ldmMinMatch);
void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog);
void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog);
/*-*************************************

View File

@@ -1,5 +1,5 @@
.
.TH "ZSTD" "1" "August 2017" "zstd 1.3.1" "User Commands"
.TH "ZSTD" "1" "September 2017" "zstd 1.3.1" "User Commands"
.
.SH "NAME"
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -104,6 +104,10 @@ Display information related to a zstd compressed file, such as size, ratio, and
unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
.
.TP
\fB\-\-long\fR
enables long distance matching\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance (up to the maximum window size, 128 MiB)\.
.
.TP
\fB\-T#\fR, \fB\-\-threads=#\fR
Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
.
@@ -322,6 +326,58 @@ Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This pa
.IP
The minimum \fIovlog\fR is 0, and the maximum is 9\. 0 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the amount of reload by a factor 2\. Default \fIovlog\fR is 6, which means "reload \fBwindowSize / 8\fR"\. Exception : the maximum compression level (22) has a default \fIovlog\fR of 9\.
.
.TP
\fBldmHashLog\fR=\fIldmhlog\fR, \fBldmhlog\fR=\fIldmhlog\fR
Specify the maximum size for a hash table used for long distance matching\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\.
.
.IP
The minimum \fIldmhlog\fR is 6 and the maximum is 26 (default: 20)\.
.
.TP
\fBldmSearchLength\fR=\fIldmslen\fR, \fBldmSlen\fR=\fIldmslen\fR
Specify the minimum searched length of a match for long distance matching\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Larger/very small values usually decrease compression ratio\.
.
.IP
The minumum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\.
.
.TP
\fBldmBucketSizeLog\fR=\fIldmblog\fR, \fBldmblog\fR=\fIldmblog\fR
Specify the size of each bucket for the hash table used for long distance matching\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Larger bucket sizes improve collision resolution but decrease compression speed\.
.
.IP
The minimum \fIldmblog\fR is 0 and the maximum is 8 (default: 3)\.
.
.TP
\fBldmHashEveryLog\fR=\fIldmhevery\fR, \fBldmhevery\fR=\fIldmhevery\fR
Specify the frequency of inserting entries into the long distance matching hash table\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\.
.
.IP
The default value is \fBwlog \- ldmhlog\fR\.
.
.SS "\-B#:"
Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\.
.

View File

@@ -105,6 +105,12 @@ the last one takes effect.
* `--ultra`:
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
Note that decompression will also require more memory when using these levels.
* `--long`:
enables long distance matching.
This increases the window size (`windowLog`) and memory usage for both the
compressor and decompressor. This setting is designed to improve the
compression ratio for files with long matches at a large distance
(up to the maximum window size, 128 MiB).
* `-T#`, `--threads=#`:
Compress using `#` threads (default: 1).
If `#` is 0, attempt to detect and use the number of physical CPU cores.
@@ -327,6 +333,47 @@ The list of available _options_:
Default _ovlog_ is 6, which means "reload `windowSize / 8`".
Exception : the maximum compression level (22) has a default _ovlog_ of 9.
- `ldmHashLog`=_ldmhlog_, `ldmhlog`=_ldmhlog_:
Specify the maximum size for a hash table used for long distance matching.
This option is ignored unless long distance matching is enabled.
Bigger hash tables usually improve compression ratio at the expense of more
memory during compression and a decrease in compression speed.
The minimum _ldmhlog_ is 6 and the maximum is 26 (default: 20).
- `ldmSearchLength`=_ldmslen_, `ldmslen`=_ldmslen_:
Specify the minimum searched length of a match for long distance matching.
This option is ignored unless long distance matching is enabled.
Larger/very small values usually decrease compression ratio.
The minumum _ldmslen_ is 4 and the maximum is 4096 (default: 64).
- `ldmBucketSizeLog`=_ldmblog_, `ldmblog`=_ldmblog_:
Specify the size of each bucket for the hash table used for long distance
matching.
This option is ignored unless long distance matching is enabled.
Larger bucket sizes improve collision resolution but decrease compression
speed.
The minimum _ldmblog_ is 0 and the maximum is 8 (default: 3).
- `ldmHashEveryLog`=_ldmhevery_, `ldmhevery`=_ldmhevery_:
Specify the frequency of inserting entries into the long distance matching
hash table.
This option is ignored unless long distance matching is enabled.
Larger values will improve compression speed. Deviating far from the
default value will likely result in a decrease in compression ratio.
The default value is `wlog - ldmhlog`.
### -B#:
Select the size of each compression job.
This parameter is available only when multi-threading is enabled.

View File

@@ -73,7 +73,12 @@ static const unsigned g_defaultMaxDictSize = 110 KB;
static const int g_defaultDictCLevel = 3;
static const unsigned g_defaultSelectivityLevel = 9;
#define OVERLAP_LOG_DEFAULT 9999
#define LDM_PARAM_DEFAULT 9999 /* Default for parameters where 0 is valid */
static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
static U32 g_ldmHashLog = 0;
static U32 g_ldmMinMatch = 0;
static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT;
static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
/*-************************************
@@ -124,6 +129,7 @@ static int usage_advanced(const char* programName)
DISPLAY( " -l : print information about zstd compressed files \n");
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
DISPLAY( "--long : enable long distance matching (requires more memory)\n");
#ifdef ZSTD_MULTITHREAD
DISPLAY( " -T# : use # threads for compression (default:1) \n");
DISPLAY( " -B# : select size of each job (default:0==automatic) \n");
@@ -305,6 +311,10 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi
if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmslen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "ldmblog")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "ldmHashEveryLog=") || longCommandWArg(&stringPtr, "ldmhevery")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
return 0;
}
@@ -363,7 +373,8 @@ int main(int argCount, const char* argv[])
ultra=0,
lastCommand = 0,
nbThreads = 1,
setRealTimePrio = 0;
setRealTimePrio = 0,
ldmFlag = 0;
unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */
size_t blockSize = 0;
zstd_operation_mode operation = zom_compress;
@@ -395,7 +406,7 @@ int main(int argCount, const char* argv[])
/* init */
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
(void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */
(void)ultra; (void)cLevel; /* not used when ZSTD_NOCOMPRESS set */
(void)ultra; (void)cLevel; (void)ldmFlag; /* not used when ZSTD_NOCOMPRESS set */
(void)memLimit; /* not used when ZSTD_NODECOMPRESS set */
if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
filenameTable[0] = stdinmark;
@@ -448,6 +459,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
if (!strcmp(argument, "--long")) { ldmFlag = 1; continue; }
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; }
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; }
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
@@ -721,6 +733,15 @@ int main(int argCount, const char* argv[])
BMK_setBlockSize(blockSize);
BMK_setNbThreads(nbThreads);
BMK_setNbSeconds(bench_nbSeconds);
BMK_setLdmFlag(ldmFlag);
BMK_setLdmMinMatch(g_ldmMinMatch);
BMK_setLdmHashLog(g_ldmHashLog);
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog);
}
if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
BMK_setLdmHashEveryLog(g_ldmHashEveryLog);
}
BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio);
#endif
(void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio;
@@ -788,6 +809,16 @@ int main(int argCount, const char* argv[])
#ifndef ZSTD_NOCOMPRESS
FIO_setNbThreads(nbThreads);
FIO_setBlockSize((U32)blockSize);
FIO_setLdmFlag(ldmFlag);
FIO_setLdmHashLog(g_ldmHashLog);
FIO_setLdmMinMatch(g_ldmMinMatch);
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog);
}
if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
FIO_setLdmHashEveryLog(g_ldmHashEveryLog);
}
if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog);
if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams);