diff --git a/NEWS b/NEWS index f6b2d6c91..036df0076 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,9 @@ v0.5.1 New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski Changed : Dictionary builder integrated into libzstd and zstd cli +Changed : zstd cli now uses "multiple input files" as default mode Fix : high compression modes for big-endian platforms +New : zstd cli : `-t` | `--test` command v0.5.0 New : dictionary builder utility diff --git a/programs/playTests.sh b/programs/playTests.sh index 014a568c1..25ba687e9 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -23,9 +23,19 @@ roundTripTest() { [ -n "$ZSTD" ] || die "ZSTD variable must be defined!" -echo "\n**** simple test **** " +echo "\n**** simple tests **** " ./datagen > tmp $ZSTD tmp +$ZSTD tmp -c > tmpCompressed +$ZSTD tmp --stdout > tmpCompressed +$ZSTD -d tmpCompressed && die "wrong suffix error not detected!" +$ZSTD -d tmpCompressed -c > tmpResult +$ZSTD --decompress tmpCompressed -c > tmpResult +$ZSTD --decompress tmpCompressed --stdout > tmpResult +$ZSTD -q tmp && die "overwrite check failed!" +$ZSTD -q -f tmp +$ZSTD -q --force tmp + echo "\n**** frame concatenation **** " @@ -83,6 +93,7 @@ $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" echo "\n**** integrity tests **** " echo "test one file (tmp1.zst) " $ZSTD -t tmp1.zst +$ZSTD --test tmp1.zst echo "test multiple files (*.zst) " $ZSTD -t *.zst echo "test good and bad files (*) " diff --git a/programs/zstd.1 b/programs/zstd.1 index 8d69c4ddf..27d607f5c 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -14,7 +14,7 @@ .SH SYNOPSIS .TP 5 -\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] +\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] [-o ] .PP .B unzstd is equivalent to @@ -28,15 +28,13 @@ is equivalent to .SH DESCRIPTION .PP \fBzstd\fR is a fast lossless compression algorithm. -It is based on the \fBLZ77\fR family, with FSE & huff0 entropy stage. -zstd offers compression speed > 200 MB/s per core. -It also features a fast decoder, with speed > 500 MB/s per core. +It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages. +\fBzstd\fR offers configurable compression speed, with fast modes at > 200 MB/s per core. +It also features a very fast decoder, with speed > 500 MB/s per core. \fBzstd\fR command line is generally similar to gzip, but features the following differences : - Original files are preserved - - By default, \fBzstd file1 file2\fR means : compress file1 \fBinto\fR file2. - Use \fB-m\fR command if you want : compress file1 into file1.zstd and file2 into file2.zst - - By default, when compressing files, \fBzstd\fR displays advancement notification and result summary. + - By default, when compressing a single file, \fBzstd\fR displays progress notifications and result summary. Use \fB-q\fR to turn them off @@ -45,22 +43,20 @@ It also features a fast decoder, with speed > 500 MB/s per core. .SH OPTIONS .TP .B \-# - # compression level [1-19](default:1) + # compression level [1-21] (default:1) .TP -.B \-d +.BR \-d ", " --decompress decompression .TP -.B \-f +.B \-D file + use `file` as Dictionary to compress or decompress FILE(s) +.TP +.B \-o file + save result into `file` (only possible with a single input FILE) +.TP +.BR \-f ", " --force overwrite output without prompting .TP -.BR \-m ", " --multiple - multiple files mode - In this mode, multiple files on the command line means compression or decompression of each named file - Notifications are also turned off by default -.TP -.B \-D - Use next file as dictionary content for compress / decompression -.TP .BR \-h/\-H ", " --help display help/long help and exit .TP @@ -73,17 +69,47 @@ It also features a fast decoder, with speed > 500 MB/s per core. .BR \-q ", " --quiet suppress warnings and notifications; specify twice to suppress errors too .TP -.B \-c +.BR \-c ", " --stdout force write to standard output, even if it is the console + +.SH DICTIONARY +.PP +\fBzstd\fR offers \fIdictionary\fR compression, useful for very small files and messages. +It's possible to train \fBzstd\fR with some samples, the result of which is saved into a file called `dictionary`. +Then during compression and decompression, make reference to the same dictionary. +It will improve compression ratio of small files. +Typical gains range from ~10% (at 64KB) to x5 better (at <1KB). .TP -.B \-z - force compression +.B \--train FILEs + use FILEs as training set to create a dictionary. + The training set should contain a lot of small files (> 100). + and weight typically 100x the target dictionary size + (for example, 10 MB for a 100 KB dictionary) +.TP +.B \-o file + dictionary saved into `file` (default: dictionary) +.TP +.B \--maxdict # + limit dictionary to specified size (default : 112640) +.TP +.B \-s# + dictionary selectivity level (default: 9) + the smaller the value, the denser the dictionary, improving its efficiency but reducing its possible maximum size. + +.SH BENCHMARK .TP .B \-b# benchmark file(s) using compression level # .TP .B \-i# iteration loops [1-9](default : 3), benchmark mode only +.TP +.B \-B# + cut file into independent blocks of size # (default: no block) +.TP +.B \-r# + test all compression levels from 1 to # (default: disabled) + .SH BUGS Report bugs at:- https://github.com/Cyan4973/zstd/issues diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 7a6dae99c..5fe1869a9 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -142,8 +142,8 @@ static int usage_advanced(const char* programName) #ifndef ZSTD_NOBENCH DISPLAY( "Benchmark arguments :\n"); DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); - DISPLAY( " -B# : cut file into independent blocks of size # (default: no block)\n"); DISPLAY( " -i# : iteration loops [1-9](default : 3)\n"); + DISPLAY( " -B# : cut file into independent blocks of size # (default: no block)\n"); DISPLAY( " -r# : test all compression levels from 1 to # (default: disabled)\n"); #endif return 0; @@ -208,12 +208,17 @@ int main(int argCount, const char** argv) if(!argument) continue; /* Protection if argument empty */ /* long commands (--long-word) */ + if (!strcmp(argument, "--decompress")) { decode=1; continue; } + if (!strcmp(argument, "--force")) { FIO_overwriteMode(); continue; } if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; } if (!strcmp(argument, "--help")) { displayOut=stdout; return usage_advanced(programName); } if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; } if (!strcmp(argument, "--quiet")) { displayLevel--; continue; } + if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; } + if (!strcmp(argument, "--test")) { decode=1; outFileName=nulmark; FIO_overwriteMode(); continue; } if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; } if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; } + if (!strcmp(argument, "--keep")) { continue; } /* does nothing, since preserving input is default; for gzip/xz compatibility */ /* '-' means stdin/stdout */ if (!strcmp(argument, "-")){