mirror of
https://github.com/facebook/zstd.git
synced 2025-07-30 22:23:13 +03:00
external dictionary capability added to command line
This commit is contained in:
2
NEWS
2
NEWS
@ -1,6 +1,6 @@
|
|||||||
v0.4.4
|
v0.4.4
|
||||||
Fixed : high compression modes for Windows 32 bits
|
Fixed : high compression modes for Windows 32 bits
|
||||||
new : external dictionary API extended to buffered mode
|
new : external dictionary API extended to buffered mode and accessible through command line
|
||||||
new : windows DLL project, thanks to Christophe Chevalier
|
new : windows DLL project, thanks to Christophe Chevalier
|
||||||
|
|
||||||
v0.4.3 :
|
v0.4.3 :
|
||||||
|
@ -94,8 +94,8 @@ zstd-noBench: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
|
|||||||
zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY)
|
zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY)
|
||||||
$(CC) $(FLAGS) -DZSTD_NOBENCH $^ -o zstd$(EXT)
|
$(CC) $(FLAGS) -DZSTD_NOBENCH $^ -o zstd$(EXT)
|
||||||
|
|
||||||
zstd-frugal: clean
|
zstd-frugal: clean
|
||||||
CFLAGS=-Os $(MAKE) zstd-noBench ZSTD_LEGACY_SUPPORT=0
|
$(MAKE) zstd-noBench ZSTD_LEGACY_SUPPORT=0
|
||||||
|
|
||||||
fullbench : $(ZSTD_FILES) \
|
fullbench : $(ZSTD_FILES) \
|
||||||
datagen.c fullbench.c
|
datagen.c fullbench.c
|
||||||
|
@ -121,6 +121,7 @@
|
|||||||
|
|
||||||
#define CACHELINE 64
|
#define CACHELINE 64
|
||||||
|
|
||||||
|
#define MAX_DICT_SIZE (512 KB)
|
||||||
|
|
||||||
/* *************************************
|
/* *************************************
|
||||||
* Macros
|
* Macros
|
||||||
@ -235,12 +236,13 @@ static U64 FIO_getFileSize(const char* infilename)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename, int cLevel)
|
unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename,
|
||||||
|
const char* dictFileName, int cLevel)
|
||||||
{
|
{
|
||||||
U64 filesize = 0;
|
U64 filesize = 0;
|
||||||
U64 compressedfilesize = 0;
|
U64 compressedfilesize = 0;
|
||||||
BYTE* inBuff;
|
U64 dictSize = 0;
|
||||||
BYTE* outBuff;
|
BYTE* inBuff, *outBuff, *dictBuff=NULL;
|
||||||
size_t inBuffSize = ZBUFF_recommendedCInSize();
|
size_t inBuffSize = ZBUFF_recommendedCInSize();
|
||||||
size_t outBuffSize = ZBUFF_recommendedCOutSize();
|
size_t outBuffSize = ZBUFF_recommendedCOutSize();
|
||||||
FILE* finput;
|
FILE* finput;
|
||||||
@ -252,16 +254,43 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char*
|
|||||||
ctx = ZBUFF_createCCtx();
|
ctx = ZBUFF_createCCtx();
|
||||||
inBuff = (BYTE*)malloc(inBuffSize);
|
inBuff = (BYTE*)malloc(inBuffSize);
|
||||||
outBuff = (BYTE*)malloc(outBuffSize);
|
outBuff = (BYTE*)malloc(outBuffSize);
|
||||||
if (!inBuff || !outBuff || !ctx) EXM_THROW(21, "Allocation error : not enough memory");
|
if (!inBuff || !outBuff || !ctx) EXM_THROW(20, "Allocation error : not enough memory");
|
||||||
|
|
||||||
|
/* dictionary */
|
||||||
|
if (dictFileName)
|
||||||
|
{
|
||||||
|
FILE* dictHandle;
|
||||||
|
size_t read;
|
||||||
|
DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName);
|
||||||
|
dictHandle = fopen(dictFileName, "rb");
|
||||||
|
if (dictHandle==0) EXM_THROW(21, "Error opening dictionary file %s", dictFileName);
|
||||||
|
dictSize = FIO_getFileSize(dictFileName);
|
||||||
|
if (dictSize > MAX_DICT_SIZE)
|
||||||
|
{
|
||||||
|
int seekResult;
|
||||||
|
if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */
|
||||||
|
DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE);
|
||||||
|
seekResult = fseek(dictHandle, dictSize-MAX_DICT_SIZE, SEEK_SET); /* use end of file */
|
||||||
|
if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName);
|
||||||
|
dictSize = MAX_DICT_SIZE;
|
||||||
|
}
|
||||||
|
dictBuff = (BYTE*)malloc(dictSize);
|
||||||
|
if (dictBuff==NULL) EXM_THROW(20, "Allocation error : not enough memory for dictBuff");
|
||||||
|
read = fread(dictBuff, 1, (size_t)dictSize, dictHandle);
|
||||||
|
if (read!=dictSize) EXM_THROW(21, "Error reading dictionary file %s", dictFileName);
|
||||||
|
fclose(dictHandle);
|
||||||
|
}
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
|
FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
|
||||||
filesize = FIO_getFileSize(input_filename);
|
filesize = FIO_getFileSize(input_filename) + dictSize;
|
||||||
errorCode = ZBUFF_compressInit_advanced(ctx, ZSTD_getParams(cLevel, filesize));
|
errorCode = ZBUFF_compressInit_advanced(ctx, ZSTD_getParams(cLevel, filesize));
|
||||||
if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing compression");
|
if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing compression");
|
||||||
filesize = 0;
|
errorCode = ZBUFF_compressWithDictionary(ctx, dictBuff, dictSize);
|
||||||
|
if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing dictionary");
|
||||||
|
|
||||||
/* Main compression loop */
|
/* Main compression loop */
|
||||||
|
filesize = 0;
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
size_t inSize;
|
size_t inSize;
|
||||||
@ -311,6 +340,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char*
|
|||||||
/* clean */
|
/* clean */
|
||||||
free(inBuff);
|
free(inBuff);
|
||||||
free(outBuff);
|
free(outBuff);
|
||||||
|
free(dictBuff);
|
||||||
ZBUFF_freeCCtx(ctx);
|
ZBUFF_freeCCtx(ctx);
|
||||||
fclose(finput);
|
fclose(finput);
|
||||||
if (fclose(foutput)) EXM_THROW(28, "Write error : cannot properly close %s", output_filename);
|
if (fclose(foutput)) EXM_THROW(28, "Write error : cannot properly close %s", output_filename);
|
||||||
@ -322,6 +352,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char*
|
|||||||
unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput,
|
unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput,
|
||||||
BYTE* inBuff, size_t inBuffSize, size_t alreadyLoaded,
|
BYTE* inBuff, size_t inBuffSize, size_t alreadyLoaded,
|
||||||
BYTE* outBuff, size_t outBuffSize,
|
BYTE* outBuff, size_t outBuffSize,
|
||||||
|
BYTE* dictBuff, size_t dictSize,
|
||||||
ZBUFF_DCtx* dctx)
|
ZBUFF_DCtx* dctx)
|
||||||
{
|
{
|
||||||
U64 frameSize = 0;
|
U64 frameSize = 0;
|
||||||
@ -329,6 +360,7 @@ unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput,
|
|||||||
|
|
||||||
/* Main decompression Loop */
|
/* Main decompression Loop */
|
||||||
ZBUFF_decompressInit(dctx);
|
ZBUFF_decompressInit(dctx);
|
||||||
|
ZBUFF_decompressWithDictionary(dctx, dictBuff, dictSize);
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
/* Decode */
|
/* Decode */
|
||||||
@ -359,16 +391,42 @@ unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename)
|
unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename, const char* dictFileName)
|
||||||
{
|
{
|
||||||
FILE* finput, *foutput;
|
FILE* finput, *foutput;
|
||||||
BYTE* inBuff=NULL;
|
BYTE* inBuff=NULL;
|
||||||
size_t inBuffSize = ZBUFF_recommendedDInSize();
|
size_t inBuffSize = ZBUFF_recommendedDInSize();
|
||||||
BYTE* outBuff=NULL;
|
BYTE* outBuff=NULL;
|
||||||
size_t outBuffSize = ZBUFF_recommendedDOutSize();
|
size_t outBuffSize = ZBUFF_recommendedDOutSize();
|
||||||
|
BYTE* dictBuff=NULL;
|
||||||
|
size_t dictSize = 0;
|
||||||
U64 filesize = 0;
|
U64 filesize = 0;
|
||||||
size_t toRead;
|
size_t toRead;
|
||||||
|
|
||||||
|
/* dictionary */
|
||||||
|
if (dictFileName)
|
||||||
|
{
|
||||||
|
FILE* dictHandle;
|
||||||
|
size_t read;
|
||||||
|
DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName);
|
||||||
|
dictHandle = fopen(dictFileName, "rb");
|
||||||
|
if (dictHandle==0) EXM_THROW(21, "Error opening dictionary file %s", dictFileName);
|
||||||
|
dictSize = FIO_getFileSize(dictFileName);
|
||||||
|
if (dictSize > MAX_DICT_SIZE)
|
||||||
|
{
|
||||||
|
int seekResult;
|
||||||
|
if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */
|
||||||
|
DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE);
|
||||||
|
seekResult = fseek(dictHandle, dictSize-MAX_DICT_SIZE, SEEK_SET); /* use end of file */
|
||||||
|
if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName);
|
||||||
|
dictSize = MAX_DICT_SIZE;
|
||||||
|
}
|
||||||
|
dictBuff = (BYTE*)malloc(dictSize);
|
||||||
|
if (dictBuff==NULL) EXM_THROW(20, "Allocation error : not enough memory for dictBuff");
|
||||||
|
read = fread(dictBuff, 1, (size_t)dictSize, dictHandle);
|
||||||
|
if (read!=dictSize) EXM_THROW(21, "Error reading dictionary file %s", dictFileName);
|
||||||
|
fclose(dictHandle);
|
||||||
|
}
|
||||||
|
|
||||||
/* Init */
|
/* Init */
|
||||||
ZBUFF_DCtx* dctx = ZBUFF_createDCtx();
|
ZBUFF_DCtx* dctx = ZBUFF_createDCtx();
|
||||||
@ -396,7 +454,11 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha
|
|||||||
}
|
}
|
||||||
#endif /* ZSTD_LEGACY_SUPPORT */
|
#endif /* ZSTD_LEGACY_SUPPORT */
|
||||||
|
|
||||||
filesize += FIO_decompressFrame(foutput, finput, inBuff, inBuffSize, toRead, outBuff, outBuffSize, dctx);
|
filesize += FIO_decompressFrame(foutput, finput,
|
||||||
|
inBuff, inBuffSize, toRead,
|
||||||
|
outBuff, outBuffSize,
|
||||||
|
dictBuff, dictSize,
|
||||||
|
dctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||||
@ -405,6 +467,7 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha
|
|||||||
/* clean */
|
/* clean */
|
||||||
free(inBuff);
|
free(inBuff);
|
||||||
free(outBuff);
|
free(outBuff);
|
||||||
|
free(dictBuff);
|
||||||
ZBUFF_freeDCtx(dctx);
|
ZBUFF_freeDCtx(dctx);
|
||||||
fclose(finput);
|
fclose(finput);
|
||||||
if (fclose(foutput)) EXM_THROW(38, "Write error : cannot properly close %s", output_filename);
|
if (fclose(foutput)) EXM_THROW(38, "Write error : cannot properly close %s", output_filename);
|
||||||
|
@ -52,8 +52,8 @@ void FIO_setNotificationLevel(unsigned level);
|
|||||||
/* *************************************
|
/* *************************************
|
||||||
* Stream/File functions
|
* Stream/File functions
|
||||||
***************************************/
|
***************************************/
|
||||||
unsigned long long FIO_compressFilename (const char* outfilename, const char* infilename, int compressionLevel);
|
unsigned long long FIO_compressFilename (const char* outfilename, const char* infilename, const char* dictFileName, int compressionLevel);
|
||||||
unsigned long long FIO_decompressFilename (const char* outfilename, const char* infilename);
|
unsigned long long FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName);
|
||||||
/**
|
/**
|
||||||
FIO_compressFilename :
|
FIO_compressFilename :
|
||||||
@result : size of compressed file
|
@result : size of compressed file
|
||||||
|
@ -42,6 +42,13 @@ echo "**** flush write error test **** "
|
|||||||
echo foo | $ZSTD > /dev/full && die "write error not detected!"
|
echo foo | $ZSTD > /dev/full && die "write error not detected!"
|
||||||
echo foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!"
|
echo foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!"
|
||||||
|
|
||||||
|
echo "*** dictionary tests *** "
|
||||||
|
|
||||||
|
./datagen > tmpDict
|
||||||
|
./datagen -g1M | md5sum > tmp1
|
||||||
|
./datagen -g1M | ./zstd -D tmpDict | ./zstd -D tmpDict -dv | md5sum > tmp2
|
||||||
|
diff -q tmp1 tmp2
|
||||||
|
|
||||||
echo "**** zstd round-trip tests **** "
|
echo "**** zstd round-trip tests **** "
|
||||||
|
|
||||||
roundTripTest
|
roundTripTest
|
||||||
|
@ -118,8 +118,7 @@ static int usage(const char* programName)
|
|||||||
DISPLAY( "input : a filename\n");
|
DISPLAY( "input : a filename\n");
|
||||||
DISPLAY( " with no FILE, or when FILE is - , read standard input\n");
|
DISPLAY( " with no FILE, or when FILE is - , read standard input\n");
|
||||||
DISPLAY( "Arguments :\n");
|
DISPLAY( "Arguments :\n");
|
||||||
DISPLAY( " -1 : Fast compression (default) \n");
|
DISPLAY( " -# : # compression level (1-19, default:1) \n");
|
||||||
DISPLAY( " -19 : High compression \n");
|
|
||||||
DISPLAY( " -d : decompression (default for %s extension)\n", ZSTD_EXTENSION);
|
DISPLAY( " -d : decompression (default for %s extension)\n", ZSTD_EXTENSION);
|
||||||
//DISPLAY( " -z : force compression\n");
|
//DISPLAY( " -z : force compression\n");
|
||||||
DISPLAY( " -f : overwrite output without prompting \n");
|
DISPLAY( " -f : overwrite output without prompting \n");
|
||||||
@ -137,6 +136,7 @@ static int usage_advanced(const char* programName)
|
|||||||
DISPLAY( " -v : verbose mode\n");
|
DISPLAY( " -v : verbose mode\n");
|
||||||
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
|
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
|
||||||
DISPLAY( " -c : force write to standard output, even if it is the console\n");
|
DISPLAY( " -c : force write to standard output, even if it is the console\n");
|
||||||
|
DISPLAY( " -D file: use file content as Dictionary \n");
|
||||||
//DISPLAY( " -t : test compressed file integrity\n");
|
//DISPLAY( " -t : test compressed file integrity\n");
|
||||||
#ifndef ZSTD_NOBENCH
|
#ifndef ZSTD_NOBENCH
|
||||||
DISPLAY( "Benchmark arguments :\n");
|
DISPLAY( "Benchmark arguments :\n");
|
||||||
@ -171,11 +171,13 @@ int main(int argCount, const char** argv)
|
|||||||
bench=0,
|
bench=0,
|
||||||
decode=0,
|
decode=0,
|
||||||
forceStdout=0,
|
forceStdout=0,
|
||||||
main_pause=0;
|
main_pause=0,
|
||||||
|
nextEntryIsDictionary=0;
|
||||||
unsigned cLevel = 1;
|
unsigned cLevel = 1;
|
||||||
const char* programName = argv[0];
|
const char* programName = argv[0];
|
||||||
const char* inFileName = NULL;
|
const char* inFileName = NULL;
|
||||||
const char* outFileName = NULL;
|
const char* outFileName = NULL;
|
||||||
|
const char* dictFileName = NULL;
|
||||||
char* dynNameSpace = NULL;
|
char* dynNameSpace = NULL;
|
||||||
const char extension[] = ZSTD_EXTENSION;
|
const char extension[] = ZSTD_EXTENSION;
|
||||||
unsigned fileNameStart = 0;
|
unsigned fileNameStart = 0;
|
||||||
@ -249,8 +251,11 @@ int main(int argCount, const char** argv)
|
|||||||
/* Force stdout, even if stdout==console */
|
/* Force stdout, even if stdout==console */
|
||||||
case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
|
case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
|
||||||
|
|
||||||
// Test
|
/* Use file content as dictionary */
|
||||||
//case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break;
|
case 'D': nextEntryIsDictionary = 1; argument++; break;
|
||||||
|
|
||||||
|
/* Test -- not implemented */
|
||||||
|
/* case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break; */
|
||||||
|
|
||||||
/* Overwrite */
|
/* Overwrite */
|
||||||
case 'f': FIO_overwriteMode(); argument++; break;
|
case 'f': FIO_overwriteMode(); argument++; break;
|
||||||
@ -261,7 +266,7 @@ int main(int argCount, const char** argv)
|
|||||||
/* Quiet mode */
|
/* Quiet mode */
|
||||||
case 'q': displayLevel--; argument++; break;
|
case 'q': displayLevel--; argument++; break;
|
||||||
|
|
||||||
/* keep source file (default anyway, so useless; only for xz/lzma compatibility) */
|
/* keep source file (default anyway, so useless; for gzip/xz compatibility) */
|
||||||
case 'k': argument++; break;
|
case 'k': argument++; break;
|
||||||
|
|
||||||
#ifndef ZSTD_NOBENCH
|
#ifndef ZSTD_NOBENCH
|
||||||
@ -310,6 +315,14 @@ int main(int argCount, const char** argv)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* dictionary */
|
||||||
|
if (nextEntryIsDictionary)
|
||||||
|
{
|
||||||
|
nextEntryIsDictionary = 0;
|
||||||
|
dictFileName = argument;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* first provided filename is input */
|
/* first provided filename is input */
|
||||||
if (!inFileName) { inFileName = argument; fileNameStart = i; nbFiles = argCount-i; continue; }
|
if (!inFileName) { inFileName = argument; fileNameStart = i; nbFiles = argCount-i; continue; }
|
||||||
|
|
||||||
@ -381,9 +394,9 @@ int main(int argCount, const char** argv)
|
|||||||
/* IO Stream/File */
|
/* IO Stream/File */
|
||||||
FIO_setNotificationLevel(displayLevel);
|
FIO_setNotificationLevel(displayLevel);
|
||||||
if (decode)
|
if (decode)
|
||||||
FIO_decompressFilename(outFileName, inFileName);
|
FIO_decompressFilename(outFileName, inFileName, dictFileName);
|
||||||
else
|
else
|
||||||
FIO_compressFilename(outFileName, inFileName, cLevel);
|
FIO_compressFilename(outFileName, inFileName, dictFileName, cLevel);
|
||||||
|
|
||||||
_end:
|
_end:
|
||||||
if (main_pause) waitEnter();
|
if (main_pause) waitEnter();
|
||||||
|
Reference in New Issue
Block a user