mirror of
https://github.com/facebook/zstd.git
synced 2025-07-29 11:21:22 +03:00
Add xz and lzma support.
Finish feature started by @inikep. * Add xz and lzma compression and decompression support to target `xzstd`. * Fix bug in gzip decompression that silently accepted truncated files. * Add gzip frame composition tests. * Add xz/lzma compatibility tests. * Add xz/lzma frame composition tests.
This commit is contained in:
@ -44,6 +44,9 @@
|
||||
# define z_const
|
||||
# endif
|
||||
#endif
|
||||
#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
|
||||
# include <lzma.h>
|
||||
#endif
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -71,7 +74,6 @@
|
||||
#define MAX_DICT_SIZE (8 MB) /* protection against large input (attack scenario) */
|
||||
|
||||
#define FNSPACE 30
|
||||
#define GZ_EXTENSION ".gz"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -434,6 +436,65 @@ static unsigned long long FIO_compressGzFrame(cRess_t* ress, const char* srcFile
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ZSTD_LZMACOMPRESS
|
||||
static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize, int plain_lzma)
|
||||
{
|
||||
unsigned long long inFileSize = 0, outFileSize = 0;
|
||||
lzma_stream strm = LZMA_STREAM_INIT;
|
||||
lzma_action action = LZMA_RUN;
|
||||
lzma_ret ret;
|
||||
|
||||
if (compressionLevel < 0) compressionLevel = 0;
|
||||
if (compressionLevel > 9) compressionLevel = 9;
|
||||
|
||||
if (plain_lzma) {
|
||||
lzma_options_lzma opt_lzma;
|
||||
if (lzma_lzma_preset(&opt_lzma, compressionLevel)) EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName);
|
||||
ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
|
||||
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
|
||||
} else {
|
||||
ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
|
||||
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
|
||||
}
|
||||
|
||||
strm.next_in = 0;
|
||||
strm.avail_in = 0;
|
||||
strm.next_out = ress->dstBuffer;
|
||||
strm.avail_out = ress->dstBufferSize;
|
||||
|
||||
while (1) {
|
||||
if (strm.avail_in == 0) {
|
||||
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
|
||||
if (inSize == 0) action = LZMA_FINISH;
|
||||
inFileSize += inSize;
|
||||
strm.next_in = ress->srcBuffer;
|
||||
strm.avail_in = inSize;
|
||||
}
|
||||
|
||||
ret = lzma_code(&strm, action);
|
||||
|
||||
if (ret != LZMA_OK && ret != LZMA_STREAM_END) EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
|
||||
{ size_t const compBytes = ress->dstBufferSize - strm.avail_out;
|
||||
if (compBytes) {
|
||||
if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) EXM_THROW(73, "Write error : cannot write to output file");
|
||||
outFileSize += compBytes;
|
||||
strm.next_out = ress->dstBuffer;
|
||||
strm.avail_out = ress->dstBufferSize;
|
||||
}
|
||||
}
|
||||
if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100)
|
||||
else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100);
|
||||
if (ret == LZMA_STREAM_END) break;
|
||||
}
|
||||
|
||||
lzma_end(&strm);
|
||||
*readsize = inFileSize;
|
||||
|
||||
return outFileSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*! FIO_compressFilename_internal() :
|
||||
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
|
||||
* @return : 0 : compression completed correctly,
|
||||
@ -448,14 +509,26 @@ static int FIO_compressFilename_internal(cRess_t ress,
|
||||
U64 compressedfilesize = 0;
|
||||
U64 const fileSize = UTIL_getFileSize(srcFileName);
|
||||
|
||||
if (g_compressionType) {
|
||||
switch (g_compressionType) {
|
||||
case FIO_zstdCompression:
|
||||
break;
|
||||
case FIO_gzipCompression:
|
||||
#ifdef ZSTD_GZCOMPRESS
|
||||
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
|
||||
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
|
||||
#else
|
||||
(void)compressionLevel;
|
||||
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
|
||||
(void)compressionLevel;
|
||||
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
|
||||
#endif
|
||||
goto finish;
|
||||
goto finish;
|
||||
case FIO_xzCompression:
|
||||
case FIO_lzmaCompression:
|
||||
#ifdef ZSTD_LZMACOMPRESS
|
||||
compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, g_compressionType==FIO_lzmaCompression);
|
||||
#else
|
||||
(void)compressionLevel;
|
||||
EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", srcFileName);
|
||||
#endif
|
||||
goto finish;
|
||||
}
|
||||
|
||||
/* init */
|
||||
@ -763,10 +836,10 @@ static void FIO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
|
||||
{
|
||||
if (storedSkips-->0) { /* implies g_sparseFileSupport>0 */
|
||||
int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
|
||||
if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n");
|
||||
if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)");
|
||||
{ const char lastZeroByte[1] = { 0 };
|
||||
size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file);
|
||||
if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n");
|
||||
if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero");
|
||||
} }
|
||||
}
|
||||
|
||||
@ -849,6 +922,7 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
|
||||
{
|
||||
unsigned long long outFileSize = 0;
|
||||
z_stream strm;
|
||||
int flush = Z_NO_FLUSH;
|
||||
int ret;
|
||||
|
||||
strm.zalloc = Z_NULL;
|
||||
@ -866,11 +940,12 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
|
||||
for ( ; ; ) {
|
||||
if (strm.avail_in == 0) {
|
||||
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
|
||||
if (ress->srcBufferLoaded == 0) break;
|
||||
if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
|
||||
strm.next_in = (z_const unsigned char*)ress->srcBuffer;
|
||||
strm.avail_in = (uInt)ress->srcBufferLoaded;
|
||||
}
|
||||
ret = inflate(&strm, Z_NO_FLUSH);
|
||||
ret = inflate(&strm, flush);
|
||||
if (ret == Z_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName);
|
||||
if (ret != Z_OK && ret != Z_STREAM_END) { DISPLAY("zstd: %s: inflate error %d \n", srcFileName, ret); return 0; }
|
||||
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
|
||||
if (decompBytes) {
|
||||
@ -886,7 +961,60 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
|
||||
if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
|
||||
ress->srcBufferLoaded = strm.avail_in;
|
||||
ret = inflateEnd(&strm);
|
||||
if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d \n", srcFileName, ret);
|
||||
if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d", srcFileName, ret);
|
||||
return outFileSize;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ZSTD_LZMADECOMPRESS
|
||||
static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma)
|
||||
{
|
||||
unsigned long long outFileSize = 0;
|
||||
lzma_stream strm = LZMA_STREAM_INIT;
|
||||
lzma_action action = LZMA_RUN;
|
||||
lzma_ret ret;
|
||||
|
||||
strm.next_in = 0;
|
||||
strm.avail_in = 0;
|
||||
if (plain_lzma) {
|
||||
ret = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
|
||||
} else {
|
||||
ret = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
|
||||
}
|
||||
|
||||
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_decoder/lzma_stream_decoder error %d", srcFileName, ret);
|
||||
|
||||
strm.next_out = ress->dstBuffer;
|
||||
strm.avail_out = ress->dstBufferSize;
|
||||
strm.avail_in = ress->srcBufferLoaded;
|
||||
strm.next_in = ress->srcBuffer;
|
||||
|
||||
for ( ; ; ) {
|
||||
if (strm.avail_in == 0) {
|
||||
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
|
||||
if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
|
||||
strm.next_in = ress->srcBuffer;
|
||||
strm.avail_in = ress->srcBufferLoaded;
|
||||
}
|
||||
ret = lzma_code(&strm, action);
|
||||
|
||||
if (ret == LZMA_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName);
|
||||
if (ret != LZMA_OK && ret != LZMA_STREAM_END) { DISPLAY("zstd: %s: lzma_code decoding error %d \n", srcFileName, ret); return 0; }
|
||||
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
|
||||
if (decompBytes) {
|
||||
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) EXM_THROW(31, "Write error : cannot write to output file");
|
||||
outFileSize += decompBytes;
|
||||
strm.next_out = ress->dstBuffer;
|
||||
strm.avail_out = ress->dstBufferSize;
|
||||
}
|
||||
}
|
||||
if (ret == LZMA_STREAM_END) break;
|
||||
}
|
||||
|
||||
if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
|
||||
ress->srcBufferLoaded = strm.avail_in;
|
||||
lzma_end(&strm);
|
||||
return outFileSize;
|
||||
}
|
||||
#endif
|
||||
@ -924,7 +1052,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
|
||||
}
|
||||
readSomething = 1; /* there is at least >= 4 bytes in srcFile */
|
||||
if (ress.srcBufferLoaded < toRead) { DISPLAY("zstd: %s: unknown header \n", srcFileName); fclose(srcFile); return 1; } /* srcFileName is empty */
|
||||
if (buf[0] == 31 && buf[1] == 139) { /* gz header */
|
||||
if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
|
||||
#ifdef ZSTD_GZDECOMPRESS
|
||||
unsigned long long const result = FIO_decompressGzFrame(&ress, srcFile, srcFileName);
|
||||
if (result == 0) return 1;
|
||||
@ -932,6 +1060,16 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
|
||||
#else
|
||||
DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without ZSTD_GZDECOMPRESS) -- ignored \n", srcFileName);
|
||||
return 1;
|
||||
#endif
|
||||
} else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
|
||||
|| (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
|
||||
#ifdef ZSTD_LZMADECOMPRESS
|
||||
unsigned long long const result = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD);
|
||||
if (result == 0) return 1;
|
||||
filesize += result;
|
||||
#else
|
||||
DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without ZSTD_LZMADECOMPRESS) -- ignored \n", srcFileName);
|
||||
return 1;
|
||||
#endif
|
||||
} else {
|
||||
if (!ZSTD_isFrame(ress.srcBuffer, toRead)) {
|
||||
@ -1020,32 +1158,31 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
|
||||
missingFiles += FIO_decompressSrcFile(ress, suffix, srcNamesTable[u]);
|
||||
if (fclose(ress.dstFile)) EXM_THROW(72, "Write error : cannot properly close stdout");
|
||||
} else {
|
||||
size_t const suffixSize = strlen(suffix);
|
||||
size_t const gzipSuffixSize = strlen(GZ_EXTENSION);
|
||||
size_t suffixSize;
|
||||
size_t dfnSize = FNSPACE;
|
||||
unsigned u;
|
||||
char* dstFileName = (char*)malloc(FNSPACE);
|
||||
if (dstFileName==NULL) EXM_THROW(73, "not enough memory for dstFileName");
|
||||
for (u=0; u<nbFiles; u++) { /* create dstFileName */
|
||||
const char* const srcFileName = srcNamesTable[u];
|
||||
const char* const suffixPtr = strrchr(srcFileName, '.');
|
||||
size_t const sfnSize = strlen(srcFileName);
|
||||
const char* const suffixPtr = srcFileName + sfnSize - suffixSize;
|
||||
const char* const gzipSuffixPtr = srcFileName + sfnSize - gzipSuffixSize;
|
||||
if (!suffixPtr) {
|
||||
DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", srcFileName);
|
||||
skippedFiles++;
|
||||
continue;
|
||||
}
|
||||
suffixSize = strlen(suffixPtr);
|
||||
if (dfnSize+suffixSize <= sfnSize+1) {
|
||||
free(dstFileName);
|
||||
dfnSize = sfnSize + 20;
|
||||
dstFileName = (char*)malloc(dfnSize);
|
||||
if (dstFileName==NULL) EXM_THROW(74, "not enough memory for dstFileName");
|
||||
}
|
||||
if (sfnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) {
|
||||
if (sfnSize <= gzipSuffixSize || strcmp(gzipSuffixPtr, GZ_EXTENSION) != 0) {
|
||||
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s expected) -- ignored \n", srcFileName, suffix, GZ_EXTENSION);
|
||||
skippedFiles++;
|
||||
continue;
|
||||
} else {
|
||||
memcpy(dstFileName, srcFileName, sfnSize - gzipSuffixSize);
|
||||
dstFileName[sfnSize-gzipSuffixSize] = '\0';
|
||||
}
|
||||
if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION))) {
|
||||
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s expected) -- ignored \n", srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION);
|
||||
skippedFiles++;
|
||||
continue;
|
||||
} else {
|
||||
memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
|
||||
dstFileName[sfnSize-suffixSize] = '\0';
|
||||
|
Reference in New Issue
Block a user