1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-29 11:21:22 +03:00

Add xz and lzma support.

Finish feature started by @inikep.

* Add xz and lzma compression and decompression support to target `xzstd`.
* Fix bug in gzip decompression that silently accepted truncated files.
* Add gzip frame composition tests.
* Add xz/lzma compatibility tests.
* Add xz/lzma frame composition tests.
This commit is contained in:
Nick Terrell
2017-03-13 18:11:07 -07:00
parent a296c6646a
commit aa8bcf360f
5 changed files with 261 additions and 36 deletions

View File

@ -44,6 +44,9 @@
# define z_const
# endif
#endif
#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
# include <lzma.h>
#endif
/*-*************************************
@ -71,7 +74,6 @@
#define MAX_DICT_SIZE (8 MB) /* protection against large input (attack scenario) */
#define FNSPACE 30
#define GZ_EXTENSION ".gz"
/*-*************************************
@ -434,6 +436,65 @@ static unsigned long long FIO_compressGzFrame(cRess_t* ress, const char* srcFile
#endif
#ifdef ZSTD_LZMACOMPRESS
static unsigned long long FIO_compressLzmaFrame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize, int plain_lzma)
{
unsigned long long inFileSize = 0, outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret ret;
if (compressionLevel < 0) compressionLevel = 0;
if (compressionLevel > 9) compressionLevel = 9;
if (plain_lzma) {
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma, compressionLevel)) EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName);
ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
} else {
ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
}
strm.next_in = 0;
strm.avail_in = 0;
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
while (1) {
if (strm.avail_in == 0) {
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
if (inSize == 0) action = LZMA_FINISH;
inFileSize += inSize;
strm.next_in = ress->srcBuffer;
strm.avail_in = inSize;
}
ret = lzma_code(&strm, action);
if (ret != LZMA_OK && ret != LZMA_STREAM_END) EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
{ size_t const compBytes = ress->dstBufferSize - strm.avail_out;
if (compBytes) {
if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) EXM_THROW(73, "Write error : cannot write to output file");
outFileSize += compBytes;
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
}
}
if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100)
else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100);
if (ret == LZMA_STREAM_END) break;
}
lzma_end(&strm);
*readsize = inFileSize;
return outFileSize;
}
#endif
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
* @return : 0 : compression completed correctly,
@ -448,14 +509,26 @@ static int FIO_compressFilename_internal(cRess_t ress,
U64 compressedfilesize = 0;
U64 const fileSize = UTIL_getFileSize(srcFileName);
if (g_compressionType) {
switch (g_compressionType) {
case FIO_zstdCompression:
break;
case FIO_gzipCompression:
#ifdef ZSTD_GZCOMPRESS
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
goto finish;
case FIO_xzCompression:
case FIO_lzmaCompression:
#ifdef ZSTD_LZMACOMPRESS
compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, g_compressionType==FIO_lzmaCompression);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
}
/* init */
@ -763,10 +836,10 @@ static void FIO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
{
if (storedSkips-->0) { /* implies g_sparseFileSupport>0 */
int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)\n");
if (seekResult != 0) EXM_THROW(69, "Final skip error (sparse file)");
{ const char lastZeroByte[1] = { 0 };
size_t const sizeCheck = fwrite(lastZeroByte, 1, 1, file);
if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero\n");
if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write last zero");
} }
}
@ -849,6 +922,7 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
{
unsigned long long outFileSize = 0;
z_stream strm;
int flush = Z_NO_FLUSH;
int ret;
strm.zalloc = Z_NULL;
@ -866,11 +940,12 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
for ( ; ; ) {
if (strm.avail_in == 0) {
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
if (ress->srcBufferLoaded == 0) break;
if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
strm.next_in = (z_const unsigned char*)ress->srcBuffer;
strm.avail_in = (uInt)ress->srcBufferLoaded;
}
ret = inflate(&strm, Z_NO_FLUSH);
ret = inflate(&strm, flush);
if (ret == Z_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName);
if (ret != Z_OK && ret != Z_STREAM_END) { DISPLAY("zstd: %s: inflate error %d \n", srcFileName, ret); return 0; }
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
@ -886,7 +961,60 @@ static unsigned long long FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, co
if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
ress->srcBufferLoaded = strm.avail_in;
ret = inflateEnd(&strm);
if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d \n", srcFileName, ret);
if (ret != Z_OK) EXM_THROW(32, "zstd: %s: inflateEnd error %d", srcFileName, ret);
return outFileSize;
}
#endif
#ifdef ZSTD_LZMADECOMPRESS
static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma)
{
unsigned long long outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret ret;
strm.next_in = 0;
strm.avail_in = 0;
if (plain_lzma) {
ret = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
} else {
ret = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
}
if (ret != LZMA_OK) EXM_THROW(71, "zstd: %s: lzma_alone_decoder/lzma_stream_decoder error %d", srcFileName, ret);
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
strm.avail_in = ress->srcBufferLoaded;
strm.next_in = ress->srcBuffer;
for ( ; ; ) {
if (strm.avail_in == 0) {
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
strm.next_in = ress->srcBuffer;
strm.avail_in = ress->srcBufferLoaded;
}
ret = lzma_code(&strm, action);
if (ret == LZMA_BUF_ERROR) EXM_THROW(39, "zstd: %s: premature end", srcFileName);
if (ret != LZMA_OK && ret != LZMA_STREAM_END) { DISPLAY("zstd: %s: lzma_code decoding error %d \n", srcFileName, ret); return 0; }
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) EXM_THROW(31, "Write error : cannot write to output file");
outFileSize += decompBytes;
strm.next_out = ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
}
}
if (ret == LZMA_STREAM_END) break;
}
if (strm.avail_in > 0) memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
ress->srcBufferLoaded = strm.avail_in;
lzma_end(&strm);
return outFileSize;
}
#endif
@ -924,7 +1052,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
}
readSomething = 1; /* there is at least >= 4 bytes in srcFile */
if (ress.srcBufferLoaded < toRead) { DISPLAY("zstd: %s: unknown header \n", srcFileName); fclose(srcFile); return 1; } /* srcFileName is empty */
if (buf[0] == 31 && buf[1] == 139) { /* gz header */
if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
#ifdef ZSTD_GZDECOMPRESS
unsigned long long const result = FIO_decompressGzFrame(&ress, srcFile, srcFileName);
if (result == 0) return 1;
@ -932,6 +1060,16 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
#else
DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without ZSTD_GZDECOMPRESS) -- ignored \n", srcFileName);
return 1;
#endif
} else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
|| (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
#ifdef ZSTD_LZMADECOMPRESS
unsigned long long const result = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD);
if (result == 0) return 1;
filesize += result;
#else
DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without ZSTD_LZMADECOMPRESS) -- ignored \n", srcFileName);
return 1;
#endif
} else {
if (!ZSTD_isFrame(ress.srcBuffer, toRead)) {
@ -1020,32 +1158,31 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
missingFiles += FIO_decompressSrcFile(ress, suffix, srcNamesTable[u]);
if (fclose(ress.dstFile)) EXM_THROW(72, "Write error : cannot properly close stdout");
} else {
size_t const suffixSize = strlen(suffix);
size_t const gzipSuffixSize = strlen(GZ_EXTENSION);
size_t suffixSize;
size_t dfnSize = FNSPACE;
unsigned u;
char* dstFileName = (char*)malloc(FNSPACE);
if (dstFileName==NULL) EXM_THROW(73, "not enough memory for dstFileName");
for (u=0; u<nbFiles; u++) { /* create dstFileName */
const char* const srcFileName = srcNamesTable[u];
const char* const suffixPtr = strrchr(srcFileName, '.');
size_t const sfnSize = strlen(srcFileName);
const char* const suffixPtr = srcFileName + sfnSize - suffixSize;
const char* const gzipSuffixPtr = srcFileName + sfnSize - gzipSuffixSize;
if (!suffixPtr) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", srcFileName);
skippedFiles++;
continue;
}
suffixSize = strlen(suffixPtr);
if (dfnSize+suffixSize <= sfnSize+1) {
free(dstFileName);
dfnSize = sfnSize + 20;
dstFileName = (char*)malloc(dfnSize);
if (dstFileName==NULL) EXM_THROW(74, "not enough memory for dstFileName");
}
if (sfnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) {
if (sfnSize <= gzipSuffixSize || strcmp(gzipSuffixPtr, GZ_EXTENSION) != 0) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s expected) -- ignored \n", srcFileName, suffix, GZ_EXTENSION);
skippedFiles++;
continue;
} else {
memcpy(dstFileName, srcFileName, sfnSize - gzipSuffixSize);
dstFileName[sfnSize-gzipSuffixSize] = '\0';
}
if (sfnSize <= suffixSize || (strcmp(suffixPtr, GZ_EXTENSION) && strcmp(suffixPtr, XZ_EXTENSION) && strcmp(suffixPtr, ZSTD_EXTENSION) && strcmp(suffixPtr, LZMA_EXTENSION))) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s/%s/%s/%s expected) -- ignored \n", srcFileName, GZ_EXTENSION, XZ_EXTENSION, ZSTD_EXTENSION, LZMA_EXTENSION);
skippedFiles++;
continue;
} else {
memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
dstFileName[sfnSize-suffixSize] = '\0';