mirror of
https://github.com/facebook/zstd.git
synced 2025-07-23 14:41:07 +03:00
Fuzzing and bugfixes for magicless-format decoding (#3976)
* fuzzing and bugfixes for magicless format * reset dctx before each decompression * do not memcmp empty buffers * nit: decompressor errata
This commit is contained in:
committed by
GitHub
parent
6a0052a409
commit
741b87bbe1
@ -9,6 +9,7 @@ lib: accept dictionaries with partial literal tables, by @terrelln
|
||||
lib: fix CCtx size estimation with external sequence producer, by @embg
|
||||
lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
|
||||
lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
|
||||
lib: fix several bugs in magicless-format decoding, by @embg
|
||||
cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
|
||||
cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
|
||||
cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte
|
||||
|
@ -125,3 +125,24 @@ The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`.
|
||||
See the compressor workaround code:
|
||||
|
||||
https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682
|
||||
|
||||
Magicless format
|
||||
----------------------
|
||||
|
||||
**Last affected version**: v1.5.5
|
||||
|
||||
**Affected decompressor component(s)**: Library
|
||||
|
||||
**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232)
|
||||
|
||||
**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59`
|
||||
|
||||
v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames.
|
||||
These include but are not limited to:
|
||||
* Valid frames that happen to begin with a legacy magic number (little-endian)
|
||||
* Valid frames that happen to begin with a skippable magic number (little-endian)
|
||||
|
||||
If you are affected by this issue and cannot update to v1.5.6 or later, there is a
|
||||
workaround to recover affected data. Simply prepend the ZSTD magic number
|
||||
`0xFD2FB528` (little-endian) to your data and decompress using the standard-format
|
||||
decoder.
|
||||
|
@ -1085,7 +1085,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
|
||||
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
||||
if (ZSTD_isLegacy(src, srcSize)) {
|
||||
if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
|
||||
size_t decodedSize;
|
||||
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
|
||||
if (ZSTD_isError(frameSize)) return frameSize;
|
||||
@ -1115,7 +1115,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (srcSize >= 4) {
|
||||
if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
|
||||
U32 const magicNumber = MEM_readLE32(src);
|
||||
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
|
||||
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
@ -1412,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
case ZSTDds_decodeSkippableHeader:
|
||||
assert(src != NULL);
|
||||
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
|
||||
assert(dctx->format != ZSTD_f_zstd1_magicless);
|
||||
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
|
||||
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
|
||||
dctx->stage = ZSTDds_skipFrame;
|
||||
@ -2209,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
DEBUGLOG(4, "Consume header");
|
||||
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
|
||||
|
||||
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
|
||||
if (zds->format == ZSTD_f_zstd1
|
||||
&& (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
|
||||
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
|
||||
zds->stage = ZSTDds_skipFrame;
|
||||
} else {
|
||||
|
@ -124,7 +124,8 @@ FUZZ_TARGETS := \
|
||||
sequence_compression_api \
|
||||
seekable_roundtrip \
|
||||
huf_round_trip \
|
||||
huf_decompress
|
||||
huf_decompress \
|
||||
decompress_cross_format
|
||||
|
||||
all: libregression.a $(FUZZ_TARGETS)
|
||||
|
||||
@ -238,6 +239,9 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
|
||||
|
||||
huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
|
||||
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@
|
||||
|
||||
decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o
|
||||
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@
|
||||
|
||||
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
|
||||
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
|
||||
|
130
tests/fuzz/decompress_cross_format.c
Normal file
130
tests/fuzz/decompress_cross_format.c
Normal file
@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
// This fuzz target validates decompression of magicless-format compressed data.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "fuzz_helpers.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "fuzz_data_producer.h"
|
||||
|
||||
static ZSTD_DCtx *dctx = NULL;
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
{
|
||||
// Give a random portion of src data to the producer, to use for parameter generation.
|
||||
// The rest will be interpreted as magicless compressed data.
|
||||
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
|
||||
size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer);
|
||||
const void* const magiclessSrc = src;
|
||||
size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
|
||||
void* const standardDst = FUZZ_malloc(dstSize);
|
||||
void* const magiclessDst = FUZZ_malloc(dstSize);
|
||||
|
||||
// Create standard-format src from magicless-format src
|
||||
const uint32_t zstd_magic = ZSTD_MAGICNUMBER;
|
||||
size_t standardSize = sizeof(zstd_magic) + magiclessSize;
|
||||
void* const standardSrc = FUZZ_malloc(standardSize);
|
||||
memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine
|
||||
memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize);
|
||||
|
||||
// Truncate to a single frame
|
||||
{
|
||||
const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize);
|
||||
if (ZSTD_isError(standardFrameCompressedSize)) {
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
standardSize = standardFrameCompressedSize;
|
||||
magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic);
|
||||
}
|
||||
|
||||
// Create DCtx if needed
|
||||
if (!dctx) {
|
||||
dctx = ZSTD_createDCtx();
|
||||
FUZZ_ASSERT(dctx);
|
||||
}
|
||||
|
||||
// Test one-shot decompression
|
||||
{
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
|
||||
const size_t standardRet = ZSTD_decompressDCtx(
|
||||
dctx, standardDst, dstSize, standardSrc, standardSize);
|
||||
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
|
||||
const size_t magiclessRet = ZSTD_decompressDCtx(
|
||||
dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize);
|
||||
|
||||
// Standard accepts => magicless should accept
|
||||
if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet);
|
||||
|
||||
// Magicless accepts => standard should accept
|
||||
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
|
||||
if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet);
|
||||
|
||||
// If both accept, decompressed size and data should match
|
||||
if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) {
|
||||
FUZZ_ASSERT(standardRet == magiclessRet);
|
||||
if (standardRet > 0) {
|
||||
FUZZ_ASSERT(
|
||||
memcmp(standardDst, magiclessDst, standardRet) == 0
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test streaming decompression
|
||||
{
|
||||
ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 };
|
||||
ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 };
|
||||
ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 };
|
||||
ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 };
|
||||
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
|
||||
const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn);
|
||||
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
|
||||
const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn);
|
||||
|
||||
// Standard accepts => magicless should accept
|
||||
if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0);
|
||||
|
||||
// Magicless accepts => standard should accept
|
||||
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
|
||||
if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0);
|
||||
|
||||
// If both accept, decompressed size and data should match
|
||||
if (standardRet == 0 && magiclessRet == 0) {
|
||||
FUZZ_ASSERT(standardOut.pos == magiclessOut.pos);
|
||||
if (standardOut.pos > 0) {
|
||||
FUZZ_ASSERT(
|
||||
memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleanup_and_return:
|
||||
#ifndef STATEFUL_FUZZING
|
||||
ZSTD_freeDCtx(dctx); dctx = NULL;
|
||||
#endif
|
||||
free(standardSrc);
|
||||
free(standardDst);
|
||||
free(magiclessDst);
|
||||
FUZZ_dataProducer_free(producer);
|
||||
return 0;
|
||||
}
|
@ -65,6 +65,7 @@ TARGET_INFO = {
|
||||
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
|
||||
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
|
||||
'huf_decompress': TargetInfo(InputType.RAW_DATA),
|
||||
'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
|
||||
}
|
||||
TARGETS = list(TARGET_INFO.keys())
|
||||
ALL_TARGETS = TARGETS + ['all']
|
||||
|
Reference in New Issue
Block a user