From 54a4998a808fbe371c1ace6f2026849f1c93d8e6 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 3 Feb 2021 19:53:00 -0800 Subject: [PATCH] Add basic tracing functionality --- build/VS2008/zstd/zstd.vcproj | 4 + build/VS2010/zstd/zstd.vcxproj | 1 + build/cmake/programs/CMakeLists.txt | 4 +- build/meson/programs/meson.build | 5 +- contrib/linux-kernel/Makefile | 4 +- contrib/single_file_libs/zstd-in.c | 1 + contrib/single_file_libs/zstddeclib-in.c | 1 + lib/common/compiler.h | 1 + lib/common/zstd_trace.c | 42 +++++ lib/common/zstd_trace.h | 118 +++++++++++++ lib/compress/zstd_compress.c | 45 ++++- lib/compress/zstd_compress_internal.h | 4 + lib/decompress/zstd_decompress.c | 40 ++++- lib/decompress/zstd_decompress_internal.h | 4 + lib/zstd.h | 14 +- programs/Makefile | 8 +- programs/zstdcli.c | 13 ++ programs/zstdcli_trace.c | 192 ++++++++++++++++++++++ programs/zstdcli_trace.h | 24 +++ tests/playTests.sh | 15 ++ 20 files changed, 521 insertions(+), 19 deletions(-) create mode 100644 lib/common/zstd_trace.c create mode 100644 lib/common/zstd_trace.h create mode 100644 programs/zstdcli_trace.c create mode 100644 programs/zstdcli_trace.h diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj index ab02e6155..6f2652902 100644 --- a/build/VS2008/zstd/zstd.vcproj +++ b/build/VS2008/zstd/zstd.vcproj @@ -480,6 +480,10 @@ RelativePath="..\..\..\programs\zstdcli.c" > + + diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj index e320d88a1..e389f9693 100644 --- a/build/VS2010/zstd/zstd.vcxproj +++ b/build/VS2010/zstd/zstd.vcxproj @@ -63,6 +63,7 @@ + diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt index b26e97d0d..c61fe56ea 100644 --- a/build/cmake/programs/CMakeLists.txt +++ b/build/cmake/programs/CMakeLists.txt @@ -32,7 +32,7 @@ if (MSVC) set(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc) endif () -add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources}) +add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PROGRAMS_DIR}/zstdcli_trace.c ${PlatformDependResources}) target_link_libraries(zstd ${PROGRAMS_ZSTD_LINK_TARGET}) if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)") target_link_libraries(zstd rt) @@ -75,7 +75,7 @@ if (UNIX) add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c) target_link_libraries(zstd-frugal ${PROGRAMS_ZSTD_LINK_TARGET}) - set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT") + set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT;ZSTD_NOTRACE") endif () # Add multi-threading support definitions diff --git a/build/meson/programs/meson.build b/build/meson/programs/meson.build index 363818f9d..d255627cd 100644 --- a/build/meson/programs/meson.build +++ b/build/meson/programs/meson.build @@ -17,7 +17,8 @@ zstd_programs_sources = [join_paths(zstd_rootdir, 'programs/zstdcli.c'), join_paths(zstd_rootdir, 'programs/benchfn.c'), join_paths(zstd_rootdir, 'programs/benchzstd.c'), join_paths(zstd_rootdir, 'programs/datagen.c'), - join_paths(zstd_rootdir, 'programs/dibio.c')] + join_paths(zstd_rootdir, 'programs/dibio.c'), + join_paths(zstd_rootdir, 'programs/zstdcli_trace.c')] zstd_c_args = libzstd_debug_cflags if use_multi_thread @@ -73,7 +74,7 @@ zstd_frugal_sources = [join_paths(zstd_rootdir, 'programs/zstdcli.c'), executable('zstd-frugal', zstd_frugal_sources, dependencies: libzstd_dep, - c_args: [ '-DZSTD_NOBENCH', '-DZSTD_NODICT' ], + c_args: [ '-DZSTD_NOBENCH', '-DZSTD_NODICT', '-DZSTD_NOTRACE' ], install: true) install_data(join_paths(zstd_rootdir, 'programs/zstdgrep'), diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile index a0cc67ac4..1725e7d46 100644 --- a/contrib/linux-kernel/Makefile +++ b/contrib/linux-kernel/Makefile @@ -45,7 +45,9 @@ libzstd: -U_MSC_VER \ -U_WIN32 \ -RZSTDLIB_VISIBILITY= \ - -RZSTDERRORLIB_VISIBILITY= + -RZSTDERRORLIB_VISIBILITY= \ + -DZSTD_HAVE_WEAK_SYMBOLS=0 \ + -DZSTD_TRACE=0 cp linux_zstd.h linux/include/linux/zstd.h cp zstd_compress_module.c linux/lib/zstd cp zstd_decompress_module.c linux/lib/zstd diff --git a/contrib/single_file_libs/zstd-in.c b/contrib/single_file_libs/zstd-in.c index 0addbab3a..1b27953a6 100644 --- a/contrib/single_file_libs/zstd-in.c +++ b/contrib/single_file_libs/zstd-in.c @@ -42,6 +42,7 @@ #ifndef __EMSCRIPTEN__ #define ZSTD_MULTITHREAD #endif +#define ZSTD_TRACE 0 /* Include zstd_deps.h first with all the options we need enabled. */ #define ZSTD_DEPS_NEED_MALLOC diff --git a/contrib/single_file_libs/zstddeclib-in.c b/contrib/single_file_libs/zstddeclib-in.c index f94467e7e..019d9c260 100644 --- a/contrib/single_file_libs/zstddeclib-in.c +++ b/contrib/single_file_libs/zstddeclib-in.c @@ -38,6 +38,7 @@ #define XXH_INLINE_ALL #define ZSTD_LEGACY_SUPPORT 0 #define ZSTD_STRIP_ERROR_STRINGS +#define ZSTD_TRACE 0 /* Include zstd_deps.h first with all the options we need enabled. */ #define ZSTD_DEPS_NEED_MALLOC diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 2697f6051..1142002b0 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -90,6 +90,7 @@ # endif #endif + /* target attribute */ #ifndef __has_attribute #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ diff --git a/lib/common/zstd_trace.c b/lib/common/zstd_trace.c new file mode 100644 index 000000000..18c3cfb74 --- /dev/null +++ b/lib/common/zstd_trace.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_trace.h" +#include "../zstd.h" + +#include "compiler.h" + +#if ZSTD_TRACE && ZSTD_HAVE_WEAK_SYMBOLS + +ZSTD_WEAK_ATTR int ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx) +{ + (void)cctx; + return 0; +} + +ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(ZSTD_CCtx const* cctx, ZSTD_trace const* trace) +{ + (void)cctx; + (void)trace; +} + +ZSTD_WEAK_ATTR int ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx) +{ + (void)dctx; + return 0; +} + +ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(ZSTD_DCtx const* dctx, ZSTD_trace const* trace) +{ + (void)dctx; + (void)trace; +} + +#endif diff --git a/lib/common/zstd_trace.h b/lib/common/zstd_trace.h new file mode 100644 index 000000000..1e28b1f83 --- /dev/null +++ b/lib/common/zstd_trace.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#include + +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; +} ZSTD_trace; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. + */ +int ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param cctx The dctx pointer for the decompression. + * @param trace The zstd tracing info. + */ +void ZSTD_trace_compress_end( + struct ZSTD_CCtx_s const* cctx, + ZSTD_trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. + */ +int ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param dctx The dctx pointer for the decompression. + * @param trace The zstd tracing info. + */ +void ZSTD_trace_decompress_end( + struct ZSTD_DCtx_s const* dctx, + ZSTD_trace const* trace); + +#endif /* ZSTD_TRACE */ + +#endif /* ZSTD_TRACE_H */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c90fb320e..eea724d48 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -14,6 +14,7 @@ #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ #include "../common/cpu.h" #include "../common/mem.h" +#include "../common/zstd_trace.h" #include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ #include "../common/fse.h" @@ -802,7 +803,7 @@ size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value } size_t ZSTD_CCtxParams_getParameter( - ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) { switch(param) { @@ -1704,6 +1705,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, XXH64_reset(&zc->xxhState, 0); zc->stage = ZSTDcs_init; zc->dictID = 0; + zc->dictContentSize = 0; ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); @@ -1864,6 +1866,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, } } cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; /* copy block state */ ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); @@ -1927,6 +1930,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; /* copy block state */ ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); @@ -2017,6 +2021,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; /* copy block state */ ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); @@ -3380,6 +3385,9 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { +#if ZSTD_TRACE + cctx->tracingEnabled = ZSTD_trace_compress_begin(cctx); +#endif DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); @@ -3409,6 +3417,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; + cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; } return 0; } @@ -3503,6 +3512,28 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) return op-ostart; } +static void ZSTD_CCtx_trace(ZSTD_CCtx const* cctx, size_t extraCSize) +{ +#if ZSTD_TRACE + if (cctx->tracingEnabled) { + int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; + ZSTD_trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + trace.dictionaryID = cctx->dictID; + trace.dictionarySize = cctx->dictContentSize; + trace.uncompressedSize = cctx->consumedSrcSize; + trace.compressedSize = cctx->producedCSize + extraCSize; + trace.params = &cctx->appliedParams; + ZSTD_trace_compress_end(cctx, &trace); + } +#else + (void)cctx; + (void)extraCSize; +#endif +} + size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -3525,6 +3556,7 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); } + ZSTD_CCtx_trace(cctx, endResult); return cSize + endResult; } @@ -4408,6 +4440,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ } if (params.nbWorkers > 0) { +#if ZSTD_TRACE + cctx->tracingEnabled = ZSTD_trace_compress_begin(cctx); +#endif /* mt context creation */ if (cctx->mtctx == NULL) { DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", @@ -4421,6 +4456,10 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, cctx->mtctx, prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; + cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; cctx->streamStage = zcss_load; cctx->appliedParams = params; } else @@ -4482,8 +4521,12 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, size_t const ipos = input->pos; size_t const opos = output->pos; flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + cctx->consumedSrcSize += (U64)(input->pos - ipos); + cctx->producedCSize += (U64)(output->pos - opos); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + if (flushMin == 0) + ZSTD_CCtx_trace(cctx, 0); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 10846ae50..3ee00269d 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -270,6 +270,7 @@ struct ZSTD_CCtx_s { ZSTD_CCtx_params requestedParams; ZSTD_CCtx_params appliedParams; U32 dictID; + size_t dictContentSize; ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ size_t blockSize; @@ -321,6 +322,9 @@ struct ZSTD_CCtx_s { #ifdef ZSTD_MULTITHREAD ZSTDMT_CCtx* mtctx; #endif + + /* Tracing */ + int tracingEnabled; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 2536cf44b..c8a7701fb 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -58,6 +58,7 @@ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ +#include "../common/zstd_trace.h" #define FSE_STATIC_LINKING_ONLY #include "../common/fse.h" #define HUF_STATIC_LINKING_ONLY @@ -630,6 +631,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he #endif dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; return 0; } @@ -784,6 +786,31 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, return regenSize; } +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->tracingEnabled) { + ZSTD_trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + ZSTD_trace_decompress_end(dctx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + /*! ZSTD_decompressFrame() : * @dctx must be properly initialized @@ -793,7 +820,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void** srcPtr, size_t *srcSizePtr) { - const BYTE* ip = (const BYTE*)(*srcPtr); + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; BYTE* op = ostart; @@ -869,7 +897,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += 4; remainingSrcSize -= 4; } - + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); /* Allow caller to get size read */ *srcPtr = ip; *srcSizePtr = remainingSrcSize; @@ -1075,6 +1103,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); ZSTD_checkContinuity(dctx, dst, dstCapacity); + dctx->processedCSize += srcSize; + switch (dctx->stage) { case ZSTDds_getFrameHeaderSize : @@ -1178,6 +1208,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; /* ends here */ dctx->stage = ZSTDds_getFrameHeaderSize; } @@ -1197,6 +1228,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -1350,8 +1382,12 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { assert(dctx != NULL); +#if ZSTD_TRACE + dctx->tracingEnabled = ZSTD_trace_decompress_begin(dctx); +#endif dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; dctx->decodedSize = 0; dctx->previousDstEnd = NULL; dctx->prefixStart = NULL; diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 72751c9ff..07c515977 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -120,6 +120,7 @@ struct ZSTD_DCtx_s const void* dictEnd; /* end of previous segment */ size_t expected; ZSTD_frameHeader fParams; + U64 processedCSize; U64 decodedSize; blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ ZSTD_dStage stage; @@ -175,6 +176,9 @@ struct ZSTD_DCtx_s void const* dictContentBeginForFuzzing; void const* dictContentEndForFuzzing; #endif + + /* Tracing */ + int tracingEnabled; }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ diff --git a/lib/zstd.h b/lib/zstd.h index 143db3541..230adee34 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1001,13 +1001,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s /*! ZSTD_DCtx_refDDict() : * Reference a prepared dictionary, to be used to decompress next frames. * The dictionary remains active for decompression of future frames using same DCtx. - * + * * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function * will store the DDict references in a table, and the DDict used for decompression * will be determined at decompression time, as per the dict ID in the frame. * The memory for the table is allocated on the first call to refDDict, and can be * freed with ZSTD_freeDCtx(). - * + * * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Note 1 : Currently, only one dictionary can be managed. * Referencing a new dictionary effectively "discards" any previous one. @@ -1892,7 +1892,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_c * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_setParametersUsingCCtxParams() : * Apply a set of ZSTD_CCtx_params to the compression context. @@ -2027,15 +2027,15 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead * store all references. At decompression time, the appropriate dictID is selected * from the set of DDicts based on the dictID in the frame. - * + * * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. - * + * * Param has values of byte ZSTD_refMultipleDDicts_e - * + * * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. * Memory is allocated as per ZSTD_DCtx::customMem. - * + * * Although this function allocates memory for the table, the user is still responsible for * memory management of the underlying ZSTD_DDict* themselves. */ diff --git a/programs/Makefile b/programs/Makefile index b254549bc..31576ecc9 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -314,16 +314,16 @@ zstd-pgo : ## zstd-small: minimal target, supporting only zstd compression and decompression. no bench. no legacy. no other format. zstd-small: CFLAGS = -Os -s zstd-frugal zstd-small: $(ZSTDLIB_CORE_SRC) zstdcli.c util.c timefn.c fileio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOTRACE $^ -o $@$(EXT) zstd-decompress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_DECOMPRESS_C) zstdcli.c util.c timefn.c fileio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT) zstd-compress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) zstdcli.c util.c timefn.c fileio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT) + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT) ## zstd-dictBuilder: executable supporting dictionary creation and compression (only) -zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS +zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS -DZSTD_NOTRACE zstd-dictBuilder: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) $(ZDICT_SRC) zstdcli.c util.c timefn.c fileio.c dibio.c $(CC) $(FLAGS) $^ -o $@$(EXT) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index b279287d4..c4e1b4792 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -42,6 +42,9 @@ #ifndef ZSTD_NODICT # include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ #endif +#ifndef ZSTD_NOTRACE +# include "zstdcli_trace.h" +#endif #include "../lib/zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */ @@ -186,6 +189,10 @@ static void usage_advanced(const char* programName) DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate)."); #endif #endif /* ZSTD_NOCOMPRESS */ + +#ifndef ZSTD_NOTRACE + DISPLAYOUT( "--trace FILE : log tracing information to FILE."); +#endif DISPLAYOUT( "\n"); DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); @@ -918,6 +925,9 @@ int main(int const argCount, const char* argv[]) if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; } #ifdef UTIL_HAS_MIRRORFILELIST if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; } +#endif +#ifndef ZSTD_NOTRACE + if (longCommandWArg(&argument, "--trace")) { char const* traceFile; NEXT_FIELD(traceFile); TRACE_enable(traceFile); continue; } #endif if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; } if (longCommandWArg(&argument, "--long")) { @@ -1397,6 +1407,9 @@ _end: if (main_pause) waitEnter(); UTIL_freeFileNamesTable(filenames); UTIL_freeFileNamesTable(file_of_names); +#ifndef ZSTD_NOTRACE + TRACE_finish(); +#endif return operationResult; } diff --git a/programs/zstdcli_trace.c b/programs/zstdcli_trace.c new file mode 100644 index 000000000..aa90a2a02 --- /dev/null +++ b/programs/zstdcli_trace.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstdcli_trace.h" + +#include +#include + +#include "timefn.h" +#include "util.h" + +#define ZSTD_STATIC_LINKING_ONLY +#include "../lib/zstd.h" +/* We depend on the trace header to avoid duplicating the ZSTD_trace struct. + * But, we check the version so it is compatible with dynamic linking. + */ +#include "../lib/common/zstd_trace.h" +/* We only use macros from threading.h so it is compatible with dynamic linking */ +#include "../lib/common/threading.h" + +#if ZSTD_TRACE + +static FILE* g_traceFile = NULL; +static int g_mutexInit = 0; +static ZSTD_pthread_mutex_t g_mutex; + +void TRACE_enable(char const* filename) +{ + int const writeHeader = !UTIL_isRegularFile(filename); + if (g_traceFile) + fclose(g_traceFile); + g_traceFile = fopen(filename, "a"); + if (g_traceFile && writeHeader) { + /* Fields: + * algorithm + * version + * method + * streaming + * level + * workers + * dictionary size + * uncompressed size + * compressed size + * duration nanos + * compression ratio + * speed MB/s + */ + fprintf(g_traceFile, "Algorithm, Version, Method, Mode, Level, Workers, Dictionary Size, Uncompressed Size, Compressed Size, Duration Nanos, Compression Ratio, Speed MB/s\n"); + } + if (!g_mutexInit) { + if (!ZSTD_pthread_mutex_init(&g_mutex, NULL)) { + g_mutexInit = 1; + } else { + TRACE_finish(); + } + } +} + +void TRACE_finish(void) +{ + if (g_traceFile) { + fclose(g_traceFile); + } + g_traceFile = NULL; + if (g_mutexInit) { + ZSTD_pthread_mutex_destroy(&g_mutex); + g_mutexInit = 0; + } +} + +static void TRACE_log(char const* method, PTime duration, ZSTD_trace const* trace) +{ + int level = 0; + int workers = 0; + double const ratio = (double)trace->uncompressedSize / (double)trace->compressedSize; + double const speed = ((double)trace->uncompressedSize * 1000) / (double)duration; + if (trace->params) { + ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_compressionLevel, &level); + ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_nbWorkers, &workers); + } + assert(g_traceFile != NULL); + + /* Fields: + * algorithm + * version + * method + * streaming + * level + * workers + * dictionary size + * uncompressed size + * compressed size + * duration nanos + * compression ratio + * speed MB/s + */ + fprintf(g_traceFile, + "zstd, %u, %s, %s, %d, %d, %llu, %llu, %llu, %llu, %.2f, %.2f\n", + trace->version, + method, + trace->streaming ? "streaming" : "single-pass", + level, + workers, + (unsigned long long)trace->dictionarySize, + (unsigned long long)trace->uncompressedSize, + (unsigned long long)trace->compressedSize, + (unsigned long long)duration, + ratio, + speed); +} + +static ZSTD_CCtx const* g_cctx; +static ZSTD_DCtx const* g_dctx; +static UTIL_time_t g_begin = UTIL_TIME_INITIALIZER; + +/** + * These symbols override the weak symbols provided by the library. + */ + +int ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx) +{ + int enabled = 0; + if (g_traceFile == NULL) + return 0; + ZSTD_pthread_mutex_lock(&g_mutex); + if (g_cctx == NULL) { + g_cctx = cctx; + g_dctx = NULL; + g_begin = UTIL_getTime(); + enabled = 1; + } + ZSTD_pthread_mutex_unlock(&g_mutex); + return enabled; +} + +void ZSTD_trace_compress_end(ZSTD_CCtx const* cctx, ZSTD_trace const* trace) +{ + assert(g_traceFile != NULL); + ZSTD_pthread_mutex_lock(&g_mutex); + assert(g_cctx == cctx); + assert(g_dctx == NULL); + if (cctx == g_cctx && trace->version == ZSTD_VERSION_NUMBER) + TRACE_log("compress", UTIL_clockSpanNano(g_begin), trace); + g_cctx = NULL; + ZSTD_pthread_mutex_unlock(&g_mutex); +} + +int ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx) +{ + int enabled = 0; + if (g_traceFile == NULL) + return 0; + ZSTD_pthread_mutex_lock(&g_mutex); + if (g_dctx == NULL) { + g_cctx = NULL; + g_dctx = dctx; + g_begin = UTIL_getTime(); + enabled = 1; + } + ZSTD_pthread_mutex_unlock(&g_mutex); + return enabled; +} + +void ZSTD_trace_decompress_end(ZSTD_DCtx const* dctx, ZSTD_trace const* trace) +{ + assert(g_traceFile != NULL); + ZSTD_pthread_mutex_lock(&g_mutex); + assert(g_cctx == NULL); + assert(g_dctx == dctx); + if (dctx == g_dctx && trace->version == ZSTD_VERSION_NUMBER) + TRACE_log("decompress", UTIL_clockSpanNano(g_begin), trace); + g_dctx = NULL; + ZSTD_pthread_mutex_unlock(&g_mutex); +} + +#else /* ZSTD_TRACE */ + +void TRACE_enable(char const* filename) +{ + (void)filename; +} + +void TRACE_finish(void) {} + +#endif /* ZSTD_TRACE */ diff --git a/programs/zstdcli_trace.h b/programs/zstdcli_trace.h new file mode 100644 index 000000000..6ed390808 --- /dev/null +++ b/programs/zstdcli_trace.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016-2021, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDCLI_TRACE_H +#define ZSTDCLI_TRACE_H + +/** + * Enable tracing - log to filename. + */ +void TRACE_enable(char const* filename); + +/** + * Shut down the tracing library. + */ +void TRACE_finish(void); + +#endif /* ZSTDCLI_TRACE_H */ diff --git a/tests/playTests.sh b/tests/playTests.sh index 4d6abbcc0..20259bc2f 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -1322,6 +1322,21 @@ zstd -f --no-check tmp1 zstd -l tmp1.zst zstd -lv tmp1.zst +println "\n===> zstd trace tests " +zstd -f --trace tmp.trace tmp1 +zstd -f --trace tmp.trace tmp1 tmp2 tmp3 +zstd -f --trace tmp.trace tmp1 tmp2 tmp3 -o /dev/null +zstd -f --trace tmp.trace tmp1 tmp2 tmp3 --single-thread +zstd -f --trace tmp.trace -D tmp1 tmp2 tmp3 -o /dev/null +zstd -f --trace tmp.trace -D tmp1 tmp2 tmp3 -o /dev/null --single-thread +zstd --trace tmp.trace -t tmp1.zst +zstd --trace tmp.trace -t tmp1.zst tmp2.zst +zstd -f --trace tmp.trace -d tmp1.zst +zstd -f --trace tmp.trace -d tmp1.zst tmp2.zst tmp3.zst +zstd -D tmp1 tmp2 -c | zstd --trace tmp.trace -t -D tmp1 +zstd -b1e10i0 --trace tmp.trace tmp1 +zstd -b1e10i0 --trace tmp.trace tmp1 tmp2 tmp3 + rm tmp*