diff --git a/.travis.yml b/.travis.yml index 36df67052..a61b82876 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,23 +20,25 @@ matrix: - env: Cmd='make gcc8install && CC=gcc-8 CFLAGS="-Werror -O3" make -j all' - env: Cmd='make clang38install && CC=clang-3.8 make clean msan-test-zstd' + - env: Cmd='make staticAnalyze' + - env: Cmd='make gcc6install && CC=gcc-6 make clean uasan-fuzztest' - env: Cmd='make gcc6install libc6install && make clean && CC=gcc-6 CFLAGS=-m32 make uasan-fuzztest' - env: Cmd='make clang38install && CC=clang-3.8 make clean msan-fuzztest' - env: Cmd='make clang38install && CC=clang-3.8 make clean tsan-test-zstream' - - env: Cmd='make arminstall && make armfuzz' - - env: Cmd='make arminstall && make aarch64fuzz' - - env: Cmd='make ppcinstall && make ppcfuzz' - - env: Cmd='make ppcinstall && make ppc64fuzz' - - env: Cmd='make -j uasanregressiontest && make clean && make -j msanregressiontest' - env: Cmd='make valgrindinstall && make -C tests clean valgrindTest && make clean && make -C tests test-fuzzer-stackmode' + - env: Cmd='make arminstall && make armfuzz' + - env: Cmd='make arminstall && make aarch64fuzz' + - env: Cmd='make ppcinstall && make ppcfuzz' + - env: Cmd='make ppcinstall && make ppc64fuzz' + - env: Cmd='make lz4install && make -C tests test-lz4 && make clean && make -C tests test-pool && make clean && bash tests/libzstd_partial_builds.sh' diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..0f7ad8bfc --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,5 @@ +# Code of Conduct + +Facebook has adopted a Code of Conduct that we expect project participants to adhere to. +Please read the [full text](https://code.fb.com/codeofconduct/) +so that you can understand what actions will and will not be tolerated. diff --git a/Makefile b/Makefile index 6c4ab9c9b..a17900450 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ else EXT = endif +## default: Build lib-release and zstd-release .PHONY: default default: lib-release zstd-release @@ -30,10 +31,9 @@ default: lib-release zstd-release all: allmost examples manual contrib .PHONY: allmost -allmost: allzstd - $(MAKE) -C $(ZWRAPDIR) all +allmost: allzstd zlibwrapper -#skip zwrapper, can't build that on alternate architectures without the proper zlib installed +# skip zwrapper, can't build that on alternate architectures without the proper zlib installed .PHONY: allzstd allzstd: lib $(MAKE) -C $(PRGDIR) all @@ -44,8 +44,8 @@ all32: $(MAKE) -C $(PRGDIR) zstd32 $(MAKE) -C $(TESTDIR) all32 -.PHONY: lib lib-release -lib lib-release: +.PHONY: lib lib-release libzstd.a +lib lib-release : @$(MAKE) -C $(ZSTDDIR) $@ .PHONY: zstd zstd-release @@ -59,8 +59,8 @@ zstdmt: cp $(PRGDIR)/zstd$(EXT) ./zstdmt$(EXT) .PHONY: zlibwrapper -zlibwrapper: - $(MAKE) -C $(ZWRAPDIR) test +zlibwrapper: lib + $(MAKE) -C $(ZWRAPDIR) all .PHONY: test test: MOREFLAGS += -g -DDEBUGLEVEL=1 -Werror @@ -88,6 +88,7 @@ contrib: lib $(MAKE) -C contrib/pzstd all $(MAKE) -C contrib/seekable_format/examples all $(MAKE) -C contrib/adaptive-compression all + $(MAKE) -C contrib/largeNbDicts all .PHONY: cleanTabs cleanTabs: @@ -104,6 +105,7 @@ clean: @$(MAKE) -C contrib/pzstd $@ > $(VOID) @$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID) @$(MAKE) -C contrib/adaptive-compression $@ > $(VOID) + @$(MAKE) -C contrib/largeNbDicts $@ > $(VOID) @$(RM) zstd$(EXT) zstdmt$(EXT) tmp* @$(RM) -r lz4 @echo Cleaning completed @@ -114,11 +116,26 @@ clean: ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT)) HOST_OS = POSIX -CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release +CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release +# Print a two column output of targets and their description. To add a target description, put a +# comment in the Makefile with the format "## : ". For example: +# +## list: Print all targets and their descriptions (if provided) .PHONY: list list: - @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs + @TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \ + | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \ + | egrep -v -e '^[^[:alnum:]]' | sort); \ + { \ + printf "Target Name\tDescription\n"; \ + printf "%0.s-" {1..16}; printf "\t"; printf "%0.s-" {1..40}; printf "\n"; \ + for target in $$TARGETS; do \ + line=$$(egrep "^##[[:space:]]+$$target:" $(lastword $(MAKEFILE_LIST))); \ + description=$$(echo $$line | awk '{i=index($$0,":"); print substr($$0,i+1)}' | xargs); \ + printf "$$target\t$$description\n"; \ + done \ + } | column -t -s $$'\t' .PHONY: install clangtest armtest usan asan uasan install: @@ -198,7 +215,7 @@ gcc6test: clean clangtest: clean clang -v - $(MAKE) all CXX=clang-++ CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" + $(MAKE) all CXX=clang++ CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" armtest: clean $(MAKE) -C $(TESTDIR) datagen # use native, faster @@ -351,7 +368,10 @@ bmi32build: clean $(CC) -v CFLAGS="-O3 -mbmi -m32 -Werror" $(MAKE) -C $(TESTDIR) test -staticAnalyze: clean +# static analyzer test uses clang's scan-build +# does not analyze zlibWrapper, due to detected issues in zlib source code +staticAnalyze: SCANBUILD ?= scan-build +staticAnalyze: $(CC) -v - CPPFLAGS=-g scan-build --status-bugs -v $(MAKE) all + CC=$(CC) CPPFLAGS=-g $(SCANBUILD) --status-bugs -v $(MAKE) allzstd examples contrib endif diff --git a/NEWS b/NEWS index e3bfb242d..1553f5ad8 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +v1.3.6 +perf: much faster dictionary builder, by @jenniferliu +api : reduced DDict size by 2 KB +misc: tests/paramgrill, a parameter optimizer, by @GeorgeLu97 + v1.3.5 perf: much faster dictionary compression, by @felixhandte perf: small quality improvement for dictionary generation, by @terrelln diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj index f6ea1f855..4d444caef 100644 --- a/build/VS2008/fuzzer/fuzzer.vcproj +++ b/build/VS2008/fuzzer/fuzzer.vcproj @@ -336,6 +336,10 @@ RelativePath="..\..\..\lib\dictBuilder\cover.c" > + + @@ -482,6 +486,10 @@ RelativePath="..\..\..\lib\dictBuilder\zdict.h" > + + diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj index 5d9f68327..46d7c85dd 100644 --- a/build/VS2008/zstd/zstd.vcproj +++ b/build/VS2008/zstd/zstd.vcproj @@ -348,6 +348,10 @@ RelativePath="..\..\..\lib\dictBuilder\cover.c" > + + @@ -522,6 +526,10 @@ RelativePath="..\..\..\lib\dictBuilder\zdict.h" > + + diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj index 7234b02ac..950a6f39c 100644 --- a/build/VS2008/zstdlib/zstdlib.vcproj +++ b/build/VS2008/zstdlib/zstdlib.vcproj @@ -332,6 +332,10 @@ RelativePath="..\..\..\lib\dictBuilder\cover.c" > + + @@ -502,6 +506,10 @@ RelativePath="..\..\..\lib\dictBuilder\zdict.h" > + + diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj index f0d1ab066..6077cd2c1 100644 --- a/build/VS2010/fuzzer/fuzzer.vcxproj +++ b/build/VS2010/fuzzer/fuzzer.vcxproj @@ -176,6 +176,7 @@ + @@ -199,6 +200,7 @@ + diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj index 92d518d3d..6e14e020e 100644 --- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj +++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj @@ -43,6 +43,7 @@ + diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj index c306fcec9..18f5cb533 100644 --- a/build/VS2010/libzstd/libzstd.vcxproj +++ b/build/VS2010/libzstd/libzstd.vcxproj @@ -43,6 +43,7 @@ + diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj index 4af281326..936960b58 100644 --- a/build/VS2010/zstd/zstd.vcxproj +++ b/build/VS2010/zstd/zstd.vcxproj @@ -40,6 +40,7 @@ + @@ -61,6 +62,7 @@ + diff --git a/build/cmake/contrib/gen_html/CMakeLists.txt b/build/cmake/contrib/gen_html/CMakeLists.txt index 958e9d173..c4062d475 100644 --- a/build/cmake/contrib/gen_html/CMakeLists.txt +++ b/build/cmake/contrib/gen_html/CMakeLists.txt @@ -27,4 +27,4 @@ ADD_CUSTOM_TARGET(zstd_manual.html ALL ${GENHTML_BINARY} "${LIBVERSION}" "${LIBRARY_DIR}/zstd.h" "${PROJECT_BINARY_DIR}/zstd_manual.html" DEPENDS gen_html COMMENT "Update zstd manual") -INSTALL(FILES "${PROJECT_BINARY_DIR}/zstd_manual.html" DESTINATION "${CMAKE_INSTALL_PREFIX}/${DOC_INSTALL_DIR}") +INSTALL(FILES "${PROJECT_BINARY_DIR}/zstd_manual.html" DESTINATION "${CMAKE_INSTALL_DOCDIR}") diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index e84e06301..ffc196ddc 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -47,6 +47,7 @@ SET(Sources ${LIBRARY_DIR}/decompress/huf_decompress.c ${LIBRARY_DIR}/decompress/zstd_decompress.c ${LIBRARY_DIR}/dictBuilder/cover.c + ${LIBRARY_DIR}/dictBuilder/fastcover.c ${LIBRARY_DIR}/dictBuilder/divsufsort.c ${LIBRARY_DIR}/dictBuilder/zdict.c ${LIBRARY_DIR}/deprecated/zbuff_common.c @@ -74,6 +75,7 @@ SET(Headers ${LIBRARY_DIR}/compress/zstd_ldm.h ${LIBRARY_DIR}/compress/zstdmt_compress.h ${LIBRARY_DIR}/dictBuilder/zdict.h + ${LIBRARY_DIR}/dictBuilder/cover.h ${LIBRARY_DIR}/deprecated/zbuff.h) IF (ZSTD_LEGACY_SUPPORT) @@ -178,6 +180,7 @@ INSTALL(FILES ${LIBRARY_DIR}/zstd.h ${LIBRARY_DIR}/deprecated/zbuff.h ${LIBRARY_DIR}/dictBuilder/zdict.h + ${LIBRARY_DIR}/dictBuilder/cover.h ${LIBRARY_DIR}/common/zstd_errors.h DESTINATION "include") diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile b/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile index 681494888..72ce04f2a 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile @@ -2,10 +2,9 @@ ARG := CC ?= gcc CFLAGS ?= -O3 -INCLUDES := -I ../randomDictBuilder -I ../fastCover -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder +INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder RANDOM_FILE := ../randomDictBuilder/random.c -FAST_FILE := ../fastCover/fastCover.c IO_FILE := ../randomDictBuilder/io.c all: run clean @@ -22,8 +21,8 @@ test: benchmarkTest clean benchmarkTest: benchmark test.sh sh test.sh -benchmark: benchmark.o io.o random.o fastCover.o libzstd.a - $(CC) $(CFLAGS) benchmark.o io.o random.o fastCover.o libzstd.a -o benchmark +benchmark: benchmark.o io.o random.o libzstd.a + $(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark benchmark.o: benchmark.c $(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c @@ -31,9 +30,6 @@ benchmark.o: benchmark.c random.o: $(RANDOM_FILE) $(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE) -fastCover.o: $(FAST_FILE) - $(CC) $(CFLAGS) $(INCLUDES) -c $(FAST_FILE) - io.o: $(IO_FILE) $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE) diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md index 559776e2b..6a6c7f1d2 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md @@ -14,113 +14,836 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" ###Benchmarking Result: - First Cover is optimize cover, second Cover uses optimized d and k from first one. -- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. +- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10. - Fourth column is chosen d and fifth column is chosen k github: -NODICT 0.000025 2.999642 -RANDOM 0.030101 8.791189 -LEGACY 0.913108 8.173529 -COVER 59.234160 10.652243 8 1298 -COVER 6.258459 10.652243 8 1298 -FAST15 9.959246 10.555630 8 1874 -FAST15 0.077719 10.555630 8 1874 -FAST16 10.028343 10.701698 8 1106 -FAST16 0.078117 10.701698 8 1106 -FAST17 10.567355 10.650652 8 1106 -FAST17 0.124833 10.650652 8 1106 -FAST18 11.795287 10.499142 8 1826 -FAST18 0.086992 10.499142 8 1826 -FAST19 13.132451 10.527140 8 1826 -FAST19 0.134716 10.527140 8 1826 -FAST20 14.366314 10.494710 8 1826 -FAST20 0.128844 10.494710 8 1826 -FAST21 14.941238 10.503488 8 1778 -FAST21 0.134975 10.503488 8 1778 -FAST22 15.146226 10.509284 8 1826 -FAST22 0.146918 10.509284 8 1826 -FAST23 16.260552 10.509284 8 1826 -FAST23 0.158494 10.509284 8 1826 -FAST24 16.806037 10.512369 8 1826 -FAST24 0.190464 10.512369 8 1826 +NODICT 0.000004 2.999642 +RANDOM 0.024560 8.791189 +LEGACY 0.727109 8.173529 +COVER 40.565676 10.652243 8 1298 +COVER 3.608284 10.652243 8 1298 +FAST f=15 a=1 4.181024 10.570882 8 1154 +FAST f=15 a=1 0.040788 10.570882 8 1154 +FAST f=15 a=2 3.548352 10.574287 6 1970 +FAST f=15 a=2 0.035535 10.574287 6 1970 +FAST f=15 a=3 3.287364 10.613950 6 1010 +FAST f=15 a=3 0.032182 10.613950 6 1010 +FAST f=15 a=4 3.184976 10.573883 6 1058 +FAST f=15 a=4 0.029878 10.573883 6 1058 +FAST f=15 a=5 3.045513 10.580640 8 1154 +FAST f=15 a=5 0.022162 10.580640 8 1154 +FAST f=15 a=6 3.003296 10.583677 6 1010 +FAST f=15 a=6 0.028091 10.583677 6 1010 +FAST f=15 a=7 2.952655 10.622551 6 1106 +FAST f=15 a=7 0.02724 10.622551 6 1106 +FAST f=15 a=8 2.945674 10.614657 6 1010 +FAST f=15 a=8 0.027264 10.614657 6 1010 +FAST f=15 a=9 3.153439 10.564018 8 1154 +FAST f=15 a=9 0.020635 10.564018 8 1154 +FAST f=15 a=10 2.950416 10.511454 6 1010 +FAST f=15 a=10 0.026606 10.511454 6 1010 +FAST f=16 a=1 3.970029 10.681035 8 1154 +FAST f=16 a=1 0.038188 10.681035 8 1154 +FAST f=16 a=2 3.422892 10.484978 6 1874 +FAST f=16 a=2 0.034702 10.484978 6 1874 +FAST f=16 a=3 3.215836 10.632631 8 1154 +FAST f=16 a=3 0.026084 10.632631 8 1154 +FAST f=16 a=4 3.081353 10.626533 6 1106 +FAST f=16 a=4 0.030032 10.626533 6 1106 +FAST f=16 a=5 3.041241 10.545027 8 1922 +FAST f=16 a=5 0.022882 10.545027 8 1922 +FAST f=16 a=6 2.989390 10.638284 6 1874 +FAST f=16 a=6 0.028308 10.638284 6 1874 +FAST f=16 a=7 3.001581 10.797136 6 1106 +FAST f=16 a=7 0.027479 10.797136 6 1106 +FAST f=16 a=8 2.984107 10.658356 8 1058 +FAST f=16 a=8 0.021099 10.658356 8 1058 +FAST f=16 a=9 2.925788 10.523869 6 1010 +FAST f=16 a=9 0.026905 10.523869 6 1010 +FAST f=16 a=10 2.889605 10.745841 6 1874 +FAST f=16 a=10 0.026846 10.745841 6 1874 +FAST f=17 a=1 4.031953 10.672080 8 1202 +FAST f=17 a=1 0.040658 10.672080 8 1202 +FAST f=17 a=2 3.458107 10.589352 8 1106 +FAST f=17 a=2 0.02926 10.589352 8 1106 +FAST f=17 a=3 3.291189 10.662714 8 1154 +FAST f=17 a=3 0.026531 10.662714 8 1154 +FAST f=17 a=4 3.154950 10.549456 8 1346 +FAST f=17 a=4 0.024991 10.549456 8 1346 +FAST f=17 a=5 3.092271 10.541670 6 1202 +FAST f=17 a=5 0.038285 10.541670 6 1202 +FAST f=17 a=6 3.166146 10.729112 6 1874 +FAST f=17 a=6 0.038217 10.729112 6 1874 +FAST f=17 a=7 3.035467 10.810485 6 1106 +FAST f=17 a=7 0.036655 10.810485 6 1106 +FAST f=17 a=8 3.035668 10.530532 6 1058 +FAST f=17 a=8 0.037715 10.530532 6 1058 +FAST f=17 a=9 2.987917 10.589802 8 1922 +FAST f=17 a=9 0.02217 10.589802 8 1922 +FAST f=17 a=10 2.981647 10.722579 8 1106 +FAST f=17 a=10 0.021948 10.722579 8 1106 +FAST f=18 a=1 4.067144 10.634943 8 1154 +FAST f=18 a=1 0.041386 10.634943 8 1154 +FAST f=18 a=2 3.507377 10.546230 6 1970 +FAST f=18 a=2 0.037572 10.546230 6 1970 +FAST f=18 a=3 3.323015 10.648061 8 1154 +FAST f=18 a=3 0.028306 10.648061 8 1154 +FAST f=18 a=4 3.216735 10.705402 6 1010 +FAST f=18 a=4 0.030755 10.705402 6 1010 +FAST f=18 a=5 3.175794 10.588154 8 1874 +FAST f=18 a=5 0.025315 10.588154 8 1874 +FAST f=18 a=6 3.127459 10.751104 8 1106 +FAST f=18 a=6 0.023897 10.751104 8 1106 +FAST f=18 a=7 3.083017 10.780402 6 1106 +FAST f=18 a=7 0.029158 10.780402 6 1106 +FAST f=18 a=8 3.069700 10.547226 8 1346 +FAST f=18 a=8 0.024046 10.547226 8 1346 +FAST f=18 a=9 3.056591 10.674759 6 1010 +FAST f=18 a=9 0.028496 10.674759 6 1010 +FAST f=18 a=10 3.063588 10.737578 8 1106 +FAST f=18 a=10 0.023033 10.737578 8 1106 +FAST f=19 a=1 4.164041 10.650333 8 1154 +FAST f=19 a=1 0.042906 10.650333 8 1154 +FAST f=19 a=2 3.585409 10.577066 6 1058 +FAST f=19 a=2 0.038994 10.577066 6 1058 +FAST f=19 a=3 3.439643 10.639403 8 1154 +FAST f=19 a=3 0.028427 10.639403 8 1154 +FAST f=19 a=4 3.268869 10.554410 8 1298 +FAST f=19 a=4 0.026866 10.554410 8 1298 +FAST f=19 a=5 3.238225 10.615109 6 1010 +FAST f=19 a=5 0.03078 10.615109 6 1010 +FAST f=19 a=6 3.199558 10.609782 6 1874 +FAST f=19 a=6 0.030099 10.609782 6 1874 +FAST f=19 a=7 3.132395 10.794753 6 1106 +FAST f=19 a=7 0.028964 10.794753 6 1106 +FAST f=19 a=8 3.148446 10.554842 8 1298 +FAST f=19 a=8 0.024277 10.554842 8 1298 +FAST f=19 a=9 3.108324 10.668763 6 1010 +FAST f=19 a=9 0.02896 10.668763 6 1010 +FAST f=19 a=10 3.159863 10.757347 8 1106 +FAST f=19 a=10 0.023351 10.757347 8 1106 +FAST f=20 a=1 4.462698 10.661788 8 1154 +FAST f=20 a=1 0.047174 10.661788 8 1154 +FAST f=20 a=2 3.820269 10.678612 6 1106 +FAST f=20 a=2 0.040807 10.678612 6 1106 +FAST f=20 a=3 3.644955 10.648424 8 1154 +FAST f=20 a=3 0.031398 10.648424 8 1154 +FAST f=20 a=4 3.546257 10.559756 8 1298 +FAST f=20 a=4 0.029856 10.559756 8 1298 +FAST f=20 a=5 3.485248 10.646637 6 1010 +FAST f=20 a=5 0.033756 10.646637 6 1010 +FAST f=20 a=6 3.490438 10.775824 8 1106 +FAST f=20 a=6 0.028338 10.775824 8 1106 +FAST f=20 a=7 3.631289 10.801795 6 1106 +FAST f=20 a=7 0.035228 10.801795 6 1106 +FAST f=20 a=8 3.758936 10.545116 8 1346 +FAST f=20 a=8 0.027495 10.545116 8 1346 +FAST f=20 a=9 3.707024 10.677454 6 1010 +FAST f=20 a=9 0.031326 10.677454 6 1010 +FAST f=20 a=10 3.586593 10.756017 8 1106 +FAST f=20 a=10 0.027122 10.756017 8 1106 +FAST f=21 a=1 5.701396 10.655398 8 1154 +FAST f=21 a=1 0.067744 10.655398 8 1154 +FAST f=21 a=2 5.270542 10.650743 6 1106 +FAST f=21 a=2 0.052999 10.650743 6 1106 +FAST f=21 a=3 4.945294 10.652380 8 1154 +FAST f=21 a=3 0.052678 10.652380 8 1154 +FAST f=21 a=4 4.894079 10.543185 8 1298 +FAST f=21 a=4 0.04997 10.543185 8 1298 +FAST f=21 a=5 4.785417 10.630321 6 1010 +FAST f=21 a=5 0.045294 10.630321 6 1010 +FAST f=21 a=6 4.789381 10.664477 6 1874 +FAST f=21 a=6 0.046578 10.664477 6 1874 +FAST f=21 a=7 4.302955 10.805179 6 1106 +FAST f=21 a=7 0.041205 10.805179 6 1106 +FAST f=21 a=8 4.034630 10.551211 8 1298 +FAST f=21 a=8 0.040121 10.551211 8 1298 +FAST f=21 a=9 4.523868 10.799114 6 1010 +FAST f=21 a=9 0.043592 10.799114 6 1010 +FAST f=21 a=10 4.760736 10.750255 8 1106 +FAST f=21 a=10 0.043483 10.750255 8 1106 +FAST f=22 a=1 6.743064 10.640537 8 1154 +FAST f=22 a=1 0.086967 10.640537 8 1154 +FAST f=22 a=2 6.121739 10.626638 6 1970 +FAST f=22 a=2 0.066337 10.626638 6 1970 +FAST f=22 a=3 5.248851 10.640688 8 1154 +FAST f=22 a=3 0.054935 10.640688 8 1154 +FAST f=22 a=4 5.436579 10.588333 8 1298 +FAST f=22 a=4 0.064113 10.588333 8 1298 +FAST f=22 a=5 5.812815 10.652653 6 1010 +FAST f=22 a=5 0.058189 10.652653 6 1010 +FAST f=22 a=6 5.745472 10.666437 6 1874 +FAST f=22 a=6 0.057188 10.666437 6 1874 +FAST f=22 a=7 5.716393 10.806911 6 1106 +FAST f=22 a=7 0.056 10.806911 6 1106 +FAST f=22 a=8 5.698799 10.530784 8 1298 +FAST f=22 a=8 0.0583 10.530784 8 1298 +FAST f=22 a=9 5.710533 10.777391 6 1010 +FAST f=22 a=9 0.054945 10.777391 6 1010 +FAST f=22 a=10 5.685395 10.745023 8 1106 +FAST f=22 a=10 0.056526 10.745023 8 1106 +FAST f=23 a=1 7.836923 10.638828 8 1154 +FAST f=23 a=1 0.099522 10.638828 8 1154 +FAST f=23 a=2 6.627834 10.631061 6 1970 +FAST f=23 a=2 0.066769 10.631061 6 1970 +FAST f=23 a=3 5.602533 10.647288 8 1154 +FAST f=23 a=3 0.064513 10.647288 8 1154 +FAST f=23 a=4 6.005580 10.568747 8 1298 +FAST f=23 a=4 0.062022 10.568747 8 1298 +FAST f=23 a=5 5.481816 10.676921 6 1010 +FAST f=23 a=5 0.058959 10.676921 6 1010 +FAST f=23 a=6 5.460444 10.666194 6 1874 +FAST f=23 a=6 0.057687 10.666194 6 1874 +FAST f=23 a=7 5.659822 10.800377 6 1106 +FAST f=23 a=7 0.06783 10.800377 6 1106 +FAST f=23 a=8 6.826940 10.522167 8 1298 +FAST f=23 a=8 0.070533 10.522167 8 1298 +FAST f=23 a=9 6.804757 10.577799 8 1682 +FAST f=23 a=9 0.069949 10.577799 8 1682 +FAST f=23 a=10 6.774933 10.742093 8 1106 +FAST f=23 a=10 0.068395 10.742093 8 1106 +FAST f=24 a=1 8.444110 10.632783 8 1154 +FAST f=24 a=1 0.094357 10.632783 8 1154 +FAST f=24 a=2 7.289578 10.631061 6 1970 +FAST f=24 a=2 0.098515 10.631061 6 1970 +FAST f=24 a=3 8.619780 10.646289 8 1154 +FAST f=24 a=3 0.098041 10.646289 8 1154 +FAST f=24 a=4 8.508455 10.555199 8 1298 +FAST f=24 a=4 0.093885 10.555199 8 1298 +FAST f=24 a=5 8.471145 10.674363 6 1010 +FAST f=24 a=5 0.088676 10.674363 6 1010 +FAST f=24 a=6 8.426727 10.667228 6 1874 +FAST f=24 a=6 0.087247 10.667228 6 1874 +FAST f=24 a=7 8.356826 10.803027 6 1106 +FAST f=24 a=7 0.085835 10.803027 6 1106 +FAST f=24 a=8 6.756811 10.522049 8 1298 +FAST f=24 a=8 0.07107 10.522049 8 1298 +FAST f=24 a=9 6.548169 10.571882 8 1682 +FAST f=24 a=9 0.0713 10.571882 8 1682 +FAST f=24 a=10 8.238079 10.736453 8 1106 +FAST f=24 a=10 0.07004 10.736453 8 1106 + hg-commands: -NODICT 0.000026 2.425291 -RANDOM 0.046270 3.490331 -LEGACY 0.847904 3.911682 -COVER 71.691804 4.132653 8 386 -COVER 3.187085 4.132653 8 386 -FAST15 11.593687 3.920720 6 1106 -FAST15 0.082431 3.920720 6 1106 -FAST16 11.775958 4.033306 8 674 -FAST16 0.092587 4.033306 8 674 -FAST17 11.965064 4.064132 8 1490 -FAST17 0.106382 4.064132 8 1490 -FAST18 11.438197 4.086714 8 290 -FAST18 0.097293 4.086714 8 290 -FAST19 12.292512 4.097947 8 578 -FAST19 0.104406 4.097947 8 578 -FAST20 13.857857 4.102851 8 434 -FAST20 0.139467 4.102851 8 434 -FAST21 14.599613 4.105350 8 530 -FAST21 0.189416 4.105350 8 530 -FAST22 15.966109 4.104100 8 530 -FAST22 0.183817 4.104100 8 530 -FAST23 18.033645 4.098110 8 914 -FAST23 0.246641 4.098110 8 914 -FAST24 22.992891 4.117367 8 722 -FAST24 0.285994 4.117367 8 722 +NODICT 0.000005 2.425276 +RANDOM 0.046332 3.490331 +LEGACY 0.720351 3.911682 +COVER 45.507731 4.132653 8 386 +COVER 1.868810 4.132653 8 386 +FAST f=15 a=1 4.561427 3.866894 8 1202 +FAST f=15 a=1 0.048946 3.866894 8 1202 +FAST f=15 a=2 3.574462 3.892119 8 1538 +FAST f=15 a=2 0.033677 3.892119 8 1538 +FAST f=15 a=3 3.230227 3.888791 6 1346 +FAST f=15 a=3 0.034312 3.888791 6 1346 +FAST f=15 a=4 3.042388 3.899739 8 1010 +FAST f=15 a=4 0.024307 3.899739 8 1010 +FAST f=15 a=5 2.800148 3.896220 8 818 +FAST f=15 a=5 0.022331 3.896220 8 818 +FAST f=15 a=6 2.706518 3.882039 8 578 +FAST f=15 a=6 0.020955 3.882039 8 578 +FAST f=15 a=7 2.701820 3.885430 6 866 +FAST f=15 a=7 0.026074 3.885430 6 866 +FAST f=15 a=8 2.604445 3.906932 8 1826 +FAST f=15 a=8 0.021789 3.906932 8 1826 +FAST f=15 a=9 2.598568 3.870324 6 1682 +FAST f=15 a=9 0.026004 3.870324 6 1682 +FAST f=15 a=10 2.575920 3.920783 8 1442 +FAST f=15 a=10 0.020228 3.920783 8 1442 +FAST f=16 a=1 4.630623 4.001430 8 770 +FAST f=16 a=1 0.047497 4.001430 8 770 +FAST f=16 a=2 3.674721 3.974431 8 1874 +FAST f=16 a=2 0.035761 3.974431 8 1874 +FAST f=16 a=3 3.338384 3.978703 8 1010 +FAST f=16 a=3 0.029436 3.978703 8 1010 +FAST f=16 a=4 3.004412 3.983035 8 1010 +FAST f=16 a=4 0.025744 3.983035 8 1010 +FAST f=16 a=5 2.881892 3.987710 8 770 +FAST f=16 a=5 0.023211 3.987710 8 770 +FAST f=16 a=6 2.807410 3.952717 8 1298 +FAST f=16 a=6 0.023199 3.952717 8 1298 +FAST f=16 a=7 2.819623 3.994627 8 770 +FAST f=16 a=7 0.021806 3.994627 8 770 +FAST f=16 a=8 2.740092 3.954032 8 1826 +FAST f=16 a=8 0.0226 3.954032 8 1826 +FAST f=16 a=9 2.682564 3.969879 6 1442 +FAST f=16 a=9 0.026324 3.969879 6 1442 +FAST f=16 a=10 2.657959 3.969755 8 674 +FAST f=16 a=10 0.020413 3.969755 8 674 +FAST f=17 a=1 4.729228 4.046000 8 530 +FAST f=17 a=1 0.049703 4.046000 8 530 +FAST f=17 a=2 3.764510 3.991519 8 1970 +FAST f=17 a=2 0.038195 3.991519 8 1970 +FAST f=17 a=3 3.416992 4.006296 6 914 +FAST f=17 a=3 0.036244 4.006296 6 914 +FAST f=17 a=4 3.145626 3.979182 8 1970 +FAST f=17 a=4 0.028676 3.979182 8 1970 +FAST f=17 a=5 2.995070 4.050070 8 770 +FAST f=17 a=5 0.025707 4.050070 8 770 +FAST f=17 a=6 2.911833 4.040024 8 770 +FAST f=17 a=6 0.02453 4.040024 8 770 +FAST f=17 a=7 2.894796 4.015884 8 818 +FAST f=17 a=7 0.023956 4.015884 8 818 +FAST f=17 a=8 2.789962 4.039303 8 530 +FAST f=17 a=8 0.023219 4.039303 8 530 +FAST f=17 a=9 2.787625 3.996762 8 1634 +FAST f=17 a=9 0.023651 3.996762 8 1634 +FAST f=17 a=10 2.754796 4.005059 8 1058 +FAST f=17 a=10 0.022537 4.005059 8 1058 +FAST f=18 a=1 4.779117 4.038214 8 242 +FAST f=18 a=1 0.048814 4.038214 8 242 +FAST f=18 a=2 3.829753 4.045768 8 722 +FAST f=18 a=2 0.036541 4.045768 8 722 +FAST f=18 a=3 3.495053 4.021497 8 770 +FAST f=18 a=3 0.032648 4.021497 8 770 +FAST f=18 a=4 3.221395 4.039623 8 770 +FAST f=18 a=4 0.027818 4.039623 8 770 +FAST f=18 a=5 3.059369 4.050414 8 530 +FAST f=18 a=5 0.026296 4.050414 8 530 +FAST f=18 a=6 3.019292 4.010714 6 962 +FAST f=18 a=6 0.031104 4.010714 6 962 +FAST f=18 a=7 2.949322 4.031439 6 770 +FAST f=18 a=7 0.030745 4.031439 6 770 +FAST f=18 a=8 2.876425 4.032088 6 386 +FAST f=18 a=8 0.027407 4.032088 6 386 +FAST f=18 a=9 2.850958 4.053372 8 674 +FAST f=18 a=9 0.023799 4.053372 8 674 +FAST f=18 a=10 2.884352 4.020148 8 1730 +FAST f=18 a=10 0.024401 4.020148 8 1730 +FAST f=19 a=1 4.815669 4.061203 8 674 +FAST f=19 a=1 0.051425 4.061203 8 674 +FAST f=19 a=2 3.951356 4.013822 8 1442 +FAST f=19 a=2 0.039968 4.013822 8 1442 +FAST f=19 a=3 3.554682 4.050425 8 722 +FAST f=19 a=3 0.032725 4.050425 8 722 +FAST f=19 a=4 3.242585 4.054677 8 722 +FAST f=19 a=4 0.028194 4.054677 8 722 +FAST f=19 a=5 3.105909 4.064524 8 818 +FAST f=19 a=5 0.02675 4.064524 8 818 +FAST f=19 a=6 3.059901 4.036857 8 1250 +FAST f=19 a=6 0.026396 4.036857 8 1250 +FAST f=19 a=7 3.016151 4.068234 6 770 +FAST f=19 a=7 0.031501 4.068234 6 770 +FAST f=19 a=8 2.962902 4.077509 8 530 +FAST f=19 a=8 0.023333 4.077509 8 530 +FAST f=19 a=9 2.899607 4.067328 8 530 +FAST f=19 a=9 0.024553 4.067328 8 530 +FAST f=19 a=10 2.950978 4.059901 8 434 +FAST f=19 a=10 0.023852 4.059901 8 434 +FAST f=20 a=1 5.259834 4.027579 8 1634 +FAST f=20 a=1 0.061123 4.027579 8 1634 +FAST f=20 a=2 4.382150 4.025093 8 1634 +FAST f=20 a=2 0.048009 4.025093 8 1634 +FAST f=20 a=3 4.104323 4.060842 8 530 +FAST f=20 a=3 0.040965 4.060842 8 530 +FAST f=20 a=4 3.853340 4.023504 6 914 +FAST f=20 a=4 0.041072 4.023504 6 914 +FAST f=20 a=5 3.728841 4.018089 6 1634 +FAST f=20 a=5 0.037469 4.018089 6 1634 +FAST f=20 a=6 3.683045 4.069138 8 578 +FAST f=20 a=6 0.028011 4.069138 8 578 +FAST f=20 a=7 3.726973 4.063160 8 722 +FAST f=20 a=7 0.028437 4.063160 8 722 +FAST f=20 a=8 3.555073 4.057690 8 386 +FAST f=20 a=8 0.027588 4.057690 8 386 +FAST f=20 a=9 3.551095 4.067253 8 482 +FAST f=20 a=9 0.025976 4.067253 8 482 +FAST f=20 a=10 3.490127 4.068518 8 530 +FAST f=20 a=10 0.025971 4.068518 8 530 +FAST f=21 a=1 7.343816 4.064945 8 770 +FAST f=21 a=1 0.085035 4.064945 8 770 +FAST f=21 a=2 5.930894 4.048206 8 386 +FAST f=21 a=2 0.067349 4.048206 8 386 +FAST f=21 a=3 6.770775 4.063417 8 578 +FAST f=21 a=3 0.077104 4.063417 8 578 +FAST f=21 a=4 6.889409 4.066761 8 626 +FAST f=21 a=4 0.0717 4.066761 8 626 +FAST f=21 a=5 6.714896 4.051813 8 914 +FAST f=21 a=5 0.071026 4.051813 8 914 +FAST f=21 a=6 6.539890 4.047263 8 1922 +FAST f=21 a=6 0.07127 4.047263 8 1922 +FAST f=21 a=7 6.511052 4.068373 8 482 +FAST f=21 a=7 0.065467 4.068373 8 482 +FAST f=21 a=8 6.458788 4.071597 8 482 +FAST f=21 a=8 0.063817 4.071597 8 482 +FAST f=21 a=9 6.377591 4.052905 8 434 +FAST f=21 a=9 0.063112 4.052905 8 434 +FAST f=21 a=10 6.360752 4.047773 8 530 +FAST f=21 a=10 0.063606 4.047773 8 530 +FAST f=22 a=1 10.523471 4.040812 8 962 +FAST f=22 a=1 0.14214 4.040812 8 962 +FAST f=22 a=2 9.454758 4.059396 8 914 +FAST f=22 a=2 0.118343 4.059396 8 914 +FAST f=22 a=3 9.043197 4.043019 8 1922 +FAST f=22 a=3 0.109798 4.043019 8 1922 +FAST f=22 a=4 8.716261 4.044819 8 770 +FAST f=22 a=4 0.099687 4.044819 8 770 +FAST f=22 a=5 8.529472 4.070576 8 530 +FAST f=22 a=5 0.093127 4.070576 8 530 +FAST f=22 a=6 8.424241 4.070565 8 722 +FAST f=22 a=6 0.093703 4.070565 8 722 +FAST f=22 a=7 8.403391 4.070591 8 578 +FAST f=22 a=7 0.089763 4.070591 8 578 +FAST f=22 a=8 8.285221 4.089171 8 530 +FAST f=22 a=8 0.087716 4.089171 8 530 +FAST f=22 a=9 8.282506 4.047470 8 722 +FAST f=22 a=9 0.089773 4.047470 8 722 +FAST f=22 a=10 8.241809 4.064151 8 818 +FAST f=22 a=10 0.090413 4.064151 8 818 +FAST f=23 a=1 12.389208 4.051635 6 530 +FAST f=23 a=1 0.147796 4.051635 6 530 +FAST f=23 a=2 11.300910 4.042835 6 914 +FAST f=23 a=2 0.133178 4.042835 6 914 +FAST f=23 a=3 10.879455 4.047415 8 626 +FAST f=23 a=3 0.129571 4.047415 8 626 +FAST f=23 a=4 10.522718 4.038269 6 914 +FAST f=23 a=4 0.118121 4.038269 6 914 +FAST f=23 a=5 10.348043 4.066884 8 434 +FAST f=23 a=5 0.112098 4.066884 8 434 +FAST f=23 a=6 10.238630 4.048635 8 1010 +FAST f=23 a=6 0.120281 4.048635 8 1010 +FAST f=23 a=7 10.213255 4.061809 8 530 +FAST f=23 a=7 0.1121 4.061809 8 530 +FAST f=23 a=8 10.107879 4.074104 8 818 +FAST f=23 a=8 0.116544 4.074104 8 818 +FAST f=23 a=9 10.063424 4.064811 8 674 +FAST f=23 a=9 0.109045 4.064811 8 674 +FAST f=23 a=10 10.035801 4.054918 8 530 +FAST f=23 a=10 0.108735 4.054918 8 530 +FAST f=24 a=1 14.963878 4.073490 8 722 +FAST f=24 a=1 0.206344 4.073490 8 722 +FAST f=24 a=2 13.833472 4.036100 8 962 +FAST f=24 a=2 0.17486 4.036100 8 962 +FAST f=24 a=3 13.404631 4.026281 6 1106 +FAST f=24 a=3 0.153961 4.026281 6 1106 +FAST f=24 a=4 13.041164 4.065448 8 674 +FAST f=24 a=4 0.155509 4.065448 8 674 +FAST f=24 a=5 12.879412 4.054636 8 674 +FAST f=24 a=5 0.148282 4.054636 8 674 +FAST f=24 a=6 12.773736 4.081376 8 530 +FAST f=24 a=6 0.142563 4.081376 8 530 +FAST f=24 a=7 12.711310 4.059834 8 770 +FAST f=24 a=7 0.149321 4.059834 8 770 +FAST f=24 a=8 12.635459 4.052050 8 1298 +FAST f=24 a=8 0.15095 4.052050 8 1298 +FAST f=24 a=9 12.558104 4.076516 8 722 +FAST f=24 a=9 0.144361 4.076516 8 722 +FAST f=24 a=10 10.661348 4.062137 8 818 +FAST f=24 a=10 0.108232 4.062137 8 818 + hg-changelog: -NODICT 0.000007 1.377613 -RANDOM 0.297345 2.097487 -LEGACY 2.633992 2.058907 -COVER 219.179786 2.189685 8 98 -COVER 6.620852 2.189685 8 98 -FAST15 47.635082 2.130794 6 386 -FAST15 0.321297 2.130794 6 386 -FAST16 43.837676 2.144845 8 194 -FAST16 0.312640 2.144845 8 194 -FAST17 49.349017 2.156099 8 242 -FAST17 0.348459 2.156099 8 242 -FAST18 51.153784 2.172439 6 98 -FAST18 0.353106 2.172439 6 98 -FAST19 52.627045 2.180321 6 98 -FAST19 0.390612 2.180321 6 98 -FAST20 63.748782 2.187431 6 98 -FAST20 0.489544 2.187431 6 98 -FAST21 68.709198 2.184185 6 146 -FAST21 0.530852 2.184185 6 146 -FAST22 68.491639 2.182830 6 98 -FAST22 0.645699 2.182830 6 98 -FAST23 72.558688 2.186399 8 98 -FAST23 0.593539 2.186399 8 98 -FAST24 76.137195 2.185608 6 98 -FAST24 0.680132 2.185608 6 98 +NODICT 0.000017 1.377590 +RANDOM 0.186171 2.097487 +LEGACY 1.670867 2.058907 +COVER 173.561948 2.189685 8 98 +COVER 4.811180 2.189685 8 98 +FAST f=15 a=1 18.685906 2.129682 8 434 +FAST f=15 a=1 0.173376 2.129682 8 434 +FAST f=15 a=2 12.928259 2.131890 8 482 +FAST f=15 a=2 0.102582 2.131890 8 482 +FAST f=15 a=3 11.132343 2.128027 8 386 +FAST f=15 a=3 0.077122 2.128027 8 386 +FAST f=15 a=4 10.120683 2.125797 8 434 +FAST f=15 a=4 0.065175 2.125797 8 434 +FAST f=15 a=5 9.479092 2.127697 8 386 +FAST f=15 a=5 0.057905 2.127697 8 386 +FAST f=15 a=6 9.159523 2.127132 8 1682 +FAST f=15 a=6 0.058604 2.127132 8 1682 +FAST f=15 a=7 8.724003 2.129914 8 434 +FAST f=15 a=7 0.0493 2.129914 8 434 +FAST f=15 a=8 8.595001 2.127137 8 338 +FAST f=15 a=8 0.0474 2.127137 8 338 +FAST f=15 a=9 8.356405 2.125512 8 482 +FAST f=15 a=9 0.046126 2.125512 8 482 +FAST f=15 a=10 8.207111 2.126066 8 338 +FAST f=15 a=10 0.043292 2.126066 8 338 +FAST f=16 a=1 18.464436 2.144040 8 242 +FAST f=16 a=1 0.172156 2.144040 8 242 +FAST f=16 a=2 12.844825 2.148171 8 194 +FAST f=16 a=2 0.099619 2.148171 8 194 +FAST f=16 a=3 11.082568 2.140837 8 290 +FAST f=16 a=3 0.079165 2.140837 8 290 +FAST f=16 a=4 10.066749 2.144405 8 386 +FAST f=16 a=4 0.068411 2.144405 8 386 +FAST f=16 a=5 9.501121 2.140720 8 386 +FAST f=16 a=5 0.061316 2.140720 8 386 +FAST f=16 a=6 9.179332 2.139478 8 386 +FAST f=16 a=6 0.056322 2.139478 8 386 +FAST f=16 a=7 8.849438 2.142412 8 194 +FAST f=16 a=7 0.050493 2.142412 8 194 +FAST f=16 a=8 8.810919 2.143454 8 434 +FAST f=16 a=8 0.051304 2.143454 8 434 +FAST f=16 a=9 8.553900 2.140339 8 194 +FAST f=16 a=9 0.047285 2.140339 8 194 +FAST f=16 a=10 8.398027 2.143130 8 386 +FAST f=16 a=10 0.046386 2.143130 8 386 +FAST f=17 a=1 18.644657 2.157192 8 98 +FAST f=17 a=1 0.173884 2.157192 8 98 +FAST f=17 a=2 13.071242 2.159830 8 146 +FAST f=17 a=2 0.10388 2.159830 8 146 +FAST f=17 a=3 11.332366 2.153654 6 194 +FAST f=17 a=3 0.08983 2.153654 6 194 +FAST f=17 a=4 10.362413 2.156813 8 242 +FAST f=17 a=4 0.070389 2.156813 8 242 +FAST f=17 a=5 9.808159 2.155098 6 338 +FAST f=17 a=5 0.072661 2.155098 6 338 +FAST f=17 a=6 9.451165 2.153845 6 146 +FAST f=17 a=6 0.064959 2.153845 6 146 +FAST f=17 a=7 9.163097 2.155424 6 242 +FAST f=17 a=7 0.064323 2.155424 6 242 +FAST f=17 a=8 9.047276 2.156640 8 242 +FAST f=17 a=8 0.053382 2.156640 8 242 +FAST f=17 a=9 8.807671 2.152396 8 146 +FAST f=17 a=9 0.049617 2.152396 8 146 +FAST f=17 a=10 8.649827 2.152370 8 146 +FAST f=17 a=10 0.047849 2.152370 8 146 +FAST f=18 a=1 18.809502 2.168116 8 98 +FAST f=18 a=1 0.175226 2.168116 8 98 +FAST f=18 a=2 13.756502 2.170870 6 242 +FAST f=18 a=2 0.119507 2.170870 6 242 +FAST f=18 a=3 12.059748 2.163094 6 98 +FAST f=18 a=3 0.093912 2.163094 6 98 +FAST f=18 a=4 11.410294 2.172372 8 98 +FAST f=18 a=4 0.073048 2.172372 8 98 +FAST f=18 a=5 10.560297 2.166388 8 98 +FAST f=18 a=5 0.065136 2.166388 8 98 +FAST f=18 a=6 10.071390 2.162672 8 98 +FAST f=18 a=6 0.059402 2.162672 8 98 +FAST f=18 a=7 10.084214 2.166624 6 194 +FAST f=18 a=7 0.073276 2.166624 6 194 +FAST f=18 a=8 9.953226 2.167454 8 98 +FAST f=18 a=8 0.053659 2.167454 8 98 +FAST f=18 a=9 8.982461 2.161593 6 146 +FAST f=18 a=9 0.05955 2.161593 6 146 +FAST f=18 a=10 8.986092 2.164373 6 242 +FAST f=18 a=10 0.059135 2.164373 6 242 +FAST f=19 a=1 18.908277 2.176021 8 98 +FAST f=19 a=1 0.177316 2.176021 8 98 +FAST f=19 a=2 13.471313 2.176103 8 98 +FAST f=19 a=2 0.106344 2.176103 8 98 +FAST f=19 a=3 11.571406 2.172812 8 98 +FAST f=19 a=3 0.083293 2.172812 8 98 +FAST f=19 a=4 10.632775 2.177770 6 146 +FAST f=19 a=4 0.079864 2.177770 6 146 +FAST f=19 a=5 10.030190 2.175574 6 146 +FAST f=19 a=5 0.07223 2.175574 6 146 +FAST f=19 a=6 9.717818 2.169997 8 98 +FAST f=19 a=6 0.060049 2.169997 8 98 +FAST f=19 a=7 9.397531 2.172770 8 146 +FAST f=19 a=7 0.057188 2.172770 8 146 +FAST f=19 a=8 9.281061 2.175822 8 98 +FAST f=19 a=8 0.053711 2.175822 8 98 +FAST f=19 a=9 9.165242 2.169849 6 146 +FAST f=19 a=9 0.059898 2.169849 6 146 +FAST f=19 a=10 9.048763 2.173394 8 98 +FAST f=19 a=10 0.049757 2.173394 8 98 +FAST f=20 a=1 21.166917 2.183923 6 98 +FAST f=20 a=1 0.205425 2.183923 6 98 +FAST f=20 a=2 15.642753 2.182349 6 98 +FAST f=20 a=2 0.135957 2.182349 6 98 +FAST f=20 a=3 14.053730 2.173544 6 98 +FAST f=20 a=3 0.11266 2.173544 6 98 +FAST f=20 a=4 15.270019 2.183656 8 98 +FAST f=20 a=4 0.107892 2.183656 8 98 +FAST f=20 a=5 15.497927 2.174661 6 98 +FAST f=20 a=5 0.100305 2.174661 6 98 +FAST f=20 a=6 13.973505 2.172391 8 98 +FAST f=20 a=6 0.087565 2.172391 8 98 +FAST f=20 a=7 14.083296 2.172443 8 98 +FAST f=20 a=7 0.078062 2.172443 8 98 +FAST f=20 a=8 12.560048 2.175581 8 98 +FAST f=20 a=8 0.070282 2.175581 8 98 +FAST f=20 a=9 13.078645 2.173975 6 146 +FAST f=20 a=9 0.081041 2.173975 6 146 +FAST f=20 a=10 12.823328 2.177778 8 98 +FAST f=20 a=10 0.074522 2.177778 8 98 +FAST f=21 a=1 29.825370 2.183057 6 98 +FAST f=21 a=1 0.334453 2.183057 6 98 +FAST f=21 a=2 29.476474 2.182752 8 98 +FAST f=21 a=2 0.286602 2.182752 8 98 +FAST f=21 a=3 25.937186 2.175867 8 98 +FAST f=21 a=3 0.17626 2.175867 8 98 +FAST f=21 a=4 20.413865 2.179780 8 98 +FAST f=21 a=4 0.206085 2.179780 8 98 +FAST f=21 a=5 20.541889 2.178328 6 146 +FAST f=21 a=5 0.199157 2.178328 6 146 +FAST f=21 a=6 21.090670 2.174443 6 146 +FAST f=21 a=6 0.190645 2.174443 6 146 +FAST f=21 a=7 20.221569 2.177384 6 146 +FAST f=21 a=7 0.184278 2.177384 6 146 +FAST f=21 a=8 20.322357 2.179456 6 98 +FAST f=21 a=8 0.178458 2.179456 6 98 +FAST f=21 a=9 20.683912 2.174396 6 146 +FAST f=21 a=9 0.190829 2.174396 6 146 +FAST f=21 a=10 20.840865 2.174905 8 98 +FAST f=21 a=10 0.172515 2.174905 8 98 +FAST f=22 a=1 36.822827 2.181612 6 98 +FAST f=22 a=1 0.437389 2.181612 6 98 +FAST f=22 a=2 30.616902 2.183142 8 98 +FAST f=22 a=2 0.324284 2.183142 8 98 +FAST f=22 a=3 28.472482 2.178130 8 98 +FAST f=22 a=3 0.236538 2.178130 8 98 +FAST f=22 a=4 25.847028 2.181878 8 98 +FAST f=22 a=4 0.263744 2.181878 8 98 +FAST f=22 a=5 27.095881 2.180775 8 98 +FAST f=22 a=5 0.24988 2.180775 8 98 +FAST f=22 a=6 25.939172 2.170916 8 98 +FAST f=22 a=6 0.240033 2.170916 8 98 +FAST f=22 a=7 27.064194 2.177849 8 98 +FAST f=22 a=7 0.242383 2.177849 8 98 +FAST f=22 a=8 25.140221 2.178216 8 98 +FAST f=22 a=8 0.237601 2.178216 8 98 +FAST f=22 a=9 25.505283 2.177455 6 146 +FAST f=22 a=9 0.223217 2.177455 6 146 +FAST f=22 a=10 24.529362 2.176705 6 98 +FAST f=22 a=10 0.222876 2.176705 6 98 +FAST f=23 a=1 39.127310 2.183006 6 98 +FAST f=23 a=1 0.417338 2.183006 6 98 +FAST f=23 a=2 32.468161 2.183524 6 98 +FAST f=23 a=2 0.351645 2.183524 6 98 +FAST f=23 a=3 31.577620 2.172604 6 98 +FAST f=23 a=3 0.319659 2.172604 6 98 +FAST f=23 a=4 30.129247 2.183932 6 98 +FAST f=23 a=4 0.307239 2.183932 6 98 +FAST f=23 a=5 29.103376 2.183529 6 146 +FAST f=23 a=5 0.285533 2.183529 6 146 +FAST f=23 a=6 29.776045 2.174367 8 98 +FAST f=23 a=6 0.276846 2.174367 8 98 +FAST f=23 a=7 28.940407 2.178022 6 146 +FAST f=23 a=7 0.274082 2.178022 6 146 +FAST f=23 a=8 29.256009 2.179462 6 98 +FAST f=23 a=8 0.26949 2.179462 6 98 +FAST f=23 a=9 29.347312 2.170407 8 98 +FAST f=23 a=9 0.265034 2.170407 8 98 +FAST f=23 a=10 29.140081 2.171762 8 98 +FAST f=23 a=10 0.259183 2.171762 8 98 +FAST f=24 a=1 44.871179 2.182115 6 98 +FAST f=24 a=1 0.509433 2.182115 6 98 +FAST f=24 a=2 38.694867 2.180549 8 98 +FAST f=24 a=2 0.406695 2.180549 8 98 +FAST f=24 a=3 38.363769 2.172821 8 98 +FAST f=24 a=3 0.359581 2.172821 8 98 +FAST f=24 a=4 36.580797 2.184142 8 98 +FAST f=24 a=4 0.340614 2.184142 8 98 +FAST f=24 a=5 33.125701 2.183301 8 98 +FAST f=24 a=5 0.324874 2.183301 8 98 +FAST f=24 a=6 34.776068 2.173019 6 146 +FAST f=24 a=6 0.340397 2.173019 6 146 +FAST f=24 a=7 34.417625 2.176561 6 146 +FAST f=24 a=7 0.308223 2.176561 6 146 +FAST f=24 a=8 35.470291 2.182161 6 98 +FAST f=24 a=8 0.307724 2.182161 6 98 +FAST f=24 a=9 34.927252 2.172682 6 146 +FAST f=24 a=9 0.300598 2.172682 6 146 +FAST f=24 a=10 33.238355 2.173395 6 98 +FAST f=24 a=10 0.249916 2.173395 6 98 + hg-manifest: -NODICT 0.000026 1.866385 -RANDOM 0.784554 2.309436 -LEGACY 10.193714 2.506977 -COVER 988.206583 2.582528 8 434 -COVER 39.726199 2.582528 8 434 -FAST15 168.388819 2.392920 6 1826 -FAST15 1.272178 2.392920 6 1826 -FAST16 161.822607 2.480762 6 1922 -FAST16 1.164908 2.480762 6 1922 -FAST17 157.688544 2.548285 6 1682 -FAST17 1.222439 2.548285 6 1682 -FAST18 154.529585 2.567634 6 386 -FAST18 1.217596 2.567634 6 386 -FAST19 160.244979 2.581653 8 338 -FAST19 1.282450 2.581653 8 338 -FAST20 191.503297 2.586881 8 194 -FAST20 2.009748 2.586881 8 194 -FAST21 226.389709 2.590051 6 242 -FAST21 2.494543 2.590051 6 242 -FAST22 217.859055 2.591376 6 194 -FAST22 2.295693 2.591376 6 194 -FAST23 236.819791 2.591131 8 434 -FAST23 2.744711 2.591131 8 434 -FAST24 269.187800 2.591548 6 290 -FAST24 2.923671 2.591548 6 290 +NODICT 0.000004 1.866377 +RANDOM 0.696346 2.309436 +LEGACY 7.064527 2.506977 +COVER 876.312865 2.582528 8 434 +COVER 35.684533 2.582528 8 434 +FAST f=15 a=1 76.618201 2.404013 8 1202 +FAST f=15 a=1 0.700722 2.404013 8 1202 +FAST f=15 a=2 49.213058 2.409248 6 1826 +FAST f=15 a=2 0.473393 2.409248 6 1826 +FAST f=15 a=3 41.753197 2.409677 8 1490 +FAST f=15 a=3 0.336848 2.409677 8 1490 +FAST f=15 a=4 38.648295 2.407996 8 1538 +FAST f=15 a=4 0.283952 2.407996 8 1538 +FAST f=15 a=5 36.144936 2.402895 8 1874 +FAST f=15 a=5 0.270128 2.402895 8 1874 +FAST f=15 a=6 35.484675 2.394873 8 1586 +FAST f=15 a=6 0.251637 2.394873 8 1586 +FAST f=15 a=7 34.280599 2.397311 8 1778 +FAST f=15 a=7 0.23984 2.397311 8 1778 +FAST f=15 a=8 32.122572 2.396089 6 1490 +FAST f=15 a=8 0.251508 2.396089 6 1490 +FAST f=15 a=9 29.909842 2.390092 6 1970 +FAST f=15 a=9 0.251233 2.390092 6 1970 +FAST f=15 a=10 30.102938 2.400086 6 1682 +FAST f=15 a=10 0.23688 2.400086 6 1682 +FAST f=16 a=1 67.750401 2.475460 6 1346 +FAST f=16 a=1 0.796035 2.475460 6 1346 +FAST f=16 a=2 52.812027 2.480860 6 1730 +FAST f=16 a=2 0.480384 2.480860 6 1730 +FAST f=16 a=3 44.179259 2.469304 8 1970 +FAST f=16 a=3 0.332657 2.469304 8 1970 +FAST f=16 a=4 37.612728 2.478208 6 1970 +FAST f=16 a=4 0.32498 2.478208 6 1970 +FAST f=16 a=5 35.056222 2.475568 6 1298 +FAST f=16 a=5 0.302824 2.475568 6 1298 +FAST f=16 a=6 34.713012 2.486079 8 1730 +FAST f=16 a=6 0.24755 2.486079 8 1730 +FAST f=16 a=7 33.713687 2.477180 6 1682 +FAST f=16 a=7 0.280358 2.477180 6 1682 +FAST f=16 a=8 31.571412 2.475418 8 1538 +FAST f=16 a=8 0.241241 2.475418 8 1538 +FAST f=16 a=9 31.608069 2.478263 8 1922 +FAST f=16 a=9 0.241764 2.478263 8 1922 +FAST f=16 a=10 31.358002 2.472263 8 1442 +FAST f=16 a=10 0.221661 2.472263 8 1442 +FAST f=17 a=1 66.185775 2.536085 6 1346 +FAST f=17 a=1 0.713549 2.536085 6 1346 +FAST f=17 a=2 50.365000 2.546105 8 1298 +FAST f=17 a=2 0.467846 2.546105 8 1298 +FAST f=17 a=3 42.712843 2.536250 8 1298 +FAST f=17 a=3 0.34047 2.536250 8 1298 +FAST f=17 a=4 39.514227 2.535555 8 1442 +FAST f=17 a=4 0.302989 2.535555 8 1442 +FAST f=17 a=5 35.189292 2.524925 8 1202 +FAST f=17 a=5 0.273451 2.524925 8 1202 +FAST f=17 a=6 35.791683 2.523466 8 1202 +FAST f=17 a=6 0.268261 2.523466 8 1202 +FAST f=17 a=7 37.416136 2.526625 6 1010 +FAST f=17 a=7 0.277558 2.526625 6 1010 +FAST f=17 a=8 37.084707 2.533274 6 1250 +FAST f=17 a=8 0.285104 2.533274 6 1250 +FAST f=17 a=9 34.183814 2.532765 8 1298 +FAST f=17 a=9 0.235133 2.532765 8 1298 +FAST f=17 a=10 31.149235 2.528722 8 1346 +FAST f=17 a=10 0.232679 2.528722 8 1346 +FAST f=18 a=1 72.942176 2.559857 6 386 +FAST f=18 a=1 0.718618 2.559857 6 386 +FAST f=18 a=2 51.690440 2.559572 8 290 +FAST f=18 a=2 0.403978 2.559572 8 290 +FAST f=18 a=3 45.344908 2.561040 8 962 +FAST f=18 a=3 0.357205 2.561040 8 962 +FAST f=18 a=4 39.804522 2.558446 8 1010 +FAST f=18 a=4 0.310526 2.558446 8 1010 +FAST f=18 a=5 38.134888 2.561811 8 626 +FAST f=18 a=5 0.273743 2.561811 8 626 +FAST f=18 a=6 35.091890 2.555518 8 722 +FAST f=18 a=6 0.260135 2.555518 8 722 +FAST f=18 a=7 34.639523 2.562938 8 290 +FAST f=18 a=7 0.234294 2.562938 8 290 +FAST f=18 a=8 36.076431 2.563567 8 1586 +FAST f=18 a=8 0.274075 2.563567 8 1586 +FAST f=18 a=9 36.376433 2.560950 8 722 +FAST f=18 a=9 0.240106 2.560950 8 722 +FAST f=18 a=10 32.624790 2.559340 8 578 +FAST f=18 a=10 0.234704 2.559340 8 578 +FAST f=19 a=1 70.513761 2.572441 8 194 +FAST f=19 a=1 0.726112 2.572441 8 194 +FAST f=19 a=2 59.263032 2.574560 8 482 +FAST f=19 a=2 0.451554 2.574560 8 482 +FAST f=19 a=3 51.509594 2.571546 6 194 +FAST f=19 a=3 0.393014 2.571546 6 194 +FAST f=19 a=4 55.393906 2.573386 8 482 +FAST f=19 a=4 0.38819 2.573386 8 482 +FAST f=19 a=5 43.201736 2.567589 8 674 +FAST f=19 a=5 0.292155 2.567589 8 674 +FAST f=19 a=6 42.911687 2.572666 6 434 +FAST f=19 a=6 0.303988 2.572666 6 434 +FAST f=19 a=7 44.687591 2.573613 6 290 +FAST f=19 a=7 0.308721 2.573613 6 290 +FAST f=19 a=8 37.372868 2.571039 6 194 +FAST f=19 a=8 0.287137 2.571039 6 194 +FAST f=19 a=9 36.074230 2.566473 6 482 +FAST f=19 a=9 0.280721 2.566473 6 482 +FAST f=19 a=10 33.731720 2.570306 8 194 +FAST f=19 a=10 0.224073 2.570306 8 194 +FAST f=20 a=1 79.670634 2.581146 6 290 +FAST f=20 a=1 0.899986 2.581146 6 290 +FAST f=20 a=2 58.827141 2.579782 8 386 +FAST f=20 a=2 0.602288 2.579782 8 386 +FAST f=20 a=3 51.289004 2.579627 8 722 +FAST f=20 a=3 0.446091 2.579627 8 722 +FAST f=20 a=4 47.711068 2.581508 8 722 +FAST f=20 a=4 0.473007 2.581508 8 722 +FAST f=20 a=5 47.402929 2.578062 6 434 +FAST f=20 a=5 0.497131 2.578062 6 434 +FAST f=20 a=6 54.797102 2.577365 8 482 +FAST f=20 a=6 0.515061 2.577365 8 482 +FAST f=20 a=7 51.370877 2.583050 8 386 +FAST f=20 a=7 0.402878 2.583050 8 386 +FAST f=20 a=8 51.437931 2.574875 6 242 +FAST f=20 a=8 0.453094 2.574875 6 242 +FAST f=20 a=9 44.105456 2.576700 6 242 +FAST f=20 a=9 0.456633 2.576700 6 242 +FAST f=20 a=10 44.447580 2.578305 8 338 +FAST f=20 a=10 0.409121 2.578305 8 338 +FAST f=21 a=1 113.031686 2.582449 6 242 +FAST f=21 a=1 1.456971 2.582449 6 242 +FAST f=21 a=2 97.700932 2.582124 8 194 +FAST f=21 a=2 1.072078 2.582124 8 194 +FAST f=21 a=3 96.563648 2.585479 8 434 +FAST f=21 a=3 0.949528 2.585479 8 434 +FAST f=21 a=4 90.597813 2.582366 6 386 +FAST f=21 a=4 0.76944 2.582366 6 386 +FAST f=21 a=5 86.815980 2.579043 8 434 +FAST f=21 a=5 0.858167 2.579043 8 434 +FAST f=21 a=6 91.235820 2.578378 8 530 +FAST f=21 a=6 0.684274 2.578378 8 530 +FAST f=21 a=7 84.392788 2.581243 8 386 +FAST f=21 a=7 0.814386 2.581243 8 386 +FAST f=21 a=8 82.052310 2.582547 8 338 +FAST f=21 a=8 0.822633 2.582547 8 338 +FAST f=21 a=9 74.696074 2.579319 8 194 +FAST f=21 a=9 0.811028 2.579319 8 194 +FAST f=21 a=10 76.211170 2.578766 8 290 +FAST f=21 a=10 0.809715 2.578766 8 290 +FAST f=22 a=1 138.976871 2.580478 8 194 +FAST f=22 a=1 1.748932 2.580478 8 194 +FAST f=22 a=2 120.164097 2.583633 8 386 +FAST f=22 a=2 1.333239 2.583633 8 386 +FAST f=22 a=3 111.986474 2.582566 6 194 +FAST f=22 a=3 1.305734 2.582566 6 194 +FAST f=22 a=4 108.548148 2.583068 6 194 +FAST f=22 a=4 1.314026 2.583068 6 194 +FAST f=22 a=5 103.173017 2.583495 6 290 +FAST f=22 a=5 1.228664 2.583495 6 290 +FAST f=22 a=6 108.421262 2.582349 8 530 +FAST f=22 a=6 1.076773 2.582349 8 530 +FAST f=22 a=7 103.284127 2.581022 8 386 +FAST f=22 a=7 1.112117 2.581022 8 386 +FAST f=22 a=8 96.330279 2.581073 8 290 +FAST f=22 a=8 1.109303 2.581073 8 290 +FAST f=22 a=9 97.651348 2.580075 6 194 +FAST f=22 a=9 0.933032 2.580075 6 194 +FAST f=22 a=10 101.660621 2.584886 8 194 +FAST f=22 a=10 0.796823 2.584886 8 194 +FAST f=23 a=1 159.322978 2.581474 6 242 +FAST f=23 a=1 2.015878 2.581474 6 242 +FAST f=23 a=2 134.331775 2.581619 8 194 +FAST f=23 a=2 1.545845 2.581619 8 194 +FAST f=23 a=3 127.724552 2.579888 6 338 +FAST f=23 a=3 1.444496 2.579888 6 338 +FAST f=23 a=4 126.077675 2.578137 6 242 +FAST f=23 a=4 1.364394 2.578137 6 242 +FAST f=23 a=5 124.914027 2.580843 8 338 +FAST f=23 a=5 1.116059 2.580843 8 338 +FAST f=23 a=6 122.874153 2.577637 6 338 +FAST f=23 a=6 1.164584 2.577637 6 338 +FAST f=23 a=7 123.099257 2.582715 6 386 +FAST f=23 a=7 1.354042 2.582715 6 386 +FAST f=23 a=8 122.026753 2.577681 8 194 +FAST f=23 a=8 1.210966 2.577681 8 194 +FAST f=23 a=9 121.164312 2.584599 6 290 +FAST f=23 a=9 1.174859 2.584599 6 290 +FAST f=23 a=10 117.462222 2.580358 8 194 +FAST f=23 a=10 1.075258 2.580358 8 194 +FAST f=24 a=1 169.539659 2.581642 6 194 +FAST f=24 a=1 1.916804 2.581642 6 194 +FAST f=24 a=2 160.539270 2.580421 6 290 +FAST f=24 a=2 1.71087 2.580421 6 290 +FAST f=24 a=3 155.455874 2.580449 6 242 +FAST f=24 a=3 1.60307 2.580449 6 242 +FAST f=24 a=4 147.630320 2.582953 6 338 +FAST f=24 a=4 1.396364 2.582953 6 338 +FAST f=24 a=5 133.767428 2.580589 6 290 +FAST f=24 a=5 1.19933 2.580589 6 290 +FAST f=24 a=6 146.437535 2.579453 8 194 +FAST f=24 a=6 1.385405 2.579453 8 194 +FAST f=24 a=7 147.227507 2.584155 8 386 +FAST f=24 a=7 1.48942 2.584155 8 386 +FAST f=24 a=8 138.005773 2.584115 8 194 +FAST f=24 a=8 1.352 2.584115 8 194 +FAST f=24 a=9 141.442625 2.582902 8 290 +FAST f=24 a=9 1.39647 2.582902 8 290 +FAST f=24 a=10 142.157446 2.582701 8 434 +FAST f=24 a=10 1.498889 2.582701 8 434 diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c index d92e8d5cb..b19345692 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c @@ -5,7 +5,6 @@ #include #include #include "random.h" -#include "fastCover.h" #include "dictBuilder.h" #include "zstd_internal.h" /* includes zstd.h */ #include "io.h" @@ -149,7 +148,7 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev /* Allocate dst with enough space to compress the maximum sized sample */ { size_t maxSampleSize = 0; - for (int i = 0; i < srcInfo->nbSamples; i++) { + for (i = 0; i < srcInfo->nbSamples; i++) { maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize); } dstCapacity = ZSTD_compressBound(maxSampleSize); @@ -291,6 +290,9 @@ int main(int argCount, const char* argv[]) /* Initialize arguments to default values */ unsigned k = 200; unsigned d = 8; + unsigned f; + unsigned accel; + unsigned i; const unsigned cLevel = DEFAULT_CLEVEL; const unsigned dictID = 0; const unsigned maxDictSize = g_defaultMaxDictSize; @@ -305,7 +307,7 @@ int main(int argCount, const char* argv[]) const char** extendedFileList = NULL; /* Parse arguments */ - for (int i = 1; i < argCount; i++) { + for (i = 1; i < argCount; i++) { const char* argument = argv[i]; if (longCommandWArg(&argument, "in=")) { filenameTable[filenameIdx] = argument; @@ -375,6 +377,7 @@ int main(int argCount, const char* argv[]) /* for cover */ { + /* for cover (optimizing k and d) */ ZDICT_cover_params_t coverParam; memset(&coverParam, 0, sizeof(coverParam)); coverParam.zParams = zParams; @@ -388,6 +391,7 @@ int main(int argCount, const char* argv[]) goto _cleanup; } + /* for cover (with k and d provided) */ const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL); DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100)); if(coverResult) { @@ -398,29 +402,34 @@ int main(int argCount, const char* argv[]) } /* for fastCover */ - for (unsigned f = 15; f < 25; f++){ + for (f = 15; f < 25; f++){ DISPLAYLEVEL(2, "current f is %u\n", f); - /* for fastCover (optimizing k and d) */ - ZDICT_fastCover_params_t fastParam; - memset(&fastParam, 0, sizeof(fastParam)); - fastParam.zParams = zParams; - fastParam.splitPoint = 1.0; - fastParam.f = f; - fastParam.steps = 40; - fastParam.nbThreads = 1; - const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); - DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100)); - if(fastOptResult) { - result = 1; - goto _cleanup; - } + for (accel = 1; accel < 11; accel++) { + DISPLAYLEVEL(2, "current accel is %u\n", accel); + /* for fastCover (optimizing k and d) */ + ZDICT_fastCover_params_t fastParam; + memset(&fastParam, 0, sizeof(fastParam)); + fastParam.zParams = zParams; + fastParam.f = f; + fastParam.steps = 40; + fastParam.nbThreads = 1; + fastParam.accel = accel; + const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); + DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel); + if(fastOptResult) { + result = 1; + goto _cleanup; + } - /* for fastCover (with k and d provided) */ - const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); - DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100)); - if(fastResult) { - result = 1; - goto _cleanup; + /* for fastCover (with k and d provided) */ + for (i = 0; i < 5; i++) { + const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); + DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel); + if(fastResult) { + result = 1; + goto _cleanup; + } + } } } diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.c b/contrib/experimental_dict_builders/fastCover/fastCover.c index 84d841b10..02c155a81 100644 --- a/contrib/experimental_dict_builders/fastCover/fastCover.c +++ b/contrib/experimental_dict_builders/fastCover/fastCover.c @@ -197,7 +197,7 @@ static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, bestSegment.end = newEnd; } { - /* Half the frequency of hash value of each dmer covered by the chosen segment. */ + /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ U32 pos; for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d); @@ -300,7 +300,7 @@ static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer, if (totalSamplesSize < MAX(d, sizeof(U64)) || totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", - (U32)(totalSamplesSize>>20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); + (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); return 0; } /* Check if there are at least 5 training samples */ diff --git a/contrib/largeNbDicts/.gitignore b/contrib/largeNbDicts/.gitignore new file mode 100644 index 000000000..e77c4e496 --- /dev/null +++ b/contrib/largeNbDicts/.gitignore @@ -0,0 +1,2 @@ +# build artifacts +largeNbDicts diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile new file mode 100644 index 000000000..624140fab --- /dev/null +++ b/contrib/largeNbDicts/Makefile @@ -0,0 +1,49 @@ +# ################################################################ +# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +PROGDIR = ../../programs +LIBDIR = ../../lib + +LIBZSTD = $(LIBDIR)/libzstd.a + +CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR) + +CFLAGS ?= -O3 +CFLAGS += -std=gnu99 +DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum \ + -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) + + +default: largeNbDicts + +all : largeNbDicts + +largeNbDicts: bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD) + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ + +.PHONY: $(LIBZSTD) +$(LIBZSTD): + $(MAKE) -C $(LIBDIR) libzstd.a + +bench.o : $(PROGDIR)/bench.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +datagen.o: $(PROGDIR)/datagen.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +xxhash.o : $(LIBDIR)/common/xxhash.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +clean: + $(RM) *.o + $(RM) largeNbDicts diff --git a/contrib/largeNbDicts/README.md b/contrib/largeNbDicts/README.md new file mode 100644 index 000000000..f29bcdfe8 --- /dev/null +++ b/contrib/largeNbDicts/README.md @@ -0,0 +1,25 @@ +largeNbDicts +===================== + +`largeNbDicts` is a benchmark test tool +dedicated to the specific scenario of +dictionary decompression using a very large number of dictionaries. +When dictionaries are constantly changing, they are always "cold", +suffering from increased latency due to cache misses. + +The tool is created in a bid to investigate performance for this scenario, +and experiment mitigation techniques. + +Command line : +``` +largeNbDicts [Options] filename(s) + +Options : +-r : recursively load all files in subdirectories (default: off) +-B# : split input into blocks of size # (default: no split) +-# : use compression level # (default: 3) +-D # : use # as a dictionary (default: create one) +-i# : nb benchmark rounds (default: 6) +--nbDicts=# : set nb of dictionaries to # (default: one per block) +-h : help (this text) +``` diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c new file mode 100644 index 000000000..e0bc55380 --- /dev/null +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -0,0 +1,806 @@ +/* + * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* largeNbDicts + * This is a benchmark test tool + * dedicated to the specific case of dictionary decompression + * using a very large nb of dictionaries + * thus suffering latency from lots of cache misses. + * It's created in a bid to investigate performance and find optimizations. */ + + +/*--- Dependencies ---*/ + +#include /* size_t */ +#include /* malloc, free, abort */ +#include /* fprintf */ +#include /* assert */ + +#include "util.h" +#include "bench.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#include "zdict.h" + + +/*--- Constants --- */ + +#define KB *(1<<10) +#define MB *(1<<20) + +#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */ +#define DICTSIZE (4 KB) +#define CLEVEL_DEFAULT 3 + +#define BENCH_TIME_DEFAULT_S 6 +#define RUN_TIME_DEFAULT_MS 1000 +#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS) + +#define DISPLAY_LEVEL_DEFAULT 3 + +#define BENCH_SIZE_MAX (1200 MB) + + +/*--- Macros ---*/ +#define CONTROL(c) { if (!(c)) abort(); } +#undef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + + +/*--- Display Macros ---*/ + +#define DISPLAY(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } +static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ + + +/*--- buffer_t ---*/ + +typedef struct { + void* ptr; + size_t size; + size_t capacity; +} buffer_t; + +static const buffer_t kBuffNull = { NULL, 0, 0 }; + +/* @return : kBuffNull if any error */ +static buffer_t createBuffer(size_t capacity) +{ + assert(capacity > 0); + void* const ptr = malloc(capacity); + if (ptr==NULL) return kBuffNull; + + buffer_t buffer; + buffer.ptr = ptr; + buffer.capacity = capacity; + buffer.size = 0; + return buffer; +} + +static void freeBuffer(buffer_t buff) +{ + free(buff.ptr); +} + + +static void fillBuffer_fromHandle(buffer_t* buff, FILE* f) +{ + size_t const readSize = fread(buff->ptr, 1, buff->capacity, f); + buff->size = readSize; +} + + +/* @return : kBuffNull if any error */ +static buffer_t createBuffer_fromFile(const char* fileName) +{ + U64 const fileSize = UTIL_getFileSize(fileName); + size_t const bufferSize = (size_t) fileSize; + + if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull; + assert((U64)bufferSize == fileSize); /* check overflow */ + + { FILE* const f = fopen(fileName, "rb"); + if (f == NULL) return kBuffNull; + + buffer_t buff = createBuffer(bufferSize); + CONTROL(buff.ptr != NULL); + + fillBuffer_fromHandle(&buff, f); + CONTROL(buff.size == buff.capacity); + + fclose(f); /* do nothing specific if fclose() fails */ + return buff; + } +} + + +/* @return : kBuffNull if any error */ +static buffer_t +createDictionaryBuffer(const char* dictionaryName, + const void* srcBuffer, + const size_t* srcBlockSizes, unsigned nbBlocks, + size_t requestedDictSize) +{ + if (dictionaryName) { + DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName); + return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */ + + } else { + + DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n", + (unsigned)requestedDictSize); + void* const dictBuffer = malloc(requestedDictSize); + CONTROL(dictBuffer != NULL); + + size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize, + srcBuffer, + srcBlockSizes, nbBlocks); + CONTROL(!ZSTD_isError(dictSize)); + + buffer_t result; + result.ptr = dictBuffer; + result.capacity = requestedDictSize; + result.size = dictSize; + return result; + } +} + + +/*! BMK_loadFiles() : + * Loads `buffer`, with content from files listed within `fileNamesTable`. + * Fills `buffer` entirely. + * @return : 0 on success, !=0 on error */ +static int loadFiles(void* buffer, size_t bufferSize, + size_t* fileSizes, + const char* const * fileNamesTable, unsigned nbFiles) +{ + size_t pos = 0, totalSize = 0; + + for (unsigned n=0; n 0); + void* const srcBuffer = malloc(loadedSize); + assert(srcBuffer != NULL); + + assert(nbFiles > 0); + size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes)); + assert(fileSizes != NULL); + + /* Load input buffer */ + int const errorCode = loadFiles(srcBuffer, loadedSize, + fileSizes, + fileNamesTable, nbFiles); + assert(errorCode == 0); + + void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable)); + assert(sliceTable != NULL); + + char* const ptr = (char*)srcBuffer; + size_t pos = 0; + unsigned fileNb = 0; + for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) { + sliceTable[fileNb] = ptr + pos; + pos += fileSizes[fileNb]; + } + assert(pos == loadedSize); + assert(fileNb == nbFiles); + + + buffer_t buffer; + buffer.ptr = srcBuffer; + buffer.capacity = loadedSize; + buffer.size = loadedSize; + + slice_collection_t slices; + slices.slicePtrs = sliceTable; + slices.capacities = fileSizes; + slices.nbSlices = nbFiles; + + buffer_collection_t bc; + bc.buffer = buffer; + bc.slices = slices; + return bc; +} + + + + +/*--- ddict_collection_t ---*/ + +typedef struct { + ZSTD_DDict** ddicts; + size_t nbDDict; +} ddict_collection_t; + +static const ddict_collection_t kNullDDictCollection = { NULL, 0 }; + +static void freeDDictCollection(ddict_collection_t ddictc) +{ + for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) { + ZSTD_freeDDict(ddictc.ddicts[dictNb]); + } + free(ddictc.ddicts); +} + +/* returns .buffers=NULL if operation fails */ +static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict) +{ + ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*)); + assert(ddicts != NULL); + if (ddicts==NULL) return kNullDDictCollection; + for (size_t dictNb=0; dictNb < nbDDict; dictNb++) { + ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize); + assert(ddicts[dictNb] != NULL); + } + ddict_collection_t ddictc; + ddictc.ddicts = ddicts; + ddictc.nbDDict = nbDDict; + return ddictc; +} + + +/* mess with adresses, so that linear scanning dictionaries != linear address scanning */ +void shuffleDictionaries(ddict_collection_t dicts) +{ + size_t const nbDicts = dicts.nbDDict; + for (size_t r=0; rdctx, + dst, dstCapacity, + src, srcSize, + di->dictionaries.ddicts[di->dictNb]); + + di->dictNb = di->dictNb + 1; + if (di->dictNb >= di->nbDicts) di->dictNb = 0; + + return result; +} + + +static int benchMem(slice_collection_t dstBlocks, + slice_collection_t srcBlocks, + ddict_collection_t dictionaries, + int nbRounds) +{ + assert(dstBlocks.nbSlices == srcBlocks.nbSlices); + + unsigned const ms_per_round = RUN_TIME_DEFAULT_MS; + unsigned const total_time_ms = nbRounds * ms_per_round; + + double bestSpeed = 0.; + + BMK_timedFnState_t* const benchState = + BMK_createTimedFnState(total_time_ms, ms_per_round); + decompressInstructions di = createDecompressInstructions(dictionaries); + + for (;;) { + BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, + decompress, &di, + NULL, NULL, + dstBlocks.nbSlices, + (const void* const *)srcBlocks.slicePtrs, srcBlocks.capacities, + dstBlocks.slicePtrs, dstBlocks.capacities, + NULL); + CONTROL(BMK_isSuccessful_runOutcome(outcome)); + + BMK_runTime_t const result = BMK_extract_runTime(outcome); + U64 const dTime_ns = result.nanoSecPerRun; + double const dTime_sec = (double)dTime_ns / 1000000000; + size_t const srcSize = result.sumOfReturn; + double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB); + if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps; + DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed); + fflush(stdout); + if (BMK_isCompleted_TimedFn(benchState)) break; + } + DISPLAY("\n"); + + freeDecompressInstructions(di); + BMK_freeTimedFnState(benchState); + + return 0; /* success */ +} + + +/*! bench() : + * fileName : file to load for benchmarking purpose + * dictionary : optional (can be NULL), file to load as dictionary, + * if none provided : will be calculated on the fly by the program. + * @return : 0 is success, 1+ otherwise */ +int bench(const char** fileNameTable, unsigned nbFiles, + const char* dictionary, + size_t blockSize, int clevel, + unsigned nbDictMax, unsigned nbBlocks, + int nbRounds) +{ + int result = 0; + + DISPLAYLEVEL(3, "loading %u files... \n", nbFiles); + buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles); + CONTROL(srcs.buffer.ptr != NULL); + buffer_t srcBuffer = srcs.buffer; + size_t const srcSize = srcBuffer.size; + DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n", + (double)srcSize / (1 MB)); + + slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks); + nbBlocks = (unsigned)(srcSlices.nbSlices); + DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks); + if (blockSize) + DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize); + DISPLAYLEVEL(3, "\n"); + + + size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities)); + CONTROL(dstCapacities != NULL); + size_t dstBufferCapacity = 0; + for (size_t bnb=0; bnb='0') && (**stringPtr <='9')) { + unsigned const max = (((unsigned)(-1)) / 10) - 1; + assert(result <= max); /* check overflow */ + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + unsigned const maxK = ((unsigned)(-1)) >> 10; + assert(result <= maxK); /* check overflow */ + result <<= 10; + if (**stringPtr=='M') { + assert(result <= maxK); /* check overflow */ + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + + +int usage(const char* exeName) +{ + DISPLAY (" \n"); + DISPLAY (" %s [Options] filename(s) \n", exeName); + DISPLAY (" \n"); + DISPLAY ("Options : \n"); + DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n"); + DISPLAY ("-B# : split input into blocks of size # (default: no split) \n"); + DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT); + DISPLAY ("-D # : use # as a dictionary (default: create one) \n"); + DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S); + DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n"); + DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n"); + DISPLAY ("-h : help (this text) \n"); + return 0; +} + +int bad_usage(const char* exeName) +{ + DISPLAY (" bad usage : \n"); + usage(exeName); + return 1; +} + +int main (int argc, const char** argv) +{ + int recursiveMode = 0; + int nbRounds = BENCH_TIME_DEFAULT_S; + const char* const exeName = argv[0]; + + if (argc < 2) return bad_usage(exeName); + + const char** nameTable = (const char**)malloc(argc * sizeof(const char*)); + assert(nameTable != NULL); + unsigned nameIdx = 0; + + const char* dictionary = NULL; + int cLevel = CLEVEL_DEFAULT; + size_t blockSize = BLOCKSIZE_DEFAULT; + unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */ + unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */ + + for (int argNb = 1; argNb < argc ; argNb++) { + const char* argument = argv[argNb]; + if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); } + if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; } + if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; } + if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; } + if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; } + /* anything that's not a command is a filename */ + nameTable[nameIdx++] = argument; + } + + const char** filenameTable = nameTable; + unsigned nbFiles = nameIdx; + char* buffer_containing_filenames = NULL; + + if (recursiveMode) { +#ifndef UTIL_HAS_CREATEFILELIST + assert(0); /* missing capability, do not run */ +#endif + filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */); + } + + int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds); + + free(buffer_containing_filenames); + free(nameTable); + + return result; +} diff --git a/contrib/seekable_format/examples/seekable_compression.c b/contrib/seekable_format/examples/seekable_compression.c index 9485bf26f..9a331a895 100644 --- a/contrib/seekable_format/examples/seekable_compression.c +++ b/contrib/seekable_format/examples/seekable_compression.c @@ -101,7 +101,7 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve free(buffOut); } -static const char* createOutFilename_orDie(const char* filename) +static char* createOutFilename_orDie(const char* filename) { size_t const inL = strlen(filename); size_t const outL = inL + 5; @@ -109,7 +109,7 @@ static const char* createOutFilename_orDie(const char* filename) memset(outSpace, 0, outL); strcat(outSpace, filename); strcat(outSpace, ".zst"); - return (const char*)outSpace; + return (char*)outSpace; } int main(int argc, const char** argv) { @@ -124,8 +124,9 @@ int main(int argc, const char** argv) { { const char* const inFileName = argv[1]; unsigned const frameSize = (unsigned)atoi(argv[2]); - const char* const outFileName = createOutFilename_orDie(inFileName); + char* const outFileName = createOutFilename_orDie(inFileName); compressFile_orDie(inFileName, outFileName, 5, frameSize); + free(outFileName); } return 0; diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c index 9cd232922..7050e0fa5 100644 --- a/contrib/seekable_format/examples/seekable_decompression.c +++ b/contrib/seekable_format/examples/seekable_decompression.c @@ -84,7 +84,7 @@ static void fseek_orDie(FILE* file, long int offset, int origin) { } -static void decompressFile_orDie(const char* fname, unsigned startOffset, unsigned endOffset) +static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset) { FILE* const fin = fopen_orDie(fname, "rb"); FILE* const fout = stdout; @@ -129,8 +129,8 @@ int main(int argc, const char** argv) { const char* const inFilename = argv[1]; - unsigned const startOffset = (unsigned) atoi(argv[2]); - unsigned const endOffset = (unsigned) atoi(argv[3]); + off_t const startOffset = atoll(argv[2]); + off_t const endOffset = atoll(argv[3]); decompressFile_orDie(inFilename, startOffset, endOffset); } diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index b006ff834..b4c48754e 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -56,6 +56,7 @@ #include /* malloc, free */ #include /* FILE* */ +#include #define XXH_STATIC_LINKING_ONLY #define XXH_NAMESPACE ZSTD_ @@ -112,7 +113,7 @@ static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n) static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) { - buffWrapper_t* buff = (buffWrapper_t*) opaque; + buffWrapper_t* const buff = (buffWrapper_t*) opaque; unsigned long long newOffset; switch (origin) { case SEEK_SET: @@ -124,6 +125,8 @@ static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) case SEEK_END: newOffset = (unsigned long long)buff->size - offset; break; + default: + assert(0); /* not possible */ } if (newOffset > buff->size) { return -1; @@ -310,8 +313,8 @@ static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) /* compute cumulative positions */ for (; idx < numFrames; idx++) { if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) { - U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); U32 const offset = SEEKABLE_BUFF_SIZE - pos; + U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset); memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */ CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead)); remaining -= toRead; diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index 0b79f959f..c57b58269 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.2.8 (30/05/18) +0.2.9 (05/09/18) Introduction @@ -1192,6 +1192,8 @@ Number_of_Bits = Weight ? (Max_Number_of_Bits + 1 - Weight) : 0 The last symbol's `Weight` is deduced from previously decoded ones, by completing to the nearest power of 2. This power of 2 gives `Max_Number_of_Bits`, the depth of the current tree. +`Max_Number_of_Bits` must be <= 11, +otherwise the representation is considered corrupted. __Example__ : Let's presume the following Huffman tree must be described : @@ -1216,12 +1218,12 @@ It gives the following series of weights : | `Weight` | 4 | 3 | 2 | 0 | 1 | The decoder will do the inverse operation : -having collected weights of literals from `0` to `4`, +having collected weights of literal symbols from `0` to `4`, it knows the last literal, `5`, is present with a non-zero weight. The weight of `5` can be determined by advancing to the next power of 2. The sum of `2^(Weight-1)` (excluding 0's) is : `8 + 4 + 2 + 0 + 1 = 15`. -Nearest power of 2 is 16. +Nearest larger power of 2 value is 16. Therefore, `Max_Number_of_Bits = 4` and `Weight[5] = 16-15 = 1`. #### Huffman Tree header @@ -1233,18 +1235,24 @@ which describes how the series of weights is encoded. the series of weights is compressed using FSE (see below). The length of the FSE-compressed series is equal to `headerByte` (0-127). -- if `headerByte` >= 128 : this is a direct representation, - where each `Weight` is written directly as a 4 bits field (0-15). - They are encoded forward, 2 weights to a byte with the first weight taking - the top four bits and the second taking the bottom four (e.g. the following - operations could be used to read the weights: - `Weight[0] = (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf)`, etc.). - The full representation occupies `Ceiling(Number_of_Symbols/2)` bytes, - meaning it uses only full bytes even if `Number_of_Symbols` is odd. - `Number_of_Symbols = headerByte - 127`. - Note that maximum `Number_of_Symbols` is 255-127 = 128. - If any literal has a value > 128, raw header mode is not possible. - In such case, it's necessary to use FSE compression. +- if `headerByte` >= 128 : + + the series of weights uses a direct representation, + where each `Weight` is encoded directly as a 4 bits field (0-15). + + They are encoded forward, 2 weights to a byte, + first weight taking the top four bits and second one taking the bottom four. + * e.g. the following operations could be used to read the weights: + `Weight[0] = (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf)`, etc. + + The full representation occupies `Ceiling(Number_of_Weights/2)` bytes, + meaning it uses only full bytes even if `Number_of_Weights` is odd. + + `Number_of_Weights = headerByte - 127`. + * Note that maximum `Number_of_Weights` is 255-127 = 128, + therefore, only up to 128 `Weight` can be encoded using direct representation. + * Since the last non-zero `Weight` is _not_ encoded, + this scheme is compatible with alphabet sizes of up to 129 symbols, + hence including literal symbol 128. + * If any literal symbol > 128 has a non-zero `Weight`, + direct representation is not possible. + In such case, it's necessary to use FSE compression. #### Finite State Entropy (FSE) compression of Huffman weights @@ -1621,6 +1629,7 @@ or at least provide a meaningful error code explaining for which reason it canno Version changes --------------- +- 0.2.9 : clarifications for huffman weights direct representation, by Ulrich Kunitz - 0.2.8 : clarifications for IETF RFC discuss - 0.2.7 : clarifications from IETF RFC review, by Vijay Gurbani and Nick Terrell - 0.2.6 : fixed an error in huffman example, by Ulrich Kunitz diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 48b9ca7f2..e5aade5b2 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -35,22 +35,34 @@

Introduction

-  zstd, short for Zstandard, is a fast lossless compression algorithm,
-  targeting real-time compression scenarios at zlib-level and better compression ratios.
-  The zstd compression library provides in-memory compression and decompression functions.
-  The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22.
-  Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
   Compression can be done in:
     - a single step (described as Simple API)
     - a single step, reusing a context (described as Explicit context)
     - unbounded multiple steps (described as Streaming compression)
-  The compression ratio achievable on small data can be highly improved using a dictionary in:
-    - a single step (described as Simple dictionary API)
-    - a single step, reusing a dictionary (described as Bulk-processing dictionary API)
 
-  Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
-  Advanced experimental APIs shall never be used with a dynamic library.
-  They are not "stable", their definition may change in the future. Only static linking is allowed.
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
 

Version


@@ -181,7 +193,8 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 

When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict + `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict + Note : A ZSTD_CDict can be created with an empty dictionary, but it is inefficient for small data.


size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -195,7 +208,9 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 

Compression using a digested Dictionary. Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. Note that compression level is decided during dictionary creation. - Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) + Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) + Note : ZSTD_compress_usingCDict() can be used with a ZSTD_CDict created from an empty dictionary. + But it is inefficient for small data, and it is recommended to use ZSTD_compressCCtx().


ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
@@ -965,16 +980,21 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx,
                            const void* prefix, size_t prefixSize,
                            ZSTD_dictContentType_e dictContentType);
 

Reference a prefix (single-usage dictionary) for next compression job. - Decompression need same prefix to properly regenerate data. - Prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end). + Decompression will need same prefix to properly regenerate data. + Compressing with a prefix is similar in outcome as performing a diff and compressing it, + but performs much faster, especially during decompression (compression speed is tunable with compression level). + Note that prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end). @result : 0, or an error code (which can be tested with ZSTD_isError()). Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary Note 1 : Prefix buffer is referenced. It **must** outlive compression job. Its contain must remain unmodified up to end of compression (ZSTD_e_end). - Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters. + Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + ensure that the window size is large enough to contain the entire source. + See ZSTD_p_windowLog. + Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. It's a CPU consuming operation, with non-negligible impact on latency. If there is a need to use same prefix multiple times, consider loadDictionary instead. - Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + Note 4 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.


@@ -1141,6 +1161,8 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);

Reference a prefix (single-usage dictionary) for next compression job. + This is the reverse operation of ZSTD_CCtx_refPrefix(), + and must use the same prefix as the one used during compression. Prefix is **only used once**. Reference is discarded at end of frame. End of frame is reached when ZSTD_DCtx_decompress_generic() returns 0. @result : 0, or an error code (which can be tested with ZSTD_isError()). diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c index e395aefbc..4308a2e4d 100644 --- a/examples/multiple_streaming_compression.c +++ b/examples/multiple_streaming_compression.c @@ -158,7 +158,8 @@ int main(int argc, const char** argv) } freeResources(ress); - /* success */ + free(ofnBuffer); + printf("compressed %i files \n", argc-1); return 0; diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c index f76364d84..9287ff398 100644 --- a/examples/streaming_compression.c +++ b/examples/streaming_compression.c @@ -73,7 +73,11 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve ZSTD_CStream* const cstream = ZSTD_createCStream(); if (cstream==NULL) { fprintf(stderr, "ZSTD_createCStream() error \n"); exit(10); } size_t const initResult = ZSTD_initCStream(cstream, cLevel); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } + if (ZSTD_isError(initResult)) { + fprintf(stderr, "ZSTD_initCStream() error : %s \n", + ZSTD_getErrorName(initResult)); + exit(11); + } size_t read, toRead = buffInSize; while( (read = fread_orDie(buffIn, toRead, fin)) ) { @@ -81,7 +85,11 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve while (input.pos < input.size) { ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; toRead = ZSTD_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */ - if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); } + if (ZSTD_isError(toRead)) { + fprintf(stderr, "ZSTD_compressStream() error : %s \n", + ZSTD_getErrorName(toRead)); + exit(12); + } if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/ fwrite_orDie(buffOut, output.pos, fout); } @@ -100,15 +108,15 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve } -static const char* createOutFilename_orDie(const char* filename) +static char* createOutFilename_orDie(const char* filename) { size_t const inL = strlen(filename); size_t const outL = inL + 5; - void* outSpace = malloc_orDie(outL); + void* const outSpace = malloc_orDie(outL); memset(outSpace, 0, outL); strcat(outSpace, filename); strcat(outSpace, ".zst"); - return (const char*)outSpace; + return (char*)outSpace; } int main(int argc, const char** argv) @@ -124,8 +132,10 @@ int main(int argc, const char** argv) const char* const inFilename = argv[1]; - const char* const outFilename = createOutFilename_orDie(inFilename); + char* const outFilename = createOutFilename_orDie(inFilename); compressFile_orDie(inFilename, outFilename, 1); + free(outFilename); /* not strictly required, since program execution stops there, + * but some static analyzer main complain otherwise */ return 0; } diff --git a/lib/BUCK b/lib/BUCK index dbe8885fc..bd93b082a 100644 --- a/lib/BUCK +++ b/lib/BUCK @@ -69,6 +69,7 @@ cxx_library( ]), headers=subdir_glob([ ('dictBuilder', 'divsufsort.h'), + ('dictBuilder', 'cover.h'), ]), srcs=glob(['dictBuilder/*.c']), deps=[':common'], diff --git a/lib/Makefile b/lib/Makefile index 01689c6d5..cf8e45b0f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,7 +23,7 @@ ifeq ($(OS),Windows_NT) # MinGW assumed CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting endif CFLAGS ?= -O3 -DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ +DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 595bac204..07f875e4d 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -89,20 +89,37 @@ #endif /* prefetch - * can be disabled, by declaring NO_PREFETCH macro */ + * can be disabled, by declaring NO_PREFETCH macro + * All prefetch invocations use a single default locality 2, + * generating instruction prefetcht1, + * which, according to Intel, means "load data into L2 cache". + * This is a good enough "middle ground" for the time being, + * though in theory, it would be better to specialize locality depending on data being prefetched. + * Tests could not determine any sensible difference based on locality value. */ #if defined(NO_PREFETCH) -# define PREFETCH(ptr) /* disabled */ +# define PREFETCH(ptr) (void)(ptr) /* disabled */ #else # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) +# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) # else -# define PREFETCH(ptr) /* disabled */ +# define PREFETCH(ptr) (void)(ptr) /* disabled */ # endif #endif /* NO_PREFETCH */ +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH(_ptr + _pos); \ + } \ +} + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include /* For Visual 2005 */ diff --git a/lib/common/cpu.h b/lib/common/cpu.h index 88e0ebf44..eeb428ad5 100644 --- a/lib/common/cpu.h +++ b/lib/common/cpu.h @@ -36,7 +36,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { U32 f1d = 0; U32 f7b = 0; U32 f7c = 0; -#ifdef _MSC_VER +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) int reg[4]; __cpuid((int*)reg, 0); { diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index b4c1af53f..e75adfa61 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -79,8 +79,7 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; -#define ZSTD_FRAMEIDSIZE 4 -static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */ +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; @@ -193,6 +192,8 @@ typedef struct { BYTE* llCode; BYTE* mlCode; BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ U32 longLengthPos; } seqStore_t; diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 70daae3bc..4075db217 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -83,7 +83,9 @@ * wkspSize should be sized to handle worst case situation, which is `1< wkspSize) return ERROR(tableLog_tooLarge); tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; - assert(tableLog < 16); /* required for the threshold strategy to work */ + assert(tableLog < 16); /* required for threshold strategy to work */ /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif /* symbol start positions */ { U32 u; @@ -124,13 +130,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi U32 symbol; for (symbol=0; symbol<=maxSymbolValue; symbol++) { int nbOccurences; - for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ + while (position > highThreshold) + position = (position + step) & tableMask; /* Low proba area */ } } - if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + assert(position==0); /* Must have initialized all positions */ } /* Build table */ @@ -201,9 +209,10 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ } -static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, - const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, - unsigned writeIsSafe) +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) { BYTE* const ostart = (BYTE*) header; BYTE* out = ostart; @@ -212,13 +221,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, const int tableSize = 1 << tableLog; int remaining; int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; - bitStream = 0; - bitCount = 0; /* Table Size */ bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; bitCount += 4; @@ -228,48 +236,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, threshold = tableSize; nbBits = tableLog+1; - while (remaining>1) { /* stops at 1 */ - if (previous0) { - unsigned start = charnum; - while (!normalizedCounter[charnum]) charnum++; - while (charnum >= start+24) { + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { start+=24; bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE) bitStream; out[1] = (BYTE)(bitStream>>8); out+=2; bitStream>>=16; } - while (charnum >= start+3) { + while (symbol >= start+3) { start+=3; bitStream += 3 << bitCount; bitCount += 2; } - bitStream += (charnum-start) << bitCount; + bitStream += (symbol-start) << bitCount; bitCount += 2; if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out += 2; bitStream >>= 16; bitCount -= 16; } } - { int count = normalizedCounter[charnum++]; - int const max = (2*threshold-1)-remaining; + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; remaining -= count < 0 ? -count : count; count++; /* +1 for extra accuracy */ - if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ bitStream += count << bitCount; bitCount += nbBits; bitCount -= (count>=1; } } if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out += 2; @@ -277,19 +290,23 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, bitCount -= 16; } } + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out+= (bitCount+7) /8; - if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); - return (out-ostart); } -size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ @@ -297,7 +314,7 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); } diff --git a/lib/compress/hist.h b/lib/compress/hist.h index 788470da7..8b1991a90 100644 --- a/lib/compress/hist.h +++ b/lib/compress/hist.h @@ -50,7 +50,7 @@ size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); -unsigned HIST_isError(size_t code); /*< tells if a return value is an error code */ +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ /* --- advanced histogram functions --- */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e09eef3d1..513156604 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -805,7 +805,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); U32 const divider = (cParams.searchLength==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = blockSize + 11*maxNbSeq; + size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; size_t const entropySpace = HUF_WORKSPACE_SIZE; size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); @@ -949,33 +949,51 @@ typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; /* ZSTD_sufficientBuff() : * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ -static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1, +static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1, + size_t maxNbLit1, ZSTD_buffered_policy_e buffPol2, ZSTD_compressionParameters cParams2, U64 pledgedSrcSize) { size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); + size_t const maxNbSeq2 = blockSize2 / ((cParams2.searchLength == 3) ? 3 : 4); + size_t const maxNbLit2 = blockSize2; size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; - DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u", - (U32)windowSize2, cParams2.windowLog); - DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u", - (U32)blockSize2, (U32)blockSize1); - return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */ + DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u", + (U32)neededBufferSize2, (U32)bufferSize1); + DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u", + (U32)maxNbSeq2, (U32)maxNbSeq1); + DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u", + (U32)maxNbLit2, (U32)maxNbLit1); + return (maxNbLit2 <= maxNbLit1) + & (maxNbSeq2 <= maxNbSeq1) & (neededBufferSize2 <= bufferSize1); } /** Equivalence for resetCCtx purposes */ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, ZSTD_CCtx_params params2, - size_t buffSize1, size_t blockSize1, + size_t buffSize1, + size_t maxNbSeq1, size_t maxNbLit1, ZSTD_buffered_policy_e buffPol2, U64 pledgedSrcSize) { DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); - return ZSTD_equivalentCParams(params1.cParams, params2.cParams) && - ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) && - ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize); + if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) { + DEBUGLOG(4, "ZSTD_equivalentCParams() == 0"); + return 0; + } + if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) { + DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0"); + return 0; + } + if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2, + params2.cParams, pledgedSrcSize)) { + DEBUGLOG(4, "ZSTD_sufficientBuff() == 0"); + return 0; + } + return 1; } static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) @@ -1103,8 +1121,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (crp == ZSTDcrp_continue) { if (ZSTD_equivalentParams(zc->appliedParams, params, - zc->inBuffSize, zc->blockSize, - zbuff, pledgedSrcSize)) { + zc->inBuffSize, + zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, + zbuff, pledgedSrcSize)) { DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", zc->appliedParams.cParams.windowLog, zc->blockSize); zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ @@ -1125,7 +1144,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = blockSize + 11*maxNbSeq; + size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq; size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); @@ -1165,7 +1184,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (zc->workSpace == NULL) return ERROR(memory_allocation); zc->workSpaceSize = neededSpace; zc->workSpaceOversizedDuration = 0; - ptr = zc->workSpace; /* Statically sized space. * entropyWorkspace never moves, @@ -1216,13 +1234,18 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); /* sequences storage */ + zc->seqStore.maxNbSeq = maxNbSeq; zc->seqStore.sequencesStart = (seqDef*)ptr; ptr = zc->seqStore.sequencesStart + maxNbSeq; zc->seqStore.llCode = (BYTE*) ptr; zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; - ptr = zc->seqStore.litStart + blockSize; + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.maxNbLit = blockSize; + ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH; /* ldm bucketOffsets table */ if (params.ldmParams.enableLdm) { @@ -1341,8 +1364,7 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, } /* copy dictionary offsets */ - { - ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; dstMatchState->window = srcMatchState->window; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; @@ -1666,6 +1688,7 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) BYTE* const mlCodeTable = seqStorePtr->mlCode; U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); for (u=0; u= maxCSize) return 0; /* block not compressed */ } - /* We check that dictionaries have offset codes available for the first - * block. After the first block, the offcode table might not have large - * enough codes to represent the offsets in the data. - */ - if (nextEntropy->fse.offcode_repeatMode == FSE_repeat_valid) - nextEntropy->fse.offcode_repeatMode = FSE_repeat_check; - return cSize; } @@ -2399,12 +2415,20 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, &zc->appliedParams, dst, dstCapacity, srcSize, zc->entropyWorkspace, zc->bmi2); - if (ZSTD_isError(cSize) || cSize == 0) return cSize; - /* confirm repcodes and entropy tables */ - { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + if (!ZSTD_isError(cSize) && cSize != 0) { + /* confirm repcodes and entropy tables */ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; zc->blockState.prevCBlock = zc->blockState.nextCBlock; zc->blockState.nextCBlock = tmp; } + + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + return cSize; } } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index d31542c69..1c95b7de9 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -314,8 +314,10 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); } #endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); /* copy Literals */ - assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB); + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); seqStorePtr->lit += litLength; diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 476cdc148..c4b9bb13b 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -970,7 +970,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ U32 seqPos = cur; DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", - last_pos, cur); + last_pos, cur); (void)last_pos; assert(storeEnd < ZSTD_OPT_NUM); DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index cc1af58e6..244690cdc 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -320,7 +320,8 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) { - ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (seqPool == NULL) return NULL; ZSTDMT_setNbSeq(seqPool, 0); return seqPool; } diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index a696261bd..83ecaff01 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -533,9 +533,9 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, } } -size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, - size_t srcSize, void* workSpace, - size_t wkspSize) +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) { U32 tableLog, maxW, sizeOfSort, nbSymbols; DTableDesc dtd = HUF_getDTableDesc(DTable); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 8f4589d13..1382c9c7f 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -40,7 +40,6 @@ # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1) #endif - /*! * NO_FORWARD_PROGRESS_MAX : * maximum allowed nb of calls to ZSTD_decompressStream() and ZSTD_decompress_generic() @@ -52,11 +51,13 @@ # define ZSTD_NO_FORWARD_PROGRESS_MAX 16 #endif + /*-******************************************************* * Dependencies *********************************************************/ #include /* memcpy, memmove, memset */ -#include "cpu.h" +#include "compiler.h" /* prefetch */ +#include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY #include "fse.h" @@ -68,6 +69,9 @@ # include "zstd_legacy.h" #endif +static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict); +static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict); + /*-************************************* * Errors @@ -110,11 +114,10 @@ typedef struct { #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; U32 rep[ZSTD_REP_NUM]; } ZSTD_entropyDTables_t; @@ -125,6 +128,7 @@ struct ZSTD_DCtx_s const ZSTD_seqSymbol* OFTptr; const HUF_DTable* HUFptr; ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ const void* previousDstEnd; /* detect continuity */ const void* prefixStart; /* start of current segment */ const void* virtualStart; /* virtual start of previous segment if it was just before current one */ @@ -138,7 +142,6 @@ struct ZSTD_DCtx_s U32 fseEntropy; XXH64_state_t xxhState; size_t headerSize; - U32 dictID; ZSTD_format_e format; const BYTE* litPtr; ZSTD_customMem customMem; @@ -147,9 +150,13 @@ struct ZSTD_DCtx_s size_t staticSize; int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ - /* streaming */ + /* dictionary */ ZSTD_DDict* ddictLocal; - const ZSTD_DDict* ddict; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + + /* streaming */ ZSTD_dStreamStage streamStage; char* inBuff; size_t inBuffSize; @@ -185,7 +192,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } static size_t ZSTD_startingInputLength(ZSTD_format_e format) { size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ? - ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize : + ZSTD_frameHeaderSize_prefix - ZSTD_FRAMEIDSIZE : ZSTD_frameHeaderSize_prefix; ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE); /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ @@ -200,6 +207,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->ddict = NULL; dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; dctx->inBuff = NULL; dctx->inBuffSize = 0; dctx->outBuffSize = 0; @@ -278,7 +287,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) * Note 3 : Skippable Frame Identifiers are considered valid. */ unsigned ZSTD_isFrame(const void* buffer, size_t size) { - if (size < ZSTD_frameIdSize) return 0; + if (size < ZSTD_FRAMEIDSIZE) return 0; { U32 const magic = MEM_readLE32(buffer); if (magic == ZSTD_MAGICNUMBER) return 1; if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1; @@ -330,7 +339,9 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s const BYTE* ip = (const BYTE*)src; size_t const minInputSize = ZSTD_startingInputLength(format); + memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ if (srcSize < minInputSize) return minInputSize; + if (src==NULL) return ERROR(GENERIC); /* invalid parameter */ if ( (format != ZSTD_f_zstd1_magicless) && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { @@ -339,7 +350,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s if (srcSize < ZSTD_skippableHeaderSize) return ZSTD_skippableHeaderSize; /* magic number + frame length */ memset(zfhPtr, 0, sizeof(*zfhPtr)); - zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_frameIdSize); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); zfhPtr->frameType = ZSTD_skippableFrame; return 0; } @@ -451,7 +462,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) size_t skippableSize; if (srcSize < ZSTD_skippableHeaderSize) return ERROR(srcSize_wrong); - skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_frameIdSize) + skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_FRAMEIDSIZE) + ZSTD_skippableHeaderSize; if (srcSize < skippableSize) { return ZSTD_CONTENTSIZE_ERROR; @@ -540,6 +551,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { + if (dst==NULL) return ERROR(dstSize_tooSmall); if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); memcpy(dst, src, srcSize); return srcSize; @@ -572,6 +584,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, case set_repeat: if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); /* fall-through */ + case set_compressed: if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ { size_t lhSize, litSize, litCSize; @@ -603,15 +616,20 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + if (HUF_isError((litEncType==set_repeat) ? ( singleStream ? HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) : HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) : ( singleStream ? HUF_decompress1X1_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) : + dctx->workspace, sizeof(dctx->workspace), dctx->bmi2) : HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2)))) + dctx->workspace, sizeof(dctx->workspace), dctx->bmi2)))) return ERROR(corruption_detected); dctx->litPtr = dctx->litBuffer; @@ -883,7 +901,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb symbolEncodingType_e type, U32 max, U32 maxLog, const void* src, size_t srcSize, const U32* baseValue, const U32* nbAdditionalBits, - const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable) + const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, + int ddictIsCold, int nbSeq) { switch(type) { @@ -902,6 +921,12 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb return 0; case set_repeat: if (!flagRepeatTable) return ERROR(corruption_detected); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } return 0; case set_compressed : { U32 tableLog; @@ -954,25 +979,25 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const BYTE* const istart = (const BYTE* const)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; + int nbSeq; DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); /* check */ if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); /* SeqHead */ - { int nbSeq = *ip++; - if (!nbSeq) { *nbSeqPtr=0; return 1; } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - if (ip+2 > iend) return ERROR(srcSize_wrong); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - } else { - if (ip >= iend) return ERROR(srcSize_wrong); - nbSeq = ((nbSeq-0x80)<<8) + *ip++; - } + nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + } else { + if (ip >= iend) return ERROR(srcSize_wrong); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; } - *nbSeqPtr = nbSeq; } + *nbSeqPtr = nbSeq; /* FSE table descriptors */ if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ @@ -986,7 +1011,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_base, LL_bits, - LL_defaultDTable, dctx->fseEntropy); + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); ip += llhSize; } @@ -995,7 +1021,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, OFtype, MaxOff, OffFSELog, ip, iend-ip, OF_base, OF_bits, - OF_defaultDTable, dctx->fseEntropy); + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); ip += ofhSize; } @@ -1004,12 +1031,23 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_base, ML_bits, - ML_defaultDTable, dctx->fseEntropy); + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq); if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); ip += mlhSize; } } + /* prefetch dictionary content */ + if (dctx->ddictIsCold) { + size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart; + size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ ); + size_t const pSize = MIN(psmin, 128 KB /* protection */ ); + const void* const pStart = (const char*)dctx->dictEnd - pSize; + PREFETCH_AREA(pStart, pSize); + dctx->ddictIsCold = 0; + } + return ip-istart; } @@ -1676,7 +1714,8 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, /* isLongOffset must be true if there are long offsets. * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. * We don't expect that to be the case in 64-bit mode. - * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit) + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) */ ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); @@ -1763,7 +1802,7 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) #endif if ( (srcSize >= ZSTD_skippableHeaderSize) && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) { - return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize); + return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE); } else { const BYTE* ip = (const BYTE*)src; const BYTE* const ipstart = ip; @@ -1797,7 +1836,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) if (zfh.checksumFlag) { /* Final frame content checksum */ if (remainingSize < 4) return ERROR(srcSize_wrong); ip += 4; - remainingSize -= 4; } return ip - ipstart; @@ -1885,9 +1923,6 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, return op-ostart; } -static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict); -static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict); - static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1896,6 +1931,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, { void* const dststart = dst; int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ if (ddict) { @@ -1932,7 +1969,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, size_t skippableSize; if (srcSize < ZSTD_skippableHeaderSize) return ERROR(srcSize_wrong); - skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize) + skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE) + ZSTD_skippableHeaderSize; if (srcSize < skippableSize) return ERROR(srcSize_wrong); @@ -2057,7 +2094,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_getFrameHeaderSize : assert(src != NULL); if (dctx->format == ZSTD_f_zstd1) { /* allows header */ - assert(srcSize >= ZSTD_frameIdSize); /* to read skippable magic number */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ memcpy(dctx->headerBuffer, src, srcSize); dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */ @@ -2167,7 +2204,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c assert(src != NULL); assert(srcSize <= ZSTD_skippableHeaderSize); memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */ - dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_frameIdSize); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->stage = ZSTDds_skipFrame; return 0; @@ -2191,21 +2228,27 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict return 0; } -/* ZSTD_loadEntropy() : - * dict : must point at beginning of a valid zstd dictionary +/*! ZSTD_loadEntropy() : + * dict : must point at beginning of a valid zstd dictionary. * @return : size of entropy tables read */ -static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize) +static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; if (dictSize <= 8) return ERROR(dictionary_corrupted); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ dictPtr += 8; /* skip header = magic + dictID */ - - { size_t const hSize = HUF_readDTableX2_wksp( - entropy->hufTable, dictPtr, dictEnd - dictPtr, - entropy->workspace, sizeof(entropy->workspace)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); dictPtr += hSize; } @@ -2216,7 +2259,7 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted); if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); - ZSTD_buildFSETable(entropy->OFTable, + ZSTD_buildFSETable( entropy->OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, offcodeLog); @@ -2229,7 +2272,7 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted); if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); - ZSTD_buildFSETable(entropy->MLTable, + ZSTD_buildFSETable( entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, matchlengthLog); @@ -2242,7 +2285,7 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted); if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); - ZSTD_buildFSETable(entropy->LLTable, + ZSTD_buildFSETable( entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, litlengthLog); @@ -2268,7 +2311,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict if (magic != ZSTD_MAGIC_DICTIONARY) { return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ } } - dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_frameIdSize); + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); /* load entropy tables */ { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); @@ -2282,7 +2325,6 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict return ZSTD_refDictContent(dctx, dict, dictSize); } -/* Note : this function cannot fail */ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { assert(dctx != NULL); @@ -2328,42 +2370,53 @@ struct ZSTD_DDict_s { static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict) { + assert(ddict != NULL); return ddict->dictContent; } static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict) { + assert(ddict != NULL); return ddict->dictSize; } -size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict) +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - CHECK_F( ZSTD_decompressBegin(dstDCtx) ); - if (ddict) { /* support begin on NULL */ - dstDCtx->dictID = ddict->dictID; - dstDCtx->prefixStart = ddict->dictContent; - dstDCtx->virtualStart = ddict->dictContent; - dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; - dstDCtx->previousDstEnd = dstDCtx->dictEnd; + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + dctx->ddictIsCold = (dctx->dictEnd != (const char*)ddict->dictContent + ddict->dictSize); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + CHECK_F( ZSTD_decompressBegin(dctx) ); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; if (ddict->entropyPresent) { - dstDCtx->litEntropy = 1; - dstDCtx->fseEntropy = 1; - dstDCtx->LLTptr = ddict->entropy.LLTable; - dstDCtx->MLTptr = ddict->entropy.MLTable; - dstDCtx->OFTptr = ddict->entropy.OFTable; - dstDCtx->HUFptr = ddict->entropy.hufTable; - dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; - dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; - dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; } else { - dstDCtx->litEntropy = 0; - dstDCtx->fseEntropy = 0; + dctx->litEntropy = 0; + dctx->fseEntropy = 0; } } return 0; } -static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType) +static size_t +ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) { ddict->dictID = 0; ddict->entropyPresent = 0; @@ -2381,10 +2434,12 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e return 0; /* pure content mode */ } } - ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize); + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); /* load entropy tables */ - CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted ); + CHECK_E( ZSTD_loadEntropy(&ddict->entropy, + ddict->dictContent, ddict->dictSize), + dictionary_corrupted ); ddict->entropyPresent = 1; return 0; } @@ -2398,6 +2453,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { ddict->dictBuffer = NULL; ddict->dictContent = dict; + if (!dict) dictSize = 0; } else { void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); ddict->dictBuffer = internalBuffer; @@ -2422,14 +2478,15 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, if (!customMem.customAlloc ^ !customMem.customFree) return NULL; { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); - if (!ddict) return NULL; + if (ddict == NULL) return NULL; ddict->cMem = customMem; - - if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) { - ZSTD_freeDDict(ddict); - return NULL; - } - + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } return ddict; } } @@ -2456,23 +2513,25 @@ ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize const ZSTD_DDict* ZSTD_initStaticDDict( - void* workspace, size_t workspaceSize, + void* sBuffer, size_t sBufferSize, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - size_t const neededSpace = - sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); - ZSTD_DDict* const ddict = (ZSTD_DDict*)workspace; - assert(workspace != NULL); + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); assert(dict != NULL); - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - if (workspaceSize < neededSpace) return NULL; + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; if (dictLoadMethod == ZSTD_dlm_byCopy) { memcpy(ddict+1, dict, dictSize); /* local copy */ dict = ddict+1; } - if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) )) + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) return NULL; return ddict; } @@ -2510,7 +2569,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) { if (dictSize < 8) return 0; if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; - return MEM_readLE32((const char*)dict + ZSTD_frameIdSize); + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); } /*! ZSTD_getDictID_fromDDict() : @@ -2586,12 +2645,15 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds) } -/* *** Initialization *** */ +/* *** Initialization *** */ size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } -size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) { if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); ZSTD_freeDDict(dctx->ddictLocal); @@ -2645,13 +2707,6 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds) return ZSTD_initDStream_usingDict(zds, NULL, 0); } -size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); - dctx->ddict = ddict; - return 0; -} - /* ZSTD_initDStream_usingDDict() : * ddict will just be referenced, and must outlive decompression session * this function cannot fail */ @@ -2690,6 +2745,13 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx, return 0; } +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + dctx->ddict = ddict; + return 0; +} + size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) { if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); @@ -2855,7 +2917,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict)); if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_frameIdSize); + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index e32991652..6b4af69d2 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -29,6 +29,7 @@ #include "mem.h" /* read */ #include "pool.h" #include "threading.h" +#include "cover.h" #include "zstd_internal.h" /* includes zstd.h */ #ifndef ZDICT_STATIC_LINKING_ONLY #define ZDICT_STATIC_LINKING_ONLY @@ -185,7 +186,7 @@ static void COVER_map_remove(COVER_map_t *map, U32 key) { } /** - * Destroyes a map that is inited with COVER_map_init(). + * Destroys a map that is inited with COVER_map_init(). */ static void COVER_map_destroy(COVER_map_t *map) { if (map->data) { @@ -223,7 +224,7 @@ static COVER_ctx_t *g_ctx = NULL; /** * Returns the sum of the sample sizes. */ -static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { size_t sum = 0; unsigned i; for (i = 0; i < nbSamples; ++i) { @@ -380,14 +381,6 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group, ctx->suffix[dmerId] = freq; } -/** - * A segment is a range in the source as well as the score of the segment. - */ -typedef struct { - U32 begin; - U32 end; - U32 score; -} COVER_segment_t; /** * Selects the best segment in an epoch. @@ -737,28 +730,65 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( } } -/** - * COVER_best_t is used for two purposes: - * 1. Synchronizing threads. - * 2. Saving the best parameters and dictionary. - * - * All of the methods except COVER_best_init() are thread safe if zstd is - * compiled with multithreaded support. - */ -typedef struct COVER_best_s { - ZSTD_pthread_mutex_t mutex; - ZSTD_pthread_cond_t cond; - size_t liveJobs; - void *dict; - size_t dictSize; - ZDICT_cover_params_t parameters; - size_t compressedSize; -} COVER_best_t; + + +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity) { + size_t totalCompressedSize = ERROR(GENERIC); + /* Pointers */ + ZSTD_CCtx *cctx; + ZSTD_CDict *cdict; + void *dst; + /* Local variables */ + size_t dstCapacity; + size_t i; + /* Allocate dst with enough space to compress the maximum sized sample */ + { + size_t maxSampleSize = 0; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + maxSampleSize = MAX(samplesSizes[i], maxSampleSize); + } + dstCapacity = ZSTD_compressBound(maxSampleSize); + dst = malloc(dstCapacity); + } + /* Create the cctx and cdict */ + cctx = ZSTD_createCCtx(); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); + if (!dst || !cctx || !cdict) { + goto _compressCleanup; + } + /* Compress each sample and sum their sizes (or error) */ + totalCompressedSize = dictBufferCapacity; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + const size_t size = ZSTD_compress_usingCDict( + cctx, dst, dstCapacity, samples + offsets[i], + samplesSizes[i], cdict); + if (ZSTD_isError(size)) { + totalCompressedSize = ERROR(GENERIC); + goto _compressCleanup; + } + totalCompressedSize += size; + } +_compressCleanup: + ZSTD_freeCCtx(cctx); + ZSTD_freeCDict(cdict); + if (dst) { + free(dst); + } + return totalCompressedSize; +} + /** * Initialize the `COVER_best_t`. */ -static void COVER_best_init(COVER_best_t *best) { +void COVER_best_init(COVER_best_t *best) { if (best==NULL) return; /* compatible with init on NULL */ (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); (void)ZSTD_pthread_cond_init(&best->cond, NULL); @@ -772,7 +802,7 @@ static void COVER_best_init(COVER_best_t *best) { /** * Wait until liveJobs == 0. */ -static void COVER_best_wait(COVER_best_t *best) { +void COVER_best_wait(COVER_best_t *best) { if (!best) { return; } @@ -786,7 +816,7 @@ static void COVER_best_wait(COVER_best_t *best) { /** * Call COVER_best_wait() and then destroy the COVER_best_t. */ -static void COVER_best_destroy(COVER_best_t *best) { +void COVER_best_destroy(COVER_best_t *best) { if (!best) { return; } @@ -802,7 +832,7 @@ static void COVER_best_destroy(COVER_best_t *best) { * Called when a thread is about to be launched. * Increments liveJobs. */ -static void COVER_best_start(COVER_best_t *best) { +void COVER_best_start(COVER_best_t *best) { if (!best) { return; } @@ -816,7 +846,7 @@ static void COVER_best_start(COVER_best_t *best) { * Decrements liveJobs and signals any waiting threads if liveJobs == 0. * If this dictionary is the best so far save it and its parameters. */ -static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, +void COVER_best_finish(COVER_best_t *best, size_t compressedSize, ZDICT_cover_params_t parameters, void *dict, size_t dictSize) { if (!best) { @@ -847,10 +877,10 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, best->parameters = parameters; best->compressedSize = compressedSize; } - ZSTD_pthread_mutex_unlock(&best->mutex); if (liveJobs == 0) { ZSTD_pthread_cond_broadcast(&best->cond); } + ZSTD_pthread_mutex_unlock(&best->mutex); } } @@ -904,51 +934,10 @@ static void COVER_tryParameters(void *opaque) { } } /* Check total compressed size */ - { - /* Pointers */ - ZSTD_CCtx *cctx; - ZSTD_CDict *cdict; - void *dst; - /* Local variables */ - size_t dstCapacity; - size_t i; - /* Allocate dst with enough space to compress the maximum sized sample */ - { - size_t maxSampleSize = 0; - i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0; - for (; i < ctx->nbSamples; ++i) { - maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize); - } - dstCapacity = ZSTD_compressBound(maxSampleSize); - dst = malloc(dstCapacity); - } - /* Create the cctx and cdict */ - cctx = ZSTD_createCCtx(); - cdict = ZSTD_createCDict(dict, dictBufferCapacity, - parameters.zParams.compressionLevel); - if (!dst || !cctx || !cdict) { - goto _compressCleanup; - } - /* Compress each sample and sum their sizes (or error) */ - totalCompressedSize = dictBufferCapacity; - i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0; - for (; i < ctx->nbSamples; ++i) { - const size_t size = ZSTD_compress_usingCDict( - cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i], - ctx->samplesSizes[i], cdict); - if (ZSTD_isError(size)) { - totalCompressedSize = ERROR(GENERIC); - goto _compressCleanup; - } - totalCompressedSize += size; - } - _compressCleanup: - ZSTD_freeCCtx(cctx); - ZSTD_freeCDict(cdict); - if (dst) { - free(dst); - } - } + totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, + ctx->samples, ctx->offsets, + ctx->nbTrainSamples, ctx->nbSamples, + dict, dictBufferCapacity); _cleanup: COVER_best_finish(data->best, totalCompressedSize, parameters, dict, diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h new file mode 100644 index 000000000..82e2e1cea --- /dev/null +++ b/lib/dictBuilder/cover.h @@ -0,0 +1,83 @@ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ +#include "mem.h" /* read */ +#include "pool.h" +#include "threading.h" +#include "zstd_internal.h" /* includes zstd.h */ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + +/** + * COVER_best_t is used for two purposes: + * 1. Synchronizing threads. + * 2. Saving the best parameters and dictionary. + * + * All of the methods except COVER_best_init() are thread safe if zstd is + * compiled with multithreaded support. + */ +typedef struct COVER_best_s { + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + size_t liveJobs; + void *dict; + size_t dictSize; + ZDICT_cover_params_t parameters; + size_t compressedSize; +} COVER_best_t; + +/** + * A segment is a range in the source as well as the score of the segment. + */ +typedef struct { + U32 begin; + U32 end; + U32 score; +} COVER_segment_t; + +/** + * Checks total compressed size of a dictionary + */ +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity); + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best); + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best); + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best); + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best); + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, size_t compressedSize, + ZDICT_cover_params_t parameters, void *dict, + size_t dictSize); diff --git a/lib/dictBuilder/divsufsort.c b/lib/dictBuilder/divsufsort.c index 60cceb088..ead922044 100644 --- a/lib/dictBuilder/divsufsort.c +++ b/lib/dictBuilder/divsufsort.c @@ -1637,7 +1637,7 @@ construct_SA(const unsigned char *T, int *SA, if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } k = SA + BUCKET_B(c2 = c0, c1); } - assert(k < j); + assert(k < j); assert(k != NULL); *k-- = s; } else { assert(((s == 0) && (T[s] == c1)) || (s < 0)); @@ -1701,7 +1701,7 @@ construct_BWT(const unsigned char *T, int *SA, if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } k = SA + BUCKET_B(c2 = c0, c1); } - assert(k < j); + assert(k < j); assert(k != NULL); *k-- = s; } else if(s != 0) { *j = ~s; @@ -1785,7 +1785,7 @@ construct_BWT_indexes(const unsigned char *T, int *SA, if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } k = SA + BUCKET_B(c2 = c0, c1); } - assert(k < j); + assert(k < j); assert(k != NULL); *k-- = s; } else if(s != 0) { *j = ~s; diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c new file mode 100644 index 000000000..6ce8c8809 --- /dev/null +++ b/lib/dictBuilder/fastcover.c @@ -0,0 +1,701 @@ +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +#include "mem.h" /* read */ +#include "pool.h" +#include "threading.h" +#include "cover.h" +#include "zstd_internal.h" /* includes zstd.h */ +#ifndef ZDICT_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + + +/*-************************************* +* Constants +***************************************/ +#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB)) +#define FASTCOVER_MAX_F 31 +#define FASTCOVER_MAX_ACCEL 10 +#define DEFAULT_SPLITPOINT 0.75 +#define DEFAULT_F 20 +#define DEFAULT_ACCEL 1 + + +/*-************************************* +* Console display +***************************************/ +static int g_displayLevel = 2; +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; + + +/*-************************************* +* Hash Functions +***************************************/ +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + + +/** + * Hash the d-byte value pointed to by p and mod 2^f + */ +static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) { + if (d == 6) { + return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1); + } + return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1); +} + + +/*-************************************* +* Acceleration +***************************************/ +typedef struct { + unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */ + unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */ +} FASTCOVER_accel_t; + + +static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = { + { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */ + { 100, 0 }, /* accel = 1 */ + { 50, 1 }, /* accel = 2 */ + { 34, 2 }, /* accel = 3 */ + { 25, 3 }, /* accel = 4 */ + { 20, 4 }, /* accel = 5 */ + { 17, 5 }, /* accel = 6 */ + { 14, 6 }, /* accel = 7 */ + { 13, 7 }, /* accel = 8 */ + { 11, 8 }, /* accel = 9 */ + { 10, 9 }, /* accel = 10 */ +}; + + +/*-************************************* +* Context +***************************************/ +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + size_t nbDmers; + U32 *freqs; + unsigned d; + unsigned f; + FASTCOVER_accel_t accelParams; +} FASTCOVER_ctx_t; + + +/*-************************************* +* Helper functions +***************************************/ +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of all dmers with hash value d. + * Let S_i be hash value of the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer with hash value d is in the dictionay we set F(d) = 0. + */ +static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, + U32 *freqs, U32 begin, U32 end, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 f = ctx->f; + const U32 dmersInK = k - d + 1; + + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + + /* Reset the activeDmers in the segment */ + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* Get hash value of current dmer */ + const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); + + /* Add frequency of this index to score if this is the first occurence of index in active segment */ + if (segmentFreqs[index] == 0) { + activeSegment.score += freqs[index]; + } + /* Increment end of segment and segmentFreqs*/ + activeSegment.end += 1; + segmentFreqs[index] += 1; + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + /* Get hash value of the dmer to be eliminated from active segment */ + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ + if (segmentFreqs[delIndex] == 0) { + activeSegment.score -= freqs[delIndex]; + } + /* Increment start of segment */ + activeSegment.begin += 1; + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + + /* Zero out rest of segmentFreqs array */ + while (activeSegment.begin < end) { + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + activeSegment.begin += 1; + } + + { + /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d); + freqs[i] = 0; + } + } + + return bestSegment; +} + + +static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize, unsigned f, + unsigned accel) { + /* k, d, and f are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* d has to be 6 or 8 */ + if (parameters.d != 6 && parameters.d != 8) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < f <= FASTCOVER_MAX_F*/ + if (f > FASTCOVER_MAX_F || f == 0) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { + return 0; + } + /* 0 < accel <= 10 */ + if (accel > 10 || accel == 0) { + return 0; + } + return 1; +} + + +/** + * Clean up a context initialized with `FASTCOVER_ctx_init()`. + */ +static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) { + if (!ctx) { + return; + } + + free(ctx->freqs); + ctx->freqs = NULL; + + free(ctx->offsets); + ctx->offsets = NULL; +} + + +/** + * Calculate for frequency of hash value of each dmer in ctx->samples + */ +static void FASTCOVER_computeFrequency(U32 *freqs, FASTCOVER_ctx_t *ctx){ + const unsigned f = ctx->f; + const unsigned d = ctx->d; + const unsigned skip = ctx->accelParams.skip; + const unsigned readLength = MAX(d, 8); + size_t start; /* start of current dmer */ + size_t i; + for (i = 0; i < ctx->nbTrainSamples; i++) { + size_t currSampleStart = ctx->offsets[i]; + size_t currSampleEnd = ctx->offsets[i+1]; + start = currSampleStart; + while (start + readLength <= currSampleEnd) { + const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d); + freqs[dmerIndex]++; + start = start + skip + 1; + } + } +} + + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 1 on success or zero on error. + * The context must be destroyed with `FASTCOVER_ctx_destroy()`. + */ +static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint, unsigned f, + FASTCOVER_accel_t accelParams) { + const BYTE *const samples = (const BYTE *)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); + return 0; + } + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); + return 0; + } + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); + return 0; + } + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (U32)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (U32)testSamplesSize); + + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->d = d; + ctx->f = f; + ctx->accelParams = accelParams; + + /* The offsets of each file */ + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); + if (!ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); + FASTCOVER_ctx_destroy(ctx); + return 0; + } + + /* Fill offsets from the samplesSizes */ + { + U32 i; + ctx->offsets[0] = 0; + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + + /* Initialize frequency array of size 2^f */ + ctx->freqs = (U32 *)calloc(((U64)1 << f), sizeof(U32)); + + DISPLAYLEVEL(2, "Computing frequencies\n"); + FASTCOVER_computeFrequency(ctx->freqs, ctx); + + return 1; +} + + +/** + * Given the prepared context build the dictionary. + */ +static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs, + void *dictBuffer, size_t dictBufferCapacity, + ZDICT_cover_params_t parameters, U16* segmentFreqs){ + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data up into epochs of equal size. + * We will select at least one segment from each epoch. + */ + const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k)); + const U32 epochSize = (U32)(ctx->nbDmers / epochs); + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs, + epochSize); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { + const U32 epochBegin = (U32)(epoch * epochSize); + const U32 epochEnd = epochBegin + epochSize; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = FASTCOVER_selectSegment( + ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); + + /* If the segment covers no dmers, then we are out of content */ + if (segment.score == 0) { + break; + } + + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + + +/** + * Parameters for FASTCOVER_tryParameters(). + */ +typedef struct FASTCOVER_tryParameters_data_s { + const FASTCOVER_ctx_t *ctx; + COVER_best_t *best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} FASTCOVER_tryParameters_data_t; + + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void FASTCOVER_tryParameters(void *opaque) { + /* Save parameters as local variables */ + FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque; + const FASTCOVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16)); + /* Allocate space for hash table, dict, and freqs */ + BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); + U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); + if (!segmentFreqs || !dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); + /* Build the dictionary */ + { + const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, + parameters, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); + dictBufferCapacity = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams); + if (ZDICT_isError(dictBufferCapacity)) { + DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); + goto _cleanup; + } + } + /* Check total compressed size */ + totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, + ctx->samples, ctx->offsets, + ctx->nbTrainSamples, ctx->nbSamples, + dict, dictBufferCapacity); +_cleanup: + COVER_best_finish(data->best, totalCompressedSize, parameters, dict, + dictBufferCapacity); + free(data); + free(segmentFreqs); + free(dict); + free(freqs); +} + + + +static void FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, + ZDICT_cover_params_t *coverParams) { + coverParams->k = fastCoverParams.k; + coverParams->d = fastCoverParams.d; + coverParams->steps = fastCoverParams.steps; + coverParams->nbThreads = fastCoverParams.nbThreads; + coverParams->splitPoint = fastCoverParams.splitPoint; + coverParams->zParams = fastCoverParams.zParams; +} + + +static void FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, + ZDICT_fastCover_params_t *fastCoverParams, + unsigned f, unsigned accel) { + fastCoverParams->k = coverParams.k; + fastCoverParams->d = coverParams.d; + fastCoverParams->steps = coverParams.steps; + fastCoverParams->nbThreads = coverParams.nbThreads; + fastCoverParams->splitPoint = coverParams.splitPoint; + fastCoverParams->f = f; + fastCoverParams->accel = accel; + fastCoverParams->zParams = coverParams.zParams; +} + + +ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) { + BYTE* const dict = (BYTE*)dictBuffer; + FASTCOVER_ctx_t ctx; + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Assign splitPoint and f if not provided */ + parameters.splitPoint = 1.0; + parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f; + parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel; + /* Convert to cover parameter */ + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(parameters, &coverParams); + /* Checks */ + if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, + parameters.accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + return ERROR(GENERIC); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); + return ERROR(GENERIC); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Assign corresponding FASTCOVER_accel_t to accelParams*/ + accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; + /* Initialize context */ + if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + coverParams.d, parameters.splitPoint, parameters.f, + accelParams)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return ERROR(GENERIC); + } + /* Build the dictionary */ + DISPLAYLEVEL(2, "Building dictionary\n"); + { + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16)); + const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, + dictBufferCapacity, coverParams, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (U32)dictionarySize); + } + FASTCOVER_ctx_destroy(&ctx); + free(segmentFreqs); + return dictionarySize; + } +} + + +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t *parameters) { + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; + const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); + return ERROR(GENERIC); + } + if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); + return ERROR(GENERIC); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); + return ERROR(GENERIC); + } + if (nbSamples == 0) { + LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); + return ERROR(GENERIC); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(*parameters, &coverParams); + accelParams = FASTCOVER_defaultAccelParameters[accel]; + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + FASTCOVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return ERROR(GENERIC); + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc( + sizeof(FASTCOVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + FASTCOVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(GENERIC); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = coverParams; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, + data->ctx->f, accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &FASTCOVER_tryParameters, data); + } else { + FASTCOVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (U32)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + FASTCOVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel); + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } + +} diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index b8a51789a..eb1a5b669 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -698,7 +698,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, short litLengthNCount[MaxLL+1]; U32 repOffset[MAXREPOFFSET]; offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; - EStats_ress_t esr; + EStats_ress_t esr = { NULL, NULL, NULL }; ZSTD_parameters params; U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; size_t pos = 0, errorCode; @@ -863,8 +863,8 @@ _cleanup: size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, const void* customDictContent, size_t dictContentSize, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t params) + const void* samplesBuffer, const size_t* samplesSizes, + unsigned nbSamples, ZDICT_params_t params) { size_t hSize; #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */ @@ -987,8 +987,10 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy( U32 const pos = dictList[u].pos; U32 const length = dictList[u].length; U32 const printedLength = MIN(40, length); - if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) + if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) { + free(dictList); return ERROR(GENERIC); /* should never happen */ + } DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", u, length, pos, dictList[u].savings); ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); @@ -1078,17 +1080,17 @@ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) { - ZDICT_cover_params_t params; + ZDICT_fastCover_params_t params; DEBUGLOG(3, "ZDICT_trainFromBuffer"); memset(¶ms, 0, sizeof(params)); params.d = 8; params.steps = 4; /* Default to level 6 since no compression level information is available */ - params.zParams.compressionLevel = 6; + params.zParams.compressionLevel = 3; #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) params.zParams.notificationLevel = DEBUGLEVEL; #endif - return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity, + return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity, samplesBuffer, samplesSizes, nbSamples, ¶ms); } diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index 4094669d1..d57d59f01 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -39,7 +39,8 @@ extern "C" { /*! ZDICT_trainFromBuffer(): * Train a dictionary from an array of samples. - * Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4. + * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4, + * f=20, and accel=1. * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. * The resulting dictionary will be saved into `dictBuffer`. @@ -52,7 +53,8 @@ extern "C" { * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. */ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples); /*====== Helper functions ======*/ @@ -84,12 +86,22 @@ typedef struct { typedef struct { unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ - unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ - double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ ZDICT_params_t zParams; } ZDICT_cover_params_t; +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ + unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ + ZDICT_params_t zParams; +} ZDICT_fastCover_params_t; /*! ZDICT_trainFromBuffer_cover(): * Train a dictionary from an array of samples using the COVER algorithm. @@ -116,9 +128,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( * dictionary constructed with those parameters is stored in `dictBuffer`. * * All of the parameters d, k, steps are optional. - * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. * if steps is zero it defaults to its default value. - * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. * * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * or an error code, which can be tested with ZDICT_isError(). @@ -130,6 +142,48 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_cover_params_t* parameters); +/*! ZDICT_trainFromBuffer_fastCover(): + * Train a dictionary from an array of samples using a modified version of COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * d and k are required. + * All other parameters are optional, will use default values if not provided + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory . + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, + size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_fastCover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations (specifically, k and d combinations) + * and picks the best parameters. `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * All of the parameters d, k, steps, f, and accel are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. + * If f is zero, default value of 20 is used. + * If accel is zero, default value of 1 is used. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, + size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters); + /*! ZDICT_finalizeDictionary(): * Given a custom content as a basis for dictionary, and a set of samples, * finalize dictionary by adding headers and statistics. diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index a2e2cfa80..15000db6d 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -1093,6 +1093,7 @@ static size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, un if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Init, lay down lowprob symbols */ + memset(tableDecode, 0, sizeof(FSE_DECODE_TYPE) * (maxSymbolValue+1) ); /* useless init, but keep static analyzer happy, and we don't need to performance optimize legacy decoders */ DTableH.tableLog = (U16)tableLog; for (s=0; s<=maxSymbolValue; s++) { diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index a5e1b1ffc..65f07a3b5 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -1224,6 +1224,7 @@ size_t FSEv05_buildDTable(FSEv05_DTable* dt, const short* normalizedCounter, uns if (tableLog > FSEv05_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Init, lay down lowprob symbols */ + memset(tableDecode, 0, sizeof(FSEv05_FUNCTION_TYPE) * (maxSymbolValue+1) ); /* useless init, but keep static analyzer happy, and we don't need to performance optimize legacy decoders */ DTableH.tableLog = (U16)tableLog; for (s=0; s<=maxSymbolValue; s++) { if (normalizedCounter[s]==-1) { @@ -2845,6 +2846,7 @@ size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { + if (dst==NULL) return ERROR(dstSize_tooSmall); if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); memcpy(dst, src, srcSize); return srcSize; diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index 8b068b3e5..ca982c35e 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -3041,6 +3041,7 @@ size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* static size_t ZSTDv06_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { + if (dst==NULL) return ERROR(dstSize_tooSmall); if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); memcpy(dst, src, srcSize); return srcSize; @@ -4006,7 +4007,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd, if (ZSTDv06_isError(hSize)) return hSize; if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip); - zbd->lhSize += iend-ip; ip = iend; notDone = 0; + zbd->lhSize += iend-ip; *dstCapacityPtr = 0; return (hSize - zbd->lhSize) + ZSTDv06_blockHeaderSize; /* remaining header bytes + next block header */ } diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index 70b170f0f..9f95f62ad 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -3150,10 +3150,10 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, const BYTE* ip = (const BYTE*)src; if (srcSize < ZSTDv07_frameHeaderSize_min) return ZSTDv07_frameHeaderSize_min; + memset(fparamsPtr, 0, sizeof(*fparamsPtr)); if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) { if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTDv07_MAGIC_SKIPPABLE_START) { if (srcSize < ZSTDv07_skippableHeaderSize) return ZSTDv07_skippableHeaderSize; /* magic number + skippable frame length */ - memset(fparamsPtr, 0, sizeof(*fparamsPtr)); fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4); fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ return 0; @@ -3175,11 +3175,13 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, U32 windowSize = 0; U32 dictID = 0; U64 frameContentSize = 0; - if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ + if ((fhdByte & 0x08) != 0) /* reserved bits, which must be zero */ + return ERROR(frameParameter_unsupported); if (!directMode) { BYTE const wlByte = ip[pos++]; U32 const windowLog = (wlByte >> 3) + ZSTDv07_WINDOWLOG_ABSOLUTEMIN; - if (windowLog > ZSTDv07_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported); + if (windowLog > ZSTDv07_WINDOWLOG_MAX) + return ERROR(frameParameter_unsupported); windowSize = (1U << windowLog); windowSize += (windowSize >> 3) * (wlByte&7); } @@ -3201,7 +3203,8 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, case 3 : frameContentSize = MEM_readLE64(ip+pos); break; } if (!windowSize) windowSize = (U32)frameContentSize; - if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported); + if (windowSize > windowSizeMax) + return ERROR(frameParameter_unsupported); fparamsPtr->frameContentSize = frameContentSize; fparamsPtr->windowSize = windowSize; fparamsPtr->dictID = dictID; @@ -3220,11 +3223,10 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, - frame header not completely provided (`srcSize` too small) */ unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize) { - { ZSTDv07_frameParams fparams; - size_t const frResult = ZSTDv07_getFrameParams(&fparams, src, srcSize); - if (frResult!=0) return 0; - return fparams.frameContentSize; - } + ZSTDv07_frameParams fparams; + size_t const frResult = ZSTDv07_getFrameParams(&fparams, src, srcSize); + if (frResult!=0) return 0; + return fparams.frameContentSize; } diff --git a/lib/zstd.h b/lib/zstd.h index edb107c2b..25441e68c 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -35,26 +35,38 @@ extern "C" { #endif -/******************************************************************************************************* +/******************************************************************************* Introduction - zstd, short for Zstandard, is a fast lossless compression algorithm, - targeting real-time compression scenarios at zlib-level and better compression ratios. - The zstd compression library provides in-memory compression and decompression functions. - The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22. - Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory. + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + Compression can be done in: - a single step (described as Simple API) - a single step, reusing a context (described as Explicit context) - unbounded multiple steps (described as Streaming compression) - The compression ratio achievable on small data can be highly improved using a dictionary in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Bulk-processing dictionary API) - Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h. - Advanced experimental APIs shall never be used with a dynamic library. - They are not "stable", their definition may change in the future. Only static linking is allowed. -*********************************************************************************************************/ + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 @@ -211,7 +223,8 @@ typedef struct ZSTD_CDict_s ZSTD_CDict; * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict */ + * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict + * Note : A ZSTD_CDict can be created with an empty dictionary, but it is inefficient for small data. */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); @@ -223,7 +236,9 @@ ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); * Compression using a digested Dictionary. * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. * Note that compression level is decided during dictionary creation. - * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ + * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) + * Note : ZSTD_compress_usingCDict() can be used with a ZSTD_CDict created from an empty dictionary. + * But it is inefficient for small data, and it is recommended to use ZSTD_compressCCtx(). */ ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1161,16 +1176,21 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*! ZSTD_CCtx_refPrefix() : * Reference a prefix (single-usage dictionary) for next compression job. - * Decompression need same prefix to properly regenerate data. - * Prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * Note that prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end). * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary * Note 1 : Prefix buffer is referenced. It **must** outlive compression job. * Its contain must remain unmodified up to end of compression (ZSTD_e_end). - * Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_p_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. * It's a CPU consuming operation, with non-negligible impact on latency. * If there is a need to use same prefix multiple times, consider loadDictionary instead. - * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + * Note 4 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); @@ -1353,6 +1373,8 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); /*! ZSTD_DCtx_refPrefix() : * Reference a prefix (single-usage dictionary) for next compression job. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. * Prefix is **only used once**. Reference is discarded at end of frame. * End of frame is reached when ZSTD_DCtx_decompress_generic() returns 0. * @result : 0, or an error code (which can be tested with ZSTD_isError()). diff --git a/programs/Makefile b/programs/Makefile index 912f9eff0..f1a96325c 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -132,6 +132,15 @@ else LZ4_MSG := $(NO_LZ4_MSG) endif +# enable backtrace symbol names for Linux/Darwin +ALL_SYMBOLS := 0 +ifeq (,$(filter Windows%, $(OS))) +ifeq ($(ALL_SYMBOLS), 1) +DEBUGFLAGS_LD+=-rdynamic +endif +endif + + .PHONY: default default: zstd-release @@ -144,7 +153,7 @@ allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP) zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP) -zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) +zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD) zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) zstd : $(ZSTDLIB_FILES) zstdcli.o fileio.o bench.o datagen.o dibio.o @echo "$(THREAD_MSG)" @@ -158,6 +167,7 @@ endif .PHONY: zstd-release zstd-release: DEBUGFLAGS := +zstd-release: DEBUGFLAGS_LD := zstd-release: zstd zstd32 : CPPFLAGS += $(THREAD_CPP) diff --git a/programs/README.md b/programs/README.md index 22a00409c..804cb8b0a 100644 --- a/programs/README.md +++ b/programs/README.md @@ -61,6 +61,13 @@ There are however other Makefile targets that create different variations of CLI In which case, linking stage will fail if `lz4` library cannot be found. This is useful to prevent silent feature disabling. +- __ALL_SYMBOLS__ : `zstd` can display a stack backtrace if the execution + generates a runtime exception. By default, this feature may be + degraded/disabled on some platforms unless additional compiler directives are + applied. When triaging a runtime issue, enabling this feature can provided + more context to determine the location of the fault. + Example : `make zstd ALL_SYMBOLS=1` + #### Aggregation of parameters CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`. @@ -151,6 +158,7 @@ Advanced arguments : Dictionary builder : --train ## : create a dictionary from a training set of files --train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args +--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fastcover algorithm with optional args --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9) -o file : `file` is dictionary name (default: dictionary) --maxdict=# : limit dictionary to specified size (default: 112640) diff --git a/programs/bench.c b/programs/bench.c index 79ef42caa..326c1c1c5 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -63,7 +63,12 @@ #define MB *(1 <<20) #define GB *(1U<<30) -static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); +#define BMK_RUNTEST_DEFAULT_MS 1000 + +static const size_t maxMemory = (sizeof(size_t)==4) ? + /* 32-bit */ (2 GB - 64 MB) : + /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31)); + /* ************************************* * console display @@ -97,26 +102,26 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; return errorNum; \ } -#define EXM_THROW(errorNum, retType, ...) { \ +#define RETURN_ERROR(errorNum, retType, ...) { \ retType r; \ memset(&r, 0, sizeof(retType)); \ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ DISPLAYLEVEL(1, "Error %i : ", errorNum); \ DISPLAYLEVEL(1, __VA_ARGS__); \ DISPLAYLEVEL(1, " \n"); \ - r.error = errorNum; \ + r.tag = errorNum; \ return r; \ } /* error without displaying */ -#define EXM_THROW_ND(errorNum, retType, ...) { \ +#define RETURN_QUIET_ERROR(errorNum, retType, ...) { \ retType r; \ memset(&r, 0, sizeof(retType)); \ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ DEBUGOUTPUT("Error %i : ", errorNum); \ DEBUGOUTPUT(__VA_ARGS__); \ DEBUGOUTPUT(" \n"); \ - r.error = errorNum; \ + r.tag = errorNum; \ return r; \ } @@ -124,16 +129,15 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; * Benchmark Parameters ***************************************/ -BMK_advancedParams_t BMK_initAdvancedParams(void) { - BMK_advancedParams_t res = { +BMK_advancedParams_t BMK_initAdvancedParams(void) { + BMK_advancedParams_t const res = { BMK_both, /* mode */ - BMK_timeMode, /* loopMode */ BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ 0, /* blockSize */ 0, /* nbWorkers */ 0, /* realTime */ 0, /* additionalParam */ - 0, /* ldmFlag */ + 0, /* ldmFlag */ 0, /* ldmMinMatch */ 0, /* ldmHashLog */ 0, /* ldmBuckSizeLog */ @@ -156,20 +160,13 @@ typedef struct { size_t resSize; } blockParam_t; -struct BMK_timeState_t{ - unsigned nbLoops; - U64 timeRemaining; - UTIL_time_t coolTime; - U64 fastestTime; -}; - #undef MIN #undef MAX #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) -static void BMK_initCCtx(ZSTD_CCtx* ctx, - const void* dictBuffer, size_t dictBufferSize, int cLevel, +static void BMK_initCCtx(ZSTD_CCtx* ctx, + const void* dictBuffer, size_t dictBufferSize, int cLevel, const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) { ZSTD_CCtx_reset(ctx); ZSTD_CCtx_resetParameters(ctx); @@ -194,15 +191,15 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx, ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); } - -static void BMK_initDCtx(ZSTD_DCtx* dctx, +static void BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize) { ZSTD_DCtx_reset(dctx); ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize); } + typedef struct { - ZSTD_CCtx* ctx; + ZSTD_CCtx* cctx; const void* dictBuffer; size_t dictBufferSize; int cLevel; @@ -212,7 +209,7 @@ typedef struct { static size_t local_initCCtx(void* payload) { BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; - BMK_initCCtx(ag->ctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); + BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); return 0; } @@ -228,26 +225,24 @@ static size_t local_initDCtx(void* payload) { return 0; } -/* additional argument is just the context */ + +/* `addArgs` is the context */ static size_t local_defaultCompress( - const void* srcBuffer, size_t srcSize, - void* dstBuffer, size_t dstSize, - void* addArgs) { + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) +{ size_t moreToFlush = 1; - ZSTD_CCtx* ctx = (ZSTD_CCtx*)addArgs; + ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs; ZSTD_inBuffer in; ZSTD_outBuffer out; - in.src = srcBuffer; - in.size = srcSize; - in.pos = 0; - out.dst = dstBuffer; - out.size = dstSize; - out.pos = 0; + in.src = srcBuffer; in.size = srcSize; in.pos = 0; + out.dst = dstBuffer; out.size = dstSize; out.pos = 0; while (moreToFlush) { if(out.pos == out.size) { return (size_t)-ZSTD_error_dstSize_tooSmall; } - moreToFlush = ZSTD_compress_generic(ctx, &out, &in, ZSTD_e_end); + moreToFlush = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); if (ZSTD_isError(moreToFlush)) { return moreToFlush; } @@ -255,27 +250,23 @@ static size_t local_defaultCompress( return out.pos; } -/* additional argument is just the context */ +/* `addArgs` is the context */ static size_t local_defaultDecompress( - const void* srcBuffer, size_t srcSize, - void* dstBuffer, size_t dstSize, - void* addArgs) { + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstCapacity, + void* addArgs) +{ size_t moreToFlush = 1; - ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs; + ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs; ZSTD_inBuffer in; ZSTD_outBuffer out; - in.src = srcBuffer; - in.size = srcSize; - in.pos = 0; - out.dst = dstBuffer; - out.size = dstSize; - out.pos = 0; + in.src = srcBuffer; in.size = srcSize; in.pos = 0; + out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0; while (moreToFlush) { if(out.pos == out.size) { return (size_t)-ZSTD_error_dstSize_tooSmall; } - moreToFlush = ZSTD_decompress_generic(dctx, - &out, &in); + moreToFlush = ZSTD_decompress_generic(dctx, &out, &in); if (ZSTD_isError(moreToFlush)) { return moreToFlush; } @@ -284,198 +275,307 @@ static size_t local_defaultDecompress( } -/* initFn will be measured once, bench fn will be measured x times */ -/* benchFn should return error value or out Size */ -/* takes # of blocks and list of size & stuff for each. */ -/* only does looping */ -/* note time per loop could be zero if interval too short */ -BMK_customReturn_t BMK_benchFunction( - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, - void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, size_t* blockResult, - unsigned nbLoops) { - size_t dstSize = 0; - U64 totalTime; - BMK_customReturn_t retval; - UTIL_time_t clockStart; +/*=== Benchmarking an arbitrary function ===*/ + +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) +{ + return outcome.tag == 0; +} + +/* warning : this function will stop program execution if outcome is invalid ! + * check outcome validity first, using BMK_isValid_runResult() */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) +{ + assert(outcome.tag == 0); + return outcome.internal_never_use_directly; +} + +static BMK_runOutcome_t BMK_runOutcome_error(void) +{ + BMK_runOutcome_t b; + memset(&b, 0, sizeof(b)); + b.tag = 1; + return b; +} + +static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) +{ + BMK_runOutcome_t outcome; + outcome.tag = 0; + outcome.internal_never_use_directly = runTime; + return outcome; +} + + +/* initFn will be measured once, benchFn will be measured `nbLoops` times */ +/* initFn is optional, provide NULL if none */ +/* benchFn must return size_t field compliant with ZSTD_isError for error valuee */ +/* takes # of blocks and list of size & stuff for each. */ +/* can report result of benchFn for each block into blockResult. */ +/* blockResult is optional, provide NULL if this information is not required */ +/* note : time per loop could be zero if run time < timer resolution */ +BMK_runOutcome_t BMK_benchFunction( + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const * srcBlockBuffers, const size_t* srcBlockSizes, + void* const * dstBlockBuffers, const size_t* dstBlockCapacities, + size_t* blockResults, + unsigned nbLoops) +{ + size_t dstSize = 0; if(!nbLoops) { - EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); + RETURN_QUIET_ERROR(2, BMK_runOutcome_t, "nbLoops must be nonzero "); } - { - size_t i; + /* init */ + { size_t i; for(i = 0; i < blockCount; i++) { memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ } #if 0 - /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops - * (Makes former slower) + /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops + * (Makes former slower) */ UTIL_sleepMilli(5); /* give processor time to other processes */ UTIL_waitForNextTick(); #endif } - { - unsigned i, j; - clockStart = UTIL_getTime(); - if(initFn != NULL) { initFn(initPayload); } - for(i = 0; i < nbLoops; i++) { - for(j = 0; j < blockCount; j++) { - size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); + /* benchmark */ + { UTIL_time_t const clockStart = UTIL_getTime(); + unsigned loopNb, blockNb; + if (initFn != NULL) initFn(initPayload); + for (loopNb = 0; loopNb < nbLoops; loopNb++) { + for (blockNb = 0; blockNb < blockCount; blockNb++) { + size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb], + dstBlockBuffers[blockNb], dstBlockCapacities[blockNb], + benchPayload); if(ZSTD_isError(res)) { - EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", - j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); - } else if(i == nbLoops - 1) { + RETURN_QUIET_ERROR(2, BMK_runOutcome_t, + "Function benchmark failed on block %u of size %u : %s", + blockNb, (U32)dstBlockCapacities[blockNb], ZSTD_getErrorName(res)); + } else if (loopNb == 0) { dstSize += res; - if(blockResult != NULL) { - blockResult[j] = res; - } - } - } - } - totalTime = UTIL_clockSpanNano(clockStart); - } + if (blockResults != NULL) blockResults[blockNb] = res; + } } + } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ - retval.error = 0; - retval.result.nanoSecPerRun = totalTime / nbLoops; - retval.result.sumOfReturn = dstSize; - return retval; -} - -#define MINUSABLETIME 500000000ULL /* 0.5 seconds in ns */ - -void BMK_resetTimeState(BMK_timedFnState_t* r, unsigned nbSeconds) { - r->nbLoops = 1; - r->timeRemaining = (U64)nbSeconds * TIMELOOP_NANOSEC; - r->coolTime = UTIL_getTime(); - r->fastestTime = (U64)(-1LL); + { U64 const totalTime = UTIL_clockSpanNano(clockStart); + BMK_runTime_t rt; + rt.nanoSecPerRun = totalTime / nbLoops; + rt.sumOfReturn = dstSize; + return BMK_setValid_runTime(rt); + } } } -BMK_timedFnState_t* BMK_createTimeState(unsigned nbSeconds) { - BMK_timedFnState_t* r = (BMK_timedFnState_t*)malloc(sizeof(struct BMK_timeState_t)); - if(r == NULL) { - return r; - } - BMK_resetTimeState(r, nbSeconds); + +/* ==== Benchmarking any function, providing intermediate results ==== */ + +struct BMK_timedFnState_s { + U64 timeSpent_ns; + U64 timeBudget_ns; + U64 runBudget_ns; + BMK_runTime_t fastestRun; + unsigned nbLoops; + UTIL_time_t coolTime; +}; /* typedef'd to BMK_timedFnState_t within bench.h */ + +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) +{ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); + if (r == NULL) return NULL; /* malloc() error */ + BMK_resetTimedFnState(r, total_ms, run_ms); return r; } -void BMK_freeTimeState(BMK_timedFnState_t* state) { +void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } -/* make option for dstBlocks to be */ -BMK_customTimedReturn_t BMK_benchFunctionTimed( - BMK_timedFnState_t* cont, - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const* const srcBlockBuffers, const size_t* srcBlockSizes, - void * const * const dstBlockBuffers, const size_t * dstBlockCapacities, size_t* blockResults) +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) { - U64 fastest = cont->fastestTime; - int completed = 0; - BMK_customTimedReturn_t r; - r.completed = 0; + if (!total_ms) total_ms = 1 ; + if (!run_ms) run_ms = 1; + if (run_ms > total_ms) run_ms = total_ms; + timedFnState->timeSpent_ns = 0; + timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL); + timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); + timedFnState->nbLoops = 1; + timedFnState->coolTime = UTIL_getTime(); +} + +/* Tells if nb of seconds set in timedFnState for all runs is spent. + * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) +{ + return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); +} + + +#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ + +BMK_runOutcome_t BMK_benchTimedFn( + BMK_timedFnState_t* cont, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const* srcBlockBuffers, const size_t* srcBlockSizes, + void * const * dstBlockBuffers, const size_t * dstBlockCapacities, + size_t* blockResults) +{ + U64 const runBudget_ns = cont->runBudget_ns; + U64 const runTimeMin_ns = runBudget_ns / 2; + int completed = 0; + BMK_runTime_t bestRunTime = cont->fastestRun; + + while (!completed) { + BMK_runOutcome_t runResult; - while(!r.completed && !completed) - { /* Overheat protection */ if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { DEBUGOUTPUT("\rcooling down ... \r"); UTIL_sleep(COOLPERIOD_SEC); cont->coolTime = UTIL_getTime(); } + /* reinitialize capacity */ - r.result = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, - blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, blockResults, cont->nbLoops); - if(r.result.error) { /* completed w/ error */ - r.completed = 1; - return r; + runResult = BMK_benchFunction(benchFn, benchPayload, + initFn, initPayload, + blockCount, + srcBlockBuffers, srcBlockSizes, + dstBlockBuffers, dstBlockCapacities, + blockResults, + cont->nbLoops); + + if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ + return BMK_runOutcome_error(); } - { U64 const loopDuration = r.result.result.nanoSecPerRun * cont->nbLoops; - r.completed = (cont->timeRemaining <= loopDuration); - cont->timeRemaining -= loopDuration; - if (loopDuration > (TIMELOOP_NANOSEC / 100)) { - fastest = MIN(fastest, r.result.result.nanoSecPerRun); - if(loopDuration >= MINUSABLETIME) { - r.result.result.nanoSecPerRun = fastest; - cont->fastestTime = fastest; - } - cont->nbLoops = (U32)(TIMELOOP_NANOSEC / r.result.result.nanoSecPerRun) + 1; + { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); + U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; + + cont->timeSpent_ns += loopDuration_ns; + + /* estimate nbLoops for next run to last approximately 1 second */ + if (loopDuration_ns > (runBudget_ns / 50)) { + U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); + cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1; } else { - const unsigned multiplier = 2; + /* previous run was too short : blindly increase workload by x multiplier */ + const unsigned multiplier = 10; assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ cont->nbLoops *= multiplier; } - if(loopDuration < MINUSABLETIME) { /* don't report results which have time too low */ - continue; - } + if(loopDuration_ns < runTimeMin_ns) { + /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ + assert(completed == 0); + continue; + } else { + if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { + bestRunTime = newRunTime; + } + completed = 1; + } } - completed = 1; - } - return r; + } /* while (!completed) */ + + return BMK_setValid_runTime(bestRunTime); } -/* benchMem with no allocation */ -static BMK_return_t BMK_benchMemAdvancedNoAlloc( - const void ** const srcPtrs, size_t* const srcSizes, - void** const cPtrs, size_t* const cCapacities, size_t* const cSizes, - void** const resPtrs, size_t* const resSizes, - void** resultBufferPtr, void* compressedBuffer, - const size_t maxCompressedSize, - BMK_timedFnState_t* timeStateCompress, BMK_timedFnState_t* timeStateDecompress, - const void* srcBuffer, size_t srcSize, - const size_t* fileSizes, unsigned nbFiles, - const int cLevel, const ZSTD_compressionParameters* comprParams, - const void* dictBuffer, size_t dictBufferSize, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) +/* ================================================================= */ +/* Benchmark Zstandard, mem-to-mem scenarios */ +/* ================================================================= */ + +int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome) { - size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ - BMK_return_t results = { { 0, 0, 0, 0 }, 0 } ; + return outcome.tag == 0; +} + +BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome) +{ + assert(outcome.tag == 0); + return outcome.internal_never_use_directly; +} + +static BMK_benchOutcome_t BMK_benchOutcome_error(void) +{ + BMK_benchOutcome_t b; + memset(&b, 0, sizeof(b)); + b.tag = 1; + return b; +} + +static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result) +{ + BMK_benchOutcome_t b; + b.tag = 0; + b.internal_never_use_directly = result; + return b; +} + + +/* benchMem with no allocation */ +static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( + const void** srcPtrs, size_t* srcSizes, + void** cPtrs, size_t* cCapacities, size_t* cSizes, + void** resPtrs, size_t* resSizes, + void** resultBufferPtr, void* compressedBuffer, + size_t maxCompressedSize, + BMK_timedFnState_t* timeStateCompress, + BMK_timedFnState_t* timeStateDecompress, + + const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, + const BMK_advancedParams_t* adv) +{ + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ + BMK_benchResult_t benchResult; size_t const loadedCompressedSize = srcSize; size_t cSize = 0; double ratio = 0.; U32 nbBlocks; - if(!ctx || !dctx) - EXM_THROW(31, BMK_return_t, "error: passed in null context"); + assert(cctx != NULL); assert(dctx != NULL); /* init */ - if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */ + memset(&benchResult, 0, sizeof(benchResult)); + if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */ if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */ const char* srcPtr = (const char*)srcBuffer; U64 totalDSize64 = 0; U32 fileNb; for (fileNb=0; fileNb decodedSize) { - free(*resultBufferPtr); - EXM_THROW(32, BMK_return_t, "original size is too large"); /* size_t overflow */ + if (totalDSize64 > decodedSize) { /* size_t overflow */ + free(*resultBufferPtr); + RETURN_ERROR(32, BMK_benchOutcome_t, "original size is too large"); } cSize = srcSize; srcSize = decodedSize; ratio = (double)srcSize / (double)cSize; - } + } } /* Init data blocks */ @@ -489,21 +589,21 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( U32 const blockEnd = nbBlocks + nbBlocksforThisFile; for ( ; nbBlocksmode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); - resPtrs[nbBlocks] = (void*)resPtr; + resPtrs[nbBlocks] = resPtr; resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize; cPtr += cCapacities[nbBlocks]; resPtr += thisBlockSize; remaining -= thisBlockSize; - } - } + } + } } - /* warmimg up memory */ + /* warmimg up `compressedBuffer` */ if (adv->mode == BMK_decodeOnly) { memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); } else { @@ -511,151 +611,105 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( } /* Bench */ - { - U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); + { U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); # define NB_MARKS 4 - const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; + const char* marks[NB_MARKS] = { " |", " /", " =", " \\" }; U32 markNb = 0; - DISPLAYLEVEL(2, "\r%79s\r", ""); + int compressionCompleted = (adv->mode == BMK_decodeOnly); + int decompressionCompleted = (adv->mode == BMK_compressOnly); + BMK_initCCtxArgs cctxprep; + BMK_initDCtxArgs dctxprep; + cctxprep.cctx = cctx; + cctxprep.dictBuffer = dictBuffer; + cctxprep.dictBufferSize = dictBufferSize; + cctxprep.cLevel = cLevel; + cctxprep.comprParams = comprParams; + cctxprep.adv = adv; + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; + DISPLAYLEVEL(2, "\r%70s\r", ""); /* blank line */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); - { - BMK_initCCtxArgs cctxprep; - BMK_initDCtxArgs dctxprep; - cctxprep.ctx = ctx; - cctxprep.dictBuffer = dictBuffer; - cctxprep.dictBufferSize = dictBufferSize; - cctxprep.cLevel = cLevel; - cctxprep.comprParams = comprParams; - cctxprep.adv = adv; - dctxprep.dctx = dctx; - dctxprep.dictBuffer = dictBuffer; - dctxprep.dictBufferSize = dictBufferSize; - if(adv->loopMode == BMK_timeMode) { - BMK_customTimedReturn_t intermediateResultCompress; - BMK_customTimedReturn_t intermediateResultDecompress; - if(adv->mode == BMK_compressOnly) { - intermediateResultCompress.completed = 0; - intermediateResultDecompress.completed = 1; - } else if (adv->mode == BMK_decodeOnly) { - intermediateResultCompress.completed = 1; - intermediateResultDecompress.completed = 0; - } else { /* both */ - intermediateResultCompress.completed = 0; - intermediateResultDecompress.completed = 0; - } - while(!(intermediateResultCompress.completed && intermediateResultDecompress.completed)) { - if(!intermediateResultCompress.completed) { - intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes); - if(intermediateResultCompress.result.error) { - results.error = intermediateResultCompress.result.error; - return results; - } - ratio = (double)(srcSize / intermediateResultCompress.result.result.sumOfReturn); - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / intermediateResultCompress.result.result.nanoSecPerRun); - cSize = intermediateResultCompress.result.result.sumOfReturn; - results.result.cSize = cSize; - ratio = (double)srcSize / results.result.cSize; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB)); - } - } - if(!intermediateResultDecompress.completed) { - intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, - nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL); - if(intermediateResultDecompress.result.error) { - results.error = intermediateResultDecompress.result.error; - return results; - } - - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ intermediateResultDecompress.result.result.nanoSecPerRun); - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB), - (double)results.result.dSpeed / (1 MB)); - } - } + while (!(compressionCompleted && decompressionCompleted)) { + + if (!compressionCompleted) { + BMK_runOutcome_t const cOutcome = + BMK_benchTimedFn( timeStateCompress, + &local_defaultCompress, cctx, + &local_initCCtx, &cctxprep, + nbBlocks, + srcPtrs, srcSizes, + cPtrs, cCapacities, + cSizes); + + if (!BMK_isSuccessful_runOutcome(cOutcome)) { + return BMK_benchOutcome_error(); } - } else { //iterMode; - if(adv->mode != BMK_decodeOnly) { + { BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome); + cSize = cResult.sumOfReturn; + ratio = (double)srcSize / cSize; + { BMK_benchResult_t newResult; + newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun); + benchResult.cSize = cSize; + if (newResult.cSpeed > benchResult.cSpeed) + benchResult.cSpeed = newResult.cSpeed; + } } - BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes, adv->nbSeconds); - if(compressionResults.error) { - results.error = compressionResults.error; - return results; - } - - if(compressionResults.result.nanoSecPerRun == 0) { - results.result.cSpeed = 0; - } else { - results.result.cSpeed = srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun; - } - - results.result.cSize = compressionResults.result.sumOfReturn; - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - cSize = compressionResults.result.sumOfReturn; - results.result.cSize = cSize; - ratio = (double)srcSize / results.result.cSize; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB)); - } - } - if(adv->mode != BMK_compressOnly) { - BMK_customReturn_t decompressionResults = BMK_benchFunction( - &local_defaultDecompress, (void*)(dctx), - &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL, - adv->nbSeconds); - if(decompressionResults.error) { - results.error = decompressionResults.error; - return results; - } - - if(decompressionResults.result.nanoSecPerRun == 0) { - results.result.dSpeed = 0; - } else { - results.result.dSpeed = srcSize * TIMELOOP_NANOSEC / decompressionResults.result.nanoSecPerRun; - } - - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB), - (double)results.result.dSpeed / (1 MB)); - } + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, + (U32)srcSize, (U32)cSize, + ratioAccuracy, ratio, + benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT); } + compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress); } - } + + if(!decompressionCompleted) { + BMK_runOutcome_t const dOutcome = + BMK_benchTimedFn(timeStateDecompress, + &local_defaultDecompress, dctx, + &local_initDCtx, &dctxprep, + nbBlocks, + (const void *const *)cPtrs, cSizes, + resPtrs, resSizes, + NULL); + + if(!BMK_isSuccessful_runOutcome(dOutcome)) { + return BMK_benchOutcome_error(); + } + + { BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome); + U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun); + if (newDSpeed > benchResult.dSpeed) + benchResult.dSpeed = newDSpeed; + } + + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, + (U32)srcSize, (U32)benchResult.cSize, + ratioAccuracy, ratio, + benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT, + (double)benchResult.dSpeed / MB_UNIT); + } + decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress); + } + } /* while (!(compressionCompleted && decompressionCompleted)) */ /* CRC Checking */ - { void* resultBuffer = *resultBufferPtr; + { const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr); U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); - /* adv->mode == 0 -> compress + decompress */ if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { size_t u; DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); for (u=0; u