1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-30 22:23:13 +03:00

Add PZstandard to contrib/

This commit is contained in:
Nick Terrell
2016-09-01 15:22:19 -07:00
parent 3427e68266
commit c932520960
31 changed files with 2811 additions and 0 deletions

View File

@ -0,0 +1,46 @@
# ##########################################################################
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
# ##########################################################################
# Set GTEST_INC and GTEST_LIB to work with your install of gtest
GTEST_INC ?= -isystem googletest/googletest/include
GTEST_LIB ?= -L googletest/build/googlemock/gtest
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
EXT =.exe
else
EXT =
endif
PZSTDDIR = ..
PROGDIR = ../../../programs
ZSTDDIR = ../../../lib
CPPFLAGS = -I$(PZSTDDIR) $(GTEST_INC) $(GTEST_LIB) -I$(ZSTDDIR)/common -I$(PROGDIR)
CFLAGS ?= -O3
CFLAGS += -std=c++11
CFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
datagen.o: $(PROGDIR)/datagen.*
$(CXX) $(FLAGS) $(PROGDIR)/datagen.c -c -o $@
%: %.cpp *.h datagen.o
$(CXX) $(FLAGS) -lgtest -lgtest_main $@.cpp datagen.o $(PZSTDDIR)/libzstd.a $(PZSTDDIR)/Pzstd.o $(PZSTDDIR)/SkippableFrame.o $(PZSTDDIR)/Options.o -o $@$(EXT)
.PHONY: test clean
test: OptionsTest PzstdTest RoundTripTest
@./OptionsTest$(EXT)
@./PzstdTest$(EXT)
@./RoundTripTest$(EXT)
clean:
@rm -f datagen.o OptionsTest PzstdTest RoundTripTest

View File

@ -0,0 +1,179 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
#include "Options.h"
#include <gtest/gtest.h>
#include <array>
using namespace pzstd;
namespace pzstd {
bool operator==(const Options& lhs, const Options& rhs) {
return lhs.numThreads == rhs.numThreads &&
lhs.maxWindowLog == rhs.maxWindowLog &&
lhs.compressionLevel == rhs.compressionLevel &&
lhs.decompress == rhs.decompress && lhs.inputFile == rhs.inputFile &&
lhs.outputFile == rhs.outputFile && lhs.overwrite == rhs.overwrite &&
lhs.pzstdHeaders == rhs.pzstdHeaders;
}
}
TEST(Options, ValidInputs) {
{
Options options;
std::array<const char*, 6> args = {
{nullptr, "--num-threads", "5", "-o", "-", "-f"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {5, 23, 3, false, "-", "-", true, false};
EXPECT_EQ(expected, options);
}
{
Options options;
std::array<const char*, 6> args = {
{nullptr, "-n", "1", "input", "-19", "-p"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {1, 23, 19, false, "input", "input.zst", false, true};
EXPECT_EQ(expected, options);
}
{
Options options;
std::array<const char*, 10> args = {{nullptr,
"--ultra",
"-22",
"-n",
"1",
"--output",
"x",
"-d",
"x.zst",
"-f"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {1, 0, 22, true, "x.zst", "x", true, false};
EXPECT_EQ(expected, options);
}
{
Options options;
std::array<const char*, 6> args = {{nullptr,
"--num-threads",
"100",
"hello.zst",
"--decompress",
"--force"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {100, 23, 3, true, "hello.zst", "hello", true, false};
EXPECT_EQ(expected, options);
}
{
Options options;
std::array<const char*, 5> args = {{nullptr, "-", "-n", "1", "-c"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {1, 23, 3, false, "-", "-", false, false};
EXPECT_EQ(expected, options);
}
{
Options options;
std::array<const char*, 5> args = {{nullptr, "-", "-n", "1", "--stdout"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {1, 23, 3, false, "-", "-", false, false};
EXPECT_EQ(expected, options);
}
{
Options options;
std::array<const char*, 10> args = {{nullptr,
"-n",
"1",
"-",
"-5",
"-o",
"-",
"-u",
"-d",
"--pzstd-headers"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {1, 0, 5, true, "-", "-", false, true};
}
{
Options options;
std::array<const char*, 6> args = {
{nullptr, "silesia.tar", "-o", "silesia.tar.pzstd", "-n", "2"}};
EXPECT_TRUE(options.parse(args.size(), args.data()));
Options expected = {
2, 23, 3, false, "silesia.tar", "silesia.tar.pzstd", false, false};
}
}
TEST(Options, BadNumThreads) {
{
Options options;
std::array<const char*, 3> args = {{nullptr, "-o", "-"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
{
Options options;
std::array<const char*, 5> args = {{nullptr, "-n", "0", "-o", "-"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
{
Options options;
std::array<const char*, 4> args = {{nullptr, "-n", "-o", "-"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
}
TEST(Options, BadCompressionLevel) {
{
Options options;
std::array<const char*, 3> args = {{nullptr, "x", "-20"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
{
Options options;
std::array<const char*, 4> args = {{nullptr, "x", "-u", "-23"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
}
TEST(Options, InvalidOption) {
{
Options options;
std::array<const char*, 3> args = {{nullptr, "x", "-x"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
}
TEST(Options, BadOutputFile) {
{
Options options;
std::array<const char*, 5> args = {{nullptr, "notzst", "-d", "-n", "1"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
{
Options options;
std::array<const char*, 3> args = {{nullptr, "-n", "1"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
{
Options options;
std::array<const char*, 4> args = {{nullptr, "-", "-n", "1"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
}
TEST(Options, Extras) {
{
Options options;
std::array<const char*, 2> args = {{nullptr, "-h"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
{
Options options;
std::array<const char*, 2> args = {{nullptr, "-V"}};
EXPECT_FALSE(options.parse(args.size(), args.data()));
}
}

View File

@ -0,0 +1,112 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
#include "datagen.h"
#include "Pzstd.h"
#include "test/RoundTrip.h"
#include "utils/ScopeGuard.h"
#include <gtest/gtest.h>
#include <cstddef>
#include <cstdio>
#include <memory>
using namespace std;
using namespace pzstd;
TEST(Pzstd, SmallSizes) {
for (unsigned len = 1; len < 1028; ++len) {
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
static uint8_t buf[1028];
RDG_genBuffer(buf, len, 0.5, 0.0, 42);
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf, 1, len, fd);
std::fclose(fd);
ASSERT_EQ(written, len);
}
for (unsigned headers = 0; headers <= 1; ++headers) {
for (unsigned numThreads = 1; numThreads <= 4; numThreads *= 2) {
for (unsigned level = 1; level <= 8; level *= 8) {
auto errorGuard = makeScopeGuard([&] {
guard.dismiss();
std::fprintf(stderr, "file: %s\n", inputFile.c_str());
std::fprintf(stderr, "pzstd headers: %u\n", headers);
std::fprintf(stderr, "# threads: %u\n", numThreads);
std::fprintf(stderr, "compression level: %u\n", level);
});
Options options;
options.pzstdHeaders = headers;
options.overwrite = true;
options.inputFile = inputFile;
options.numThreads = numThreads;
options.compressionLevel = level;
ASSERT_TRUE(roundTrip(options));
errorGuard.dismiss();
}
}
}
}
}
TEST(Pzstd, LargeSizes) {
for (unsigned len = 1 << 20; len <= (1 << 24); len *= 2) {
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
std::unique_ptr<uint8_t[]> buf(new uint8_t[len]);
RDG_genBuffer(buf.get(), len, 0.5, 0.0, 42);
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf.get(), 1, len, fd);
std::fclose(fd);
ASSERT_EQ(written, len);
}
for (unsigned headers = 0; headers <= 1; ++headers) {
for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) {
for (unsigned level = 1; level <= 4; level *= 2) {
auto errorGuard = makeScopeGuard([&] {
guard.dismiss();
std::fprintf(stderr, "file: %s\n", inputFile.c_str());
std::fprintf(stderr, "pzstd headers: %u\n", headers);
std::fprintf(stderr, "# threads: %u\n", numThreads);
std::fprintf(stderr, "compression level: %u\n", level);
});
Options options;
options.pzstdHeaders = headers;
options.overwrite = true;
options.inputFile = inputFile;
options.numThreads = numThreads;
options.compressionLevel = level;
ASSERT_TRUE(roundTrip(options));
errorGuard.dismiss();
}
}
}
}
}
TEST(Pzstd, ExtremelyCompressible) {
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
std::unique_ptr<uint8_t[]> buf(new uint8_t[10000]);
std::memset(buf.get(), 'a', 10000);
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf.get(), 1, 10000, fd);
std::fclose(fd);
ASSERT_EQ(written, 10000);
}
Options options;
options.pzstdHeaders = false;
options.overwrite = true;
options.inputFile = inputFile;
options.numThreads = 1;
options.compressionLevel = 1;
ASSERT_TRUE(roundTrip(options));
}

View File

@ -0,0 +1,89 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
#pragma once
#include "Options.h"
#include "Pzstd.h"
#include "utils/ScopeGuard.h"
#include <cstdio>
#include <string>
#include <cstdint>
#include <memory>
namespace pzstd {
inline bool check(std::string source, std::string decompressed) {
std::unique_ptr<std::uint8_t[]> sBuf(new std::uint8_t[1024]);
std::unique_ptr<std::uint8_t[]> dBuf(new std::uint8_t[1024]);
auto sFd = std::fopen(source.c_str(), "rb");
auto dFd = std::fopen(decompressed.c_str(), "rb");
auto guard = makeScopeGuard([&] {
std::fclose(sFd);
std::fclose(dFd);
});
size_t sRead, dRead;
do {
sRead = std::fread(sBuf.get(), 1, 1024, sFd);
dRead = std::fread(dBuf.get(), 1, 1024, dFd);
if (std::ferror(sFd) || std::ferror(dFd)) {
return false;
}
if (sRead != dRead) {
return false;
}
for (size_t i = 0; i < sRead; ++i) {
if (sBuf.get()[i] != dBuf.get()[i]) {
return false;
}
}
} while (sRead == 1024);
if (!std::feof(sFd) || !std::feof(dFd)) {
return false;
}
return true;
}
inline bool roundTrip(Options& options) {
std::string source = options.inputFile;
std::string compressedFile = std::tmpnam(nullptr);
std::string decompressedFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] {
std::remove(compressedFile.c_str());
std::remove(decompressedFile.c_str());
});
{
options.outputFile = compressedFile;
options.decompress = false;
ErrorHolder errorHolder;
pzstdMain(options, errorHolder);
if (errorHolder.hasError()) {
errorHolder.getError();
return false;
}
}
{
options.decompress = true;
options.inputFile = compressedFile;
options.outputFile = decompressedFile;
ErrorHolder errorHolder;
pzstdMain(options, errorHolder);
if (errorHolder.hasError()) {
errorHolder.getError();
return false;
}
}
return check(source, decompressedFile);
}
}

View File

@ -0,0 +1,88 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
#include "datagen.h"
#include "Options.h"
#include "test/RoundTrip.h"
#include "utils/ScopeGuard.h"
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <random>
using namespace std;
using namespace pzstd;
namespace {
string
writeData(size_t size, double matchProba, double litProba, unsigned seed) {
std::unique_ptr<uint8_t[]> buf(new uint8_t[size]);
RDG_genBuffer(buf.get(), size, matchProba, litProba, seed);
string file = tmpnam(nullptr);
auto fd = std::fopen(file.c_str(), "wb");
auto guard = makeScopeGuard([&] { std::fclose(fd); });
auto bytesWritten = std::fwrite(buf.get(), 1, size, fd);
if (bytesWritten != size) {
std::abort();
}
return file;
}
template <typename Generator>
string generateInputFile(Generator& gen) {
// Use inputs ranging from 1 Byte to 2^16 Bytes
std::uniform_int_distribution<size_t> size{1, 1 << 16};
std::uniform_real_distribution<> prob{0, 1};
return writeData(size(gen), prob(gen), prob(gen), gen());
}
template <typename Generator>
Options generateOptions(Generator& gen, const string& inputFile) {
Options options;
options.inputFile = inputFile;
options.overwrite = true;
std::bernoulli_distribution pzstdHeaders{0.75};
std::uniform_int_distribution<unsigned> numThreads{1, 32};
std::uniform_int_distribution<unsigned> compressionLevel{1, 10};
options.pzstdHeaders = pzstdHeaders(gen);
options.numThreads = numThreads(gen);
options.compressionLevel = compressionLevel(gen);
return options;
}
}
int main(int argc, char** argv) {
std::mt19937 gen(std::random_device{}());
auto newlineGuard = makeScopeGuard([] { std::fprintf(stderr, "\n"); });
for (unsigned i = 0; i < 10000; ++i) {
if (i % 100 == 0) {
std::fprintf(stderr, "Progress: %u%%\r", i / 100);
}
auto inputFile = generateInputFile(gen);
auto inputGuard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
for (unsigned i = 0; i < 10; ++i) {
auto options = generateOptions(gen, inputFile);
if (!roundTrip(options)) {
std::fprintf(stderr, "numThreads: %u\n", options.numThreads);
std::fprintf(stderr, "level: %u\n", options.compressionLevel);
std::fprintf(stderr, "decompress? %u\n", (unsigned)options.decompress);
std::fprintf(
stderr, "pzstd headers? %u\n", (unsigned)options.pzstdHeaders);
std::fprintf(stderr, "file: %s\n", inputFile.c_str());
return 1;
}
}
}
return 0;
}