1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-27 21:01:50 +03:00

feat(fdb): MCOL-5802 Add support for blob insertion into FDB. (#3351)

This commit is contained in:
Denis Khalikov
2024-12-11 00:07:46 +03:00
committed by GitHub
parent eb6b370287
commit 87e2bb4cef
4 changed files with 474 additions and 1 deletions

View File

@ -21,6 +21,12 @@
#include <iostream>
#include <thread>
#include <memory>
#include <vector>
#include <unordered_map>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/uuid/random_generator.hpp>
#include <boost/lexical_cast.hpp>
// https://apple.github.io/foundationdb/api-c.html
// We have to define `FDB_API_VERSION` before include `fdb_c.h` header.
@ -104,5 +110,77 @@ class DataBaseCreator
static std::shared_ptr<FDBDataBase> createDataBase(const std::string clusterFilePath);
};
using Block = std::pair<uint32_t, std::string>;
using Key = std::string;
using Keys = std::vector<Key>;
// Maps a key to associated block.
using KeyBlockMap = std::unordered_map<Key, Block>;
using TreeLevelNumKeysMap = std::unordered_map<uint32_t, uint32_t>;
class KeyGenerator
{
public:
virtual ~KeyGenerator()
{
}
virtual Key generateKey() = 0;
virtual uint32_t getKeySize() = 0;
};
class BoostUIDKeyGenerator : public KeyGenerator
{
public:
Key generateKey() override;
uint32_t getKeySize() override;
};
// This class represetns a machinery to handle a data `blob`.
class BlobHandler
{
public:
BlobHandler(std::shared_ptr<KeyGenerator> keyGen, uint32_t blockSizeInBytes = 100000)
: keyGen_(keyGen), blockSizeInBytes_(blockSizeInBytes)
{
// Block size in 100KB shows the best performance.
keySizeInBytes_ = keyGen_->getKeySize();
assert(keySizeInBytes_);
assert(blockSizeInBytes_);
assert((keySizeInBytes_ + keyBlockIdentifier.size()) <= blockSizeInBytes_);
numKeysInBlock_ = (blockSizeInBytes_ - keyBlockIdentifier.size()) / keySizeInBytes_;
assert(blockSizeInBytes_ > dataBlockIdentifier.size());
dataBlockSizeInBytes_ = (blockSizeInBytes_ - dataBlockIdentifier.size());
}
// Writes the given `blob` with given `key`.
bool writeBlob(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key, const ByteArray& blob);
// Reads `blob` by the given `key`, on error returns false.
std::pair<bool, std::string> readBlob(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key);
// Removes a `blob` by the given `key`, on error returns false.
bool removeBlob(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key);
private:
size_t insertData(Block& block, const std::string& blob, const size_t offset);
void insertKey(Block& block, const std::string& value);
std::pair<bool, Keys> getKeysFromBlock(const Block& block);
Keys generateKeys(const uint32_t num);
bool isDataBlock(const Block& block);
bool commitKeys(std::shared_ptr<FDBCS::FDBDataBase> database, KeyBlockMap& keyBlockMap, const Keys& keys);
bool commitKey(std::shared_ptr<FDBCS::FDBDataBase> database, const Key& key, const ByteArray& value);
bool removeKeys(std::shared_ptr<FDBCS::FDBDataBase> database, const Keys& keys);
TreeLevelNumKeysMap computeNumKeysForEachTreeLevel(const int32_t treeLen, const uint32_t numBlocks);
inline float log(const uint32_t base, const uint32_t value);
std::shared_ptr<KeyGenerator> keyGen_;
uint32_t blockSizeInBytes_;
uint32_t keySizeInBytes_;
uint32_t numKeysInBlock_;
uint32_t dataBlockSizeInBytes_;
// FIXME: Doc says that 10MB is limit, currently taking in account `key` size and `value` size, but 10MB
// limit returns error on transaction.
const uint32_t maxTnxSize_{8192000};
const std::string keyBlockIdentifier{"K"};
const std::string dataBlockIdentifier{"D"};
};
bool setAPIVersion();
} // namespace FDBCS