1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-04-18 21:44:02 +03:00
Denis Khalikov 6a6db672db
feat(FDB): [FDB BlobHandler] Add writeOrUpdateBlob, update read and remove. (#3373)
This patch:
1) Adds a `writeOrUpdateBlob` function.
2) Updates `read` and `remove` to take in account the size of the
`keys` and `values` for one FDB transaction.
2025-03-14 11:39:06 +00:00

215 lines
7.8 KiB
C++

/* Copyright (C) 2024 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#pragma once
#include <string>
#include <iostream>
#include <thread>
#include <memory>
#include <vector>
#include <unordered_map>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/uuid/random_generator.hpp>
#include <boost/lexical_cast.hpp>
// https://apple.github.io/foundationdb/api-c.html
// We have to define `FDB_API_VERSION` before include `fdb_c.h` header.
#define FDB_API_VERSION 630
#include <foundationdb/fdb_c.h>
namespace FDBCS
{
// TODO: How about uint8_t.
using ByteArray = std::string;
// Represensts a `transaction`.
class Transaction
{
public:
Transaction() = delete;
Transaction(const Transaction&) = delete;
Transaction(Transaction&&) = delete;
Transaction& operator=(const Transaction&) = delete;
Transaction& operator=(Transaction&&) = delete;
explicit Transaction(FDBTransaction* tnx);
~Transaction();
// Tries to atomically (during one transaction) swap keys.
bool swap(const ByteArray &key1, const ByteArray &key2);
// Sets a given `key` and given `value`.
void set(const ByteArray& key, const ByteArray& value) const;
// Gets a `value` by the given `key`.
std::pair<bool, ByteArray> get(const ByteArray& key) const;
// Removes a given `key` from database.
void remove(const ByteArray& key) const;
// Removes all keys in the given range, starting from `beginKey` until `endKey`, but not including `endKey`.
void removeRange(const ByteArray& beginKey, const ByteArray& endKey) const;
// Commits transaction.
bool commit() const;
private:
FDBTransaction* tnx_{nullptr};
};
// Represents network class.
class FDBNetwork
{
public:
FDBNetwork() = default;
FDBNetwork(const FDBNetwork&) = delete;
FDBNetwork(FDBNetwork&&) = delete;
FDBNetwork& operator=(const FDBNetwork&) = delete;
FDBNetwork& operator=(FDBNetwork&&) = delete;
~FDBNetwork();
bool setUpAndRunNetwork();
private:
std::thread netThread;
};
// Represents database class.
class FDBDataBase
{
public:
FDBDataBase() = delete;
FDBDataBase(const FDBDataBase&) = delete;
FDBDataBase& operator=(FDBDataBase&) = delete;
FDBDataBase(FDBDataBase&&) = delete;
FDBDataBase& operator=(FDBDataBase&&) = delete;
explicit FDBDataBase(FDBDatabase* database);
~FDBDataBase();
std::unique_ptr<Transaction> createTransaction() const;
bool isDataBaseReady() const;
private:
FDBDatabase* database_;
const uint32_t secondsToWait_ = 3;
};
// Represents a creator class for the `FDBDatabase`.
class DataBaseCreator
{
public:
// Creates a `FDBDataBase` from the given `clusterFilePath` (path to the cluster file).
static std::shared_ptr<FDBDataBase> createDataBase(const std::string clusterFilePath);
};
using Block = std::pair<uint32_t, std::string>;
using Key = std::string;
using Keys = std::vector<Key>;
// Maps a key to associated block.
using KeyBlockMap = std::unordered_map<Key, Block>;
using TreeLevelNumKeysMap = std::unordered_map<uint32_t, uint32_t>;
// Represents an abstract class for key generators.
class KeyGenerator
{
public:
virtual ~KeyGenerator()
{
}
virtual Key generateKey() = 0;
virtual uint32_t getKeySize() = 0;
};
class BoostUIDKeyGenerator : public KeyGenerator
{
public:
Key generateKey() override;
uint32_t getKeySize() override;
};
// This class represetns a machinery to handle a data `blob`.
class BlobHandler
{
public:
BlobHandler(std::shared_ptr<KeyGenerator> keyGen, uint32_t blockSizeInBytes = 100000)
: keyGen_(keyGen), blockSizeInBytes_(blockSizeInBytes)
{
// Block size in 100KB shows the best performance.
keySizeInBytes_ = keyGen_->getKeySize();
assert(keySizeInBytes_);
assert(blockSizeInBytes_);
assert((keySizeInBytes_ + keyBlockIdentifier.size()) <= blockSizeInBytes_);
numKeysInBlock_ = (blockSizeInBytes_ - keyBlockIdentifier.size()) / keySizeInBytes_;
assert(blockSizeInBytes_ > dataBlockIdentifier.size());
dataBlockSizeInBytes_ = (blockSizeInBytes_ - dataBlockIdentifier.size());
}
// Writes the given `blob` with given `key`.
// The semantic of this `write` is not atomic, it splits the data into multiple fdb transactions, if one of
// this transaction fails, we should call `removeBlob` to clear data which were written partially, and then
// try to `writeBlob` again.
bool writeBlob(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key, const ByteArray& blob);
// This function:
// 1) Checks if blob with the same key exists if not, uses `writeBlob` function.
// 2) Creates a new tree with a new key.
// 3) Atomically (during one fdb transaction) swaps root nodes for the original tree and new tree.
// 4) Removes original tree.
bool writeOrUpdateBlob(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key,
const ByteArray& blob);
// Reads `blob` by the given `key`, on error returns false.
std::pair<bool, std::string> readBlob(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key);
// Read blocks of the data based on the given `keys` starting from `index` position in keys vector
// and taking in account the max size of transaction.
// If `DataBlock` reached in a tree (leaf nodes), sets `dataBlockReached` flag to true.
std::pair<bool, std::vector<Block>> readBlocks(std::shared_ptr<FDBCS::FDBDataBase> database,
const std::vector<ByteArray>& keys, uint32_t& index,
bool& dataBlockReached);
// Removes a `blob` by the given `key`, on error returns false.
// The semantic of this `remove` is not atomic, it splits keys to remove into multiple fdb transactions, if
// one of this transaction fails, we should call `removeBlob` again to remove keys, which were not removed.
bool removeBlob(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key);
// Checks if key exis.
bool keyExists(std::shared_ptr<FDBCS::FDBDataBase> database, const ByteArray& key);
private:
size_t insertData(Block& block, const std::string& blob, const size_t offset);
void insertKey(Block& block, const std::string& value);
std::pair<bool, Keys> getKeysFromBlock(const Block& block);
Keys generateKeys(const uint32_t num);
bool isDataBlock(const Block& block);
bool commitKeys(std::shared_ptr<FDBCS::FDBDataBase> database, KeyBlockMap& keyBlockMap, const Keys& keys);
bool commitKey(std::shared_ptr<FDBCS::FDBDataBase> database, const Key& key, const ByteArray& value);
bool removeKeys(std::shared_ptr<FDBCS::FDBDataBase> database, const Keys& keys);
TreeLevelNumKeysMap computeNumKeysForEachTreeLevel(const int32_t treeLen, const uint32_t numBlocks);
inline float log(const uint32_t base, const uint32_t value);
std::shared_ptr<KeyGenerator> keyGen_;
uint32_t blockSizeInBytes_;
uint32_t keySizeInBytes_;
uint32_t numKeysInBlock_;
uint32_t dataBlockSizeInBytes_;
// FIXME: Doc says that 10MB is limit, currently taking in account `key` size and `value` size, but 10MB
// limit returns error on transaction.
const uint32_t maxTnxSize_{8192000};
const std::string keyBlockIdentifier{"K"};
const std::string dataBlockIdentifier{"D"};
};
bool setAPIVersion();
} // namespace FDBCS