mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Merge from merge-myrocks:
commit 445e518bc7
Author: Sergei Petrunia <psergey@askmonty.org>
Date: Sat Jan 27 10:18:20 2018 +0000
Copy of
commit f8f364b47f2784f16b401f27658f1c16eaf348ec
Author: Jay Edgar <jkedgar@fb.com>
Date: Tue Oct 17 15:19:31 2017 -0700
Add a hashed, hierarchical, wheel timer implementation
Summary:
In order to implement idle timeouts on detached sessions we need something inside MySQL that is lightweight and can handle calling events in the future wi
By default the timers are grouped into 10ms buckets (the 'hashed' part), though the size of the buckets is configurable at the creation of the timer. Eac
Reviewed By: djwatson
Differential Revision: D6199806
fbshipit-source-id: 5e1590f
This commit is contained in:
@@ -138,6 +138,7 @@ const size_t RDB_SIZEOF_INDEX_INFO_VERSION = sizeof(uint16);
|
||||
const size_t RDB_SIZEOF_INDEX_TYPE = sizeof(uchar);
|
||||
const size_t RDB_SIZEOF_KV_VERSION = sizeof(uint16);
|
||||
const size_t RDB_SIZEOF_INDEX_FLAGS = sizeof(uint32);
|
||||
const size_t RDB_SIZEOF_AUTO_INCREMENT_VERSION = sizeof(uint16);
|
||||
|
||||
// Possible return values for rdb_index_field_unpack_t functions.
|
||||
enum {
|
||||
@@ -237,17 +238,28 @@ public:
|
||||
*size = INDEX_NUMBER_SIZE;
|
||||
}
|
||||
|
||||
/* Get the first key that you need to position at to start iterating.
|
||||
Returns a "supremum" or "infimum" for this index based on collation order
|
||||
*/
|
||||
inline void get_first_key(uchar *const key, uint *const size) const {
|
||||
return m_is_reverse_cf ? get_supremum_key(key, size)
|
||||
: get_infimum_key(key, size);
|
||||
}
|
||||
|
||||
/* Make a key that is right after the given key. */
|
||||
static int successor(uchar *const packed_tuple, const uint &len);
|
||||
|
||||
/* Make a key that is right before the given key. */
|
||||
static int predecessor(uchar *const packed_tuple, const uint &len);
|
||||
|
||||
/*
|
||||
This can be used to compare prefixes.
|
||||
if X is a prefix of Y, then we consider that X = Y.
|
||||
*/
|
||||
// b describes the lookup key, which can be a prefix of a.
|
||||
// b might be outside of the index_number range, if successor() is called.
|
||||
int cmp_full_keys(const rocksdb::Slice &a, const rocksdb::Slice &b) const {
|
||||
DBUG_ASSERT(covers_key(a));
|
||||
DBUG_ASSERT(covers_key(b));
|
||||
|
||||
return memcmp(a.data(), b.data(), std::min(a.size(), b.size()));
|
||||
}
|
||||
@@ -383,6 +395,7 @@ public:
|
||||
INDEX_STATISTICS = 6,
|
||||
MAX_INDEX_ID = 7,
|
||||
DDL_CREATE_INDEX_ONGOING = 8,
|
||||
AUTO_INC = 9,
|
||||
END_DICT_INDEX_ID = 255
|
||||
};
|
||||
|
||||
@@ -395,6 +408,7 @@ public:
|
||||
DDL_DROP_INDEX_ONGOING_VERSION = 1,
|
||||
MAX_INDEX_ID_VERSION = 1,
|
||||
DDL_CREATE_INDEX_ONGOING_VERSION = 1,
|
||||
AUTO_INCREMENT_VERSION = 1,
|
||||
// Version for index stats is stored in IndexStats struct
|
||||
};
|
||||
|
||||
@@ -968,17 +982,17 @@ public:
|
||||
Rdb_tbl_def &operator=(const Rdb_tbl_def &) = delete;
|
||||
|
||||
explicit Rdb_tbl_def(const std::string &name)
|
||||
: m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
|
||||
: m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
|
||||
set_name(name);
|
||||
}
|
||||
|
||||
Rdb_tbl_def(const char *const name, const size_t &len)
|
||||
: m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
|
||||
: m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
|
||||
set_name(std::string(name, len));
|
||||
}
|
||||
|
||||
explicit Rdb_tbl_def(const rocksdb::Slice &slice, const size_t &pos = 0)
|
||||
: m_key_descr_arr(nullptr), m_hidden_pk_val(1), m_auto_incr_val(1) {
|
||||
: m_key_descr_arr(nullptr), m_hidden_pk_val(0), m_auto_incr_val(0) {
|
||||
set_name(std::string(slice.data() + pos, slice.size() - pos));
|
||||
}
|
||||
|
||||
@@ -991,7 +1005,7 @@ public:
|
||||
std::shared_ptr<Rdb_key_def> *m_key_descr_arr;
|
||||
|
||||
std::atomic<longlong> m_hidden_pk_val;
|
||||
std::atomic<longlong> m_auto_incr_val;
|
||||
std::atomic<ulonglong> m_auto_incr_val;
|
||||
|
||||
/* Is this a system table */
|
||||
bool m_is_mysql_system_table;
|
||||
@@ -1003,6 +1017,7 @@ public:
|
||||
const std::string &base_dbname() const { return m_dbname; }
|
||||
const std::string &base_tablename() const { return m_tablename; }
|
||||
const std::string &base_partition() const { return m_partition; }
|
||||
GL_INDEX_ID get_autoincr_gl_index_id();
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1115,6 +1130,8 @@ private:
|
||||
static void free_hash_elem(void *const data);
|
||||
|
||||
bool validate_schemas();
|
||||
|
||||
bool validate_auto_incr();
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1179,8 +1196,9 @@ private:
|
||||
|
||||
2. internal cf_id, index id => index information
|
||||
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
|
||||
value: version, index_type, kv_format_version, ttl_duration
|
||||
value: version, index_type, kv_format_version, index_flags, ttl_duration
|
||||
index_type is 1 byte, version and kv_format_version are 2 bytes.
|
||||
index_flags is 4 bytes.
|
||||
ttl_duration is 8 bytes.
|
||||
|
||||
3. CF id => CF flags
|
||||
@@ -1209,6 +1227,11 @@ private:
|
||||
key: Rdb_key_def::DDL_CREATE_INDEX_ONGOING(0x8) + cf_id + index_id
|
||||
value: version
|
||||
|
||||
9. auto_increment values
|
||||
key: Rdb_key_def::AUTO_INC(0x9) + cf_id + index_id
|
||||
value: version, {max auto_increment so far}
|
||||
max auto_increment is 8 bytes
|
||||
|
||||
Data dictionary operations are atomic inside RocksDB. For example,
|
||||
when creating a table with two indexes, it is necessary to call Put
|
||||
three times. They have to be atomic. Rdb_dict_manager has a wrapper function
|
||||
@@ -1350,6 +1373,13 @@ public:
|
||||
void add_stats(rocksdb::WriteBatch *const batch,
|
||||
const std::vector<Rdb_index_stats> &stats) const;
|
||||
Rdb_index_stats get_stats(GL_INDEX_ID gl_index_id) const;
|
||||
|
||||
rocksdb::Status put_auto_incr_val(rocksdb::WriteBatchBase *batch,
|
||||
const GL_INDEX_ID &gl_index_id,
|
||||
ulonglong val,
|
||||
bool overwrite = false) const;
|
||||
bool get_auto_incr_val(const GL_INDEX_ID &gl_index_id,
|
||||
ulonglong *new_val) const;
|
||||
};
|
||||
|
||||
struct Rdb_index_info {
|
||||
@@ -1361,6 +1391,109 @@ struct Rdb_index_info {
|
||||
uint64 m_ttl_duration = 0;
|
||||
};
|
||||
|
||||
/*
|
||||
@brief
|
||||
Merge Operator for the auto_increment value in the system_cf
|
||||
|
||||
@detail
|
||||
This class implements the rocksdb Merge Operator for auto_increment values
|
||||
that are stored to the data dictionary every transaction.
|
||||
|
||||
The actual Merge function is triggered on compaction, memtable flushes, or
|
||||
when get() is called on the same key.
|
||||
|
||||
*/
|
||||
class Rdb_system_merge_op : public rocksdb::AssociativeMergeOperator {
|
||||
public:
|
||||
/*
|
||||
Updates the new value associated with a key to be the maximum of the
|
||||
passed in value and the existing value.
|
||||
|
||||
@param[IN] key
|
||||
@param[IN] existing_value existing value for a key; nullptr if nonexistent
|
||||
key
|
||||
@param[IN] value
|
||||
@param[OUT] new_value new value after Merge
|
||||
@param[IN] logger
|
||||
*/
|
||||
bool Merge(const rocksdb::Slice &key, const rocksdb::Slice *existing_value,
|
||||
const rocksdb::Slice &value, std::string *new_value,
|
||||
rocksdb::Logger *logger) const override {
|
||||
DBUG_ASSERT(new_value != nullptr);
|
||||
|
||||
if (key.size() != Rdb_key_def::INDEX_NUMBER_SIZE * 3 ||
|
||||
GetKeyType(key) != Rdb_key_def::AUTO_INC ||
|
||||
value.size() !=
|
||||
RDB_SIZEOF_AUTO_INCREMENT_VERSION + ROCKSDB_SIZEOF_AUTOINC_VALUE ||
|
||||
GetVersion(value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
|
||||
abort();
|
||||
}
|
||||
|
||||
uint64_t merged_value = Deserialize(value);
|
||||
|
||||
if (existing_value != nullptr) {
|
||||
if (existing_value->size() != RDB_SIZEOF_AUTO_INCREMENT_VERSION +
|
||||
ROCKSDB_SIZEOF_AUTOINC_VALUE ||
|
||||
GetVersion(*existing_value) > Rdb_key_def::AUTO_INCREMENT_VERSION) {
|
||||
abort();
|
||||
}
|
||||
|
||||
merged_value = std::max(merged_value, Deserialize(*existing_value));
|
||||
}
|
||||
Serialize(merged_value, new_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual const char *Name() const override { return "Rdb_system_merge_op"; }
|
||||
|
||||
private:
|
||||
/*
|
||||
Serializes the integer data to the new_value buffer or the target buffer
|
||||
the merge operator will update to
|
||||
*/
|
||||
void Serialize(const uint64_t data, std::string *new_value) const {
|
||||
uchar value_buf[RDB_SIZEOF_AUTO_INCREMENT_VERSION +
|
||||
ROCKSDB_SIZEOF_AUTOINC_VALUE] = {0};
|
||||
uchar *ptr = value_buf;
|
||||
/* fill in the auto increment version */
|
||||
rdb_netbuf_store_uint16(ptr, Rdb_key_def::AUTO_INCREMENT_VERSION);
|
||||
ptr += RDB_SIZEOF_AUTO_INCREMENT_VERSION;
|
||||
/* fill in the auto increment value */
|
||||
rdb_netbuf_store_uint64(ptr, data);
|
||||
ptr += ROCKSDB_SIZEOF_AUTOINC_VALUE;
|
||||
new_value->assign(reinterpret_cast<char *>(value_buf), ptr - value_buf);
|
||||
}
|
||||
|
||||
/*
|
||||
Gets the value of auto_increment type in the data dictionary from the
|
||||
value slice
|
||||
|
||||
@Note Only to be used on data dictionary keys for the auto_increment type
|
||||
*/
|
||||
uint64_t Deserialize(const rocksdb::Slice &s) const {
|
||||
return rdb_netbuf_to_uint64(reinterpret_cast<const uchar *>(s.data()) +
|
||||
RDB_SIZEOF_AUTO_INCREMENT_VERSION);
|
||||
}
|
||||
|
||||
/*
|
||||
Gets the type of the key of the key in the data dictionary.
|
||||
|
||||
@Note Only to be used on data dictionary keys for the auto_increment type
|
||||
*/
|
||||
uint16_t GetKeyType(const rocksdb::Slice &s) const {
|
||||
return rdb_netbuf_to_uint32(reinterpret_cast<const uchar *>(s.data()));
|
||||
}
|
||||
|
||||
/*
|
||||
Gets the version of the auto_increment value in the data dictionary.
|
||||
|
||||
@Note Only to be used on data dictionary value for the auto_increment type
|
||||
*/
|
||||
uint16_t GetVersion(const rocksdb::Slice &s) const {
|
||||
return rdb_netbuf_to_uint16(reinterpret_cast<const uchar *>(s.data()));
|
||||
}
|
||||
};
|
||||
|
||||
bool rdb_is_collation_supported(const my_core::CHARSET_INFO *const cs);
|
||||
|
||||
} // namespace myrocks
|
||||
|
Reference in New Issue
Block a user