mirror of
https://github.com/MariaDB/server.git
synced 2025-08-24 14:48:09 +03:00
merge with ft-engine and ft-index up to tag:tokudb-7.1.0
This commit is contained in:
@@ -13,8 +13,9 @@ ENDIF()
|
|||||||
IF(NOT TOKUDB_OK)
|
IF(NOT TOKUDB_OK)
|
||||||
RETURN()
|
RETURN()
|
||||||
ENDIF()
|
ENDIF()
|
||||||
SET(ENV{TOKUDB_VERSION} "7.0.4")
|
|
||||||
|
|
||||||
|
############################################
|
||||||
|
SET(ENV{TOKUDB_VERSION} "7.0.4")
|
||||||
SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-server-5.5/README-TOKUDB\nusr/share/doc/mariadb-server-5.5/README.md" PARENT_SCOPE)
|
SET(TOKUDB_DEB_FILES "usr/lib/mysql/plugin/ha_tokudb.so\netc/mysql/conf.d/tokudb.cnf\nusr/bin/tokuftdump\nusr/share/doc/mariadb-server-5.5/README-TOKUDB\nusr/share/doc/mariadb-server-5.5/README.md" PARENT_SCOPE)
|
||||||
SET(USE_BDB OFF CACHE BOOL "")
|
SET(USE_BDB OFF CACHE BOOL "")
|
||||||
SET(USE_VALGRIND OFF CACHE BOOL "")
|
SET(USE_VALGRIND OFF CACHE BOOL "")
|
||||||
@@ -58,6 +59,7 @@ SET(TOKUDB_SOURCES ha_tokudb.cc)
|
|||||||
MYSQL_ADD_PLUGIN(tokudb ${TOKUDB_SOURCES} STORAGE_ENGINE MODULE_ONLY
|
MYSQL_ADD_PLUGIN(tokudb ${TOKUDB_SOURCES} STORAGE_ENGINE MODULE_ONLY
|
||||||
LINK_LIBRARIES tokufractaltree_static tokuportability_static ${ZLIB_LIBRARY} stdc++)
|
LINK_LIBRARIES tokufractaltree_static tokuportability_static ${ZLIB_LIBRARY} stdc++)
|
||||||
SET(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -flto -fuse-linker-plugin")
|
SET(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -flto -fuse-linker-plugin")
|
||||||
|
SET(CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO} -flto -fuse-linker-plugin")
|
||||||
|
|
||||||
SET(CPACK_RPM_server_PACKAGE_OBSOLETES
|
SET(CPACK_RPM_server_PACKAGE_OBSOLETES
|
||||||
"${CPACK_RPM_server_PACKAGE_OBSOLETES} MariaDB-tokudb-engine < 5.5.34" PARENT_SCOPE)
|
"${CPACK_RPM_server_PACKAGE_OBSOLETES} MariaDB-tokudb-engine < 5.5.34" PARENT_SCOPE)
|
||||||
|
@@ -24,14 +24,14 @@ working MySQL or MariaDB with Tokutek patches, and with the TokuDB storage
|
|||||||
engine, called `make.mysql.bash`. This script will download copies of the
|
engine, called `make.mysql.bash`. This script will download copies of the
|
||||||
needed source code from github and build everything.
|
needed source code from github and build everything.
|
||||||
|
|
||||||
To build MySQL with TokuDB 7.0.1:
|
To build MySQL with TokuDB 7.0.4:
|
||||||
```sh
|
```sh
|
||||||
scripts/make.mysql.bash --mysqlbuild=mysql-5.5.30-tokudb-7.0.1-linux-x86_64
|
scripts/make.mysql.bash --mysqlbuild=mysql-5.5.30-tokudb-7.0.4-linux-x86_64
|
||||||
```
|
```
|
||||||
|
|
||||||
To build MariaDB with TokuDB 7.0.1:
|
To build MariaDB with TokuDB 7.0.4:
|
||||||
```sh
|
```sh
|
||||||
scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.30-tokudb-7.0.1-linux-x86_64
|
scripts/make.mysql.bash --mysqlbuild=mariadb-5.5.30-tokudb-7.0.4-linux-x86_64
|
||||||
```
|
```
|
||||||
|
|
||||||
Before you start, make sure you have a C++11-compatible compiler (GCC >=
|
Before you start, make sure you have a C++11-compatible compiler (GCC >=
|
||||||
|
@@ -46,6 +46,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -16,7 +16,7 @@ To build the full MySQL product, see the instructions for
|
|||||||
Building
|
Building
|
||||||
--------
|
--------
|
||||||
|
|
||||||
TokuKV is built using CMake >= 2.8.8. Out-of-source builds are
|
TokuKV is built using CMake >= 2.8.9. Out-of-source builds are
|
||||||
recommended. You need a C++11 compiler, though only GCC >= 4.7 and
|
recommended. You need a C++11 compiler, though only GCC >= 4.7 and
|
||||||
Apple's Clang are tested. You also need zlib and valgrind development
|
Apple's Clang are tested. You also need zlib and valgrind development
|
||||||
packages (`yum install valgrind-devel zlib-devel` or `apt-get install
|
packages (`yum install valgrind-devel zlib-devel` or `apt-get install
|
||||||
|
@@ -49,6 +49,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -431,18 +432,18 @@ static void print_db_env_struct (void) {
|
|||||||
"int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags)",
|
"int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags)",
|
||||||
"int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
"int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
||||||
" const DBT *src_key, const DBT *src_val,\n"
|
" const DBT *src_key, const DBT *src_val,\n"
|
||||||
" uint32_t num_dbs, DB **db_array, DBT *keys, DBT *vals, uint32_t *flags_array) /* insert into multiple DBs */",
|
" uint32_t num_dbs, DB **db_array, DBT_ARRAY *keys, DBT_ARRAY *vals, uint32_t *flags_array) /* insert into multiple DBs */",
|
||||||
"int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put)",
|
"int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put)",
|
||||||
"int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
"int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
||||||
" const DBT *src_key, const DBT *src_val,\n"
|
" const DBT *src_key, const DBT *src_val,\n"
|
||||||
" uint32_t num_dbs, DB **db_array, DBT *keys, uint32_t *flags_array) /* delete from multiple DBs */",
|
" uint32_t num_dbs, DB **db_array, DBT_ARRAY *keys, uint32_t *flags_array) /* delete from multiple DBs */",
|
||||||
"int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del)",
|
"int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del)",
|
||||||
"int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
"int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
||||||
" DBT *old_src_key, DBT *old_src_data,\n"
|
" DBT *old_src_key, DBT *old_src_data,\n"
|
||||||
" DBT *new_src_key, DBT *new_src_data,\n"
|
" DBT *new_src_key, DBT *new_src_data,\n"
|
||||||
" uint32_t num_dbs, DB **db_array, uint32_t *flags_array,\n"
|
" uint32_t num_dbs, DB **db_array, uint32_t *flags_array,\n"
|
||||||
" uint32_t num_keys, DBT *keys,\n"
|
" uint32_t num_keys, DBT_ARRAY *keys,\n"
|
||||||
" uint32_t num_vals, DBT *vals) /* update multiple DBs */",
|
" uint32_t num_vals, DBT_ARRAY *vals) /* update multiple DBs */",
|
||||||
"int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */",
|
"int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */",
|
||||||
"int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */",
|
"int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */",
|
||||||
"int (*set_lk_max_memory) (DB_ENV *env, uint64_t max)",
|
"int (*set_lk_max_memory) (DB_ENV *env, uint64_t max)",
|
||||||
@@ -450,11 +451,16 @@ static void print_db_env_struct (void) {
|
|||||||
"void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra))",
|
"void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra))",
|
||||||
"int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec)",
|
"int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec)",
|
||||||
"int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec)",
|
"int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec)",
|
||||||
|
"int (*set_lock_timeout_callback) (DB_ENV *env, lock_timeout_callback callback)",
|
||||||
"int (*txn_xa_recover) (DB_ENV*, TOKU_XA_XID list[/*count*/], long count, /*out*/ long *retp, uint32_t flags)",
|
"int (*txn_xa_recover) (DB_ENV*, TOKU_XA_XID list[/*count*/], long count, /*out*/ long *retp, uint32_t flags)",
|
||||||
"int (*get_txn_from_xid) (DB_ENV*, /*in*/ TOKU_XA_XID *, /*out*/ DB_TXN **)",
|
"int (*get_txn_from_xid) (DB_ENV*, /*in*/ TOKU_XA_XID *, /*out*/ DB_TXN **)",
|
||||||
"int (*get_cursor_for_directory) (DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
"int (*get_cursor_for_directory) (DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
||||||
"int (*get_cursor_for_persistent_environment) (DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
"int (*get_cursor_for_persistent_environment)(DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
||||||
"void (*change_fsync_log_period)(DB_ENV*, uint32_t)",
|
"void (*change_fsync_log_period) (DB_ENV*, uint32_t)",
|
||||||
|
"int (*iterate_live_transactions) (DB_ENV *env, iterate_transactions_callback callback, void *extra)",
|
||||||
|
"int (*iterate_pending_lock_requests) (DB_ENV *env, iterate_requests_callback callback, void *extra)",
|
||||||
|
"void (*set_loader_memory_size)(DB_ENV *env, uint64_t loader_memory_size)",
|
||||||
|
"uint64_t (*get_loader_memory_size)(DB_ENV *env)",
|
||||||
NULL};
|
NULL};
|
||||||
|
|
||||||
sort_and_dump_fields("db_env", true, extra);
|
sort_and_dump_fields("db_env", true, extra);
|
||||||
@@ -523,7 +529,7 @@ static void print_db_struct (void) {
|
|||||||
"int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, uint32_t) /* change row/dictionary descriptor for a db. Available only while db is open */",
|
"int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, uint32_t) /* change row/dictionary descriptor for a db. Available only while db is open */",
|
||||||
"int (*getf_set)(DB*, DB_TXN*, uint32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
|
"int (*getf_set)(DB*, DB_TXN*, uint32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
|
||||||
"int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */",
|
"int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */",
|
||||||
"int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra)",
|
"int (*hot_optimize)(DB*, DBT*, DBT*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra)",
|
||||||
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
|
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
|
||||||
"int (*change_pagesize)(DB*,uint32_t)",
|
"int (*change_pagesize)(DB*,uint32_t)",
|
||||||
"int (*change_readpagesize)(DB*,uint32_t)",
|
"int (*change_readpagesize)(DB*,uint32_t)",
|
||||||
@@ -539,6 +545,7 @@ static void print_db_struct (void) {
|
|||||||
"int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, uint32_t flags)",
|
"int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, uint32_t flags)",
|
||||||
"int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*)",
|
"int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*)",
|
||||||
"int (*iterate_fractal_tree_block_map)(DB*,int(*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*),void*)",
|
"int (*iterate_fractal_tree_block_map)(DB*,int(*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*),void*)",
|
||||||
|
"const char *(*get_dname)(DB *db)",
|
||||||
NULL};
|
NULL};
|
||||||
sort_and_dump_fields("db", true, extra);
|
sort_and_dump_fields("db", true, extra);
|
||||||
}
|
}
|
||||||
@@ -566,6 +573,8 @@ static void print_db_txn_struct (void) {
|
|||||||
"int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*)",
|
"int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*)",
|
||||||
"int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *)",
|
"int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *)",
|
||||||
"uint64_t (*id64) (DB_TXN*)",
|
"uint64_t (*id64) (DB_TXN*)",
|
||||||
|
"void (*set_client_id)(DB_TXN *, uint64_t client_id)",
|
||||||
|
"uint64_t (*get_client_id)(DB_TXN *)",
|
||||||
NULL};
|
NULL};
|
||||||
sort_and_dump_fields("db_txn", false, extra);
|
sort_and_dump_fields("db_txn", false, extra);
|
||||||
}
|
}
|
||||||
@@ -747,9 +756,22 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
|
|||||||
print_dbtype();
|
print_dbtype();
|
||||||
print_defines();
|
print_defines();
|
||||||
|
|
||||||
printf("typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val);\n");
|
printf("typedef struct {\n");
|
||||||
printf("typedef int (*generate_row_for_del_func)(DB *dest_db, DB *src_db, DBT *dest_key, const DBT *src_key, const DBT *src_val);\n");
|
printf(" uint32_t capacity;\n");
|
||||||
|
printf(" uint32_t size;\n");
|
||||||
|
printf(" DBT *dbts;\n");
|
||||||
|
printf("} DBT_ARRAY;\n\n");
|
||||||
|
printf("typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT_ARRAY * dest_keys, DBT_ARRAY *dest_vals, const DBT *src_key, const DBT *src_val);\n");
|
||||||
|
printf("typedef int (*generate_row_for_del_func)(DB *dest_db, DB *src_db, DBT_ARRAY * dest_keys, const DBT *src_key, const DBT *src_val);\n");
|
||||||
|
printf("DBT_ARRAY * toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) %s;\n", VISIBLE);
|
||||||
|
printf("void toku_dbt_array_destroy(DBT_ARRAY *dbts) %s;\n", VISIBLE);
|
||||||
|
printf("void toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) %s;\n", VISIBLE);
|
||||||
|
printf("void toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) %s;\n", VISIBLE);
|
||||||
|
|
||||||
|
printf("typedef void (*lock_timeout_callback)(DB *db, uint64_t requesting_txnid, const DBT *left_key, const DBT *right_key, uint64_t blocking_txnid);\n");
|
||||||
|
printf("typedef int (*iterate_row_locks_callback)(DB **db, DBT *left_key, DBT *right_key, void *extra);\n");
|
||||||
|
printf("typedef int (*iterate_transactions_callback)(uint64_t txnid, uint64_t client_id, iterate_row_locks_callback cb, void *locks_extra, void *extra);\n");
|
||||||
|
printf("typedef int (*iterate_requests_callback)(DB *db, uint64_t requesting_txnid, const DBT *left_key, const DBT *right_key, uint64_t blocking_txnid, uint64_t start_time, void *extra);\n");
|
||||||
print_db_env_struct();
|
print_db_env_struct();
|
||||||
print_db_key_range_struct();
|
print_db_key_range_struct();
|
||||||
print_db_lsn_struct();
|
print_db_lsn_struct();
|
||||||
|
@@ -87,6 +87,7 @@ set_cflags_if_supported(
|
|||||||
-Wmissing-format-attribute
|
-Wmissing-format-attribute
|
||||||
-Wno-error=missing-format-attribute
|
-Wno-error=missing-format-attribute
|
||||||
-Wno-error=address-of-array-temporary
|
-Wno-error=address-of-array-temporary
|
||||||
|
-Wno-error=tautological-constant-out-of-range-compare
|
||||||
-fno-rtti
|
-fno-rtti
|
||||||
-fno-exceptions
|
-fno-exceptions
|
||||||
)
|
)
|
||||||
@@ -128,7 +129,7 @@ set(CMAKE_CXX_FLAGS_DRD "-g3 -O1 ${CMAKE_CXX_FLAGS_DRD}")
|
|||||||
|
|
||||||
## set extra release flags
|
## set extra release flags
|
||||||
## need to set flags for RelWithDebInfo as well because we want the MySQL/MariaDB builds to use them
|
## need to set flags for RelWithDebInfo as well because we want the MySQL/MariaDB builds to use them
|
||||||
if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
|
if (CMAKE_CXX_COMPILER_ID STREQUAL Clang)
|
||||||
# have tried -flto and -O4, both make our statically linked executables break apple's linker
|
# have tried -flto and -O4, both make our statically linked executables break apple's linker
|
||||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
||||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
||||||
|
@@ -30,7 +30,7 @@ FILE(GLOB XZ_ALL_FILES ${XZ_SOURCE_DIR}/*)
|
|||||||
ExternalProject_Add(build_lzma
|
ExternalProject_Add(build_lzma
|
||||||
PREFIX xz
|
PREFIX xz
|
||||||
DOWNLOAD_COMMAND
|
DOWNLOAD_COMMAND
|
||||||
cp -au "${XZ_ALL_FILES}" "<SOURCE_DIR>/"
|
cp -a "${XZ_ALL_FILES}" "<SOURCE_DIR>/"
|
||||||
CONFIGURE_COMMAND
|
CONFIGURE_COMMAND
|
||||||
"<SOURCE_DIR>/configure" ${xz_configure_opts}
|
"<SOURCE_DIR>/configure" ${xz_configure_opts}
|
||||||
"--prefix=${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz"
|
"--prefix=${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz"
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -26,6 +26,7 @@ set(FT_SOURCES
|
|||||||
background_job_manager
|
background_job_manager
|
||||||
block_allocator
|
block_allocator
|
||||||
block_table
|
block_table
|
||||||
|
bndata
|
||||||
cachetable
|
cachetable
|
||||||
checkpoint
|
checkpoint
|
||||||
compress
|
compress
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -49,6 +49,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
474
storage/tokudb/ft-index/ft/bndata.cc
Normal file
474
storage/tokudb/ft-index/ft/bndata.cc
Normal file
@@ -0,0 +1,474 @@
|
|||||||
|
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
#ident "$Id$"
|
||||||
|
/*
|
||||||
|
COPYING CONDITIONS NOTICE:
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of version 2 of the GNU General Public License as
|
||||||
|
published by the Free Software Foundation, and provided that the
|
||||||
|
following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain this COPYING
|
||||||
|
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
||||||
|
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
||||||
|
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
||||||
|
GRANT (below).
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce this COPYING
|
||||||
|
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
||||||
|
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
||||||
|
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
||||||
|
GRANT (below) in the documentation and/or other materials
|
||||||
|
provided with the distribution.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
02110-1301, USA.
|
||||||
|
|
||||||
|
COPYRIGHT NOTICE:
|
||||||
|
|
||||||
|
TokuDB, Tokutek Fractal Tree Indexing Library.
|
||||||
|
Copyright (C) 2007-2013 Tokutek, Inc.
|
||||||
|
|
||||||
|
DISCLAIMER:
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
General Public License for more details.
|
||||||
|
|
||||||
|
UNIVERSITY PATENT NOTICE:
|
||||||
|
|
||||||
|
The technology is licensed by the Massachusetts Institute of
|
||||||
|
Technology, Rutgers State University of New Jersey, and the Research
|
||||||
|
Foundation of State University of New York at Stony Brook under
|
||||||
|
United States of America Serial No. 11/760379 and to the patents
|
||||||
|
and/or patent applications resulting from it.
|
||||||
|
|
||||||
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
||||||
|
Tokutek as part of the Fractal Tree project.
|
||||||
|
|
||||||
|
"PATENT CLAIMS" means the claims of patents that are owned or
|
||||||
|
licensable by Tokutek, both currently or in the future; and that in
|
||||||
|
the absence of this license would be infringed by THIS
|
||||||
|
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
||||||
|
|
||||||
|
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
||||||
|
patentability, enforceability and/or non-infringement of any of the
|
||||||
|
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
||||||
|
|
||||||
|
Tokutek hereby grants to you, for the term and geographical scope of
|
||||||
|
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
||||||
|
irrevocable (except as stated in this section) patent license to
|
||||||
|
make, have made, use, offer to sell, sell, import, transfer, and
|
||||||
|
otherwise run, modify, and propagate the contents of THIS
|
||||||
|
IMPLEMENTATION, where such license applies only to the PATENT
|
||||||
|
CLAIMS. This grant does not include claims that would be infringed
|
||||||
|
only as a consequence of further modifications of THIS
|
||||||
|
IMPLEMENTATION. If you or your agent or licensee institute or order
|
||||||
|
or agree to the institution of patent litigation against any entity
|
||||||
|
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||||
|
THIS IMPLEMENTATION constitutes direct or contributory patent
|
||||||
|
infringement, or inducement of patent infringement, then any rights
|
||||||
|
granted to you under this License shall terminate as of the date
|
||||||
|
such litigation is filed. If you or your agent or exclusive
|
||||||
|
licensee institute or order or agree to the institution of a PATENT
|
||||||
|
CHALLENGE, then Tokutek may terminate any rights granted to you
|
||||||
|
under this License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
||||||
|
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
||||||
|
|
||||||
|
#include <bndata.h>
|
||||||
|
|
||||||
|
static uint32_t klpair_size(KLPAIR klpair){
|
||||||
|
return sizeof(*klpair) + klpair->keylen + leafentry_memsize(get_le_from_klpair(klpair));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t klpair_disksize(KLPAIR klpair){
|
||||||
|
return sizeof(*klpair) + klpair->keylen + leafentry_disksize(get_le_from_klpair(klpair));
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::init_zero() {
|
||||||
|
toku_mempool_zero(&m_buffer_mempool);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::initialize_empty() {
|
||||||
|
toku_mempool_zero(&m_buffer_mempool);
|
||||||
|
m_buffer.create();
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size) {
|
||||||
|
if (data_size == 0) {
|
||||||
|
invariant_zero(num_entries);
|
||||||
|
}
|
||||||
|
KLPAIR *XMALLOC_N(num_entries, array); // create array of pointers to leafentries
|
||||||
|
unsigned char *newmem = NULL;
|
||||||
|
// add same wiggle room that toku_mempool_construct would, 25% extra
|
||||||
|
uint32_t allocated_bytes = data_size + data_size/4;
|
||||||
|
CAST_FROM_VOIDP(newmem, toku_xmalloc(allocated_bytes));
|
||||||
|
unsigned char* curr_src_pos = buf;
|
||||||
|
unsigned char* curr_dest_pos = newmem;
|
||||||
|
for (uint32_t i = 0; i < num_entries; i++) {
|
||||||
|
KLPAIR curr_kl = (KLPAIR)curr_dest_pos;
|
||||||
|
array[i] = curr_kl;
|
||||||
|
|
||||||
|
uint8_t curr_type = curr_src_pos[0];
|
||||||
|
curr_src_pos++;
|
||||||
|
// first thing we do is lay out the key,
|
||||||
|
// to do so, we must extract it from the leafentry
|
||||||
|
// and write it in
|
||||||
|
uint32_t keylen = 0;
|
||||||
|
void* keyp = NULL;
|
||||||
|
keylen = *(uint32_t *)curr_src_pos;
|
||||||
|
curr_src_pos += sizeof(uint32_t);
|
||||||
|
uint32_t clean_vallen = 0;
|
||||||
|
uint32_t num_cxrs = 0;
|
||||||
|
uint8_t num_pxrs = 0;
|
||||||
|
if (curr_type == LE_CLEAN) {
|
||||||
|
clean_vallen = toku_dtoh32(*(uint32_t *)curr_src_pos);
|
||||||
|
curr_src_pos += sizeof(clean_vallen); // val_len
|
||||||
|
keyp = curr_src_pos;
|
||||||
|
curr_src_pos += keylen;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
paranoid_invariant(curr_type == LE_MVCC);
|
||||||
|
num_cxrs = toku_htod32(*(uint32_t *)curr_src_pos);
|
||||||
|
curr_src_pos += sizeof(uint32_t); // num_cxrs
|
||||||
|
num_pxrs = curr_src_pos[0];
|
||||||
|
curr_src_pos += sizeof(uint8_t); //num_pxrs
|
||||||
|
keyp = curr_src_pos;
|
||||||
|
curr_src_pos += keylen;
|
||||||
|
}
|
||||||
|
// now that we have the keylen and the key, we can copy it
|
||||||
|
// into the destination
|
||||||
|
*(uint32_t *)curr_dest_pos = keylen;
|
||||||
|
curr_dest_pos += sizeof(keylen);
|
||||||
|
memcpy(curr_dest_pos, keyp, keylen);
|
||||||
|
curr_dest_pos += keylen;
|
||||||
|
// now curr_dest_pos is pointing to where the leafentry should be packed
|
||||||
|
curr_dest_pos[0] = curr_type;
|
||||||
|
curr_dest_pos++;
|
||||||
|
if (curr_type == LE_CLEAN) {
|
||||||
|
*(uint32_t *)curr_dest_pos = toku_htod32(clean_vallen);
|
||||||
|
curr_dest_pos += sizeof(clean_vallen);
|
||||||
|
memcpy(curr_dest_pos, curr_src_pos, clean_vallen); // copy the val
|
||||||
|
curr_dest_pos += clean_vallen;
|
||||||
|
curr_src_pos += clean_vallen;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// pack num_cxrs and num_pxrs
|
||||||
|
*(uint32_t *)curr_dest_pos = toku_htod32(num_cxrs);
|
||||||
|
curr_dest_pos += sizeof(num_cxrs);
|
||||||
|
*(uint8_t *)curr_dest_pos = num_pxrs;
|
||||||
|
curr_dest_pos += sizeof(num_pxrs);
|
||||||
|
// now we need to pack the rest of the data
|
||||||
|
uint32_t num_rest_bytes = leafentry_rest_memsize(num_pxrs, num_cxrs, curr_src_pos);
|
||||||
|
memcpy(curr_dest_pos, curr_src_pos, num_rest_bytes);
|
||||||
|
curr_dest_pos += num_rest_bytes;
|
||||||
|
curr_src_pos += num_rest_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t num_bytes_read UU() = (uint32_t)(curr_src_pos - buf);
|
||||||
|
paranoid_invariant( num_bytes_read == data_size);
|
||||||
|
uint32_t num_bytes_written = curr_dest_pos - newmem;
|
||||||
|
paranoid_invariant( num_bytes_written == data_size);
|
||||||
|
toku_mempool_init(&m_buffer_mempool, newmem, (size_t)(num_bytes_written), allocated_bytes);
|
||||||
|
|
||||||
|
// destroy old omt that was created by toku_create_empty_bn(), so we can create a new one
|
||||||
|
m_buffer.destroy();
|
||||||
|
m_buffer.create_steal_sorted_array(&array, num_entries, num_entries);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t bn_data::get_memory_size() {
|
||||||
|
uint64_t retval = 0;
|
||||||
|
// include fragmentation overhead but do not include space in the
|
||||||
|
// mempool that has not yet been allocated for leaf entries
|
||||||
|
size_t poolsize = toku_mempool_footprint(&m_buffer_mempool);
|
||||||
|
invariant(poolsize >= get_disk_size());
|
||||||
|
retval += poolsize;
|
||||||
|
retval += m_buffer.memory_size();
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::delete_leafentry (
|
||||||
|
uint32_t idx,
|
||||||
|
uint32_t keylen,
|
||||||
|
uint32_t old_le_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
m_buffer.delete_at(idx);
|
||||||
|
toku_mempool_mfree(&m_buffer_mempool, 0, old_le_size + keylen + sizeof(keylen)); // Must pass 0, since le is no good any more.
|
||||||
|
}
|
||||||
|
|
||||||
|
/* mempool support */
|
||||||
|
|
||||||
|
struct omt_compressor_state {
|
||||||
|
struct mempool *new_kvspace;
|
||||||
|
KLPAIR *newvals;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int move_it (const KLPAIR &klpair, const uint32_t idx, struct omt_compressor_state * const oc) {
|
||||||
|
uint32_t size = klpair_size(klpair);
|
||||||
|
KLPAIR CAST_FROM_VOIDP(newdata, toku_mempool_malloc(oc->new_kvspace, size, 1));
|
||||||
|
paranoid_invariant_notnull(newdata); // we do this on a fresh mempool, so nothing bad should happen
|
||||||
|
memcpy(newdata, klpair, size);
|
||||||
|
oc->newvals[idx] = newdata;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compress things, and grow the mempool if needed.
|
||||||
|
void bn_data::omt_compress_kvspace(size_t added_size, void **maybe_free) {
|
||||||
|
uint32_t total_size_needed = toku_mempool_get_used_space(&m_buffer_mempool) + added_size;
|
||||||
|
if (total_size_needed+total_size_needed >= m_buffer_mempool.size) {
|
||||||
|
m_buffer_mempool.size = total_size_needed+total_size_needed;
|
||||||
|
}
|
||||||
|
struct mempool new_kvspace;
|
||||||
|
toku_mempool_construct(&new_kvspace, m_buffer_mempool.size);
|
||||||
|
uint32_t numvals = omt_size();
|
||||||
|
KLPAIR *XMALLOC_N(numvals, newvals);
|
||||||
|
struct omt_compressor_state oc = { &new_kvspace, newvals };
|
||||||
|
|
||||||
|
m_buffer.iterate_on_range< decltype(oc), move_it >(0, omt_size(), &oc);
|
||||||
|
|
||||||
|
m_buffer.destroy();
|
||||||
|
m_buffer.create_steal_sorted_array(&newvals, numvals, numvals);
|
||||||
|
|
||||||
|
if (maybe_free) {
|
||||||
|
*maybe_free = m_buffer_mempool.base;
|
||||||
|
} else {
|
||||||
|
toku_free(m_buffer_mempool.base);
|
||||||
|
}
|
||||||
|
m_buffer_mempool = new_kvspace;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Effect: Allocate a new object of size SIZE in MP. If MP runs out of space, allocate new a new mempool space, and copy all the items
|
||||||
|
// from the OMT (which items refer to items in the old mempool) into the new mempool.
|
||||||
|
// If MAYBE_FREE is NULL then free the old mempool's space.
|
||||||
|
// Otherwise, store the old mempool's space in maybe_free.
|
||||||
|
KLPAIR bn_data::mempool_malloc_from_omt(size_t size, void **maybe_free) {
|
||||||
|
void *v = toku_mempool_malloc(&m_buffer_mempool, size, 1);
|
||||||
|
if (v == NULL) {
|
||||||
|
omt_compress_kvspace(size, maybe_free);
|
||||||
|
v = toku_mempool_malloc(&m_buffer_mempool, size, 1);
|
||||||
|
paranoid_invariant_notnull(v);
|
||||||
|
}
|
||||||
|
return (KLPAIR)v;
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO: probably not free the "maybe_free" right away?
|
||||||
|
void bn_data::get_space_for_overwrite(
|
||||||
|
uint32_t idx,
|
||||||
|
const void* keyp,
|
||||||
|
uint32_t keylen,
|
||||||
|
uint32_t old_le_size,
|
||||||
|
uint32_t new_size,
|
||||||
|
LEAFENTRY* new_le_space
|
||||||
|
)
|
||||||
|
{
|
||||||
|
void* maybe_free = nullptr;
|
||||||
|
uint32_t size_alloc = new_size + keylen + sizeof(keylen);
|
||||||
|
KLPAIR new_kl = mempool_malloc_from_omt(
|
||||||
|
size_alloc,
|
||||||
|
&maybe_free
|
||||||
|
);
|
||||||
|
uint32_t size_freed = old_le_size + keylen + sizeof(keylen);
|
||||||
|
toku_mempool_mfree(&m_buffer_mempool, nullptr, size_freed); // Must pass nullptr, since le is no good any more.
|
||||||
|
new_kl->keylen = keylen;
|
||||||
|
memcpy(new_kl->key_le, keyp, keylen);
|
||||||
|
m_buffer.set_at(new_kl, idx);
|
||||||
|
*new_le_space = get_le_from_klpair(new_kl);
|
||||||
|
// free at end, so that the keyp and keylen
|
||||||
|
// passed in is still valid
|
||||||
|
if (maybe_free) {
|
||||||
|
toku_free(maybe_free);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO: probably not free the "maybe_free" right away?
|
||||||
|
void bn_data::get_space_for_insert(
|
||||||
|
uint32_t idx,
|
||||||
|
const void* keyp,
|
||||||
|
uint32_t keylen,
|
||||||
|
size_t size,
|
||||||
|
LEAFENTRY* new_le_space
|
||||||
|
)
|
||||||
|
{
|
||||||
|
void* maybe_free = nullptr;
|
||||||
|
uint32_t size_alloc = size + keylen + sizeof(keylen);
|
||||||
|
KLPAIR new_kl = mempool_malloc_from_omt(
|
||||||
|
size_alloc,
|
||||||
|
&maybe_free
|
||||||
|
);
|
||||||
|
new_kl->keylen = keylen;
|
||||||
|
memcpy(new_kl->key_le, keyp, keylen);
|
||||||
|
m_buffer.insert_at(new_kl, idx);
|
||||||
|
*new_le_space = get_le_from_klpair(new_kl);
|
||||||
|
// free at end, so that the keyp and keylen
|
||||||
|
// passed in is still valid (you never know if
|
||||||
|
// it was part of the old mempool, this is just
|
||||||
|
// safer).
|
||||||
|
if (maybe_free) {
|
||||||
|
toku_free(maybe_free);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::move_leafentries_to(
|
||||||
|
BN_DATA dest_bd,
|
||||||
|
uint32_t lbi, //lower bound inclusive
|
||||||
|
uint32_t ube //upper bound exclusive
|
||||||
|
)
|
||||||
|
//Effect: move leafentries in the range [lbi, ube) from this to src_omt to newly created dest_omt
|
||||||
|
{
|
||||||
|
paranoid_invariant(lbi < ube);
|
||||||
|
paranoid_invariant(ube <= omt_size());
|
||||||
|
KLPAIR *XMALLOC_N(ube-lbi, newklpointers); // create new omt
|
||||||
|
|
||||||
|
size_t mpsize = toku_mempool_get_used_space(&m_buffer_mempool); // overkill, but safe
|
||||||
|
struct mempool *dest_mp = &dest_bd->m_buffer_mempool;
|
||||||
|
struct mempool *src_mp = &m_buffer_mempool;
|
||||||
|
toku_mempool_construct(dest_mp, mpsize);
|
||||||
|
|
||||||
|
uint32_t i = 0;
|
||||||
|
for (i = lbi; i < ube; i++) {
|
||||||
|
KLPAIR curr_kl;
|
||||||
|
m_buffer.fetch(i, &curr_kl);
|
||||||
|
|
||||||
|
size_t kl_size = klpair_size(curr_kl);
|
||||||
|
KLPAIR new_kl = NULL;
|
||||||
|
CAST_FROM_VOIDP(new_kl, toku_mempool_malloc(dest_mp, kl_size, 1));
|
||||||
|
memcpy(new_kl, curr_kl, kl_size);
|
||||||
|
newklpointers[i-lbi] = new_kl;
|
||||||
|
toku_mempool_mfree(src_mp, curr_kl, kl_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
dest_bd->m_buffer.create_steal_sorted_array(&newklpointers, ube-lbi, ube-lbi);
|
||||||
|
// now remove the elements from src_omt
|
||||||
|
for (i=ube-1; i >= lbi; i--) {
|
||||||
|
m_buffer.delete_at(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t bn_data::get_disk_size() {
|
||||||
|
return toku_mempool_get_used_space(&m_buffer_mempool);
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::verify_mempool(void) {
|
||||||
|
// TODO: implement something
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t bn_data::omt_size(void) const {
|
||||||
|
return m_buffer.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::destroy(void) {
|
||||||
|
// The buffer may have been freed already, in some cases.
|
||||||
|
m_buffer.destroy();
|
||||||
|
toku_mempool_destroy(&m_buffer_mempool);
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO: Splitting key/val requires changing this
|
||||||
|
void bn_data::replace_contents_with_clone_of_sorted_array(
|
||||||
|
uint32_t num_les,
|
||||||
|
const void** old_key_ptrs,
|
||||||
|
uint32_t* old_keylens,
|
||||||
|
LEAFENTRY* old_les,
|
||||||
|
size_t *le_sizes,
|
||||||
|
size_t mempool_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
toku_mempool_construct(&m_buffer_mempool, mempool_size);
|
||||||
|
KLPAIR *XMALLOC_N(num_les, le_array);
|
||||||
|
for (uint32_t idx = 0; idx < num_les; idx++) {
|
||||||
|
KLPAIR new_kl = (KLPAIR)toku_mempool_malloc(
|
||||||
|
&m_buffer_mempool,
|
||||||
|
le_sizes[idx] + old_keylens[idx] + sizeof(uint32_t),
|
||||||
|
1); // point to new location
|
||||||
|
new_kl->keylen = old_keylens[idx];
|
||||||
|
memcpy(new_kl->key_le, old_key_ptrs[idx], new_kl->keylen);
|
||||||
|
memcpy(get_le_from_klpair(new_kl), old_les[idx], le_sizes[idx]);
|
||||||
|
CAST_FROM_VOIDP(le_array[idx], new_kl);
|
||||||
|
}
|
||||||
|
//TODO: Splitting key/val requires changing this; keys are stored in old omt.. cannot delete it yet?
|
||||||
|
m_buffer.destroy();
|
||||||
|
m_buffer.create_steal_sorted_array(&le_array, num_les, num_les);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// get info about a single leafentry by index
|
||||||
|
int bn_data::fetch_le(uint32_t idx, LEAFENTRY *le) {
|
||||||
|
KLPAIR klpair = NULL;
|
||||||
|
int r = m_buffer.fetch(idx, &klpair);
|
||||||
|
if (r == 0) {
|
||||||
|
*le = get_le_from_klpair(klpair);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bn_data::fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key) {
|
||||||
|
KLPAIR klpair = NULL;
|
||||||
|
int r = m_buffer.fetch(idx, &klpair);
|
||||||
|
if (r == 0) {
|
||||||
|
*len = klpair->keylen;
|
||||||
|
*key = klpair->key_le;
|
||||||
|
*le = get_le_from_klpair(klpair);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bn_data::fetch_klpair_disksize(uint32_t idx, size_t *size) {
|
||||||
|
KLPAIR klpair = NULL;
|
||||||
|
int r = m_buffer.fetch(idx, &klpair);
|
||||||
|
if (r == 0) {
|
||||||
|
*size = klpair_disksize(klpair);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bn_data::fetch_le_key_and_len(uint32_t idx, uint32_t *len, void** key) {
|
||||||
|
KLPAIR klpair = NULL;
|
||||||
|
int r = m_buffer.fetch(idx, &klpair);
|
||||||
|
if (r == 0) {
|
||||||
|
*len = klpair->keylen;
|
||||||
|
*key = klpair->key_le;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct mp_pair {
|
||||||
|
void* orig_base;
|
||||||
|
void* new_base;
|
||||||
|
klpair_omt_t* omt;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int fix_mp_offset(const KLPAIR &klpair, const uint32_t idx, struct mp_pair * const p) {
|
||||||
|
char* old_value = (char *) klpair;
|
||||||
|
char *new_value = old_value - (char *)p->orig_base + (char *)p->new_base;
|
||||||
|
p->omt->set_at((KLPAIR)new_value, idx);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bn_data::clone(bn_data* orig_bn_data) {
|
||||||
|
toku_mempool_clone(&orig_bn_data->m_buffer_mempool, &m_buffer_mempool);
|
||||||
|
m_buffer.clone(orig_bn_data->m_buffer);
|
||||||
|
struct mp_pair p;
|
||||||
|
p.orig_base = toku_mempool_get_base(&orig_bn_data->m_buffer_mempool);
|
||||||
|
p.new_base = toku_mempool_get_base(&m_buffer_mempool);
|
||||||
|
p.omt = &m_buffer;
|
||||||
|
|
||||||
|
int r = m_buffer.iterate_on_range<decltype(p), fix_mp_offset>(0, omt_size(), &p);
|
||||||
|
invariant_zero(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
254
storage/tokudb/ft-index/ft/bndata.h
Normal file
254
storage/tokudb/ft-index/ft/bndata.h
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||||
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||||
|
/*
|
||||||
|
COPYING CONDITIONS NOTICE:
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of version 2 of the GNU General Public License as
|
||||||
|
published by the Free Software Foundation, and provided that the
|
||||||
|
following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain this COPYING
|
||||||
|
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
||||||
|
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
||||||
|
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
||||||
|
GRANT (below).
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce this COPYING
|
||||||
|
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
||||||
|
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
||||||
|
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
||||||
|
GRANT (below) in the documentation and/or other materials
|
||||||
|
provided with the distribution.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
02110-1301, USA.
|
||||||
|
|
||||||
|
COPYRIGHT NOTICE:
|
||||||
|
|
||||||
|
TokuDB, Tokutek Fractal Tree Indexing Library.
|
||||||
|
Copyright (C) 2007-2013 Tokutek, Inc.
|
||||||
|
|
||||||
|
DISCLAIMER:
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
General Public License for more details.
|
||||||
|
|
||||||
|
UNIVERSITY PATENT NOTICE:
|
||||||
|
|
||||||
|
The technology is licensed by the Massachusetts Institute of
|
||||||
|
Technology, Rutgers State University of New Jersey, and the Research
|
||||||
|
Foundation of State University of New York at Stony Brook under
|
||||||
|
United States of America Serial No. 11/760379 and to the patents
|
||||||
|
and/or patent applications resulting from it.
|
||||||
|
|
||||||
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
||||||
|
Tokutek as part of the Fractal Tree project.
|
||||||
|
|
||||||
|
"PATENT CLAIMS" means the claims of patents that are owned or
|
||||||
|
licensable by Tokutek, both currently or in the future; and that in
|
||||||
|
the absence of this license would be infringed by THIS
|
||||||
|
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
||||||
|
|
||||||
|
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
||||||
|
patentability, enforceability and/or non-infringement of any of the
|
||||||
|
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
||||||
|
|
||||||
|
Tokutek hereby grants to you, for the term and geographical scope of
|
||||||
|
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
||||||
|
irrevocable (except as stated in this section) patent license to
|
||||||
|
make, have made, use, offer to sell, sell, import, transfer, and
|
||||||
|
otherwise run, modify, and propagate the contents of THIS
|
||||||
|
IMPLEMENTATION, where such license applies only to the PATENT
|
||||||
|
CLAIMS. This grant does not include claims that would be infringed
|
||||||
|
only as a consequence of further modifications of THIS
|
||||||
|
IMPLEMENTATION. If you or your agent or licensee institute or order
|
||||||
|
or agree to the institution of patent litigation against any entity
|
||||||
|
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||||
|
THIS IMPLEMENTATION constitutes direct or contributory patent
|
||||||
|
infringement, or inducement of patent infringement, then any rights
|
||||||
|
granted to you under this License shall terminate as of the date
|
||||||
|
such litigation is filed. If you or your agent or exclusive
|
||||||
|
licensee institute or order or agree to the institution of a PATENT
|
||||||
|
CHALLENGE, then Tokutek may terminate any rights granted to you
|
||||||
|
under this License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
||||||
|
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
||||||
|
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <util/omt.h>
|
||||||
|
#include "leafentry.h"
|
||||||
|
#include <util/mempool.h>
|
||||||
|
|
||||||
|
#if 0 //for implementation
|
||||||
|
static int
|
||||||
|
UU() verify_in_mempool(OMTVALUE lev, uint32_t UU(idx), void *mpv)
|
||||||
|
{
|
||||||
|
LEAFENTRY CAST_FROM_VOIDP(le, lev);
|
||||||
|
struct mempool *CAST_FROM_VOIDP(mp, mpv);
|
||||||
|
int r = toku_mempool_inrange(mp, le, leafentry_memsize(le));
|
||||||
|
lazy_assert(r);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
toku_omt_iterate(bn->buffer, verify_in_mempool, &bn->buffer_mempool);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct klpair_struct {
|
||||||
|
uint32_t keylen;
|
||||||
|
uint8_t key_le[0]; // key, followed by le
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct klpair_struct *KLPAIR;
|
||||||
|
|
||||||
|
static LEAFENTRY get_le_from_klpair(KLPAIR klpair){
|
||||||
|
uint32_t keylen = klpair->keylen;
|
||||||
|
LEAFENTRY le = (LEAFENTRY)(klpair->key_le + keylen);
|
||||||
|
return le;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename omtcmp_t,
|
||||||
|
int (*h)(const DBT &, const omtcmp_t &)>
|
||||||
|
static int wrappy_fun_find(const KLPAIR &klpair, const omtcmp_t &extra) {
|
||||||
|
//TODO: kill this function when we split, and/or use toku_fill_dbt
|
||||||
|
DBT kdbt;
|
||||||
|
kdbt.data = klpair->key_le;
|
||||||
|
kdbt.size = klpair->keylen;
|
||||||
|
return h(kdbt, extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename iterate_extra_t,
|
||||||
|
int (*h)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t idx, iterate_extra_t *const)>
|
||||||
|
static int wrappy_fun_iterate(const KLPAIR &klpair, const uint32_t idx, iterate_extra_t *const extra) {
|
||||||
|
uint32_t keylen = klpair->keylen;
|
||||||
|
void* key = klpair->key_le;
|
||||||
|
LEAFENTRY le = get_le_from_klpair(klpair);
|
||||||
|
return h(key, keylen, le, idx, extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef toku::omt<KLPAIR> klpair_omt_t;
|
||||||
|
// This class stores the data associated with a basement node
|
||||||
|
class bn_data {
|
||||||
|
public:
|
||||||
|
void init_zero(void);
|
||||||
|
void initialize_empty(void);
|
||||||
|
void initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size);
|
||||||
|
// globals
|
||||||
|
uint64_t get_memory_size(void);
|
||||||
|
uint64_t get_disk_size(void);
|
||||||
|
void verify_mempool(void);
|
||||||
|
|
||||||
|
// Interact with "omt"
|
||||||
|
uint32_t omt_size(void) const;
|
||||||
|
|
||||||
|
template<typename iterate_extra_t,
|
||||||
|
int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
|
||||||
|
int omt_iterate(iterate_extra_t *const iterate_extra) const {
|
||||||
|
return omt_iterate_on_range<iterate_extra_t, f>(0, omt_size(), iterate_extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename iterate_extra_t,
|
||||||
|
int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
|
||||||
|
int omt_iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const {
|
||||||
|
return m_buffer.iterate_on_range< iterate_extra_t, wrappy_fun_iterate<iterate_extra_t, f> >(left, right, iterate_extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename omtcmp_t,
|
||||||
|
int (*h)(const DBT &, const omtcmp_t &)>
|
||||||
|
int find_zero(const omtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
|
||||||
|
KLPAIR klpair = NULL;
|
||||||
|
int r = m_buffer.find_zero< omtcmp_t, wrappy_fun_find<omtcmp_t, h> >(extra, &klpair, idxp);
|
||||||
|
if (r == 0) {
|
||||||
|
if (value) {
|
||||||
|
*value = get_le_from_klpair(klpair);
|
||||||
|
}
|
||||||
|
if (key) {
|
||||||
|
paranoid_invariant(keylen != NULL);
|
||||||
|
*key = klpair->key_le;
|
||||||
|
*keylen = klpair->keylen;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
paranoid_invariant(keylen == NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename omtcmp_t,
|
||||||
|
int (*h)(const DBT &, const omtcmp_t &)>
|
||||||
|
int find(const omtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
|
||||||
|
KLPAIR klpair = NULL;
|
||||||
|
int r = m_buffer.find< omtcmp_t, wrappy_fun_find<omtcmp_t, h> >(extra, direction, &klpair, idxp);
|
||||||
|
if (r == 0) {
|
||||||
|
if (value) {
|
||||||
|
*value = get_le_from_klpair(klpair);
|
||||||
|
}
|
||||||
|
if (key) {
|
||||||
|
paranoid_invariant(keylen != NULL);
|
||||||
|
*key = klpair->key_le;
|
||||||
|
*keylen = klpair->keylen;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
paranoid_invariant(keylen == NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get info about a single leafentry by index
|
||||||
|
int fetch_le(uint32_t idx, LEAFENTRY *le);
|
||||||
|
int fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key);
|
||||||
|
int fetch_klpair_disksize(uint32_t idx, size_t *size);
|
||||||
|
int fetch_le_key_and_len(uint32_t idx, uint32_t *len, void** key);
|
||||||
|
|
||||||
|
// Interact with another bn_data
|
||||||
|
void move_leafentries_to(BN_DATA dest_bd,
|
||||||
|
uint32_t lbi, //lower bound inclusive
|
||||||
|
uint32_t ube //upper bound exclusive
|
||||||
|
);
|
||||||
|
|
||||||
|
void destroy(void);
|
||||||
|
|
||||||
|
// Replaces contents, into brand new mempool.
|
||||||
|
// Returns old mempool base, expects caller to free it.
|
||||||
|
void replace_contents_with_clone_of_sorted_array(
|
||||||
|
uint32_t num_les,
|
||||||
|
const void** old_key_ptrs,
|
||||||
|
uint32_t* old_keylens,
|
||||||
|
LEAFENTRY* old_les,
|
||||||
|
size_t *le_sizes,
|
||||||
|
size_t mempool_size
|
||||||
|
);
|
||||||
|
|
||||||
|
void clone(bn_data* orig_bn_data);
|
||||||
|
void delete_leafentry (
|
||||||
|
uint32_t idx,
|
||||||
|
uint32_t keylen,
|
||||||
|
uint32_t old_le_size
|
||||||
|
);
|
||||||
|
void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_size, uint32_t new_size, LEAFENTRY* new_le_space);
|
||||||
|
void get_space_for_insert(uint32_t idx, const void* keyp, uint32_t keylen, size_t size, LEAFENTRY* new_le_space);
|
||||||
|
private:
|
||||||
|
// Private functions
|
||||||
|
KLPAIR mempool_malloc_from_omt(size_t size, void **maybe_free);
|
||||||
|
void omt_compress_kvspace(size_t added_size, void **maybe_free);
|
||||||
|
|
||||||
|
klpair_omt_t m_buffer; // pointers to individual leaf entries
|
||||||
|
struct mempool m_buffer_mempool; // storage for all leaf entries
|
||||||
|
};
|
||||||
|
|
@@ -54,6 +54,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -184,7 +185,7 @@ struct cachefile {
|
|||||||
// they are managed whenever we add or remove a pair from
|
// they are managed whenever we add or remove a pair from
|
||||||
// the cachetable. As of Riddler, this linked list is only used to
|
// the cachetable. As of Riddler, this linked list is only used to
|
||||||
// make cachetable_flush_cachefile more efficient
|
// make cachetable_flush_cachefile more efficient
|
||||||
PAIR cf_head;
|
PAIR cf_head; // doubly linked list that is NOT circular
|
||||||
uint32_t num_pairs; // count on number of pairs in the cachetable belong to this cachefile
|
uint32_t num_pairs; // count on number of pairs in the cachetable belong to this cachefile
|
||||||
|
|
||||||
bool for_checkpoint; //True if part of the in-progress checkpoint
|
bool for_checkpoint; //True if part of the in-progress checkpoint
|
||||||
@@ -197,12 +198,19 @@ struct cachefile {
|
|||||||
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
|
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
|
||||||
CACHETABLE cachetable;
|
CACHETABLE cachetable;
|
||||||
struct fileid fileid;
|
struct fileid fileid;
|
||||||
|
// the filenum is used as an identifer of the cachefile
|
||||||
|
// for logging and recovery
|
||||||
FILENUM filenum;
|
FILENUM filenum;
|
||||||
|
// number used to generate hashes for blocks in the cachefile
|
||||||
|
// used in toku_cachetable_hash
|
||||||
|
// this used to be the filenum.fileid, but now it is separate
|
||||||
|
uint32_t hash_id;
|
||||||
char *fname_in_env; /* Used for logging */
|
char *fname_in_env; /* Used for logging */
|
||||||
|
|
||||||
void *userdata;
|
void *userdata;
|
||||||
void (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files.
|
void (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files.
|
||||||
void (*close_userdata)(CACHEFILE cf, int fd, void *userdata, bool lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function.
|
void (*close_userdata)(CACHEFILE cf, int fd, void *userdata, bool lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function.
|
||||||
|
void (*free_userdata)(CACHEFILE cf, void *userdata); // when closing the last reference to a cachefile, first call this function.
|
||||||
void (*begin_checkpoint_userdata)(LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function.
|
void (*begin_checkpoint_userdata)(LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function.
|
||||||
void (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function.
|
void (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function.
|
||||||
void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function.
|
void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function.
|
||||||
@@ -373,7 +381,10 @@ public:
|
|||||||
toku_pthread_rwlock_t m_pending_lock_cheap;
|
toku_pthread_rwlock_t m_pending_lock_cheap;
|
||||||
void init();
|
void init();
|
||||||
void destroy();
|
void destroy();
|
||||||
void evict(PAIR pair);
|
void evict_completely(PAIR pair);
|
||||||
|
void evict_from_cachetable(PAIR pair);
|
||||||
|
void evict_from_cachefile(PAIR pair);
|
||||||
|
void add_to_cachetable_only(PAIR p);
|
||||||
void put(PAIR pair);
|
void put(PAIR pair);
|
||||||
PAIR find_pair(CACHEFILE file, CACHEKEY key, uint32_t hash);
|
PAIR find_pair(CACHEFILE file, CACHEKEY key, uint32_t hash);
|
||||||
void pending_pairs_remove (PAIR p);
|
void pending_pairs_remove (PAIR p);
|
||||||
@@ -397,10 +408,10 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void pair_remove (PAIR p);
|
void pair_remove (PAIR p);
|
||||||
void cf_pairs_remove (PAIR p);
|
void remove_from_hash_chain(PAIR p);
|
||||||
void add_to_cf_list (PAIR p);
|
void add_to_cf_list (PAIR p);
|
||||||
void add_to_clock (PAIR p);
|
void add_to_clock (PAIR p);
|
||||||
PAIR remove_from_hash_chain (PAIR remove_me, PAIR list);
|
void add_to_hash_chain(PAIR p);
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -415,10 +426,28 @@ public:
|
|||||||
void read_unlock();
|
void read_unlock();
|
||||||
void write_lock();
|
void write_lock();
|
||||||
void write_unlock();
|
void write_unlock();
|
||||||
|
int cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf);
|
||||||
|
int cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf);
|
||||||
|
void add_cf_unlocked(CACHEFILE newcf);
|
||||||
|
void add_stale_cf(CACHEFILE newcf);
|
||||||
|
void remove_cf(CACHEFILE cf);
|
||||||
|
void remove_stale_cf_unlocked(CACHEFILE cf);
|
||||||
|
FILENUM reserve_filenum();
|
||||||
|
uint32_t get_new_hash_id_unlocked();
|
||||||
|
CACHEFILE find_cachefile_unlocked(struct fileid* fileid);
|
||||||
|
CACHEFILE find_stale_cachefile_unlocked(struct fileid* fileid);
|
||||||
|
void verify_unused_filenum(FILENUM filenum);
|
||||||
|
bool evict_some_stale_pair(evictor* ev);
|
||||||
|
void free_stale_data(evictor* ev);
|
||||||
// access to these fields are protected by the lock
|
// access to these fields are protected by the lock
|
||||||
CACHEFILE m_head;
|
CACHEFILE m_active_head; // head of CACHEFILEs that are active
|
||||||
|
CACHEFILE m_stale_head; // head of CACHEFILEs that are stale
|
||||||
|
CACHEFILE m_stale_tail; // tail of CACHEFILEs that are stale
|
||||||
FILENUM m_next_filenum_to_use;
|
FILENUM m_next_filenum_to_use;
|
||||||
|
uint32_t m_next_hash_id_to_use;
|
||||||
toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD
|
toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD
|
||||||
|
private:
|
||||||
|
CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -483,14 +512,14 @@ const int EVICTION_PERIOD = 1;
|
|||||||
//
|
//
|
||||||
class evictor {
|
class evictor {
|
||||||
public:
|
public:
|
||||||
void init(long _size_limit, pair_list* _pl, KIBBUTZ _kibbutz, uint32_t eviction_period);
|
void init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period);
|
||||||
void destroy();
|
void destroy();
|
||||||
void add_pair_attr(PAIR_ATTR attr);
|
void add_pair_attr(PAIR_ATTR attr);
|
||||||
void remove_pair_attr(PAIR_ATTR attr);
|
void remove_pair_attr(PAIR_ATTR attr);
|
||||||
void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr);
|
void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr);
|
||||||
void add_to_size_current(long size);
|
void add_to_size_current(long size);
|
||||||
void remove_from_size_current(long size);
|
void remove_from_size_current(long size);
|
||||||
uint64_t reserve_memory(double fraction);
|
uint64_t reserve_memory(double fraction, uint64_t upper_bound);
|
||||||
void release_reserved_memory(uint64_t reserved_memory);
|
void release_reserved_memory(uint64_t reserved_memory);
|
||||||
void run_eviction_thread();
|
void run_eviction_thread();
|
||||||
void do_partial_eviction(PAIR p, bool pair_mutex_held);
|
void do_partial_eviction(PAIR p, bool pair_mutex_held);
|
||||||
@@ -516,6 +545,7 @@ private:
|
|||||||
int64_t unsafe_read_size_evicting(void) const;
|
int64_t unsafe_read_size_evicting(void) const;
|
||||||
|
|
||||||
pair_list* m_pl;
|
pair_list* m_pl;
|
||||||
|
cachefile_list* m_cf_list;
|
||||||
int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable
|
int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable
|
||||||
// changes to these two values are protected
|
// changes to these two values are protected
|
||||||
// by ev_thread_lock
|
// by ev_thread_lock
|
||||||
@@ -558,6 +588,10 @@ private:
|
|||||||
PARTITIONED_COUNTER m_size_leaf;
|
PARTITIONED_COUNTER m_size_leaf;
|
||||||
PARTITIONED_COUNTER m_size_rollback;
|
PARTITIONED_COUNTER m_size_rollback;
|
||||||
PARTITIONED_COUNTER m_size_cachepressure;
|
PARTITIONED_COUNTER m_size_cachepressure;
|
||||||
|
PARTITIONED_COUNTER m_wait_pressure_count;
|
||||||
|
PARTITIONED_COUNTER m_wait_pressure_time;
|
||||||
|
PARTITIONED_COUNTER m_long_wait_pressure_count;
|
||||||
|
PARTITIONED_COUNTER m_long_wait_pressure_time;
|
||||||
|
|
||||||
KIBBUTZ m_kibbutz;
|
KIBBUTZ m_kibbutz;
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -167,7 +168,7 @@ int toku_cachetable_openfd(CACHEFILE *,CACHETABLE, int fd,
|
|||||||
const char *fname_relative_to_env);
|
const char *fname_relative_to_env);
|
||||||
int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd,
|
int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd,
|
||||||
const char *fname_in_env,
|
const char *fname_in_env,
|
||||||
FILENUM filenum);
|
FILENUM filenum, bool* was_open);
|
||||||
|
|
||||||
// reserve a unique filenum
|
// reserve a unique filenum
|
||||||
FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct);
|
FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct);
|
||||||
@@ -176,7 +177,7 @@ FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct);
|
|||||||
// Returns the amount reserved.
|
// Returns the amount reserved.
|
||||||
// To return the memory to the cachetable, call toku_cachetable_release_reserved_memory
|
// To return the memory to the cachetable, call toku_cachetable_release_reserved_memory
|
||||||
// Requires 0<fraction<1.
|
// Requires 0<fraction<1.
|
||||||
uint64_t toku_cachetable_reserve_memory(CACHETABLE, double fraction);
|
uint64_t toku_cachetable_reserve_memory(CACHETABLE, double fraction, uint64_t upper_bound);
|
||||||
void toku_cachetable_release_reserved_memory(CACHETABLE, uint64_t);
|
void toku_cachetable_release_reserved_memory(CACHETABLE, uint64_t);
|
||||||
|
|
||||||
// cachefile operations
|
// cachefile operations
|
||||||
@@ -275,6 +276,7 @@ typedef void (*CACHETABLE_REMOVE_KEY)(CACHEKEY* cachekey, bool for_checkpoint, v
|
|||||||
void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata,
|
void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata,
|
||||||
void (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
|
void (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
|
||||||
void (*close_userdata)(CACHEFILE, int, void*, bool, LSN),
|
void (*close_userdata)(CACHEFILE, int, void*, bool, LSN),
|
||||||
|
void (*free_userdata)(CACHEFILE, void*),
|
||||||
void (*checkpoint_userdata)(CACHEFILE, int, void*),
|
void (*checkpoint_userdata)(CACHEFILE, int, void*),
|
||||||
void (*begin_checkpoint_userdata)(LSN, void*),
|
void (*begin_checkpoint_userdata)(LSN, void*),
|
||||||
void (*end_checkpoint_userdata)(CACHEFILE, int, void*),
|
void (*end_checkpoint_userdata)(CACHEFILE, int, void*),
|
||||||
@@ -523,12 +525,6 @@ int toku_cachefile_count_pinned (CACHEFILE, int /*printthem*/ );
|
|||||||
// If oplsn_valid is true then use oplsn as the LSN of the close instead of asking the logger. oplsn_valid being true is only allowed during recovery, and requires that you are removing the last reference (otherwise the lsn wouldn't make it in.)
|
// If oplsn_valid is true then use oplsn as the LSN of the close instead of asking the logger. oplsn_valid being true is only allowed during recovery, and requires that you are removing the last reference (otherwise the lsn wouldn't make it in.)
|
||||||
void toku_cachefile_close (CACHEFILE*, bool oplsn_valid, LSN oplsn);
|
void toku_cachefile_close (CACHEFILE*, bool oplsn_valid, LSN oplsn);
|
||||||
|
|
||||||
// Flush the cachefile.
|
|
||||||
// Effect: Flush everything owned by the cachefile from the cachetable. All dirty
|
|
||||||
// blocks are written. All unpinned blocks are evicted from the cachetable.
|
|
||||||
// Returns: 0 if success, otherwise returns an error number.
|
|
||||||
void toku_cachefile_flush(CACHEFILE);
|
|
||||||
|
|
||||||
// Return on success (different from pread and pwrite)
|
// Return on success (different from pread and pwrite)
|
||||||
//int cachefile_pwrite (CACHEFILE, const void *buf, size_t count, toku_off_t offset);
|
//int cachefile_pwrite (CACHEFILE, const void *buf, size_t count, toku_off_t offset);
|
||||||
//int cachefile_pread (CACHEFILE, void *buf, size_t count, toku_off_t offset);
|
//int cachefile_pread (CACHEFILE, void *buf, size_t count, toku_off_t offset);
|
||||||
@@ -542,12 +538,6 @@ int toku_cachefile_get_fd (CACHEFILE);
|
|||||||
// Return the filename
|
// Return the filename
|
||||||
char * toku_cachefile_fname_in_env (CACHEFILE cf);
|
char * toku_cachefile_fname_in_env (CACHEFILE cf);
|
||||||
|
|
||||||
// For test programs only.
|
|
||||||
// Set the cachefile's fd and fname.
|
|
||||||
// Effect: Bind the cachefile to a new fd and fname. The old fd is closed.
|
|
||||||
// Returns: 0 if success, otherwise an error number
|
|
||||||
int toku_cachefile_set_fd (CACHEFILE cf, int fd, const char *fname_relative_to_env);
|
|
||||||
|
|
||||||
// Make it so when the cachefile closes, the underlying file is unlinked
|
// Make it so when the cachefile closes, the underlying file is unlinked
|
||||||
void toku_cachefile_unlink_on_close(CACHEFILE cf);
|
void toku_cachefile_unlink_on_close(CACHEFILE cf);
|
||||||
|
|
||||||
@@ -609,6 +599,10 @@ typedef enum {
|
|||||||
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
|
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
|
||||||
CT_CLEANER_PERIOD,
|
CT_CLEANER_PERIOD,
|
||||||
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
|
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
|
||||||
|
CT_WAIT_PRESSURE_COUNT,
|
||||||
|
CT_WAIT_PRESSURE_TIME,
|
||||||
|
CT_LONG_WAIT_PRESSURE_COUNT,
|
||||||
|
CT_LONG_WAIT_PRESSURE_TIME,
|
||||||
CT_STATUS_NUM_ROWS
|
CT_STATUS_NUM_ROWS
|
||||||
} ct_status_entry;
|
} ct_status_entry;
|
||||||
|
|
||||||
|
@@ -49,6 +49,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -163,6 +164,11 @@ status_init(void) {
|
|||||||
STATUS_INIT(CP_WAITERS_MAX, nullptr, UINT64, "waiters max", TOKU_ENGINE_STATUS);
|
STATUS_INIT(CP_WAITERS_MAX, nullptr, UINT64, "waiters max", TOKU_ENGINE_STATUS);
|
||||||
STATUS_INIT(CP_CLIENT_WAIT_ON_MO, nullptr, UINT64, "non-checkpoint client wait on mo lock", TOKU_ENGINE_STATUS);
|
STATUS_INIT(CP_CLIENT_WAIT_ON_MO, nullptr, UINT64, "non-checkpoint client wait on mo lock", TOKU_ENGINE_STATUS);
|
||||||
STATUS_INIT(CP_CLIENT_WAIT_ON_CS, nullptr, UINT64, "non-checkpoint client wait on cs lock", TOKU_ENGINE_STATUS);
|
STATUS_INIT(CP_CLIENT_WAIT_ON_CS, nullptr, UINT64, "non-checkpoint client wait on cs lock", TOKU_ENGINE_STATUS);
|
||||||
|
|
||||||
|
STATUS_INIT(CP_BEGIN_TIME, CHECKPOINT_BEGIN_TIME, UINT64, "checkpoint begin time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
||||||
|
STATUS_INIT(CP_LONG_BEGIN_COUNT, CHECKPOINT_LONG_BEGIN_COUNT, UINT64, "long checkpoint begin count", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
||||||
|
STATUS_INIT(CP_LONG_BEGIN_TIME, CHECKPOINT_LONG_BEGIN_TIME, UINT64, "long checkpoint begin time", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
||||||
|
|
||||||
cp_status.initialized = true;
|
cp_status.initialized = true;
|
||||||
}
|
}
|
||||||
#undef STATUS_INIT
|
#undef STATUS_INIT
|
||||||
@@ -188,12 +194,11 @@ static toku_pthread_rwlock_t low_priority_multi_operation_lock;
|
|||||||
static bool initialized = false; // sanity check
|
static bool initialized = false; // sanity check
|
||||||
static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint)
|
static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint)
|
||||||
static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint)
|
static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint)
|
||||||
|
static volatile uint64_t toku_checkpoint_long_threshold = 1000000;
|
||||||
|
|
||||||
// Note following static functions are called from checkpoint internal logic only,
|
// Note following static functions are called from checkpoint internal logic only,
|
||||||
// and use the "writer" calls for locking and unlocking.
|
// and use the "writer" calls for locking and unlocking.
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
multi_operation_lock_init(void) {
|
multi_operation_lock_init(void) {
|
||||||
pthread_rwlockattr_t attr;
|
pthread_rwlockattr_t attr;
|
||||||
@@ -335,7 +340,9 @@ toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
|
|||||||
|
|
||||||
SET_CHECKPOINT_FOOTPRINT(30);
|
SET_CHECKPOINT_FOOTPRINT(30);
|
||||||
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
|
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
|
||||||
|
uint64_t t_checkpoint_begin_start = toku_current_time_microsec();
|
||||||
toku_cachetable_begin_checkpoint(cp, logger);
|
toku_cachetable_begin_checkpoint(cp, logger);
|
||||||
|
uint64_t t_checkpoint_begin_end = toku_current_time_microsec();
|
||||||
|
|
||||||
toku_ft_open_close_unlock();
|
toku_ft_open_close_unlock();
|
||||||
multi_operation_checkpoint_unlock();
|
multi_operation_checkpoint_unlock();
|
||||||
@@ -357,6 +364,12 @@ toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
|
|||||||
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
|
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
|
||||||
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN);
|
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN);
|
||||||
STATUS_VALUE(CP_CHECKPOINT_COUNT)++;
|
STATUS_VALUE(CP_CHECKPOINT_COUNT)++;
|
||||||
|
uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start;
|
||||||
|
STATUS_VALUE(CP_BEGIN_TIME) += duration;
|
||||||
|
if (duration >= toku_checkpoint_long_threshold) {
|
||||||
|
STATUS_VALUE(CP_LONG_BEGIN_TIME) += duration;
|
||||||
|
STATUS_VALUE(CP_LONG_BEGIN_COUNT) += 1;
|
||||||
|
}
|
||||||
STATUS_VALUE(CP_FOOTPRINT) = 0;
|
STATUS_VALUE(CP_FOOTPRINT) = 0;
|
||||||
|
|
||||||
checkpoint_safe_checkpoint_unlock();
|
checkpoint_safe_checkpoint_unlock();
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -185,7 +186,10 @@ typedef enum {
|
|||||||
CP_WAITERS_MAX, // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint
|
CP_WAITERS_MAX, // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint
|
||||||
CP_CLIENT_WAIT_ON_MO, // how many times a client thread waited to take the multi_operation lock, not for checkpoint
|
CP_CLIENT_WAIT_ON_MO, // how many times a client thread waited to take the multi_operation lock, not for checkpoint
|
||||||
CP_CLIENT_WAIT_ON_CS, // how many times a client thread waited for the checkpoint_safe lock, not for checkpoint
|
CP_CLIENT_WAIT_ON_CS, // how many times a client thread waited for the checkpoint_safe lock, not for checkpoint
|
||||||
CP_STATUS_NUM_ROWS // number of rows in this status array
|
CP_BEGIN_TIME,
|
||||||
|
CP_LONG_BEGIN_TIME,
|
||||||
|
CP_LONG_BEGIN_COUNT,
|
||||||
|
CP_STATUS_NUM_ROWS // number of rows in this status array. must be last.
|
||||||
} cp_status_entry;
|
} cp_status_entry;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -49,6 +49,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -49,6 +49,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -168,17 +169,9 @@ int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *d
|
|||||||
if (need_space_total > fifo->memory_size) {
|
if (need_space_total > fifo->memory_size) {
|
||||||
// Out of memory at the end.
|
// Out of memory at the end.
|
||||||
int next_2 = next_power_of_two(need_space_total);
|
int next_2 = next_power_of_two(need_space_total);
|
||||||
if ((2*next_2 > fifo->memory_size)
|
|
||||||
|| (8*next_2 < fifo->memory_size)) {
|
|
||||||
// resize the fifo
|
// resize the fifo
|
||||||
char *XMALLOC_N(next_2, newmem);
|
XREALLOC_N(next_2, fifo->memory);
|
||||||
char *oldmem = fifo->memory;
|
|
||||||
if (newmem==0) return ENOMEM;
|
|
||||||
memcpy(newmem, oldmem, fifo->memory_used);
|
|
||||||
fifo->memory_size = next_2;
|
fifo->memory_size = next_2;
|
||||||
fifo->memory = newmem;
|
|
||||||
toku_free(oldmem);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used);
|
struct fifo_entry *entry = (struct fifo_entry *)(fifo->memory + fifo->memory_used);
|
||||||
fifo_entry_set_msg_type(entry, type);
|
fifo_entry_set_msg_type(entry, type);
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -645,16 +646,6 @@ handle_split_of_child(
|
|||||||
VERIFY_NODE(t, childb);
|
VERIFY_NODE(t, childb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
UU() verify_in_mempool(OMTVALUE lev, uint32_t UU(idx), void *mpv)
|
|
||||||
{
|
|
||||||
LEAFENTRY CAST_FROM_VOIDP(le, lev);
|
|
||||||
struct mempool *CAST_FROM_VOIDP(mp, mpv);
|
|
||||||
int r = toku_mempool_inrange(mp, le, leafentry_memsize(le));
|
|
||||||
lazy_assert(r);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
verify_all_in_mempool(FTNODE UU() node)
|
verify_all_in_mempool(FTNODE UU() node)
|
||||||
{
|
{
|
||||||
@@ -662,8 +653,7 @@ verify_all_in_mempool(FTNODE UU() node)
|
|||||||
if (node->height==0) {
|
if (node->height==0) {
|
||||||
for (int i = 0; i < node->n_children; i++) {
|
for (int i = 0; i < node->n_children; i++) {
|
||||||
invariant(BP_STATE(node,i) == PT_AVAIL);
|
invariant(BP_STATE(node,i) == PT_AVAIL);
|
||||||
BASEMENTNODE bn = BLB(node, i);
|
BLB_DATA(node, i)->verify_mempool();
|
||||||
toku_omt_iterate(bn->buffer, verify_in_mempool, &bn->buffer_mempool);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -677,15 +667,7 @@ ftleaf_disk_size(FTNODE node)
|
|||||||
toku_assert_entire_node_in_memory(node);
|
toku_assert_entire_node_in_memory(node);
|
||||||
uint64_t retval = 0;
|
uint64_t retval = 0;
|
||||||
for (int i = 0; i < node->n_children; i++) {
|
for (int i = 0; i < node->n_children; i++) {
|
||||||
OMT curr_buffer = BLB_BUFFER(node, i);
|
retval += BLB_DATA(node, i)->get_disk_size();
|
||||||
const uint32_t n_leafentries = toku_omt_size(curr_buffer);
|
|
||||||
for (uint32_t j=0; j < n_leafentries; j++) {
|
|
||||||
OMTVALUE v;
|
|
||||||
int r = toku_omt_fetch(curr_buffer, j, &v);
|
|
||||||
assert_zero(r);
|
|
||||||
LEAFENTRY CAST_FROM_VOIDP(curr_le, v);
|
|
||||||
retval += leafentry_disksize(curr_le);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
@@ -704,16 +686,16 @@ ftleaf_get_split_loc(
|
|||||||
switch (split_mode) {
|
switch (split_mode) {
|
||||||
case SPLIT_LEFT_HEAVY: {
|
case SPLIT_LEFT_HEAVY: {
|
||||||
*num_left_bns = node->n_children;
|
*num_left_bns = node->n_children;
|
||||||
*num_left_les = toku_omt_size(BLB_BUFFER(node, *num_left_bns - 1));
|
*num_left_les = BLB_DATA(node, *num_left_bns - 1)->omt_size();
|
||||||
if (*num_left_les == 0) {
|
if (*num_left_les == 0) {
|
||||||
*num_left_bns = node->n_children - 1;
|
*num_left_bns = node->n_children - 1;
|
||||||
*num_left_les = toku_omt_size(BLB_BUFFER(node, *num_left_bns - 1));
|
*num_left_les = BLB_DATA(node, *num_left_bns - 1)->omt_size();
|
||||||
}
|
}
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
case SPLIT_RIGHT_HEAVY: {
|
case SPLIT_RIGHT_HEAVY: {
|
||||||
*num_left_bns = 1;
|
*num_left_bns = 1;
|
||||||
*num_left_les = toku_omt_size(BLB_BUFFER(node, 0)) ? 1 : 0;
|
*num_left_les = BLB_DATA(node, 0)->omt_size() ? 1 : 0;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
case SPLIT_EVENLY: {
|
case SPLIT_EVENLY: {
|
||||||
@@ -722,14 +704,13 @@ ftleaf_get_split_loc(
|
|||||||
uint64_t sumlesizes = ftleaf_disk_size(node);
|
uint64_t sumlesizes = ftleaf_disk_size(node);
|
||||||
uint32_t size_so_far = 0;
|
uint32_t size_so_far = 0;
|
||||||
for (int i = 0; i < node->n_children; i++) {
|
for (int i = 0; i < node->n_children; i++) {
|
||||||
OMT curr_buffer = BLB_BUFFER(node, i);
|
BN_DATA bd = BLB_DATA(node, i);
|
||||||
uint32_t n_leafentries = toku_omt_size(curr_buffer);
|
uint32_t n_leafentries = bd->omt_size();
|
||||||
for (uint32_t j=0; j < n_leafentries; j++) {
|
for (uint32_t j=0; j < n_leafentries; j++) {
|
||||||
OMTVALUE lev;
|
size_t size_this_le;
|
||||||
int r = toku_omt_fetch(curr_buffer, j, &lev);
|
int rr = bd->fetch_klpair_disksize(j, &size_this_le);
|
||||||
assert_zero(r);
|
invariant_zero(rr);
|
||||||
LEAFENTRY CAST_FROM_VOIDP(curr_le, lev);
|
size_so_far += size_this_le;
|
||||||
size_so_far += leafentry_disksize(curr_le);
|
|
||||||
if (size_so_far >= sumlesizes/2) {
|
if (size_so_far >= sumlesizes/2) {
|
||||||
*num_left_bns = i + 1;
|
*num_left_bns = i + 1;
|
||||||
*num_left_les = j + 1;
|
*num_left_les = j + 1;
|
||||||
@@ -741,7 +722,7 @@ ftleaf_get_split_loc(
|
|||||||
(*num_left_les)--;
|
(*num_left_les)--;
|
||||||
} else if (*num_left_bns > 1) {
|
} else if (*num_left_bns > 1) {
|
||||||
(*num_left_bns)--;
|
(*num_left_bns)--;
|
||||||
*num_left_les = toku_omt_size(BLB_BUFFER(node, *num_left_bns - 1));
|
*num_left_les = BLB_DATA(node, *num_left_bns - 1)->omt_size();
|
||||||
} else {
|
} else {
|
||||||
// we are trying to split a leaf with only one
|
// we are trying to split a leaf with only one
|
||||||
// leafentry in it
|
// leafentry in it
|
||||||
@@ -766,39 +747,11 @@ move_leafentries(
|
|||||||
BASEMENTNODE dest_bn,
|
BASEMENTNODE dest_bn,
|
||||||
BASEMENTNODE src_bn,
|
BASEMENTNODE src_bn,
|
||||||
uint32_t lbi, //lower bound inclusive
|
uint32_t lbi, //lower bound inclusive
|
||||||
uint32_t ube, //upper bound exclusive
|
uint32_t ube //upper bound exclusive
|
||||||
uint32_t* num_bytes_moved
|
|
||||||
)
|
)
|
||||||
//Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt
|
//Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt
|
||||||
{
|
{
|
||||||
paranoid_invariant(lbi < ube);
|
src_bn->data_buffer.move_leafentries_to(&dest_bn->data_buffer, lbi, ube);
|
||||||
OMTVALUE *XMALLOC_N(ube-lbi, newleafpointers); // create new omt
|
|
||||||
|
|
||||||
size_t mpsize = toku_mempool_get_used_space(&src_bn->buffer_mempool); // overkill, but safe
|
|
||||||
struct mempool *dest_mp = &dest_bn->buffer_mempool;
|
|
||||||
struct mempool *src_mp = &src_bn->buffer_mempool;
|
|
||||||
toku_mempool_construct(dest_mp, mpsize);
|
|
||||||
|
|
||||||
uint32_t i = 0;
|
|
||||||
*num_bytes_moved = 0;
|
|
||||||
for (i = lbi; i < ube; i++) {
|
|
||||||
OMTVALUE lev;
|
|
||||||
int r = toku_omt_fetch(src_bn->buffer, i, &lev);
|
|
||||||
assert_zero(r);
|
|
||||||
LEAFENTRY CAST_FROM_VOIDP(curr_le, lev);
|
|
||||||
size_t le_size = leafentry_memsize(curr_le);
|
|
||||||
*num_bytes_moved += leafentry_disksize(curr_le);
|
|
||||||
LEAFENTRY CAST_FROM_VOIDP(new_le, toku_mempool_malloc(dest_mp, le_size, 1));
|
|
||||||
memcpy(new_le, curr_le, le_size);
|
|
||||||
newleafpointers[i-lbi] = new_le;
|
|
||||||
toku_mempool_mfree(src_mp, curr_le, le_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
toku_omt_create_steal_sorted_array(&dest_bn->buffer, &newleafpointers, ube-lbi, ube-lbi);
|
|
||||||
// now remove the elements from src_omt
|
|
||||||
for (i=ube-1; i >= lbi; i--) {
|
|
||||||
toku_omt_delete_at(src_bn->buffer, i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_node) {
|
static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_node) {
|
||||||
@@ -895,7 +848,7 @@ ftleaf_split(
|
|||||||
ftleaf_get_split_loc(node, split_mode, &num_left_bns, &num_left_les);
|
ftleaf_get_split_loc(node, split_mode, &num_left_bns, &num_left_les);
|
||||||
{
|
{
|
||||||
// did we split right on the boundary between basement nodes?
|
// did we split right on the boundary between basement nodes?
|
||||||
const bool split_on_boundary = (num_left_les == 0) || (num_left_les == (int) toku_omt_size(BLB_BUFFER(node, num_left_bns - 1)));
|
const bool split_on_boundary = (num_left_les == 0) || (num_left_les == (int) BLB_DATA(node, num_left_bns - 1)->omt_size());
|
||||||
// Now we know where we are going to break it
|
// Now we know where we are going to break it
|
||||||
// the two nodes will have a total of n_children+1 basement nodes
|
// the two nodes will have a total of n_children+1 basement nodes
|
||||||
// and n_children-1 pivots
|
// and n_children-1 pivots
|
||||||
@@ -950,18 +903,15 @@ ftleaf_split(
|
|||||||
// handle the move of a subset of data in last_bn_on_left from node to B
|
// handle the move of a subset of data in last_bn_on_left from node to B
|
||||||
if (!split_on_boundary) {
|
if (!split_on_boundary) {
|
||||||
BP_STATE(B,curr_dest_bn_index) = PT_AVAIL;
|
BP_STATE(B,curr_dest_bn_index) = PT_AVAIL;
|
||||||
uint32_t diff_size = 0;
|
|
||||||
destroy_basement_node(BLB(B, curr_dest_bn_index)); // Destroy B's empty OMT, so I can rebuild it from an array
|
destroy_basement_node(BLB(B, curr_dest_bn_index)); // Destroy B's empty OMT, so I can rebuild it from an array
|
||||||
set_BNULL(B, curr_dest_bn_index);
|
set_BNULL(B, curr_dest_bn_index);
|
||||||
set_BLB(B, curr_dest_bn_index, toku_create_empty_bn_no_buffer());
|
set_BLB(B, curr_dest_bn_index, toku_create_empty_bn_no_buffer());
|
||||||
move_leafentries(BLB(B, curr_dest_bn_index),
|
move_leafentries(BLB(B, curr_dest_bn_index),
|
||||||
BLB(node, curr_src_bn_index),
|
BLB(node, curr_src_bn_index),
|
||||||
num_left_les, // first row to be moved to B
|
num_left_les, // first row to be moved to B
|
||||||
toku_omt_size(BLB_BUFFER(node, curr_src_bn_index)), // number of rows in basement to be split
|
BLB_DATA(node, curr_src_bn_index)->omt_size() // number of rows in basement to be split
|
||||||
&diff_size);
|
);
|
||||||
BLB_MAX_MSN_APPLIED(B, curr_dest_bn_index) = BLB_MAX_MSN_APPLIED(node, curr_src_bn_index);
|
BLB_MAX_MSN_APPLIED(B, curr_dest_bn_index) = BLB_MAX_MSN_APPLIED(node, curr_src_bn_index);
|
||||||
BLB_NBYTESINBUF(node, curr_src_bn_index) -= diff_size;
|
|
||||||
BLB_NBYTESINBUF(B, curr_dest_bn_index) += diff_size;
|
|
||||||
curr_dest_bn_index++;
|
curr_dest_bn_index++;
|
||||||
}
|
}
|
||||||
curr_src_bn_index++;
|
curr_src_bn_index++;
|
||||||
@@ -1001,13 +951,11 @@ ftleaf_split(
|
|||||||
toku_destroy_dbt(&node->childkeys[num_left_bns - 1]);
|
toku_destroy_dbt(&node->childkeys[num_left_bns - 1]);
|
||||||
}
|
}
|
||||||
} else if (splitk) {
|
} else if (splitk) {
|
||||||
OMTVALUE lev;
|
BN_DATA bd = BLB_DATA(node, num_left_bns - 1);
|
||||||
OMT buffer = BLB_BUFFER(node, num_left_bns - 1);
|
|
||||||
int r = toku_omt_fetch(buffer, toku_omt_size(buffer) - 1, &lev);
|
|
||||||
assert_zero(r); // that fetch should have worked.
|
|
||||||
LEAFENTRY CAST_FROM_VOIDP(le, lev);
|
|
||||||
uint32_t keylen;
|
uint32_t keylen;
|
||||||
void *key = le_key_and_len(le, &keylen);
|
void *key;
|
||||||
|
int rr = bd->fetch_le_key_and_len(bd->omt_size() - 1, &keylen, &key);
|
||||||
|
invariant_zero(rr);
|
||||||
toku_memdup_dbt(splitk, key, keylen);
|
toku_memdup_dbt(splitk, key, keylen);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1217,11 +1165,11 @@ merge_leaf_nodes(FTNODE a, FTNODE b)
|
|||||||
a->dirty = 1;
|
a->dirty = 1;
|
||||||
b->dirty = 1;
|
b->dirty = 1;
|
||||||
|
|
||||||
OMT a_last_buffer = BLB_BUFFER(a, a->n_children-1);
|
BN_DATA a_last_bd = BLB_DATA(a, a->n_children-1);
|
||||||
// this bool states if the last basement node in a has any items or not
|
// this bool states if the last basement node in a has any items or not
|
||||||
// If it does, then it stays in the merge. If it does not, the last basement node
|
// If it does, then it stays in the merge. If it does not, the last basement node
|
||||||
// of a gets eliminated because we do not have a pivot to store for it (because it has no elements)
|
// of a gets eliminated because we do not have a pivot to store for it (because it has no elements)
|
||||||
const bool a_has_tail = toku_omt_size(a_last_buffer) > 0;
|
const bool a_has_tail = a_last_bd->omt_size() > 0;
|
||||||
|
|
||||||
// move each basement node from b to a
|
// move each basement node from b to a
|
||||||
// move the pivots, adding one of what used to be max(a)
|
// move the pivots, adding one of what used to be max(a)
|
||||||
@@ -1232,10 +1180,8 @@ merge_leaf_nodes(FTNODE a, FTNODE b)
|
|||||||
BASEMENTNODE bn = BLB(a, lastchild);
|
BASEMENTNODE bn = BLB(a, lastchild);
|
||||||
{
|
{
|
||||||
// verify that last basement in a is empty, then destroy mempool
|
// verify that last basement in a is empty, then destroy mempool
|
||||||
struct mempool * mp = &bn->buffer_mempool;
|
size_t used_space = a_last_bd->get_disk_size();
|
||||||
size_t used_space = toku_mempool_get_used_space(mp);
|
|
||||||
invariant_zero(used_space);
|
invariant_zero(used_space);
|
||||||
toku_mempool_destroy(mp);
|
|
||||||
}
|
}
|
||||||
destroy_basement_node(bn);
|
destroy_basement_node(bn);
|
||||||
set_BNULL(a, a->n_children-1);
|
set_BNULL(a, a->n_children-1);
|
||||||
@@ -1248,12 +1194,10 @@ merge_leaf_nodes(FTNODE a, FTNODE b)
|
|||||||
|
|
||||||
// fill in pivot for what used to be max of node 'a', if it is needed
|
// fill in pivot for what used to be max of node 'a', if it is needed
|
||||||
if (a_has_tail) {
|
if (a_has_tail) {
|
||||||
OMTVALUE lev;
|
|
||||||
int r = toku_omt_fetch(a_last_buffer, toku_omt_size(a_last_buffer) - 1, &lev);
|
|
||||||
assert_zero(r);
|
|
||||||
LEAFENTRY CAST_FROM_VOIDP(le, lev);
|
|
||||||
uint32_t keylen;
|
uint32_t keylen;
|
||||||
void *key = le_key_and_len(le, &keylen);
|
void *key;
|
||||||
|
int rr = a_last_bd->fetch_le_key_and_len(a_last_bd->omt_size() - 1, &keylen, &key);
|
||||||
|
invariant_zero(rr);
|
||||||
toku_memdup_dbt(&a->childkeys[a->n_children-1], key, keylen);
|
toku_memdup_dbt(&a->childkeys[a->n_children-1], key, keylen);
|
||||||
a->totalchildkeylens += keylen;
|
a->totalchildkeylens += keylen;
|
||||||
}
|
}
|
||||||
@@ -1651,7 +1595,7 @@ static void ft_flush_some_child(
|
|||||||
//VERIFY_NODE(brt, child);
|
//VERIFY_NODE(brt, child);
|
||||||
|
|
||||||
// only do the following work if there is a flush to perform
|
// only do the following work if there is a flush to perform
|
||||||
if (toku_bnc_n_entries(BNC(parent, childnum)) > 0) {
|
if (toku_bnc_n_entries(BNC(parent, childnum)) > 0 || parent->height == 1) {
|
||||||
if (!parent->dirty) {
|
if (!parent->dirty) {
|
||||||
dirtied++;
|
dirtied++;
|
||||||
parent->dirty = 1;
|
parent->dirty = 1;
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -222,10 +223,14 @@ void toku_ft_hot_status_init(void) __attribute__((__constructor__));
|
|||||||
void toku_ft_hot_get_status(FT_HOT_STATUS);
|
void toku_ft_hot_get_status(FT_HOT_STATUS);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes given FT and pushes all pending messages to the leaf nodes.
|
* Takes given FT and pushes all pending messages between left and right to the leaf nodes.
|
||||||
|
* All messages between left and right (inclusive) will be pushed, as will some others
|
||||||
|
* that happen to share buffers with messages near the boundary.
|
||||||
|
* If left is NULL, messages from beginning of FT are pushed. If right is NULL, that means
|
||||||
|
* we go until the end of the FT.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
toku_ft_hot_optimize(FT_HANDLE brt,
|
toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
|
||||||
int (*progress_callback)(void *extra, float progress),
|
int (*progress_callback)(void *extra, float progress),
|
||||||
void *progress_extra);
|
void *progress_extra);
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -155,6 +156,16 @@ hot_set_highest_key(struct hot_flusher_extra *flusher)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start)
|
||||||
|
{
|
||||||
|
toku_destroy_dbt(&flusher->highest_pivot_key);
|
||||||
|
if (start != NULL) {
|
||||||
|
// Otherwise, let's copy all the contents from one key to the other.
|
||||||
|
toku_clone_dbt(&flusher->highest_pivot_key, *start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
hot_just_pick_child(FT h,
|
hot_just_pick_child(FT h,
|
||||||
FTNODE parent,
|
FTNODE parent,
|
||||||
@@ -286,7 +297,7 @@ hot_flusher_destroy(struct hot_flusher_extra *flusher)
|
|||||||
// Entry point for Hot Optimize Table (HOT). Note, this function is
|
// Entry point for Hot Optimize Table (HOT). Note, this function is
|
||||||
// not recursive. It iterates over root-to-leaf paths.
|
// not recursive. It iterates over root-to-leaf paths.
|
||||||
int
|
int
|
||||||
toku_ft_hot_optimize(FT_HANDLE brt,
|
toku_ft_hot_optimize(FT_HANDLE brt, DBT* left, DBT* right,
|
||||||
int (*progress_callback)(void *extra, float progress),
|
int (*progress_callback)(void *extra, float progress),
|
||||||
void *progress_extra)
|
void *progress_extra)
|
||||||
{
|
{
|
||||||
@@ -295,6 +306,7 @@ toku_ft_hot_optimize(FT_HANDLE brt,
|
|||||||
struct flusher_advice advice;
|
struct flusher_advice advice;
|
||||||
|
|
||||||
hot_flusher_init(&advice, &flusher);
|
hot_flusher_init(&advice, &flusher);
|
||||||
|
hot_set_start_key(&flusher, left);
|
||||||
|
|
||||||
uint64_t loop_count = 0;
|
uint64_t loop_count = 0;
|
||||||
MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
|
MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
|
||||||
@@ -368,6 +380,15 @@ toku_ft_hot_optimize(FT_HANDLE brt,
|
|||||||
if (flusher.max_current_key.data == NULL) {
|
if (flusher.max_current_key.data == NULL) {
|
||||||
flusher.rightmost_leaf_seen = 1;
|
flusher.rightmost_leaf_seen = 1;
|
||||||
}
|
}
|
||||||
|
else if (right) {
|
||||||
|
// if we have flushed past the bounds set for us,
|
||||||
|
// set rightmost_leaf_seen so we exit
|
||||||
|
FAKE_DB(db, &brt->ft->cmp_descriptor);
|
||||||
|
int cmp = brt->ft->compare_fun(&db, &flusher.max_current_key, right);
|
||||||
|
if (cmp > 0) {
|
||||||
|
flusher.rightmost_leaf_seen = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update HOT's progress.
|
// Update HOT's progress.
|
||||||
if (progress_callback != NULL) {
|
if (progress_callback != NULL) {
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -114,6 +115,7 @@ PATENT RIGHTS GRANT:
|
|||||||
#include "compress.h"
|
#include "compress.h"
|
||||||
#include <util/mempool.h>
|
#include <util/mempool.h>
|
||||||
#include <util/omt.h>
|
#include <util/omt.h>
|
||||||
|
#include "bndata.h"
|
||||||
|
|
||||||
#ifndef FT_FANOUT
|
#ifndef FT_FANOUT
|
||||||
#define FT_FANOUT 16
|
#define FT_FANOUT 16
|
||||||
@@ -244,10 +246,7 @@ uint32_t get_leaf_num_entries(FTNODE node);
|
|||||||
|
|
||||||
// data of an available partition of a leaf ftnode
|
// data of an available partition of a leaf ftnode
|
||||||
struct ftnode_leaf_basement_node {
|
struct ftnode_leaf_basement_node {
|
||||||
OMT buffer; // pointers to individual leaf entries
|
bn_data data_buffer;
|
||||||
struct mempool buffer_mempool; // storage for all leaf entries
|
|
||||||
unsigned int n_bytes_in_buffer; // How many bytes to represent the OMT (including the per-key overheads, ...
|
|
||||||
// ... but not including the overheads for the node.
|
|
||||||
unsigned int seqinsert; // number of sequential inserts to this leaf
|
unsigned int seqinsert; // number of sequential inserts to this leaf
|
||||||
MSN max_msn_applied; // max message sequence number applied
|
MSN max_msn_applied; // max message sequence number applied
|
||||||
bool stale_ancestor_messages_applied;
|
bool stale_ancestor_messages_applied;
|
||||||
@@ -450,9 +449,8 @@ static inline void set_BSB(FTNODE node, int i, SUB_BLOCK sb) {
|
|||||||
// ftnode leaf basementnode macros,
|
// ftnode leaf basementnode macros,
|
||||||
#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied)
|
#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied)
|
||||||
#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied)
|
#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied)
|
||||||
#define BLB_BUFFER(node,i) (BLB(node,i)->buffer)
|
#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer))
|
||||||
#define BLB_BUFFER_MEMPOOL(node,i) (BLB(node,i)->buffer_mempool)
|
#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size())
|
||||||
#define BLB_NBYTESINBUF(node,i) (BLB(node,i)->n_bytes_in_buffer)
|
|
||||||
#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
|
#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
|
||||||
|
|
||||||
/* pivot flags (must fit in 8 bits) */
|
/* pivot flags (must fit in 8 bits) */
|
||||||
@@ -742,6 +740,7 @@ bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
|
|||||||
int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
|
int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
|
||||||
int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
|
int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
|
||||||
void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h);
|
void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h);
|
||||||
|
BASEMENTNODE toku_detach_bn(FTNODE node, int childnum);
|
||||||
|
|
||||||
// Given pinned node and pinned child, split child into two
|
// Given pinned node and pinned child, split child into two
|
||||||
// and update node with information about its new child.
|
// and update node with information about its new child.
|
||||||
@@ -773,17 +772,6 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT h) {
|
|||||||
|
|
||||||
static const FTNODE null_ftnode=0;
|
static const FTNODE null_ftnode=0;
|
||||||
|
|
||||||
// Values to be used to update ftcursor if a search is successful.
|
|
||||||
struct ft_cursor_leaf_info_to_be {
|
|
||||||
uint32_t index;
|
|
||||||
OMT omt;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Values to be used to pin a leaf for shortcut searches
|
|
||||||
struct ft_cursor_leaf_info {
|
|
||||||
struct ft_cursor_leaf_info_to_be to_be;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* a brt cursor is represented as a kv pair in a tree */
|
/* a brt cursor is represented as a kv pair in a tree */
|
||||||
struct ft_cursor {
|
struct ft_cursor {
|
||||||
struct toku_list cursors_link;
|
struct toku_list cursors_link;
|
||||||
@@ -799,7 +787,6 @@ struct ft_cursor {
|
|||||||
int out_of_range_error;
|
int out_of_range_error;
|
||||||
int direction;
|
int direction;
|
||||||
TOKUTXN ttxn;
|
TOKUTXN ttxn;
|
||||||
struct ft_cursor_leaf_info leaf_info;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -1041,23 +1028,9 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM, const char *key, int
|
|||||||
int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen);
|
int toku_testsetup_insert_to_nonleaf (FT_HANDLE brt, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen);
|
||||||
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
|
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
|
||||||
|
|
||||||
// These two go together to do lookups in a ftnode using the keys in a command.
|
|
||||||
struct cmd_leafval_heaviside_extra {
|
|
||||||
ft_compare_func compare_fun;
|
|
||||||
DESCRIPTOR desc;
|
|
||||||
DBT const * const key;
|
|
||||||
};
|
|
||||||
int toku_cmd_leafval_heaviside (OMTVALUE leafentry, void *extra)
|
|
||||||
__attribute__((__warn_unused_result__));
|
|
||||||
|
|
||||||
// toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn
|
// toku_ft_root_put_cmd() accepts non-constant cmd because this is where we set the msn
|
||||||
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, TXNID oldest_referenced_xid, GC_INFO gc_info);
|
void toku_ft_root_put_cmd(FT h, FT_MSG_S * cmd, TXNID oldest_referenced_xid, GC_INFO gc_info);
|
||||||
|
|
||||||
void *mempool_malloc_from_omt(OMT *omtp, struct mempool *mp, size_t size, void **maybe_free);
|
|
||||||
// Effect: Allocate a new object of size SIZE in MP. If MP runs out of space, allocate new a new mempool space, and copy all the items
|
|
||||||
// from the OMT (which items refer to items in the old mempool) into the new mempool.
|
|
||||||
// If MAYBE_FREE is NULL then free the old mempool's space.
|
|
||||||
// Otherwise, store the old mempool's space in maybe_free.
|
|
||||||
void
|
void
|
||||||
toku_get_node_for_verify(
|
toku_get_node_for_verify(
|
||||||
BLOCKNUM blocknum,
|
BLOCKNUM blocknum,
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -103,7 +104,7 @@ struct ft_search;
|
|||||||
the compare function should be a step function from 0 to 1 for a left to right search
|
the compare function should be a step function from 0 to 1 for a left to right search
|
||||||
and 1 to 0 for a right to left search */
|
and 1 to 0 for a right to left search */
|
||||||
|
|
||||||
typedef int (*ft_search_compare_func_t)(struct ft_search */*so*/, DBT *);
|
typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *);
|
||||||
|
|
||||||
/* the search object contains the compare function, search direction, and the kv pair that
|
/* the search object contains the compare function, search direction, and the kv pair that
|
||||||
is used in the compare function. the context is the user's private data */
|
is used in the compare function. the context is the user's private data */
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -107,24 +108,6 @@ compare_pairs (FT_HANDLE brt, const DBT *a, const DBT *b) {
|
|||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
compare_leafentries (FT_HANDLE brt, LEAFENTRY a, LEAFENTRY b) {
|
|
||||||
DBT x,y;
|
|
||||||
FAKE_DB(db, &brt->ft->cmp_descriptor);
|
|
||||||
int cmp = brt->ft->compare_fun(&db,
|
|
||||||
toku_fill_dbt(&x, le_key(a), le_keylen(a)),
|
|
||||||
toku_fill_dbt(&y, le_key(b), le_keylen(b)));
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
compare_pair_to_leafentry (FT_HANDLE brt, const DBT *a, LEAFENTRY b) {
|
|
||||||
DBT y;
|
|
||||||
FAKE_DB(db, &brt->ft->cmp_descriptor);
|
|
||||||
int cmp = brt->ft->compare_fun(&db, a, toku_fill_dbt(&y, le_key(b), le_keylen(b)));
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compare_pair_to_key (FT_HANDLE brt, const DBT *a, bytevec key, ITEMLEN keylen) {
|
compare_pair_to_key (FT_HANDLE brt, const DBT *a, bytevec key, ITEMLEN keylen) {
|
||||||
DBT y;
|
DBT y;
|
||||||
@@ -166,12 +149,12 @@ verify_msg_in_child_buffer(FT_HANDLE brt, enum ft_msg_type type, MSN msn, byteve
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static LEAFENTRY
|
static DBT
|
||||||
get_ith_leafentry (BASEMENTNODE bn, int i) {
|
get_ith_key_dbt (BASEMENTNODE bn, int i) {
|
||||||
OMTVALUE le_v;
|
DBT kdbt;
|
||||||
int r = toku_omt_fetch(bn->buffer, i, &le_v);
|
int r = bn->data_buffer.fetch_le_key_and_len(i, &kdbt.size, &kdbt.data);
|
||||||
invariant(r == 0); // this is a bad failure if it happens.
|
invariant_zero(r); // this is a bad failure if it happens.
|
||||||
return (LEAFENTRY)le_v;
|
return kdbt;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define VERIFY_ASSERTION(predicate, i, string) ({ \
|
#define VERIFY_ASSERTION(predicate, i, string) ({ \
|
||||||
@@ -441,20 +424,20 @@ toku_verify_ftnode_internal(FT_HANDLE brt,
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
BASEMENTNODE bn = BLB(node, i);
|
BASEMENTNODE bn = BLB(node, i);
|
||||||
for (uint32_t j = 0; j < toku_omt_size(bn->buffer); j++) {
|
for (uint32_t j = 0; j < bn->data_buffer.omt_size(); j++) {
|
||||||
VERIFY_ASSERTION((rootmsn.msn >= this_msn.msn), 0, "leaf may have latest msn, but cannot be greater than root msn");
|
VERIFY_ASSERTION((rootmsn.msn >= this_msn.msn), 0, "leaf may have latest msn, but cannot be greater than root msn");
|
||||||
LEAFENTRY le = get_ith_leafentry(bn, j);
|
DBT kdbt = get_ith_key_dbt(bn, j);
|
||||||
if (curr_less_pivot) {
|
if (curr_less_pivot) {
|
||||||
int compare = compare_pair_to_leafentry(brt, curr_less_pivot, le);
|
int compare = compare_pairs(brt, curr_less_pivot, &kdbt);
|
||||||
VERIFY_ASSERTION(compare < 0, j, "The leafentry is >= the lower-bound pivot");
|
VERIFY_ASSERTION(compare < 0, j, "The leafentry is >= the lower-bound pivot");
|
||||||
}
|
}
|
||||||
if (curr_geq_pivot) {
|
if (curr_geq_pivot) {
|
||||||
int compare = compare_pair_to_leafentry(brt, curr_geq_pivot, le);
|
int compare = compare_pairs(brt, curr_geq_pivot, &kdbt);
|
||||||
VERIFY_ASSERTION(compare >= 0, j, "The leafentry is < the upper-bound pivot");
|
VERIFY_ASSERTION(compare >= 0, j, "The leafentry is < the upper-bound pivot");
|
||||||
}
|
}
|
||||||
if (0 < j) {
|
if (0 < j) {
|
||||||
LEAFENTRY prev_le = get_ith_leafentry(bn, j-1);
|
DBT prev_key_dbt = get_ith_key_dbt(bn, j-1);
|
||||||
int compare = compare_leafentries(brt, prev_le, le);
|
int compare = compare_pairs(brt, &prev_key_dbt, &kdbt);
|
||||||
VERIFY_ASSERTION(compare < 0, j, "Adjacent leafentries are out of order");
|
VERIFY_ASSERTION(compare < 0, j, "Adjacent leafentries are out of order");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -317,6 +318,11 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
|
|||||||
ft_end_checkpoint(cachefile, fd, header_v);
|
ft_end_checkpoint(cachefile, fd, header_v);
|
||||||
assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
|
assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// maps to cf->free_userdata
|
||||||
|
static void ft_free(CACHEFILE cachefile UU(), void *header_v) {
|
||||||
|
FT ft = (FT) header_v;
|
||||||
toku_ft_free(ft);
|
toku_ft_free(ft);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -392,6 +398,7 @@ static void ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
|
|||||||
ft,
|
ft,
|
||||||
ft_log_fassociate_during_checkpoint,
|
ft_log_fassociate_during_checkpoint,
|
||||||
ft_close,
|
ft_close,
|
||||||
|
ft_free,
|
||||||
ft_checkpoint,
|
ft_checkpoint,
|
||||||
ft_begin_checkpoint,
|
ft_begin_checkpoint,
|
||||||
ft_end_checkpoint,
|
ft_end_checkpoint,
|
||||||
@@ -458,7 +465,7 @@ void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: (Zardosht) get rid of brt parameter
|
// TODO: (Zardosht) get rid of brt parameter
|
||||||
int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header, bool* was_open)
|
int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header)
|
||||||
// If the cachefile already has the header, then just get it.
|
// If the cachefile already has the header, then just get it.
|
||||||
// If the cachefile has not been initialized, then don't modify anything.
|
// If the cachefile has not been initialized, then don't modify anything.
|
||||||
// max_acceptable_lsn is the latest acceptable checkpointed version of the file.
|
// max_acceptable_lsn is the latest acceptable checkpointed version of the file.
|
||||||
@@ -467,13 +474,11 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_ac
|
|||||||
FT h;
|
FT h;
|
||||||
if ((h = (FT) toku_cachefile_get_userdata(cf))!=0) {
|
if ((h = (FT) toku_cachefile_get_userdata(cf))!=0) {
|
||||||
*header = h;
|
*header = h;
|
||||||
*was_open = true;
|
|
||||||
assert(brt->options.update_fun == h->update_fun);
|
assert(brt->options.update_fun == h->update_fun);
|
||||||
assert(brt->options.compare_fun == h->compare_fun);
|
assert(brt->options.compare_fun == h->compare_fun);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*was_open = false;
|
|
||||||
FT h = nullptr;
|
FT h = nullptr;
|
||||||
int r;
|
int r;
|
||||||
{
|
{
|
||||||
@@ -494,6 +499,7 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_ac
|
|||||||
(void*)h,
|
(void*)h,
|
||||||
ft_log_fassociate_during_checkpoint,
|
ft_log_fassociate_during_checkpoint,
|
||||||
ft_close,
|
ft_close,
|
||||||
|
ft_free,
|
||||||
ft_checkpoint,
|
ft_checkpoint,
|
||||||
ft_begin_checkpoint,
|
ft_begin_checkpoint,
|
||||||
ft_end_checkpoint,
|
ft_end_checkpoint,
|
||||||
@@ -1028,11 +1034,12 @@ struct garbage_helper_extra {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
garbage_leafentry_helper(OMTVALUE v, uint32_t UU(idx), void *extra) {
|
garbage_leafentry_helper(const void* key UU(), const uint32_t keylen, const LEAFENTRY & le, uint32_t UU(idx), struct garbage_helper_extra * const info) {
|
||||||
struct garbage_helper_extra *CAST_FROM_VOIDP(info, extra);
|
//TODO #warning need to reanalyze for split
|
||||||
LEAFENTRY CAST_FROM_VOIDP(le, v);
|
info->total_space += leafentry_disksize(le) + keylen + sizeof(keylen);
|
||||||
info->total_space += leafentry_disksize(le);
|
if (!le_latest_is_del(le)) {
|
||||||
info->used_space += LE_CLEAN_MEMSIZE(le_latest_keylen(le), le_latest_vallen(le));
|
info->used_space += LE_CLEAN_MEMSIZE(le_latest_vallen(le)) + keylen + sizeof(keylen);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1052,8 +1059,8 @@ garbage_helper(BLOCKNUM blocknum, int64_t UU(size), int64_t UU(address), void *e
|
|||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < node->n_children; ++i) {
|
for (int i = 0; i < node->n_children; ++i) {
|
||||||
BASEMENTNODE bn = BLB(node, i);
|
BN_DATA bd = BLB_DATA(node, i);
|
||||||
r = toku_omt_iterate(bn->buffer, garbage_leafentry_helper, info);
|
r = bd->omt_iterate<struct garbage_helper_extra, garbage_leafentry_helper>(info);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -112,7 +113,7 @@ void toku_ft_release_reflock(FT ft);
|
|||||||
void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
|
void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
|
||||||
void toku_ft_free (FT h);
|
void toku_ft_free (FT h);
|
||||||
|
|
||||||
int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header, bool* was_open);
|
int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header);
|
||||||
void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
|
void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
|
||||||
|
|
||||||
bool toku_ft_needed_unlocked(FT ft);
|
bool toku_ft_needed_unlocked(FT ft);
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -58,6 +58,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -283,10 +284,27 @@ serialize_node_header(FTNODE node, FTNODE_DISK_DATA ndd, struct wbuf *wbuf) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
wbufwriteleafentry(OMTVALUE lev, const uint32_t UU(idx), void *wbv) {
|
wbufwriteleafentry(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t UU(idx), struct wbuf * const wb) {
|
||||||
const LEAFENTRY CAST_FROM_VOIDP(le, lev);
|
// need to pack the leafentry as it was in versions
|
||||||
struct wbuf *CAST_FROM_VOIDP(wb, wbv);
|
// where the key was integrated into it
|
||||||
wbuf_nocrc_LEAFENTRY(wb, le);
|
uint32_t begin_spot UU() = wb->ndone;
|
||||||
|
uint32_t le_disk_size = leafentry_disksize(le);
|
||||||
|
wbuf_nocrc_uint8_t(wb, le->type);
|
||||||
|
wbuf_nocrc_uint32_t(wb, keylen);
|
||||||
|
if (le->type == LE_CLEAN) {
|
||||||
|
wbuf_nocrc_uint32_t(wb, le->u.clean.vallen);
|
||||||
|
wbuf_nocrc_literal_bytes(wb, key, keylen);
|
||||||
|
wbuf_nocrc_literal_bytes(wb, le->u.clean.val, le->u.clean.vallen);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
paranoid_invariant(le->type == LE_MVCC);
|
||||||
|
wbuf_nocrc_uint32_t(wb, le->u.mvcc.num_cxrs);
|
||||||
|
wbuf_nocrc_uint8_t(wb, le->u.mvcc.num_pxrs);
|
||||||
|
wbuf_nocrc_literal_bytes(wb, key, keylen);
|
||||||
|
wbuf_nocrc_literal_bytes(wb, le->u.mvcc.xrs, le_disk_size - (1 + 4 + 1));
|
||||||
|
}
|
||||||
|
uint32_t end_spot UU() = wb->ndone;
|
||||||
|
paranoid_invariant((end_spot - begin_spot) == keylen + sizeof(keylen) + le_disk_size);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -302,7 +320,7 @@ serialize_ftnode_partition_size (FTNODE node, int i)
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
result += 4; // n_entries in buffer table
|
result += 4; // n_entries in buffer table
|
||||||
result += BLB_NBYTESINBUF(node, i);
|
result += BLB_NBYTESINDATA(node, i);
|
||||||
}
|
}
|
||||||
result += 4; // checksum
|
result += 4; // checksum
|
||||||
return result;
|
return result;
|
||||||
@@ -353,14 +371,15 @@ serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) {
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
unsigned char ch = FTNODE_PARTITION_OMT_LEAVES;
|
unsigned char ch = FTNODE_PARTITION_OMT_LEAVES;
|
||||||
OMT buffer = BLB_BUFFER(node, i);
|
BN_DATA bd = BLB_DATA(node, i);
|
||||||
|
|
||||||
wbuf_nocrc_char(&wb, ch);
|
wbuf_nocrc_char(&wb, ch);
|
||||||
wbuf_nocrc_uint(&wb, toku_omt_size(buffer));
|
wbuf_nocrc_uint(&wb, bd->omt_size());
|
||||||
|
|
||||||
//
|
//
|
||||||
// iterate over leafentries and place them into the buffer
|
// iterate over leafentries and place them into the buffer
|
||||||
//
|
//
|
||||||
toku_omt_iterate(buffer, wbufwriteleafentry, &wb);
|
bd->omt_iterate<struct wbuf, wbufwriteleafentry>(&wb);
|
||||||
}
|
}
|
||||||
uint32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb));
|
uint32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb));
|
||||||
wbuf_nocrc_int(&wb, end_to_end_checksum);
|
wbuf_nocrc_int(&wb, end_to_end_checksum);
|
||||||
@@ -495,27 +514,16 @@ toku_serialize_ftnode_size (FTNODE node) {
|
|||||||
|
|
||||||
struct array_info {
|
struct array_info {
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
OMTVALUE* array;
|
LEAFENTRY* le_array;
|
||||||
|
uint32_t* key_sizes_array;
|
||||||
|
const void** key_ptr_array;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
array_item (OMTVALUE lev, const uint32_t idx, void *aiv) {
|
array_item(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t idx, struct array_info *const ai) {
|
||||||
struct array_info *CAST_FROM_VOIDP(ai, aiv);
|
ai->le_array[idx+ai->offset] = le;
|
||||||
ai->array[idx+ai->offset] = lev;
|
ai->key_sizes_array[idx+ai->offset] = keylen;
|
||||||
return 0;
|
ai->key_ptr_array[idx+ai->offset] = key;
|
||||||
}
|
|
||||||
|
|
||||||
struct sum_info {
|
|
||||||
unsigned int dsum;
|
|
||||||
unsigned int count;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int
|
|
||||||
sum_item (OMTVALUE lev, uint32_t UU(idx), void *vsi) {
|
|
||||||
LEAFENTRY le = (LEAFENTRY) lev;
|
|
||||||
struct sum_info *si = (struct sum_info *) vsi;
|
|
||||||
si->count++;
|
|
||||||
si->dsum += leafentry_disksize(le); // TODO 4050 delete this redundant call and use le_sizes[]
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -534,29 +542,27 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
|
|||||||
// Count number of leaf entries in this leaf (num_le).
|
// Count number of leaf entries in this leaf (num_le).
|
||||||
uint32_t num_le = 0;
|
uint32_t num_le = 0;
|
||||||
for (uint32_t i = 0; i < num_orig_basements; i++) {
|
for (uint32_t i = 0; i < num_orig_basements; i++) {
|
||||||
num_le += toku_omt_size(BLB_BUFFER(node, i));
|
num_le += BLB_DATA(node, i)->omt_size();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t num_alloc = num_le ? num_le : 1; // simplify logic below by always having at least one entry per array
|
uint32_t num_alloc = num_le ? num_le : 1; // simplify logic below by always having at least one entry per array
|
||||||
|
|
||||||
// Create an array of OMTVALUE's that store all the pointers to all the data.
|
// Create an array of OMTVALUE's that store all the pointers to all the data.
|
||||||
// Each element in leafpointers is a pointer to a leaf.
|
// Each element in leafpointers is a pointer to a leaf.
|
||||||
OMTVALUE *XMALLOC_N(num_alloc, leafpointers);
|
LEAFENTRY *XMALLOC_N(num_alloc, leafpointers);
|
||||||
leafpointers[0] = NULL;
|
leafpointers[0] = NULL;
|
||||||
|
const void **XMALLOC_N(num_alloc, key_pointers);
|
||||||
|
uint32_t *XMALLOC_N(num_alloc, key_sizes);
|
||||||
|
|
||||||
// Capture pointers to old mempools' buffers (so they can be destroyed)
|
// Capture pointers to old mempools' buffers (so they can be destroyed)
|
||||||
void **XMALLOC_N(num_orig_basements, old_mempool_bases);
|
BASEMENTNODE *XMALLOC_N(num_orig_basements, old_bns);
|
||||||
|
|
||||||
uint32_t curr_le = 0;
|
uint32_t curr_le = 0;
|
||||||
for (uint32_t i = 0; i < num_orig_basements; i++) {
|
for (uint32_t i = 0; i < num_orig_basements; i++) {
|
||||||
OMT curr_omt = BLB_BUFFER(node, i);
|
BN_DATA bd = BLB_DATA(node, i);
|
||||||
struct array_info ai;
|
struct array_info ai {.offset = curr_le, .le_array = leafpointers, .key_sizes_array = key_sizes, .key_ptr_array = key_pointers };
|
||||||
ai.offset = curr_le; // index of first le in basement
|
bd->omt_iterate<array_info, array_item>(&ai);
|
||||||
ai.array = leafpointers;
|
curr_le += bd->omt_size();
|
||||||
toku_omt_iterate(curr_omt, array_item, &ai);
|
|
||||||
curr_le += toku_omt_size(curr_omt);
|
|
||||||
BASEMENTNODE bn = BLB(node, i);
|
|
||||||
old_mempool_bases[i] = toku_mempool_get_base(&bn->buffer_mempool);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create an array that will store indexes of new pivots.
|
// Create an array that will store indexes of new pivots.
|
||||||
@@ -598,7 +604,7 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
|
|||||||
}
|
}
|
||||||
num_le_in_curr_bn++;
|
num_le_in_curr_bn++;
|
||||||
num_les_this_bn[curr_pivot] = num_le_in_curr_bn;
|
num_les_this_bn[curr_pivot] = num_le_in_curr_bn;
|
||||||
bn_size_so_far += curr_le_size;
|
bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i];
|
||||||
bn_sizes[curr_pivot] = bn_size_so_far;
|
bn_sizes[curr_pivot] = bn_size_so_far;
|
||||||
}
|
}
|
||||||
// curr_pivot is now the total number of pivot keys in the leaf node
|
// curr_pivot is now the total number of pivot keys in the leaf node
|
||||||
@@ -619,7 +625,13 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
|
|||||||
MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i);
|
MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i);
|
||||||
max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn;
|
max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn;
|
||||||
}
|
}
|
||||||
|
// remove the basement node in the node, we've saved a copy
|
||||||
|
for (uint32_t i = 0; i < num_orig_basements; i++) {
|
||||||
|
// save a reference to the old basement nodes
|
||||||
|
// we will need them to ensure that the memory
|
||||||
|
// stays intact
|
||||||
|
old_bns[i] = toku_detach_bn(node, i);
|
||||||
|
}
|
||||||
// Now destroy the old basements, but do not destroy leaves
|
// Now destroy the old basements, but do not destroy leaves
|
||||||
toku_destroy_ftnode_internals(node);
|
toku_destroy_ftnode_internals(node);
|
||||||
|
|
||||||
@@ -638,9 +650,8 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
|
|||||||
|
|
||||||
// first the pivots
|
// first the pivots
|
||||||
for (int i = 0; i < num_pivots; i++) {
|
for (int i = 0; i < num_pivots; i++) {
|
||||||
LEAFENTRY CAST_FROM_VOIDP(curr_le_pivot, leafpointers[new_pivots[i]]);
|
uint32_t keylen = key_sizes[new_pivots[i]];
|
||||||
uint32_t keylen;
|
const void *key = key_pointers[new_pivots[i]];
|
||||||
void *key = le_key_and_len(curr_le_pivot, &keylen);
|
|
||||||
toku_memdup_dbt(&node->childkeys[i], key, keylen);
|
toku_memdup_dbt(&node->childkeys[i], key, keylen);
|
||||||
node->totalchildkeylens += keylen;
|
node->totalchildkeylens += keylen;
|
||||||
}
|
}
|
||||||
@@ -663,29 +674,16 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
|
|||||||
|
|
||||||
// construct mempool for this basement
|
// construct mempool for this basement
|
||||||
size_t size_this_bn = bn_sizes[i];
|
size_t size_this_bn = bn_sizes[i];
|
||||||
BASEMENTNODE bn = BLB(node, i);
|
|
||||||
struct mempool *mp = &bn->buffer_mempool;
|
|
||||||
toku_mempool_construct(mp, size_this_bn);
|
|
||||||
|
|
||||||
OMTVALUE *XMALLOC_N(num_in_bn, bn_array);
|
BN_DATA bd = BLB_DATA(node, i);
|
||||||
for (uint32_t le_index = 0; le_index < num_les_to_copy; le_index++) {
|
bd->replace_contents_with_clone_of_sorted_array(
|
||||||
uint32_t le_within_node = baseindex_this_bn + le_index;
|
num_les_to_copy,
|
||||||
size_t le_size = le_sizes[le_within_node];
|
&key_pointers[baseindex_this_bn],
|
||||||
void *new_le = toku_mempool_malloc(mp, le_size, 1); // point to new location
|
&key_sizes[baseindex_this_bn],
|
||||||
void *old_le = leafpointers[le_within_node];
|
&leafpointers[baseindex_this_bn],
|
||||||
memcpy(new_le, old_le, le_size); // put le data at new location
|
&le_sizes[baseindex_this_bn],
|
||||||
bn_array[le_index] = new_le; // point to new location (in new mempool)
|
size_this_bn
|
||||||
}
|
|
||||||
|
|
||||||
toku_omt_destroy(&BLB_BUFFER(node, i));
|
|
||||||
int r = toku_omt_create_steal_sorted_array(
|
|
||||||
&BLB_BUFFER(node, i),
|
|
||||||
&bn_array,
|
|
||||||
num_in_bn,
|
|
||||||
num_in_bn
|
|
||||||
);
|
);
|
||||||
invariant_zero(r);
|
|
||||||
BLB_NBYTESINBUF(node, i) = size_this_bn;
|
|
||||||
|
|
||||||
BP_STATE(node,i) = PT_AVAIL;
|
BP_STATE(node,i) = PT_AVAIL;
|
||||||
BP_TOUCH_CLOCK(node,i);
|
BP_TOUCH_CLOCK(node,i);
|
||||||
@@ -696,10 +694,12 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
|
|||||||
|
|
||||||
// destroy buffers of old mempools
|
// destroy buffers of old mempools
|
||||||
for (uint32_t i = 0; i < num_orig_basements; i++) {
|
for (uint32_t i = 0; i < num_orig_basements; i++) {
|
||||||
toku_free(old_mempool_bases[i]);
|
destroy_basement_node(old_bns[i]);
|
||||||
}
|
}
|
||||||
|
toku_free(key_pointers);
|
||||||
|
toku_free(key_sizes);
|
||||||
toku_free(leafpointers);
|
toku_free(leafpointers);
|
||||||
toku_free(old_mempool_bases);
|
toku_free(old_bns);
|
||||||
toku_free(new_pivots);
|
toku_free(new_pivots);
|
||||||
toku_free(le_sizes);
|
toku_free(le_sizes);
|
||||||
toku_free(bn_sizes);
|
toku_free(bn_sizes);
|
||||||
@@ -1144,55 +1144,27 @@ dump_bad_block(unsigned char *vp, uint64_t size) {
|
|||||||
|
|
||||||
BASEMENTNODE toku_create_empty_bn(void) {
|
BASEMENTNODE toku_create_empty_bn(void) {
|
||||||
BASEMENTNODE bn = toku_create_empty_bn_no_buffer();
|
BASEMENTNODE bn = toku_create_empty_bn_no_buffer();
|
||||||
int r = toku_omt_create(&bn->buffer);
|
bn->data_buffer.initialize_empty();
|
||||||
lazy_assert_zero(r);
|
|
||||||
return bn;
|
return bn;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mp_pair {
|
|
||||||
void* orig_base;
|
|
||||||
void* new_base;
|
|
||||||
OMT omt;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int fix_mp_offset(OMTVALUE v, uint32_t i, void* extra) {
|
|
||||||
struct mp_pair *CAST_FROM_VOIDP(p, extra);
|
|
||||||
char* old_value = (char *) v;
|
|
||||||
char *new_value = old_value - (char *)p->orig_base + (char *)p->new_base;
|
|
||||||
toku_omt_set_at(p->omt, (OMTVALUE) new_value, i);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
|
BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
|
||||||
BASEMENTNODE bn = toku_create_empty_bn_no_buffer();
|
BASEMENTNODE bn = toku_create_empty_bn_no_buffer();
|
||||||
bn->max_msn_applied = orig_bn->max_msn_applied;
|
bn->max_msn_applied = orig_bn->max_msn_applied;
|
||||||
bn->n_bytes_in_buffer = orig_bn->n_bytes_in_buffer;
|
|
||||||
bn->seqinsert = orig_bn->seqinsert;
|
bn->seqinsert = orig_bn->seqinsert;
|
||||||
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
|
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
|
||||||
bn->stat64_delta = orig_bn->stat64_delta;
|
bn->stat64_delta = orig_bn->stat64_delta;
|
||||||
toku_mempool_clone(&orig_bn->buffer_mempool, &bn->buffer_mempool);
|
bn->data_buffer.clone(&orig_bn->data_buffer);
|
||||||
toku_omt_clone_noptr(&bn->buffer, orig_bn->buffer);
|
|
||||||
struct mp_pair p;
|
|
||||||
p.orig_base = toku_mempool_get_base(&orig_bn->buffer_mempool);
|
|
||||||
p.new_base = toku_mempool_get_base(&bn->buffer_mempool);
|
|
||||||
p.omt = bn->buffer;
|
|
||||||
toku_omt_iterate(
|
|
||||||
bn->buffer,
|
|
||||||
fix_mp_offset,
|
|
||||||
&p
|
|
||||||
);
|
|
||||||
return bn;
|
return bn;
|
||||||
}
|
}
|
||||||
|
|
||||||
BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
|
BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
|
||||||
BASEMENTNODE XMALLOC(bn);
|
BASEMENTNODE XMALLOC(bn);
|
||||||
bn->max_msn_applied.msn = 0;
|
bn->max_msn_applied.msn = 0;
|
||||||
bn->buffer = NULL;
|
|
||||||
bn->n_bytes_in_buffer = 0;
|
|
||||||
bn->seqinsert = 0;
|
bn->seqinsert = 0;
|
||||||
bn->stale_ancestor_messages_applied = false;
|
bn->stale_ancestor_messages_applied = false;
|
||||||
toku_mempool_zero(&bn->buffer_mempool);
|
|
||||||
bn->stat64_delta = ZEROSTATS;
|
bn->stat64_delta = ZEROSTATS;
|
||||||
|
bn->data_buffer.init_zero();
|
||||||
return bn;
|
return bn;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1219,10 +1191,7 @@ NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo) {
|
|||||||
|
|
||||||
void destroy_basement_node (BASEMENTNODE bn)
|
void destroy_basement_node (BASEMENTNODE bn)
|
||||||
{
|
{
|
||||||
// The buffer may have been freed already, in some cases.
|
bn->data_buffer.destroy();
|
||||||
if (bn->buffer) {
|
|
||||||
toku_omt_destroy(&bn->buffer);
|
|
||||||
}
|
|
||||||
toku_free(bn);
|
toku_free(bn);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1568,32 +1537,12 @@ deserialize_ftnode_partition(
|
|||||||
assert(ch == FTNODE_PARTITION_OMT_LEAVES);
|
assert(ch == FTNODE_PARTITION_OMT_LEAVES);
|
||||||
BLB_SEQINSERT(node, childnum) = 0;
|
BLB_SEQINSERT(node, childnum) = 0;
|
||||||
uint32_t num_entries = rbuf_int(&rb);
|
uint32_t num_entries = rbuf_int(&rb);
|
||||||
uint32_t start_of_data = rb.ndone; // index of first byte of first leafentry
|
// we are now at the first byte of first leafentry
|
||||||
data_size -= start_of_data; // remaining bytes of leafentry data
|
data_size -= rb.ndone; // remaining bytes of leafentry data
|
||||||
// TODO 3988 Count empty basements (data_size == 0)
|
|
||||||
if (data_size == 0) {
|
|
||||||
// printf("#### Deserialize empty basement, childnum = %d\n", childnum);
|
|
||||||
invariant_zero(num_entries);
|
|
||||||
}
|
|
||||||
OMTVALUE *XMALLOC_N(num_entries, array); // create array of pointers to leafentries
|
|
||||||
BASEMENTNODE bn = BLB(node, childnum);
|
BASEMENTNODE bn = BLB(node, childnum);
|
||||||
toku_mempool_copy_construct(&bn->buffer_mempool, &rb.buf[rb.ndone], data_size);
|
bn->data_buffer.initialize_from_data(num_entries, &rb.buf[rb.ndone], data_size);
|
||||||
uint8_t *CAST_FROM_VOIDP(le_base, toku_mempool_get_base(&bn->buffer_mempool)); // point to first le in mempool
|
rb.ndone += data_size;
|
||||||
for (uint32_t i = 0; i < num_entries; i++) { // now set up the pointers in the omt
|
|
||||||
LEAFENTRY le = reinterpret_cast<LEAFENTRY>(&le_base[rb.ndone - start_of_data]); // point to durable mempool, not to transient rbuf
|
|
||||||
uint32_t disksize = leafentry_disksize(le);
|
|
||||||
rb.ndone += disksize;
|
|
||||||
invariant(rb.ndone<=rb.size);
|
|
||||||
array[i] = le;
|
|
||||||
}
|
|
||||||
uint32_t end_of_data = rb.ndone;
|
|
||||||
|
|
||||||
BLB_NBYTESINBUF(node, childnum) += end_of_data-start_of_data;
|
|
||||||
|
|
||||||
// destroy old omt (bn.buffer) that was created by toku_create_empty_bn(), so we can create a new one
|
|
||||||
toku_omt_destroy(&BLB_BUFFER(node, childnum));
|
|
||||||
r = toku_omt_create_steal_sorted_array(&BLB_BUFFER(node, childnum), &array, num_entries, num_entries);
|
|
||||||
invariant_zero(r);
|
|
||||||
}
|
}
|
||||||
assert(rb.ndone == rb.size);
|
assert(rb.ndone == rb.size);
|
||||||
toku_free(sb->uncompressed_ptr);
|
toku_free(sb->uncompressed_ptr);
|
||||||
@@ -2103,19 +2052,14 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
|
|||||||
|
|
||||||
// The number of leaf entries in buffer.
|
// The number of leaf entries in buffer.
|
||||||
int n_in_buf = rbuf_int(rb); // 15. # of leaves
|
int n_in_buf = rbuf_int(rb); // 15. # of leaves
|
||||||
BLB_NBYTESINBUF(node,0) = 0;
|
|
||||||
BLB_SEQINSERT(node,0) = 0;
|
BLB_SEQINSERT(node,0) = 0;
|
||||||
BASEMENTNODE bn = BLB(node, 0);
|
BASEMENTNODE bn = BLB(node, 0);
|
||||||
|
|
||||||
// The current end of the buffer, read from disk and decompressed,
|
|
||||||
// is the start of the leaf entries.
|
|
||||||
uint32_t start_of_data = rb->ndone;
|
|
||||||
|
|
||||||
// Read the leaf entries from the buffer, advancing the buffer
|
// Read the leaf entries from the buffer, advancing the buffer
|
||||||
// as we go.
|
// as we go.
|
||||||
|
bool has_end_to_end_checksum = (version >= FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM);
|
||||||
if (version <= FT_LAYOUT_VERSION_13) {
|
if (version <= FT_LAYOUT_VERSION_13) {
|
||||||
// Create our mempool.
|
// Create our mempool.
|
||||||
toku_mempool_construct(&bn->buffer_mempool, 0);
|
|
||||||
// Loop through
|
// Loop through
|
||||||
for (int i = 0; i < n_in_buf; ++i) {
|
for (int i = 0; i < n_in_buf; ++i) {
|
||||||
LEAFENTRY_13 le = reinterpret_cast<LEAFENTRY_13>(&rb->buf[rb->ndone]);
|
LEAFENTRY_13 le = reinterpret_cast<LEAFENTRY_13>(&rb->buf[rb->ndone]);
|
||||||
@@ -2124,61 +2068,33 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
|
|||||||
invariant(rb->ndone<=rb->size);
|
invariant(rb->ndone<=rb->size);
|
||||||
LEAFENTRY new_le;
|
LEAFENTRY new_le;
|
||||||
size_t new_le_size;
|
size_t new_le_size;
|
||||||
|
void* key = NULL;
|
||||||
|
uint32_t keylen = 0;
|
||||||
r = toku_le_upgrade_13_14(le,
|
r = toku_le_upgrade_13_14(le,
|
||||||
|
&key,
|
||||||
|
&keylen,
|
||||||
&new_le_size,
|
&new_le_size,
|
||||||
&new_le,
|
&new_le);
|
||||||
&bn->buffer,
|
|
||||||
&bn->buffer_mempool);
|
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
// Copy the pointer value straight into the OMT
|
// Copy the pointer value straight into the OMT
|
||||||
r = toku_omt_insert_at(bn->buffer, new_le, i);
|
LEAFENTRY new_le_in_bn = nullptr;
|
||||||
assert_zero(r);
|
bn->data_buffer.get_space_for_insert(
|
||||||
bn->n_bytes_in_buffer += new_le_size;
|
i,
|
||||||
|
key,
|
||||||
|
keylen,
|
||||||
|
new_le_size,
|
||||||
|
&new_le_in_bn
|
||||||
|
);
|
||||||
|
memcpy(new_le_in_bn, new_le, new_le_size);
|
||||||
|
toku_free(new_le);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
uint32_t end_of_data;
|
uint32_t data_size = rb->size - rb->ndone;
|
||||||
uint32_t data_size;
|
if (has_end_to_end_checksum) {
|
||||||
|
data_size -= sizeof(uint32_t);
|
||||||
// Leaf Entry creation for version 14 and above:
|
|
||||||
// Allocate space for our leaf entry pointers.
|
|
||||||
OMTVALUE *XMALLOC_N(n_in_buf, array);
|
|
||||||
|
|
||||||
// Iterate over leaf entries copying their addresses into our
|
|
||||||
// temporary array.
|
|
||||||
for (int i = 0; i < n_in_buf; ++i) {
|
|
||||||
LEAFENTRY le = reinterpret_cast<LEAFENTRY>(&rb->buf[rb->ndone]);
|
|
||||||
uint32_t disksize = leafentry_disksize(le);
|
|
||||||
rb->ndone += disksize; // 16. leaf entry (14)
|
|
||||||
invariant(rb->ndone <= rb->size);
|
|
||||||
array[i] = le;
|
|
||||||
}
|
}
|
||||||
|
bn->data_buffer.initialize_from_data(n_in_buf, &rb->buf[rb->ndone], data_size);
|
||||||
end_of_data = rb->ndone;
|
rb->ndone += data_size;
|
||||||
data_size = end_of_data - start_of_data;
|
|
||||||
|
|
||||||
// Now we must create the OMT and it's associated mempool.
|
|
||||||
|
|
||||||
// Allocate mempool in basement node and memcpy from start of
|
|
||||||
// input/deserialized buffer.
|
|
||||||
toku_mempool_copy_construct(&bn->buffer_mempool,
|
|
||||||
&rb->buf[start_of_data],
|
|
||||||
data_size);
|
|
||||||
|
|
||||||
// Adjust the array of OMT values to point to the correct
|
|
||||||
// position in the mempool. The mempool should have all the
|
|
||||||
// data at this point.
|
|
||||||
for (int i = 0; i < n_in_buf; ++i) {
|
|
||||||
int offset = (unsigned char *) array[i] - &rb->buf[start_of_data];
|
|
||||||
unsigned char *mp_base = (unsigned char *) toku_mempool_get_base(&bn->buffer_mempool);
|
|
||||||
array[i] = &mp_base[offset];
|
|
||||||
}
|
|
||||||
|
|
||||||
BLB_NBYTESINBUF(node, 0) = data_size;
|
|
||||||
|
|
||||||
toku_omt_destroy(&BLB_BUFFER(node, 0));
|
|
||||||
// Construct the omt.
|
|
||||||
r = toku_omt_create_steal_sorted_array(&BLB_BUFFER(node, 0), &array, n_in_buf, n_in_buf);
|
|
||||||
invariant_zero(r);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Whatever this is must be less than the MSNs of every message above
|
// Whatever this is must be less than the MSNs of every message above
|
||||||
@@ -2188,7 +2104,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
|
|||||||
node->max_msn_applied_to_node_on_disk = bn->max_msn_applied;
|
node->max_msn_applied_to_node_on_disk = bn->max_msn_applied;
|
||||||
|
|
||||||
// Checksum (end to end) is only on version 14
|
// Checksum (end to end) is only on version 14
|
||||||
if (version >= FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) {
|
if (has_end_to_end_checksum) {
|
||||||
uint32_t expected_xsum = rbuf_int(rb); // 17. checksum
|
uint32_t expected_xsum = rbuf_int(rb); // 17. checksum
|
||||||
uint32_t actual_xsum = x1764_memory(rb->buf, rb->size - 4);
|
uint32_t actual_xsum = x1764_memory(rb->buf, rb->size - 4);
|
||||||
if (expected_xsum != actual_xsum) {
|
if (expected_xsum != actual_xsum) {
|
||||||
@@ -2679,20 +2595,7 @@ toku_deserialize_ftnode_from (int fd,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_verify_or_set_counts(FTNODE node) {
|
toku_verify_or_set_counts(FTNODE UU(node)) {
|
||||||
if (node->height==0) {
|
|
||||||
for (int i=0; i<node->n_children; i++) {
|
|
||||||
lazy_assert(BLB_BUFFER(node, i));
|
|
||||||
struct sum_info sum_info = {0,0};
|
|
||||||
toku_omt_iterate(BLB_BUFFER(node, i), sum_item, &sum_info);
|
|
||||||
lazy_assert(sum_info.count==toku_omt_size(BLB_BUFFER(node, i)));
|
|
||||||
lazy_assert(sum_info.dsum==BLB_NBYTESINBUF(node, i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// nothing to do because we no longer store n_bytes_in_buffers for
|
|
||||||
// the whole node
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -52,6 +52,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -342,6 +343,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
|
|||||||
LSN load_lsn,
|
LSN load_lsn,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
bool reserve_memory,
|
bool reserve_memory,
|
||||||
|
uint64_t reserve_memory_size,
|
||||||
bool compress_intermediates);
|
bool compress_intermediates);
|
||||||
|
|
||||||
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
|
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -541,6 +542,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
|
|||||||
LSN load_lsn,
|
LSN load_lsn,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
bool reserve_memory,
|
bool reserve_memory,
|
||||||
|
uint64_t reserve_memory_size,
|
||||||
bool compress_intermediates)
|
bool compress_intermediates)
|
||||||
// Effect: Allocate and initialize a FTLOADER, but do not create the extractor thread.
|
// Effect: Allocate and initialize a FTLOADER, but do not create the extractor thread.
|
||||||
{
|
{
|
||||||
@@ -551,14 +553,16 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
|
|||||||
bl->cachetable = cachetable;
|
bl->cachetable = cachetable;
|
||||||
if (reserve_memory && bl->cachetable) {
|
if (reserve_memory && bl->cachetable) {
|
||||||
bl->did_reserve_memory = true;
|
bl->did_reserve_memory = true;
|
||||||
bl->reserved_memory = toku_cachetable_reserve_memory(bl->cachetable, 2.0/3.0); // allocate 2/3 of the unreserved part (which is 3/4 of the memory to start with).
|
bl->reserved_memory = toku_cachetable_reserve_memory(bl->cachetable, 2.0/3.0, reserve_memory_size); // allocate 2/3 of the unreserved part (which is 3/4 of the memory to start with).
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
bl->did_reserve_memory = false;
|
bl->did_reserve_memory = false;
|
||||||
bl->reserved_memory = 512*1024*1024; // if no cache table use 512MB.
|
bl->reserved_memory = 512*1024*1024; // if no cache table use 512MB.
|
||||||
}
|
}
|
||||||
bl->compress_intermediates = compress_intermediates;
|
bl->compress_intermediates = compress_intermediates;
|
||||||
//printf("Reserved memory=%ld\n", bl->reserved_memory);
|
if (0) { // debug
|
||||||
|
fprintf(stderr, "%s Reserved memory=%ld\n", __FUNCTION__, bl->reserved_memory);
|
||||||
|
}
|
||||||
|
|
||||||
bl->src_db = src_db;
|
bl->src_db = src_db;
|
||||||
bl->N = N;
|
bl->N = N;
|
||||||
@@ -645,6 +649,7 @@ int toku_ft_loader_open (/* out */ FTLOADER *blp,
|
|||||||
LSN load_lsn,
|
LSN load_lsn,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
bool reserve_memory,
|
bool reserve_memory,
|
||||||
|
uint64_t reserve_memory_size,
|
||||||
bool compress_intermediates)
|
bool compress_intermediates)
|
||||||
/* Effect: called by DB_ENV->create_loader to create a brt loader.
|
/* Effect: called by DB_ENV->create_loader to create a brt loader.
|
||||||
* Arguments:
|
* Arguments:
|
||||||
@@ -668,6 +673,7 @@ int toku_ft_loader_open (/* out */ FTLOADER *blp,
|
|||||||
load_lsn,
|
load_lsn,
|
||||||
txn,
|
txn,
|
||||||
reserve_memory,
|
reserve_memory,
|
||||||
|
reserve_memory_size,
|
||||||
compress_intermediates);
|
compress_intermediates);
|
||||||
if (r!=0) result = r;
|
if (r!=0) result = r;
|
||||||
}
|
}
|
||||||
@@ -1236,9 +1242,9 @@ static TXNID leafentry_xid(FTLOADER bl, int which_db) {
|
|||||||
size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid) {
|
size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid) {
|
||||||
size_t s = 0;
|
size_t s = 0;
|
||||||
if (xid == TXNID_NONE)
|
if (xid == TXNID_NONE)
|
||||||
s = LE_CLEAN_MEMSIZE(key_size, val_size);
|
s = LE_CLEAN_MEMSIZE(val_size) + key_size + sizeof(uint32_t);
|
||||||
else
|
else
|
||||||
s = LE_MVCC_COMMITTED_MEMSIZE(key_size, val_size);
|
s = LE_MVCC_COMMITTED_MEMSIZE(val_size) + key_size + sizeof(uint32_t);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1250,6 +1256,12 @@ static int process_primary_rows_internal (FTLOADER bl, struct rowset *primary_ro
|
|||||||
int error_count = 0;
|
int error_count = 0;
|
||||||
int *XMALLOC_N(bl->N, error_codes);
|
int *XMALLOC_N(bl->N, error_codes);
|
||||||
|
|
||||||
|
// If we parallelize the first for loop, dest_keys/dest_vals init&cleanup need to move inside
|
||||||
|
DBT_ARRAY dest_keys;
|
||||||
|
DBT_ARRAY dest_vals;
|
||||||
|
toku_dbt_array_init(&dest_keys, 1);
|
||||||
|
toku_dbt_array_init(&dest_vals, 1);
|
||||||
|
|
||||||
for (int i = 0; i < bl->N; i++) {
|
for (int i = 0; i < bl->N; i++) {
|
||||||
unsigned int klimit,vlimit; // maximum row sizes.
|
unsigned int klimit,vlimit; // maximum row sizes.
|
||||||
toku_ft_get_maximum_advised_key_value_lengths(&klimit, &vlimit);
|
toku_ft_get_maximum_advised_key_value_lengths(&klimit, &vlimit);
|
||||||
@@ -1259,12 +1271,8 @@ static int process_primary_rows_internal (FTLOADER bl, struct rowset *primary_ro
|
|||||||
struct merge_fileset *fs = &(bl->fs[i]);
|
struct merge_fileset *fs = &(bl->fs[i]);
|
||||||
ft_compare_func compare = bl->bt_compare_funs[i];
|
ft_compare_func compare = bl->bt_compare_funs[i];
|
||||||
|
|
||||||
DBT skey = zero_dbt;
|
|
||||||
skey.flags = DB_DBT_REALLOC;
|
|
||||||
DBT sval=skey;
|
|
||||||
|
|
||||||
// Don't parallelize this loop, or we have to lock access to add_row() which would be a lot of overehad.
|
// Don't parallelize this loop, or we have to lock access to add_row() which would be a lot of overehad.
|
||||||
// Also this way we can reuse the DB_DBT_REALLOC'd value inside skey and sval without a race.
|
// Also this way we can reuse the DB_DBT_REALLOC'd values inside dest_keys/dest_vals without a race.
|
||||||
for (size_t prownum=0; prownum<primary_rowset->n_rows; prownum++) {
|
for (size_t prownum=0; prownum<primary_rowset->n_rows; prownum++) {
|
||||||
if (error_count) break;
|
if (error_count) break;
|
||||||
|
|
||||||
@@ -1276,23 +1284,32 @@ static int process_primary_rows_internal (FTLOADER bl, struct rowset *primary_ro
|
|||||||
pval.data = primary_rowset->data + prow->off + prow->klen;
|
pval.data = primary_rowset->data + prow->off + prow->klen;
|
||||||
pval.size = prow->vlen;
|
pval.size = prow->vlen;
|
||||||
|
|
||||||
DBT *dest_key = &skey;
|
|
||||||
DBT *dest_val = &sval;
|
|
||||||
|
|
||||||
{
|
|
||||||
int r;
|
|
||||||
|
|
||||||
|
DBT_ARRAY key_array;
|
||||||
|
DBT_ARRAY val_array;
|
||||||
if (bl->dbs[i] != bl->src_db) {
|
if (bl->dbs[i] != bl->src_db) {
|
||||||
r = bl->generate_row_for_put(bl->dbs[i], bl->src_db, dest_key, dest_val, &pkey, &pval);
|
int r = bl->generate_row_for_put(bl->dbs[i], bl->src_db, &dest_keys, &dest_vals, &pkey, &pval);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
error_codes[i] = r;
|
error_codes[i] = r;
|
||||||
inc_error_count();
|
inc_error_count();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
paranoid_invariant(dest_keys.size <= dest_keys.capacity);
|
||||||
|
paranoid_invariant(dest_vals.size <= dest_vals.capacity);
|
||||||
|
paranoid_invariant(dest_keys.size == dest_vals.size);
|
||||||
|
|
||||||
|
key_array = dest_keys;
|
||||||
|
val_array = dest_vals;
|
||||||
} else {
|
} else {
|
||||||
dest_key = &pkey;
|
key_array.size = key_array.capacity = 1;
|
||||||
dest_val = &pval;
|
key_array.dbts = &pkey;
|
||||||
|
|
||||||
|
val_array.size = val_array.capacity = 1;
|
||||||
|
val_array.dbts = &pval;
|
||||||
}
|
}
|
||||||
|
for (uint32_t row = 0; row < key_array.size; row++) {
|
||||||
|
DBT *dest_key = &key_array.dbts[row];
|
||||||
|
DBT *dest_val = &val_array.dbts[row];
|
||||||
if (dest_key->size > klimit) {
|
if (dest_key->size > klimit) {
|
||||||
error_codes[i] = EINVAL;
|
error_codes[i] = EINVAL;
|
||||||
fprintf(stderr, "Key too big (keysize=%d bytes, limit=%d bytes)\n", dest_key->size, klimit);
|
fprintf(stderr, "Key too big (keysize=%d bytes, limit=%d bytes)\n", dest_key->size, klimit);
|
||||||
@@ -1305,7 +1322,6 @@ static int process_primary_rows_internal (FTLOADER bl, struct rowset *primary_ro
|
|||||||
inc_error_count();
|
inc_error_count();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
bl->extracted_datasizes[i] += ft_loader_leafentry_size(dest_key->size, dest_val->size, leafentry_xid(bl, i));
|
bl->extracted_datasizes[i] += ft_loader_leafentry_size(dest_key->size, dest_val->size, leafentry_xid(bl, i));
|
||||||
|
|
||||||
@@ -1326,30 +1342,11 @@ static int process_primary_rows_internal (FTLOADER bl, struct rowset *primary_ro
|
|||||||
inc_error_count();
|
inc_error_count();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
//flags==0 means generate_row_for_put callback changed it
|
|
||||||
//(and freed any memory necessary to do so) so that values are now stored
|
|
||||||
//in temporary memory that does not need to be freed. We need to continue
|
|
||||||
//using DB_DBT_REALLOC however.
|
|
||||||
if (skey.flags == 0) {
|
|
||||||
toku_init_dbt(&skey);
|
|
||||||
skey.flags = DB_DBT_REALLOC;
|
|
||||||
}
|
|
||||||
if (sval.flags == 0) {
|
|
||||||
toku_init_dbt(&sval);
|
|
||||||
sval.flags = DB_DBT_REALLOC;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bl->dbs[i] != bl->src_db) {
|
|
||||||
if (skey.flags) {
|
|
||||||
toku_free(skey.data); skey.data = NULL;
|
|
||||||
}
|
|
||||||
if (sval.flags) {
|
|
||||||
toku_free(sval.data); sval.data = NULL;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
toku_dbt_array_destroy(&dest_keys);
|
||||||
|
toku_dbt_array_destroy(&dest_vals);
|
||||||
|
|
||||||
destroy_rowset(primary_rowset);
|
destroy_rowset(primary_rowset);
|
||||||
toku_free(primary_rowset);
|
toku_free(primary_rowset);
|
||||||
@@ -2915,7 +2912,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
|
|||||||
// #3588 TODO just make a clean ule and append it to the omt
|
// #3588 TODO just make a clean ule and append it to the omt
|
||||||
// #3588 TODO can do the rebalancing here and avoid a lot of work later
|
// #3588 TODO can do the rebalancing here and avoid a lot of work later
|
||||||
FTNODE leafnode = lbuf->node;
|
FTNODE leafnode = lbuf->node;
|
||||||
uint32_t idx = toku_omt_size(BLB_BUFFER(leafnode, 0));
|
uint32_t idx = BLB_DATA(leafnode, 0)->omt_size();
|
||||||
DBT thekey = { .data = key, .size = (uint32_t) keylen };
|
DBT thekey = { .data = key, .size = (uint32_t) keylen };
|
||||||
DBT theval = { .data = val, .size = (uint32_t) vallen };
|
DBT theval = { .data = val, .size = (uint32_t) vallen };
|
||||||
FT_MSG_S cmd = { .type = FT_INSERT,
|
FT_MSG_S cmd = { .type = FT_INSERT,
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -111,6 +112,7 @@ int toku_ft_loader_open (FTLOADER *bl,
|
|||||||
LSN load_lsn,
|
LSN load_lsn,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
bool reserve_memory,
|
bool reserve_memory,
|
||||||
|
uint64_t reserve_memory_size,
|
||||||
bool compress_intermediates);
|
bool compress_intermediates);
|
||||||
|
|
||||||
int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val);
|
int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val);
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -233,6 +234,7 @@ typedef struct cachetable *CACHETABLE;
|
|||||||
typedef struct cachefile *CACHEFILE;
|
typedef struct cachefile *CACHEFILE;
|
||||||
typedef struct ctpair *PAIR;
|
typedef struct ctpair *PAIR;
|
||||||
typedef class checkpointer *CHECKPOINTER;
|
typedef class checkpointer *CHECKPOINTER;
|
||||||
|
typedef class bn_data *BN_DATA;
|
||||||
|
|
||||||
/* tree command types */
|
/* tree command types */
|
||||||
enum ft_msg_type {
|
enum ft_msg_type {
|
||||||
@@ -334,7 +336,7 @@ struct ft_msg {
|
|||||||
// Message sent into brt to implement command (insert, delete, etc.)
|
// Message sent into brt to implement command (insert, delete, etc.)
|
||||||
// This structure supports nested transactions, and obsoletes ft_msg.
|
// This structure supports nested transactions, and obsoletes ft_msg.
|
||||||
typedef struct ft_msg FT_MSG_S;
|
typedef struct ft_msg FT_MSG_S;
|
||||||
typedef const struct ft_msg *FT_MSG;
|
typedef struct ft_msg *FT_MSG;
|
||||||
|
|
||||||
typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *);
|
typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *);
|
||||||
typedef void (*setval_func)(const DBT *, void *);
|
typedef void (*setval_func)(const DBT *, void *);
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -120,8 +121,8 @@ toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_handle, TOKUTXN
|
|||||||
if (result == 0) {
|
if (result == 0) {
|
||||||
// TODO move the leaf mode to the ft cursor constructor
|
// TODO move the leaf mode to the ft cursor constructor
|
||||||
toku_ft_cursor_set_leaf_mode(le_cursor->ft_cursor);
|
toku_ft_cursor_set_leaf_mode(le_cursor->ft_cursor);
|
||||||
le_cursor->neg_infinity = true;
|
le_cursor->neg_infinity = false;
|
||||||
le_cursor->pos_infinity = false;
|
le_cursor->pos_infinity = true;
|
||||||
// zero out the fake DB. this is a rare operation so it's not too slow.
|
// zero out the fake DB. this is a rare operation so it's not too slow.
|
||||||
memset(&le_cursor->fake_db, 0, sizeof(le_cursor->fake_db));
|
memset(&le_cursor->fake_db, 0, sizeof(le_cursor->fake_db));
|
||||||
}
|
}
|
||||||
@@ -147,21 +148,21 @@ void toku_le_cursor_close(LE_CURSOR le_cursor) {
|
|||||||
int
|
int
|
||||||
toku_le_cursor_next(LE_CURSOR le_cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
toku_le_cursor_next(LE_CURSOR le_cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||||
int result;
|
int result;
|
||||||
if (le_cursor->pos_infinity) {
|
if (le_cursor->neg_infinity) {
|
||||||
result = DB_NOTFOUND;
|
result = DB_NOTFOUND;
|
||||||
} else {
|
} else {
|
||||||
le_cursor->neg_infinity = false;
|
le_cursor->pos_infinity = false;
|
||||||
// TODO replace this with a non deprecated function. Which?
|
// TODO replace this with a non deprecated function. Which?
|
||||||
result = toku_ft_cursor_get(le_cursor->ft_cursor, NULL, getf, getf_v, DB_NEXT);
|
result = toku_ft_cursor_get(le_cursor->ft_cursor, NULL, getf, getf_v, DB_PREV);
|
||||||
if (result == DB_NOTFOUND) {
|
if (result == DB_NOTFOUND) {
|
||||||
le_cursor->pos_infinity = true;
|
le_cursor->neg_infinity = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
toku_le_cursor_is_key_greater(LE_CURSOR le_cursor, const DBT *key) {
|
toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key) {
|
||||||
bool result;
|
bool result;
|
||||||
if (le_cursor->neg_infinity) {
|
if (le_cursor->neg_infinity) {
|
||||||
result = true; // all keys are greater than -infinity
|
result = true; // all keys are greater than -infinity
|
||||||
@@ -175,7 +176,7 @@ toku_le_cursor_is_key_greater(LE_CURSOR le_cursor, const DBT *key) {
|
|||||||
// get the current position from the cursor and compare it to the given key.
|
// get the current position from the cursor and compare it to the given key.
|
||||||
DBT *cursor_key = &le_cursor->ft_cursor->key;
|
DBT *cursor_key = &le_cursor->ft_cursor->key;
|
||||||
int r = keycompare(&le_cursor->fake_db, cursor_key, key);
|
int r = keycompare(&le_cursor->fake_db, cursor_key, key);
|
||||||
if (r < 0) {
|
if (r <= 0) {
|
||||||
result = true; // key is right of the cursor key
|
result = true; // key is right of the cursor key
|
||||||
} else {
|
} else {
|
||||||
result = false; // key is at or left of the cursor key
|
result = false; // key is at or left of the cursor key
|
||||||
@@ -183,3 +184,17 @@ toku_le_cursor_is_key_greater(LE_CURSOR le_cursor, const DBT *key) {
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate) {
|
||||||
|
// don't handle these edge cases, not worth it.
|
||||||
|
// estimate stays same
|
||||||
|
if (le_cursor->pos_infinity || le_cursor->neg_infinity) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
DBT *cursor_key = &le_cursor->ft_cursor->key;
|
||||||
|
estimate->data = toku_xrealloc(estimate->data, cursor_key->size);
|
||||||
|
memcpy(estimate->data, cursor_key->data, cursor_key->size);
|
||||||
|
estimate->size = cursor_key->size;
|
||||||
|
estimate->flags = DB_DBT_REALLOC;
|
||||||
|
}
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -121,6 +122,10 @@ int toku_le_cursor_next(LE_CURSOR le_cursor, FT_GET_CALLBACK_FUNCTION getf, void
|
|||||||
// The LE_CURSOR position is intialized to -infinity. Any key comparision with -infinity returns true.
|
// The LE_CURSOR position is intialized to -infinity. Any key comparision with -infinity returns true.
|
||||||
// When the cursor runs off the right edge of the tree, the LE_CURSOR position is set to +infinity. Any key comparision with +infinity
|
// When the cursor runs off the right edge of the tree, the LE_CURSOR position is set to +infinity. Any key comparision with +infinity
|
||||||
// returns false.
|
// returns false.
|
||||||
bool toku_le_cursor_is_key_greater(LE_CURSOR le_cursor, const DBT *key);
|
bool toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key);
|
||||||
|
|
||||||
|
// extracts position of le_cursor into estimate. Responsibility of caller to handle
|
||||||
|
// thread safety. Caller (the indexer), does so by ensuring indexer lock is held
|
||||||
|
void toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -91,10 +92,6 @@ PATENT RIGHTS GRANT:
|
|||||||
#include "wbuf.h"
|
#include "wbuf.h"
|
||||||
#include "leafentry.h"
|
#include "leafentry.h"
|
||||||
|
|
||||||
void wbuf_LEAFENTRY(struct wbuf *w, LEAFENTRY le) {
|
|
||||||
wbuf_literal_bytes(w, le, leafentry_disksize(le));
|
|
||||||
}
|
|
||||||
|
|
||||||
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le) {
|
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le) {
|
||||||
wbuf_nocrc_literal_bytes(w, le, leafentry_disksize(le));
|
wbuf_nocrc_literal_bytes(w, le, leafentry_disksize(le));
|
||||||
}
|
}
|
||||||
|
@@ -54,6 +54,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -132,18 +133,17 @@ PATENT RIGHTS GRANT:
|
|||||||
enum { LE_CLEAN = 0, LE_MVCC = 1 };
|
enum { LE_CLEAN = 0, LE_MVCC = 1 };
|
||||||
|
|
||||||
// This is an on-disk format. static_asserts verify everything is packed and aligned correctly.
|
// This is an on-disk format. static_asserts verify everything is packed and aligned correctly.
|
||||||
struct __attribute__ ((__packed__)) leafentry {
|
struct leafentry {
|
||||||
struct leafentry_clean {
|
struct leafentry_clean {
|
||||||
uint32_t vallen;
|
uint32_t vallen;
|
||||||
uint8_t key_val[0]; //Actual key, then actual val
|
uint8_t val[0]; //actual val
|
||||||
}; // For the case where LEAFENTRY->type is LE_CLEAN
|
}; // For the case where LEAFENTRY->type is LE_CLEAN
|
||||||
static_assert(4 == sizeof(leafentry::leafentry_clean), "leafentry_clean size is wrong");
|
static_assert(4 == sizeof(leafentry::leafentry_clean), "leafentry_clean size is wrong");
|
||||||
static_assert(4 == __builtin_offsetof(leafentry::leafentry_clean, key_val), "key_val is in the wrong place");
|
static_assert(4 == __builtin_offsetof(leafentry::leafentry_clean, val), "val is in the wrong place");
|
||||||
struct __attribute__ ((__packed__)) leafentry_mvcc {
|
struct __attribute__ ((__packed__)) leafentry_mvcc {
|
||||||
uint32_t num_cxrs; // number of committed transaction records
|
uint32_t num_cxrs; // number of committed transaction records
|
||||||
uint8_t num_pxrs; // number of provisional transaction records
|
uint8_t num_pxrs; // number of provisional transaction records
|
||||||
uint8_t key_xrs[0]; //Actual key,
|
uint8_t xrs[0]; //then TXNIDs of XRs relevant for reads:
|
||||||
//then TXNIDs of XRs relevant for reads:
|
|
||||||
// if provisional XRs exist, store OUTERMOST TXNID
|
// if provisional XRs exist, store OUTERMOST TXNID
|
||||||
// store committed TXNIDs, from most recently committed to least recently committed (newest first)
|
// store committed TXNIDs, from most recently committed to least recently committed (newest first)
|
||||||
//then lengths of XRs relevant for reads (length is at most 1<<31, MSB is 1 for insert, 0 for delete):
|
//then lengths of XRs relevant for reads (length is at most 1<<31, MSB is 1 for insert, 0 for delete):
|
||||||
@@ -167,37 +167,33 @@ struct __attribute__ ((__packed__)) leafentry {
|
|||||||
// (innermost data and length with insert/delete flag are stored above, cannot be a placeholder)
|
// (innermost data and length with insert/delete flag are stored above, cannot be a placeholder)
|
||||||
}; // For the case where LEAFENTRY->type is LE_MVCC
|
}; // For the case where LEAFENTRY->type is LE_MVCC
|
||||||
static_assert(5 == sizeof(leafentry::leafentry_mvcc), "leafentry_mvcc size is wrong");
|
static_assert(5 == sizeof(leafentry::leafentry_mvcc), "leafentry_mvcc size is wrong");
|
||||||
static_assert(5 == __builtin_offsetof(leafentry::leafentry_mvcc, key_xrs), "key_xrs is in the wrong place");
|
static_assert(5 == __builtin_offsetof(leafentry::leafentry_mvcc, xrs), "xrs is in the wrong place");
|
||||||
|
|
||||||
uint8_t type; // type is LE_CLEAN or LE_MVCC
|
uint8_t type; // type is LE_CLEAN or LE_MVCC
|
||||||
uint32_t keylen;
|
//uint32_t keylen;
|
||||||
union __attribute__ ((__packed__)) {
|
union __attribute__ ((__packed__)) {
|
||||||
struct leafentry_clean clean;
|
struct leafentry_clean clean;
|
||||||
struct leafentry_mvcc mvcc;
|
struct leafentry_mvcc mvcc;
|
||||||
} u;
|
} u;
|
||||||
};
|
};
|
||||||
static_assert(10 == sizeof(leafentry), "leafentry size is wrong");
|
static_assert(6 == sizeof(leafentry), "leafentry size is wrong");
|
||||||
static_assert(5 == __builtin_offsetof(leafentry, u), "union is in the wrong place");
|
static_assert(1 == __builtin_offsetof(leafentry, u), "union is in the wrong place");
|
||||||
|
|
||||||
#define LE_CLEAN_MEMSIZE(_keylen, _vallen) \
|
#define LE_CLEAN_MEMSIZE(_vallen) \
|
||||||
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
||||||
+sizeof(((LEAFENTRY)NULL)->keylen) /* keylen */ \
|
|
||||||
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen) /* vallen */ \
|
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen) /* vallen */ \
|
||||||
+(_keylen) /* actual key */ \
|
|
||||||
+(_vallen)) /* actual val */
|
+(_vallen)) /* actual val */
|
||||||
|
|
||||||
#define LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
#define LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
||||||
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
||||||
+sizeof(((LEAFENTRY)NULL)->keylen) /* keylen */ \
|
|
||||||
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_cxrs) /* committed */ \
|
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_cxrs) /* committed */ \
|
||||||
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_pxrs) /* provisional */ \
|
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_pxrs) /* provisional */ \
|
||||||
+sizeof(TXNID) /* transaction */ \
|
+sizeof(TXNID) /* transaction */ \
|
||||||
+sizeof(uint32_t) /* length+bit */ \
|
+sizeof(uint32_t) /* length+bit */ \
|
||||||
+sizeof(uint32_t)) /* length+bit */
|
+sizeof(uint32_t)) /* length+bit */
|
||||||
|
|
||||||
#define LE_MVCC_COMMITTED_MEMSIZE(_keylen, _vallen) \
|
#define LE_MVCC_COMMITTED_MEMSIZE(_vallen) \
|
||||||
(LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
(LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
||||||
+(_keylen) /* actual key */ \
|
|
||||||
+(_vallen)) /* actual val */
|
+(_vallen)) /* actual val */
|
||||||
|
|
||||||
|
|
||||||
@@ -208,25 +204,20 @@ typedef struct leafentry_13 *LEAFENTRY_13;
|
|||||||
// TODO: consistency among names is very poor.
|
// TODO: consistency among names is very poor.
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// TODO: rename this helper function for deserialization
|
||||||
|
size_t leafentry_rest_memsize(uint32_t num_puxrs, uint32_t num_cuxrs, uint8_t* start);
|
||||||
size_t leafentry_memsize (LEAFENTRY le); // the size of a leafentry in memory.
|
size_t leafentry_memsize (LEAFENTRY le); // the size of a leafentry in memory.
|
||||||
size_t leafentry_disksize (LEAFENTRY le); // this is the same as logsizeof_LEAFENTRY. The size of a leafentry on disk.
|
size_t leafentry_disksize (LEAFENTRY le); // this is the same as logsizeof_LEAFENTRY. The size of a leafentry on disk.
|
||||||
void wbuf_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
|
|
||||||
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
|
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
|
||||||
int print_leafentry (FILE *outf, LEAFENTRY v); // Print a leafentry out in human-readable form.
|
int print_klpair (FILE *outf, const void* key, uint32_t keylen, LEAFENTRY v); // Print a leafentry out in human-readable form.
|
||||||
|
|
||||||
int le_latest_is_del(LEAFENTRY le); // Return true if it is a provisional delete.
|
int le_latest_is_del(LEAFENTRY le); // Return true if it is a provisional delete.
|
||||||
bool le_is_clean(LEAFENTRY le); //Return how many xids exist (0 does not count)
|
bool le_is_clean(LEAFENTRY le); //Return how many xids exist (0 does not count)
|
||||||
bool le_has_xids(LEAFENTRY le, XIDS xids); // Return true transaction represented by xids is still provisional in this leafentry (le's xid stack is a superset or equal to xids)
|
bool le_has_xids(LEAFENTRY le, XIDS xids); // Return true transaction represented by xids is still provisional in this leafentry (le's xid stack is a superset or equal to xids)
|
||||||
uint32_t le_latest_keylen (LEAFENTRY le); // Return the latest keylen.
|
|
||||||
void* le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes)
|
void* le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes)
|
||||||
uint32_t le_latest_vallen (LEAFENTRY le); // Return the latest vallen. Returns 0 for provisional deletes.
|
uint32_t le_latest_vallen (LEAFENTRY le); // Return the latest vallen. Returns 0 for provisional deletes.
|
||||||
void* le_latest_val_and_len (LEAFENTRY le, uint32_t *len);
|
void* le_latest_val_and_len (LEAFENTRY le, uint32_t *len);
|
||||||
|
|
||||||
// Return any key or value (even if it's only provisional).
|
|
||||||
void* le_key (LEAFENTRY le);
|
|
||||||
uint32_t le_keylen (LEAFENTRY le);
|
|
||||||
void* le_key_and_len (LEAFENTRY le, uint32_t *len);
|
|
||||||
|
|
||||||
uint64_t le_outermost_uncommitted_xid (LEAFENTRY le);
|
uint64_t le_outermost_uncommitted_xid (LEAFENTRY le);
|
||||||
|
|
||||||
//Callback contract:
|
//Callback contract:
|
||||||
@@ -246,30 +237,30 @@ leafentry_disksize_13(LEAFENTRY_13 le);
|
|||||||
|
|
||||||
int
|
int
|
||||||
toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored data.
|
toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry, // NULL if there was no stored data.
|
||||||
|
void** keyp,
|
||||||
|
uint32_t* keylen,
|
||||||
size_t *new_leafentry_memorysize,
|
size_t *new_leafentry_memorysize,
|
||||||
LEAFENTRY *new_leafentry_p,
|
LEAFENTRY *new_leafentry_p);
|
||||||
OMT *omtp,
|
|
||||||
struct mempool *mp);
|
|
||||||
|
|
||||||
void toku_le_apply_msg(FT_MSG msg,
|
void
|
||||||
|
toku_le_apply_msg(FT_MSG msg,
|
||||||
LEAFENTRY old_leafentry, // NULL if there was no stored data.
|
LEAFENTRY old_leafentry, // NULL if there was no stored data.
|
||||||
|
bn_data* data_buffer, // bn_data storing leafentry, if NULL, means there is no bn_data
|
||||||
|
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
|
||||||
TXNID oldest_referenced_xid,
|
TXNID oldest_referenced_xid,
|
||||||
GC_INFO gc_info,
|
GC_INFO gc_info,
|
||||||
size_t *new_leafentry_memorysize,
|
|
||||||
LEAFENTRY *new_leafentry_p,
|
LEAFENTRY *new_leafentry_p,
|
||||||
OMT *omtp,
|
|
||||||
struct mempool *mp,
|
|
||||||
void **maybe_free,
|
|
||||||
int64_t * numbytes_delta_p);
|
int64_t * numbytes_delta_p);
|
||||||
|
|
||||||
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known);
|
bool toku_le_worth_running_garbage_collection(LEAFENTRY le, TXNID oldest_referenced_xid_known);
|
||||||
|
|
||||||
void toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
|
void
|
||||||
|
toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
|
||||||
|
bn_data* data_buffer,
|
||||||
|
uint32_t idx,
|
||||||
|
void* keyp,
|
||||||
|
uint32_t keylen,
|
||||||
LEAFENTRY *new_leaf_entry,
|
LEAFENTRY *new_leaf_entry,
|
||||||
size_t *new_leaf_entry_memory_size,
|
|
||||||
OMT *omtp,
|
|
||||||
struct mempool *mp,
|
|
||||||
void **maybe_free,
|
|
||||||
const xid_omt_t &snapshot_xids,
|
const xid_omt_t &snapshot_xids,
|
||||||
const rx_omt_t &referenced_xids,
|
const rx_omt_t &referenced_xids,
|
||||||
const xid_omt_t &live_root_txns,
|
const xid_omt_t &live_root_txns,
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -173,9 +174,6 @@ struct tokulogger {
|
|||||||
|
|
||||||
uint32_t write_block_size; // How big should the blocks be written to various logs?
|
uint32_t write_block_size; // How big should the blocks be written to various logs?
|
||||||
|
|
||||||
uint64_t input_lock_ctr; // how many times has input_lock been taken and released
|
|
||||||
uint64_t output_condition_lock_ctr; // how many times has output_condition_lock been taken and released
|
|
||||||
uint64_t swap_ctr; // how many times have input/output log buffers been swapped
|
|
||||||
uint64_t num_writes_to_disk; // how many times did we write to disk?
|
uint64_t num_writes_to_disk; // how many times did we write to disk?
|
||||||
uint64_t bytes_written_to_disk; // how many bytes have been written to disk?
|
uint64_t bytes_written_to_disk; // how many bytes have been written to disk?
|
||||||
tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk?
|
tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk?
|
||||||
@@ -284,6 +282,7 @@ struct tokutxn {
|
|||||||
TOKUTXN_STATE state;
|
TOKUTXN_STATE state;
|
||||||
uint32_t num_pin; // number of threads (all hot indexes) that want this
|
uint32_t num_pin; // number of threads (all hot indexes) that want this
|
||||||
// txn to not transition to commit or abort
|
// txn to not transition to commit or abort
|
||||||
|
uint64_t client_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -192,9 +193,6 @@ int toku_logger_create (TOKULOGGER *resultp) {
|
|||||||
ml_init(&result->input_lock);
|
ml_init(&result->input_lock);
|
||||||
toku_mutex_init(&result->output_condition_lock, NULL);
|
toku_mutex_init(&result->output_condition_lock, NULL);
|
||||||
toku_cond_init(&result->output_condition, NULL);
|
toku_cond_init(&result->output_condition, NULL);
|
||||||
result->input_lock_ctr = 0;
|
|
||||||
result->output_condition_lock_ctr = 0;
|
|
||||||
result->swap_ctr = 0;
|
|
||||||
result->rollback_cachefile = NULL;
|
result->rollback_cachefile = NULL;
|
||||||
result->output_is_available = true;
|
result->output_is_available = true;
|
||||||
toku_txn_manager_init(&result->txn_manager);
|
toku_txn_manager_init(&result->txn_manager);
|
||||||
@@ -350,7 +348,6 @@ int toku_logger_close(TOKULOGGER *loggerp) {
|
|||||||
goto is_closed;
|
goto is_closed;
|
||||||
}
|
}
|
||||||
ml_lock(&logger->input_lock);
|
ml_lock(&logger->input_lock);
|
||||||
logger->input_lock_ctr++;
|
|
||||||
LSN fsynced_lsn;
|
LSN fsynced_lsn;
|
||||||
grab_output(logger, &fsynced_lsn);
|
grab_output(logger, &fsynced_lsn);
|
||||||
logger_write_buffer(logger, &fsynced_lsn);
|
logger_write_buffer(logger, &fsynced_lsn);
|
||||||
@@ -437,13 +434,11 @@ grab_output(TOKULOGGER logger, LSN *fsynced_lsn)
|
|||||||
// Exit: Hold permission to modify output (but none of the locks).
|
// Exit: Hold permission to modify output (but none of the locks).
|
||||||
{
|
{
|
||||||
toku_mutex_lock(&logger->output_condition_lock);
|
toku_mutex_lock(&logger->output_condition_lock);
|
||||||
logger->output_condition_lock_ctr++;
|
|
||||||
wait_till_output_available(logger);
|
wait_till_output_available(logger);
|
||||||
logger->output_is_available = false;
|
logger->output_is_available = false;
|
||||||
if (fsynced_lsn) {
|
if (fsynced_lsn) {
|
||||||
*fsynced_lsn = logger->fsynced_lsn;
|
*fsynced_lsn = logger->fsynced_lsn;
|
||||||
}
|
}
|
||||||
logger->output_condition_lock_ctr++;
|
|
||||||
toku_mutex_unlock(&logger->output_condition_lock);
|
toku_mutex_unlock(&logger->output_condition_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -458,7 +453,6 @@ wait_till_output_already_written_or_output_buffer_available (TOKULOGGER logger,
|
|||||||
{
|
{
|
||||||
bool result;
|
bool result;
|
||||||
toku_mutex_lock(&logger->output_condition_lock);
|
toku_mutex_lock(&logger->output_condition_lock);
|
||||||
logger->output_condition_lock_ctr++;
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (logger->fsynced_lsn.lsn >= lsn.lsn) { // we can look at the fsynced lsn since we have the lock.
|
if (logger->fsynced_lsn.lsn >= lsn.lsn) { // we can look at the fsynced lsn since we have the lock.
|
||||||
result = true;
|
result = true;
|
||||||
@@ -473,7 +467,6 @@ wait_till_output_already_written_or_output_buffer_available (TOKULOGGER logger,
|
|||||||
toku_cond_wait(&logger->output_condition, &logger->output_condition_lock);
|
toku_cond_wait(&logger->output_condition, &logger->output_condition_lock);
|
||||||
}
|
}
|
||||||
*fsynced_lsn = logger->fsynced_lsn;
|
*fsynced_lsn = logger->fsynced_lsn;
|
||||||
logger->output_condition_lock_ctr++;
|
|
||||||
toku_mutex_unlock(&logger->output_condition_lock);
|
toku_mutex_unlock(&logger->output_condition_lock);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -485,13 +478,11 @@ release_output (TOKULOGGER logger, LSN fsynced_lsn)
|
|||||||
// Exit: Holds neither locks nor output permission.
|
// Exit: Holds neither locks nor output permission.
|
||||||
{
|
{
|
||||||
toku_mutex_lock(&logger->output_condition_lock);
|
toku_mutex_lock(&logger->output_condition_lock);
|
||||||
logger->output_condition_lock_ctr++;
|
|
||||||
logger->output_is_available = true;
|
logger->output_is_available = true;
|
||||||
if (logger->fsynced_lsn.lsn < fsynced_lsn.lsn) {
|
if (logger->fsynced_lsn.lsn < fsynced_lsn.lsn) {
|
||||||
logger->fsynced_lsn = fsynced_lsn;
|
logger->fsynced_lsn = fsynced_lsn;
|
||||||
}
|
}
|
||||||
toku_cond_broadcast(&logger->output_condition);
|
toku_cond_broadcast(&logger->output_condition);
|
||||||
logger->output_condition_lock_ctr++;
|
|
||||||
toku_mutex_unlock(&logger->output_condition_lock);
|
toku_mutex_unlock(&logger->output_condition_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -504,7 +495,6 @@ swap_inbuf_outbuf (TOKULOGGER logger)
|
|||||||
logger->inbuf = logger->outbuf;
|
logger->inbuf = logger->outbuf;
|
||||||
logger->outbuf = tmp;
|
logger->outbuf = tmp;
|
||||||
assert(logger->inbuf.n_in_buf == 0);
|
assert(logger->inbuf.n_in_buf == 0);
|
||||||
logger->swap_ctr++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -549,13 +539,11 @@ toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed)
|
|||||||
if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) {
|
if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
logger->input_lock_ctr++;
|
|
||||||
ml_unlock(&logger->input_lock);
|
ml_unlock(&logger->input_lock);
|
||||||
LSN fsynced_lsn;
|
LSN fsynced_lsn;
|
||||||
grab_output(logger, &fsynced_lsn);
|
grab_output(logger, &fsynced_lsn);
|
||||||
|
|
||||||
ml_lock(&logger->input_lock);
|
ml_lock(&logger->input_lock);
|
||||||
logger->input_lock_ctr++;
|
|
||||||
// Some other thread may have written the log out while we didn't have the lock. If we have space now, then be happy.
|
// Some other thread may have written the log out while we didn't have the lock. If we have space now, then be happy.
|
||||||
if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) {
|
if (logger->inbuf.n_in_buf + n_bytes_needed <= LOGGER_MIN_BUF_SIZE) {
|
||||||
release_output(logger, fsynced_lsn);
|
release_output(logger, fsynced_lsn);
|
||||||
@@ -823,7 +811,6 @@ void toku_logger_maybe_fsync(TOKULOGGER logger, LSN lsn, int do_fsync, bool hold
|
|||||||
// The input lock may be released and then reacquired. Thus this function does not run atomically with respect to other threads.
|
// The input lock may be released and then reacquired. Thus this function does not run atomically with respect to other threads.
|
||||||
{
|
{
|
||||||
if (holds_input_lock) {
|
if (holds_input_lock) {
|
||||||
logger->input_lock_ctr++;
|
|
||||||
ml_unlock(&logger->input_lock);
|
ml_unlock(&logger->input_lock);
|
||||||
}
|
}
|
||||||
if (do_fsync) {
|
if (do_fsync) {
|
||||||
@@ -837,11 +824,9 @@ void toku_logger_maybe_fsync(TOKULOGGER logger, LSN lsn, int do_fsync, bool hold
|
|||||||
// otherwise we now own the output permission, and our lsn isn't outputed.
|
// otherwise we now own the output permission, and our lsn isn't outputed.
|
||||||
|
|
||||||
ml_lock(&logger->input_lock);
|
ml_lock(&logger->input_lock);
|
||||||
logger->input_lock_ctr++;
|
|
||||||
|
|
||||||
swap_inbuf_outbuf(logger);
|
swap_inbuf_outbuf(logger);
|
||||||
|
|
||||||
logger->input_lock_ctr++;
|
|
||||||
ml_unlock(&logger->input_lock); // release the input lock now, so other threads can fill the inbuf. (Thus enabling group commit.)
|
ml_unlock(&logger->input_lock); // release the input lock now, so other threads can fill the inbuf. (Thus enabling group commit.)
|
||||||
|
|
||||||
write_outbuf_to_logfile(logger, &fsynced_lsn);
|
write_outbuf_to_logfile(logger, &fsynced_lsn);
|
||||||
@@ -867,7 +852,6 @@ logger_write_buffer(TOKULOGGER logger, LSN *fsynced_lsn)
|
|||||||
// Note: Only called during single-threaded activity from toku_logger_restart, so locks aren't really needed.
|
// Note: Only called during single-threaded activity from toku_logger_restart, so locks aren't really needed.
|
||||||
{
|
{
|
||||||
swap_inbuf_outbuf(logger);
|
swap_inbuf_outbuf(logger);
|
||||||
logger->input_lock_ctr++;
|
|
||||||
ml_unlock(&logger->input_lock);
|
ml_unlock(&logger->input_lock);
|
||||||
write_outbuf_to_logfile(logger, fsynced_lsn);
|
write_outbuf_to_logfile(logger, fsynced_lsn);
|
||||||
if (logger->write_log_files) {
|
if (logger->write_log_files) {
|
||||||
@@ -885,7 +869,6 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn)
|
|||||||
LSN fsynced_lsn;
|
LSN fsynced_lsn;
|
||||||
grab_output(logger, &fsynced_lsn);
|
grab_output(logger, &fsynced_lsn);
|
||||||
ml_lock(&logger->input_lock);
|
ml_lock(&logger->input_lock);
|
||||||
logger->input_lock_ctr++;
|
|
||||||
logger_write_buffer(logger, &fsynced_lsn);
|
logger_write_buffer(logger, &fsynced_lsn);
|
||||||
|
|
||||||
// close the log file
|
// close the log file
|
||||||
@@ -1112,7 +1095,7 @@ int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, stru
|
|||||||
TXNID_PAIR v;
|
TXNID_PAIR v;
|
||||||
int r = toku_fread_TXNID_PAIR(inf, &v, checksum, len);
|
int r = toku_fread_TXNID_PAIR(inf, &v, checksum, len);
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
fprintf(outf, " %s=%" PRIu64 "%" PRIu64, fieldname, v.parent_id64, v.child_id64);
|
fprintf(outf, " %s=%" PRIu64 ",%" PRIu64, fieldname, v.parent_id64, v.child_id64);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1410,9 +1393,6 @@ status_init(void) {
|
|||||||
// Note, this function initializes the keyname, type, and legend fields.
|
// Note, this function initializes the keyname, type, and legend fields.
|
||||||
// Value fields are initialized to zero by compiler.
|
// Value fields are initialized to zero by compiler.
|
||||||
STATUS_INIT(LOGGER_NEXT_LSN, nullptr, UINT64, "next LSN", TOKU_ENGINE_STATUS);
|
STATUS_INIT(LOGGER_NEXT_LSN, nullptr, UINT64, "next LSN", TOKU_ENGINE_STATUS);
|
||||||
STATUS_INIT(LOGGER_ILOCK_CTR, nullptr, UINT64, "ilock count", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_OLOCK_CTR, nullptr, UINT64, "olock count", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_SWAP_CTR, nullptr, UINT64, "swap count", TOKU_ENGINE_STATUS);
|
|
||||||
STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
||||||
STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
||||||
STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
|
||||||
@@ -1429,9 +1409,6 @@ toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) {
|
|||||||
status_init();
|
status_init();
|
||||||
if (logger) {
|
if (logger) {
|
||||||
STATUS_VALUE(LOGGER_NEXT_LSN) = logger->lsn.lsn;
|
STATUS_VALUE(LOGGER_NEXT_LSN) = logger->lsn.lsn;
|
||||||
STATUS_VALUE(LOGGER_ILOCK_CTR) = logger->input_lock_ctr;
|
|
||||||
STATUS_VALUE(LOGGER_OLOCK_CTR) = logger->output_condition_lock_ctr;
|
|
||||||
STATUS_VALUE(LOGGER_SWAP_CTR) = logger->swap_ctr;
|
|
||||||
STATUS_VALUE(LOGGER_NUM_WRITES) = logger->num_writes_to_disk;
|
STATUS_VALUE(LOGGER_NUM_WRITES) = logger->num_writes_to_disk;
|
||||||
STATUS_VALUE(LOGGER_BYTES_WRITTEN) = logger->bytes_written_to_disk;
|
STATUS_VALUE(LOGGER_BYTES_WRITTEN) = logger->bytes_written_to_disk;
|
||||||
// No compression on logfiles so the uncompressed size is just number of bytes written
|
// No compression on logfiles so the uncompressed size is just number of bytes written
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -239,9 +240,6 @@ void toku_logger_maybe_fsync (TOKULOGGER logger, LSN lsn, int do_fsync, bool hol
|
|||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
LOGGER_NEXT_LSN = 0,
|
LOGGER_NEXT_LSN = 0,
|
||||||
LOGGER_ILOCK_CTR,
|
|
||||||
LOGGER_OLOCK_CTR,
|
|
||||||
LOGGER_SWAP_CTR,
|
|
||||||
LOGGER_NUM_WRITES,
|
LOGGER_NUM_WRITES,
|
||||||
LOGGER_BYTES_WRITTEN,
|
LOGGER_BYTES_WRITTEN,
|
||||||
LOGGER_UNCOMPRESSED_BYTES_WRITTEN,
|
LOGGER_UNCOMPRESSED_BYTES_WRITTEN,
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -49,6 +49,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -144,10 +145,15 @@ minicron_do (void *pv)
|
|||||||
wakeup_at.tv_nsec += (p->period_in_ms % 1000) * 1000000;
|
wakeup_at.tv_nsec += (p->period_in_ms % 1000) * 1000000;
|
||||||
toku_timespec_t now;
|
toku_timespec_t now;
|
||||||
toku_gettime(&now);
|
toku_gettime(&now);
|
||||||
|
int compare = timespec_compare(&wakeup_at, &now);
|
||||||
|
// if the time to wakeup has yet to come, then we sleep
|
||||||
|
// otherwise, we continue
|
||||||
|
if (compare > 0) {
|
||||||
int r = toku_cond_timedwait(&p->condvar, &p->mutex, &wakeup_at);
|
int r = toku_cond_timedwait(&p->condvar, &p->mutex, &wakeup_at);
|
||||||
if (r!=0 && r!=ETIMEDOUT) fprintf(stderr, "%s:%d r=%d (%s)", __FILE__, __LINE__, r, strerror(r));
|
if (r!=0 && r!=ETIMEDOUT) fprintf(stderr, "%s:%d r=%d (%s)", __FILE__, __LINE__, r, strerror(r));
|
||||||
assert(r==0 || r==ETIMEDOUT);
|
assert(r==0 || r==ETIMEDOUT);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// Now we woke up, and we should figure out what to do
|
// Now we woke up, and we should figure out what to do
|
||||||
if (p->do_shutdown) {
|
if (p->do_shutdown) {
|
||||||
toku_mutex_unlock(&p->mutex);
|
toku_mutex_unlock(&p->mutex);
|
||||||
@@ -160,13 +166,12 @@ minicron_do (void *pv)
|
|||||||
time_to_call.tv_sec += p->period_in_ms/1000;
|
time_to_call.tv_sec += p->period_in_ms/1000;
|
||||||
time_to_call.tv_nsec += (p->period_in_ms % 1000) * 1000000;
|
time_to_call.tv_nsec += (p->period_in_ms % 1000) * 1000000;
|
||||||
int compare = timespec_compare(&time_to_call, &now);
|
int compare = timespec_compare(&time_to_call, &now);
|
||||||
//printf("compare(%.6f, %.6f)=%d\n", time_to_call.tv_sec + time_to_call.tv_nsec*1e-9, now.tv_sec+now.tv_nsec*1e-9, compare);
|
|
||||||
if (compare <= 0) {
|
if (compare <= 0) {
|
||||||
|
toku_gettime(&p->time_of_last_call_to_f); // the measured period includes the time to make the call.
|
||||||
toku_mutex_unlock(&p->mutex);
|
toku_mutex_unlock(&p->mutex);
|
||||||
int r = p->f(p->arg);
|
int r = p->f(p->arg);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
toku_mutex_lock(&p->mutex);
|
toku_mutex_lock(&p->mutex);
|
||||||
toku_gettime(&p->time_of_last_call_to_f); // the period is measured between calls to f.
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -49,6 +49,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
@@ -187,6 +188,8 @@ struct recover_env {
|
|||||||
ft_update_func update_function;
|
ft_update_func update_function;
|
||||||
generate_row_for_put_func generate_row_for_put;
|
generate_row_for_put_func generate_row_for_put;
|
||||||
generate_row_for_del_func generate_row_for_del;
|
generate_row_for_del_func generate_row_for_del;
|
||||||
|
DBT_ARRAY dest_keys;
|
||||||
|
DBT_ARRAY dest_vals;
|
||||||
struct scan_state ss;
|
struct scan_state ss;
|
||||||
struct file_map fmap;
|
struct file_map fmap;
|
||||||
bool goforward;
|
bool goforward;
|
||||||
@@ -306,6 +309,8 @@ static int recover_env_init (RECOVER_ENV renv,
|
|||||||
file_map_init(&renv->fmap);
|
file_map_init(&renv->fmap);
|
||||||
renv->goforward = false;
|
renv->goforward = false;
|
||||||
renv->cp = toku_cachetable_get_checkpointer(renv->ct);
|
renv->cp = toku_cachetable_get_checkpointer(renv->ct);
|
||||||
|
toku_dbt_array_init(&renv->dest_keys, 1);
|
||||||
|
toku_dbt_array_init(&renv->dest_vals, 1);
|
||||||
if (tokudb_recovery_trace)
|
if (tokudb_recovery_trace)
|
||||||
fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__);
|
fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__);
|
||||||
return r;
|
return r;
|
||||||
@@ -330,6 +335,8 @@ static void recover_env_cleanup (RECOVER_ENV renv) {
|
|||||||
} else {
|
} else {
|
||||||
toku_cachetable_close(&renv->ct);
|
toku_cachetable_close(&renv->ct);
|
||||||
}
|
}
|
||||||
|
toku_dbt_array_destroy(&renv->dest_keys);
|
||||||
|
toku_dbt_array_destroy(&renv->dest_vals);
|
||||||
|
|
||||||
if (tokudb_recovery_trace)
|
if (tokudb_recovery_trace)
|
||||||
fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__);
|
fprintf(stderr, "%s:%d\n", __FUNCTION__, __LINE__);
|
||||||
@@ -1033,11 +1040,10 @@ static int toku_recover_enq_insert_multiple (struct logtype_enq_insert_multiple
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (do_inserts) {
|
if (do_inserts) {
|
||||||
DBT src_key, src_val, dest_key, dest_val;
|
DBT src_key, src_val;
|
||||||
|
|
||||||
toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len);
|
toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len);
|
||||||
toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len);
|
toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len);
|
||||||
toku_init_dbt_flags(&dest_key, DB_DBT_REALLOC);
|
|
||||||
toku_init_dbt_flags(&dest_val, DB_DBT_REALLOC);
|
|
||||||
|
|
||||||
for (uint32_t file = 0; file < l->dest_filenums.num; file++) {
|
for (uint32_t file = 0; file < l->dest_filenums.num; file++) {
|
||||||
struct file_map_tuple *tuple = NULL;
|
struct file_map_tuple *tuple = NULL;
|
||||||
@@ -1045,23 +1051,29 @@ static int toku_recover_enq_insert_multiple (struct logtype_enq_insert_multiple
|
|||||||
if (r==0) {
|
if (r==0) {
|
||||||
// We found the cachefile. (maybe) Do the insert.
|
// We found the cachefile. (maybe) Do the insert.
|
||||||
DB *db = &tuple->fake_db;
|
DB *db = &tuple->fake_db;
|
||||||
r = renv->generate_row_for_put(db, src_db, &dest_key, &dest_val, &src_key, &src_val);
|
|
||||||
|
DBT_ARRAY key_array;
|
||||||
|
DBT_ARRAY val_array;
|
||||||
|
if (db != src_db) {
|
||||||
|
r = renv->generate_row_for_put(db, src_db, &renv->dest_keys, &renv->dest_vals, &src_key, &src_val);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
toku_ft_maybe_insert(tuple->ft_handle, &dest_key, &dest_val, txn, true, l->lsn, false, FT_INSERT);
|
invariant(renv->dest_keys.size <= renv->dest_keys.capacity);
|
||||||
|
invariant(renv->dest_vals.size <= renv->dest_vals.capacity);
|
||||||
|
invariant(renv->dest_keys.size == renv->dest_vals.size);
|
||||||
|
key_array = renv->dest_keys;
|
||||||
|
val_array = renv->dest_vals;
|
||||||
|
} else {
|
||||||
|
key_array.size = key_array.capacity = 1;
|
||||||
|
key_array.dbts = &src_key;
|
||||||
|
|
||||||
//flags==0 means generate_row_for_put callback changed it
|
val_array.size = val_array.capacity = 1;
|
||||||
//(and freed any memory necessary to do so) so that values are now stored
|
val_array.dbts = &src_val;
|
||||||
//in temporary memory that does not need to be freed. We need to continue
|
}
|
||||||
//using DB_DBT_REALLOC however.
|
for (uint32_t i = 0; i < key_array.size; i++) {
|
||||||
if (dest_key.flags == 0)
|
toku_ft_maybe_insert(tuple->ft_handle, &key_array.dbts[i], &val_array.dbts[i], txn, true, l->lsn, false, FT_INSERT);
|
||||||
toku_init_dbt_flags(&dest_key, DB_DBT_REALLOC);
|
}
|
||||||
if (dest_val.flags == 0)
|
|
||||||
toku_init_dbt_flags(&dest_val, DB_DBT_REALLOC);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dest_key.data) toku_free(dest_key.data); //TODO: #2321 May need windows hack
|
|
||||||
if (dest_val.data) toku_free(dest_val.data); //TODO: #2321 May need windows hack
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1085,18 +1097,18 @@ static int toku_recover_enq_delete_multiple (struct logtype_enq_delete_multiple
|
|||||||
if (l->src_filenum.fileid == FILENUM_NONE.fileid)
|
if (l->src_filenum.fileid == FILENUM_NONE.fileid)
|
||||||
assert(r==DB_NOTFOUND);
|
assert(r==DB_NOTFOUND);
|
||||||
else {
|
else {
|
||||||
if (r == 0)
|
if (r == 0) {
|
||||||
src_db = &tuple->fake_db;
|
src_db = &tuple->fake_db;
|
||||||
else
|
} else {
|
||||||
do_deletes = false; // src file was probably deleted, #3129
|
do_deletes = false; // src file was probably deleted, #3129
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (do_deletes) {
|
if (do_deletes) {
|
||||||
DBT src_key, src_val, dest_key;
|
DBT src_key, src_val;
|
||||||
toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len);
|
toku_fill_dbt(&src_key, l->src_key.data, l->src_key.len);
|
||||||
toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len);
|
toku_fill_dbt(&src_val, l->src_val.data, l->src_val.len);
|
||||||
toku_init_dbt_flags(&dest_key, DB_DBT_REALLOC);
|
|
||||||
|
|
||||||
for (uint32_t file = 0; file < l->dest_filenums.num; file++) {
|
for (uint32_t file = 0; file < l->dest_filenums.num; file++) {
|
||||||
struct file_map_tuple *tuple = NULL;
|
struct file_map_tuple *tuple = NULL;
|
||||||
@@ -1104,18 +1116,22 @@ static int toku_recover_enq_delete_multiple (struct logtype_enq_delete_multiple
|
|||||||
if (r==0) {
|
if (r==0) {
|
||||||
// We found the cachefile. (maybe) Do the delete.
|
// We found the cachefile. (maybe) Do the delete.
|
||||||
DB *db = &tuple->fake_db;
|
DB *db = &tuple->fake_db;
|
||||||
r = renv->generate_row_for_del(db, src_db, &dest_key, &src_key, &src_val);
|
|
||||||
|
DBT_ARRAY key_array;
|
||||||
|
if (db != src_db) {
|
||||||
|
r = renv->generate_row_for_del(db, src_db, &renv->dest_keys, &src_key, &src_val);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
toku_ft_maybe_delete(tuple->ft_handle, &dest_key, txn, true, l->lsn, false);
|
invariant(renv->dest_keys.size <= renv->dest_keys.capacity);
|
||||||
|
key_array = renv->dest_keys;
|
||||||
//flags==0 indicates the return values are stored in temporary memory that does
|
} else {
|
||||||
//not need to be freed. We need to continue using DB_DBT_REALLOC however.
|
key_array.size = key_array.capacity = 1;
|
||||||
if (dest_key.flags == 0)
|
key_array.dbts = &src_key;
|
||||||
toku_init_dbt_flags(&dest_key, DB_DBT_REALLOC);
|
}
|
||||||
|
for (uint32_t i = 0; i < key_array.size; i++) {
|
||||||
|
toku_ft_maybe_delete(tuple->ft_handle, &key_array.dbts[i], txn, true, l->lsn, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dest_key.flags & DB_DBT_REALLOC && dest_key.data) toku_free(dest_key.data); //TODO: #2321 May need windows hack
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@@ -53,6 +53,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
@@ -50,6 +50,7 @@ UNIVERSITY PATENT NOTICE:
|
|||||||
PATENT MARKING NOTICE:
|
PATENT MARKING NOTICE:
|
||||||
|
|
||||||
This software is covered by US Patent No. 8,185,551.
|
This software is covered by US Patent No. 8,185,551.
|
||||||
|
This software is covered by US Patent No. 8,489,638.
|
||||||
|
|
||||||
PATENT RIGHTS GRANT:
|
PATENT RIGHTS GRANT:
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user