mirror of
https://github.com/MariaDB/server.git
synced 2025-04-18 21:44:20 +03:00
MDEV-19780 Remove the TokuDB storage engine
The TokuDB storage engine has been deprecated by upstream Percona Server 8.0 in favor of MyRocks and will not be available in subsequent major upstream releases. Let us remove it from MariaDB Server as well. MyRocks is actively maintained, and it can be used instead.
This commit is contained in:
parent
3b251e24b6
commit
7924158496
28
.gitignore
vendored
28
.gitignore
vendored
@ -228,34 +228,6 @@ storage/rocksdb/mysql_ldb
|
||||
storage/rocksdb/myrocks_hotbackup
|
||||
storage/rocksdb/rdb_source_revision.h
|
||||
storage/rocksdb/sst_dump
|
||||
storage/tokudb/PerconaFT/buildheader/db.h
|
||||
storage/tokudb/PerconaFT/buildheader/make_tdb
|
||||
storage/tokudb/PerconaFT/buildheader/runcat.sh
|
||||
storage/tokudb/PerconaFT/ft/log_code.cc
|
||||
storage/tokudb/PerconaFT/ft/log_header.h
|
||||
storage/tokudb/PerconaFT/ft/log_print.cc
|
||||
storage/tokudb/PerconaFT/ft/logformat
|
||||
storage/tokudb/PerconaFT/ft/ftverify
|
||||
storage/tokudb/PerconaFT/ft/tdb-recover
|
||||
storage/tokudb/PerconaFT/ft/tokuftdump
|
||||
storage/tokudb/PerconaFT/portability/merge_archives_tokuportability_static.cmake
|
||||
storage/tokudb/PerconaFT/portability/toku_config.h
|
||||
storage/tokudb/PerconaFT/portability/tokuportability_static_depends.cc
|
||||
storage/tokudb/PerconaFT/snappy/
|
||||
storage/tokudb/PerconaFT/src/merge_archives_tokufractaltree_static.cmake
|
||||
storage/tokudb/PerconaFT/src/tokufractaltree_static_depends.cc
|
||||
storage/tokudb/PerconaFT/toku_include/toku_config.h
|
||||
storage/tokudb/PerconaFT/tools/ba_replay
|
||||
storage/tokudb/PerconaFT/tools/ftverify
|
||||
storage/tokudb/PerconaFT/tools/tdb-recover
|
||||
storage/tokudb/PerconaFT/tools/tokudb_dump
|
||||
storage/tokudb/PerconaFT/tools/tokudb_gen
|
||||
storage/tokudb/PerconaFT/tools/tokudb_load
|
||||
storage/tokudb/PerconaFT/tools/tokuftdump
|
||||
storage/tokudb/PerconaFT/tools/tokuft_logprint
|
||||
storage/tokudb/PerconaFT/xz/
|
||||
storage/tokudb/tokudb.cnf
|
||||
storage/tokudb/tokudb.conf
|
||||
strings/conf_to_src
|
||||
support-files/MySQL-shared-compat.spec
|
||||
support-files/binary-configure
|
||||
|
@ -7,7 +7,7 @@ set -v -x
|
||||
|
||||
function exclude_modules() {
|
||||
# excludes for all
|
||||
CMAKE_OPT="${CMAKE_OPT} -DPLUGIN_TOKUDB=NO -DPLUGIN_MROONGA=NO -DPLUGIN_SPIDER=NO -DPLUGIN_OQGRAPH=NO -DPLUGIN_PERFSCHEMA=NO -DPLUGIN_SPHINX=NO"
|
||||
CMAKE_OPT="${CMAKE_OPT} -DPLUGIN_MROONGA=NO -DPLUGIN_SPIDER=NO -DPLUGIN_OQGRAPH=NO -DPLUGIN_PERFSCHEMA=NO -DPLUGIN_SPHINX=NO"
|
||||
# exclude storage engines not being tested in current job
|
||||
if [[ ! "${MYSQL_TEST_SUITES}" =~ "archive" ]]; then
|
||||
CMAKE_OPT="${CMAKE_OPT} -DPLUGIN_ARCHIVE=NO"
|
||||
|
@ -207,8 +207,7 @@ before_script:
|
||||
script:
|
||||
# following modules are disabled after sourcing .travis.compiler.sh:
|
||||
# clang disabled: mroonga just generates too many warnings with clang and travis stops the job
|
||||
# cland disabled: tokudb has fatal warnings
|
||||
# gcc/rpl: tokudb and mroonga
|
||||
# gcc/rpl: mroonga
|
||||
- source .travis.compiler.sh
|
||||
- cmake .
|
||||
-DCMAKE_BUILD_TYPE=${TYPE}
|
||||
|
@ -198,8 +198,6 @@ base_configs="$base_configs --with-extra-charsets=complex "
|
||||
base_configs="$base_configs --enable-thread-safe-client "
|
||||
base_configs="$base_configs --with-big-tables $maintainer_mode"
|
||||
base_configs="$base_configs --with-plugin-aria --with-aria-tmp-tables --with-plugin-s3=STATIC"
|
||||
# Following is to get tokudb to work
|
||||
base_configs="$base_configs --with-jemalloc=NO"
|
||||
|
||||
if test -d "$path/../cmd-line-utils/readline"
|
||||
then
|
||||
@ -303,7 +301,7 @@ gcov_compile_flags="$gcov_compile_flags -DMYSQL_SERVER_SUFFIX=-gcov -DHAVE_gcov"
|
||||
|
||||
#
|
||||
# The following plugins doesn't work on 32 bit systems
|
||||
disable_64_bit_plugins="--without-plugin-tokudb --without-plugin-rocksdb"
|
||||
disable_64_bit_plugins="--without-plugin-rocksdb"
|
||||
|
||||
|
||||
# GCC4 needs -fprofile-arcs -ftest-coverage on the linker command line (as well
|
||||
|
@ -81,7 +81,7 @@ SET(CPACK_RPM_SPEC_MORE_DEFINE "
|
||||
%filter_provides_in \\\\.\\\\(test\\\\|result\\\\|h\\\\|cc\\\\|c\\\\|inc\\\\|opt\\\\|ic\\\\|cnf\\\\|rdiff\\\\|cpp\\\\)$
|
||||
%filter_requires_in \\\\.\\\\(test\\\\|result\\\\|h\\\\|cc\\\\|c\\\\|inc\\\\|opt\\\\|ic\\\\|cnf\\\\|rdiff\\\\|cpp\\\\)$
|
||||
%filter_from_provides /perl(\\\\(mtr\\\\|My::\\\\)/d
|
||||
%filter_from_requires /\\\\(lib\\\\(ft\\\\|lzma\\\\|tokuportability\\\\)\\\\)\\\\|\\\\(perl(\\\\(.*mtr\\\\|My::\\\\|.*HandlerSocket\\\\|Mysql\\\\)\\\\)/d
|
||||
%filter_from_requires /\\\\(liblzma\\\\)\\\\|\\\\(perl(\\\\(.*mtr\\\\|My::\\\\|.*HandlerSocket\\\\|Mysql\\\\)\\\\)/d
|
||||
%filter_setup
|
||||
}
|
||||
")
|
||||
|
32
debian/additions/mariadb-report
vendored
32
debian/additions/mariadb-report
vendored
@ -142,7 +142,6 @@ my $have_innodb_vals = 1; # This might be set to 0 later in get_MySQL_version()
|
||||
my $have_aria_vals = 0;
|
||||
my $have_subquerycache_vals = 0;
|
||||
my $have_binlog_vals = 0;
|
||||
my $have_tokudb_engine = 0;
|
||||
my $use_thread_pool = 0;
|
||||
|
||||
if(defined $op{'r'})
|
||||
@ -690,15 +689,6 @@ sub get_MySQL_version
|
||||
} else {
|
||||
$have_binlog_vals = 0;
|
||||
}
|
||||
|
||||
$have_tokudb_engine = $dbh->selectall_arrayref("SELECT SUPPORT FROM information_schema.engines WHERE ENGINE = 'TokuDB';", undef)->[0][0];
|
||||
if(defined($have_tokudb_engine) && ($have_tokudb_engine eq "YES" || $have_tokudb_engine eq "DEFAULT"))
|
||||
{
|
||||
print "TokuDB detected\n" if $op{debug};
|
||||
$have_tokudb_engine = 1;
|
||||
} else {
|
||||
$have_tokudb_engine = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -903,7 +893,6 @@ sub write_report
|
||||
write_Aria() if $have_aria_vals;
|
||||
write_Subquerycache() if $have_subquerycache_vals;
|
||||
write_Binlog() if $have_binlog_vals;
|
||||
write_TokuDB() if $have_tokudb_engine;
|
||||
}
|
||||
|
||||
sub sec_to_dhms # Seconds to days+hours:minutes:seconds
|
||||
@ -1175,16 +1164,6 @@ sub write_Binlog
|
||||
write;
|
||||
}
|
||||
|
||||
sub write_TokuDB
|
||||
{
|
||||
print "write_TokuDB\n" if $op{debug};
|
||||
|
||||
return if $stats{'Tokudb_cachetable_size_current'} == 0;
|
||||
|
||||
$~ = 'TOKUDB';
|
||||
write;
|
||||
}
|
||||
|
||||
sub write_InnoDB
|
||||
{
|
||||
print "write_InnoDB\n" if $op{debug};
|
||||
@ -1574,17 +1553,6 @@ perc($binlog_cache_ratio)
|
||||
perc($binlog_stmt_cache_ratio)
|
||||
.
|
||||
|
||||
format TOKUDB =
|
||||
|
||||
__ TokuDB ______________________________________________________________
|
||||
Cachetable @>>>>>> of @>>>>>> %Usage: @>>>>>
|
||||
make_short($stats{Tokudb_cachetable_size_current}, 1), make_short($vars{tokudb_cache_size}, 1), perc($stats{Tokudb_cachetable_size_current}, $vars{tokudb_cache_size})
|
||||
Miss @>>>>>> @>>>>>/s
|
||||
make_short($stats{'Tokudb_cachetable_miss'}), t($stats{'Tokudb_cachetable_miss'})
|
||||
Evictions @>>>>>> @>>>>>/s
|
||||
make_short($stats{'Tokudb_cachetable_evictions'}), t($stats{'Tokudb_cachetable_evictions'})
|
||||
.
|
||||
|
||||
format ROWS =
|
||||
|
||||
__ Rows ________________________________________________________________
|
||||
|
1
debian/rules
vendored
1
debian/rules
vendored
@ -85,7 +85,6 @@ endif
|
||||
-DSYSTEM_TYPE="debian-$(DEB_HOST_GNU_SYSTEM)" \
|
||||
-DCMAKE_SYSTEM_PROCESSOR=$(DEB_HOST_ARCH) \
|
||||
-DBUILD_CONFIG=mysql_release \
|
||||
-DWITHOUT_TOKUDB=true \
|
||||
-DWITHOUT_CASSANDRA=true \
|
||||
-DPLUGIN_AWS_KEY_MANAGEMENT=NO \
|
||||
-WITH_EMBEDDED_SERVER=OFF \
|
||||
|
@ -947,13 +947,6 @@ rpl.show_status_stop_slave_race-7126 : MDEV-17438 - Timeout
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
rpl-tokudb.* : MDEV-14354 - Tests fail with tcmalloc
|
||||
rpl-tokudb.rpl_deadlock_tokudb : MDEV-20529 - mysqltest failed but provided no output
|
||||
rpl-tokudb.rpl_extra_col_master_tokudb : MDEV-13258 - Extra warning
|
||||
rpl-tokudb.rpl_tokudb_commit_after_flush : MDEV-16966 - Server crash
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
rpl/extra/rpl_tests.* : MDEV-10994 - Not maintained
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
@ -1022,62 +1015,6 @@ sys_vars.wait_timeout_func : MDEV-12896 - Wrong result
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
tokudb.change_column_all_1000_10 : MDEV-12640 - Lost connection
|
||||
tokudb.change_column_bin : MDEV-12640 - Lost connection
|
||||
tokudb.change_column_char : MDEV-12822 - Lost connection
|
||||
tokudb.change_column_varbin : MDEV-17682 - Timeout
|
||||
tokudb.cluster_filter : MDEV-10678 - Wrong execution plan
|
||||
tokudb.cluster_filter_hidden : MDEV-10678 - Wrong execution plan
|
||||
tokudb.cluster_filter_unpack_varchar : MDEV-10636 - Wrong execution plan
|
||||
tokudb.dir_per_db : MDEV-11537 - Wrong result
|
||||
tokudb.dir_per_db_rename_to_nonexisting_schema : MDEV-14359 - Directory not empty
|
||||
tokudb.hotindex-del-0 : MDEV-16559 - Timeout
|
||||
tokudb.hotindex-insert-0 : MDEV-15271 - Timeout
|
||||
tokudb.hotindex-insert-1 : MDEV-13870 - Lost connection to MySQL server
|
||||
tokudb.hotindex-insert-2 : MDEV-15271 - Timeout
|
||||
tokudb.hotindex-insert-bigchar : MDEV-12640 - Crash
|
||||
tokudb.hotindex-update-0 : MDEV-15198 - Timeout
|
||||
tokudb.hotindex-update-1 : MDEV-12640 - Crash
|
||||
tokudb.locks-select-update-1 : MDEV-13406 - Lock wait timeout
|
||||
tokudb.rows-32m-rand-insert : MDEV-12640 - Crash
|
||||
tokudb.rows-32m-seq-insert : MDEV-12640 - Crash
|
||||
tokudb.savepoint-5 : MDEV-15280 - Wrong result
|
||||
tokudb.type_blob : Modified in 10.4.13
|
||||
tokudb.type_datetime : MDEV-15193 - Wrong result
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
tokudb_alter_table.hcad_all_add2 : MDEV-15269 - Timeout
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
tokudb_backup.* : MDEV-11001 - Missing include file
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
tokudb_bugs.checkpoint_lock : MDEV-10637 - Wrong processlist output
|
||||
tokudb_bugs.checkpoint_lock_3 : MDEV-10637 - Wrong processlist output
|
||||
tokudb_bugs.frm_store : MDEV-12823 - Valgrind
|
||||
tokudb_bugs.frm_store2 : MDEV-12823 - Valgrind
|
||||
tokudb_bugs.frm_store3 : MDEV-12823 - Valgrind
|
||||
tokudb_bugs.xa : MDEV-11804 - Lock wait timeout
|
||||
tokudb_bugs.xa-3 : MDEV-22512 - Server failed to restart
|
||||
tokudb_bugs.xa-4 : MDEV-22512 - Server failed to restart
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
tokudb_parts.partition_alter4_tokudb : MDEV-12640 - Lost connection
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
tokudb_rpl.* : MDEV-11001 - Missing include file
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
tokudb_sys_vars.* : MDEV-11001 - Missing include file
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
unit.conc_basic-t : MDEV-15286 - not ok 7 - test_reconnect_maxpackage
|
||||
unit.conc_bulk1 : MDEV-19410 - LeakSanitizer: detected memory leaks
|
||||
unit.conc_errors : MDEV-18634 - ASAN global-buffer-overflow
|
||||
|
@ -968,14 +968,6 @@
|
||||
obj:*/libodbc.so*
|
||||
}
|
||||
|
||||
{
|
||||
TokuDB uses gcc __thread variables
|
||||
Memcheck:Leak
|
||||
fun:memalign
|
||||
fun:*
|
||||
fun:__tls_get_addr
|
||||
}
|
||||
|
||||
{
|
||||
Galera uses gcc __thread variables
|
||||
Memcheck:Leak
|
||||
|
@ -205,11 +205,6 @@ sub new
|
||||
$limits{'max_temporary_tables'}= $limits{"max_tables"};
|
||||
$self->{'transactions'} = 1; # Transactions enabled
|
||||
}
|
||||
if (defined($main::opt_create_options) &&
|
||||
$main::opt_create_options =~ /engine=tokudb/i)
|
||||
{
|
||||
$self->{'transactions'} = 1; # Transactions enabled
|
||||
}
|
||||
|
||||
return $self;
|
||||
}
|
||||
|
@ -525,7 +525,7 @@ enum legacy_db_type
|
||||
DB_TYPE_PERFORMANCE_SCHEMA=28,
|
||||
DB_TYPE_S3=41,
|
||||
DB_TYPE_ARIA=42,
|
||||
DB_TYPE_TOKUDB=43,
|
||||
DB_TYPE_TOKUDB=43, /* disabled in MariaDB Server 10.5, removed in 10.6 */
|
||||
DB_TYPE_SEQUENCE=44,
|
||||
DB_TYPE_FIRST_DYNAMIC=45,
|
||||
DB_TYPE_DEFAULT=127 // Must be last
|
||||
|
@ -5008,7 +5008,7 @@ thd_need_wait_reports(const MYSQL_THD thd)
|
||||
}
|
||||
|
||||
/*
|
||||
Used by storage engines (currently TokuDB and InnoDB) to report that
|
||||
Used by storage engines (currently InnoDB) to report that
|
||||
one transaction THD is about to go to wait for a transactional lock held by
|
||||
another transactions OTHER_THD.
|
||||
|
||||
|
@ -1057,7 +1057,7 @@ struct TABLE_SHARE
|
||||
bool write_frm_image(const uchar *frm_image, size_t frm_length);
|
||||
bool write_par_image(const uchar *par_image, size_t par_length);
|
||||
|
||||
/* Only used by tokudb */
|
||||
/* Only used by S3 */
|
||||
bool write_frm_image(void)
|
||||
{ return frm_image ? write_frm_image(frm_image->str, frm_image->length) : 0; }
|
||||
|
||||
|
@ -47,7 +47,6 @@ build_script:
|
||||
-DPLUGIN_SPHINX=NO
|
||||
-DPLUGIN_SPIDER=NO
|
||||
-DPLUGIN_TEST_SQL_DISCOVERY=NO
|
||||
-DPLUGIN_TOKUDB=NO
|
||||
-DPLUGIN_XTRADB=NO
|
||||
-DWITH_UNIT_TESTS=OFF
|
||||
-DWITH_MARIABACKUP=OFF
|
||||
|
@ -37,7 +37,6 @@ if [ "${MROONGA_BUNDLED}" = "yes" ]; then
|
||||
cmake_args=("${cmake_args[@]}" -DWITHOUT_SPHINX=TRUE)
|
||||
cmake_args=("${cmake_args[@]}" -DWITHOUT_SPIDER=TRUE)
|
||||
cmake_args=("${cmake_args[@]}" -DWITHOUT_TEST_SQL_DISCOVERY=TRUE)
|
||||
cmake_args=("${cmake_args[@]}" -DWITHOUT_TOKUDB=TRUE)
|
||||
if [ "${MROONGA_TEST_EMBEDDED}" = "yes" ]; then
|
||||
cmake_args=("${cmake_args[@]}" -DWITH_EMBEDDED_SERVER=TRUE)
|
||||
cmake_args=("${cmake_args[@]}" -DMRN_BUILD_FOR_EMBEDDED_SERVER=TRUE)
|
||||
|
@ -1,40 +0,0 @@
|
||||
# .clang-format file for Percona TokuDB
|
||||
# Minimum required version of clang-format is 5.0.1. Earlier versions will work
|
||||
# but may need removal of some parameters.
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
|
||||
# The following parameters are default for Google style,
|
||||
# but as they are important for our project they
|
||||
# are set explicitly here
|
||||
AlignAfterOpenBracket: Align
|
||||
BreakBeforeBinaryOperators: None
|
||||
ColumnLimit: 80
|
||||
PointerAlignment: Left
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
UseTab: Never
|
||||
|
||||
# Non-default parameters
|
||||
NamespaceIndentation: All
|
||||
IndentWidth: 4
|
||||
TabWidth: 4
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
BinPackParameters: false
|
||||
BinPackArguments: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
# not supported in 5.0.1
|
||||
#AlignConsecutiveAssignments: yes
|
||||
#AlignConsecutiveDeclarations: yes
|
||||
BreakStringLiterals: false
|
||||
ReflowComments: true
|
@ -1,173 +0,0 @@
|
||||
SET(TOKUDB_VERSION 5.6.41-84.1)
|
||||
# PerconaFT only supports x86-64 and cmake-2.8.9+
|
||||
IF(WIN32)
|
||||
# tokudb never worked there
|
||||
RETURN()
|
||||
ELSEIF(CMAKE_VERSION VERSION_LESS "2.8.9")
|
||||
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
|
||||
ELSEIF(NOT HAVE_DLOPEN)
|
||||
MESSAGE(STATUS "dlopen is required by TokuDB")
|
||||
ELSEIF(PLUGIN_PERFSCHEMA MATCHES "^NO$")
|
||||
MESSAGE(STATUS "Performance Schema is required by TokuDB")
|
||||
RETURN()
|
||||
ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
|
||||
# tokudb requires F_NOCACHE or O_DIRECT, and designated initializers
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
"
|
||||
#include <fcntl.h>
|
||||
struct a {int b; int c; };
|
||||
struct a d = { .b=1, .c=2 };
|
||||
#if defined(O_DIRECT) || defined(F_NOCACHE)
|
||||
int main() { return 0; }
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
" TOKUDB_OK)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT TOKUDB_OK)
|
||||
RETURN()
|
||||
ENDIF()
|
||||
|
||||
SET(TOKUDB_SOURCES
|
||||
ha_tokudb.cc
|
||||
tokudb_background.cc
|
||||
tokudb_information_schema.cc
|
||||
tokudb_sysvars.cc
|
||||
tokudb_thread.cc
|
||||
tokudb_dir_cmd.cc)
|
||||
MYSQL_ADD_PLUGIN(tokudb ${TOKUDB_SOURCES} STORAGE_ENGINE MODULE_ONLY
|
||||
DISABLED
|
||||
COMPONENT tokudb-engine CONFIG ${CMAKE_CURRENT_BINARY_DIR}/tokudb.cnf)
|
||||
|
||||
IF(NOT TARGET tokudb)
|
||||
RETURN()
|
||||
ENDIF()
|
||||
|
||||
INCLUDE(jemalloc)
|
||||
CHECK_JEMALLOC()
|
||||
|
||||
IF(NOT LIBJEMALLOC)
|
||||
MESSAGE(WARNING "TokuDB is enabled, but jemalloc is not. This configuration is not supported")
|
||||
ELSEIF(LIBJEMALLOC STREQUAL jemalloc_pic)
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
"
|
||||
#include <jemalloc/jemalloc.h>
|
||||
#if JEMALLOC_VERSION_MAJOR < 5
|
||||
int main() { return 0; }
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
" JEMALLOC_OK)
|
||||
IF (NOT JEMALLOC_OK)
|
||||
MESSAGE(FATAL_ERROR "static jemalloc_pic.a can only be used up to jemalloc 4")
|
||||
ENDIF()
|
||||
ELSEIF(LIBJEMALLOC STREQUAL jemalloc)
|
||||
FIND_LIBRARY(LIBJEMALLOC_SO jemalloc)
|
||||
IF(NOT LIBJEMALLOC_SO)
|
||||
MESSAGE(FATAL_ERROR "jemalloc is present, but cannot be found?")
|
||||
ENDIF()
|
||||
GET_FILENAME_COMPONENT(LIBJEMALLOC_PATH ${LIBJEMALLOC_SO} REALPATH CACHE)
|
||||
|
||||
IF(RPM OR DEB)
|
||||
UNSET(LIBJEMALLOC)
|
||||
GET_DIRECTORY_PROPERTY(V DIRECTORY ${CMAKE_SOURCE_DIR} DEFINITION CPACK_RPM_tokudb-engine_PACKAGE_REQUIRES)
|
||||
SET(CPACK_RPM_tokudb-engine_PACKAGE_REQUIRES "${V} jemalloc" PARENT_SCOPE)
|
||||
ENDIF()
|
||||
|
||||
IF(INSTALL_SYSCONFDIR)
|
||||
SET(systemd_env "Environment=\"LD_PRELOAD=${LIBJEMALLOC_PATH}\"")
|
||||
SET(cnf_malloc_lib "malloc-lib=${LIBJEMALLOC_PATH}")
|
||||
CONFIGURE_FILE(tokudb.conf.in tokudb.conf @ONLY)
|
||||
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/tokudb.conf
|
||||
DESTINATION ${INSTALL_SYSCONFDIR}/systemd/system/mariadb.service.d/
|
||||
COMPONENT tokudb-engine)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
CONFIGURE_FILE(tokudb.cnf.in tokudb.cnf @ONLY)
|
||||
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-shadow")
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-vla" DEBUG)
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-implicit-fallthrough")
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-cpp" DEBUG)
|
||||
|
||||
############################################
|
||||
MARK_AS_ADVANCED(BUILDNAME)
|
||||
MARK_AS_ADVANCED(BUILD_TESTING)
|
||||
MARK_AS_ADVANCED(CMAKE_TOKUDB_REVISION)
|
||||
MARK_AS_ADVANCED(LIBTOKUDB)
|
||||
MARK_AS_ADVANCED(LIBTOKUPORTABILITY)
|
||||
MARK_AS_ADVANCED(PROFILING)
|
||||
MARK_AS_ADVANCED(SNAPPY_SOURCE_DIR)
|
||||
MARK_AS_ADVANCED(TOKUDB_DATA)
|
||||
MARK_AS_ADVANCED(TOKU_DEBUG_PARANOID)
|
||||
MARK_AS_ADVANCED(USE_VALGRIND)
|
||||
MARK_AS_ADVANCED(XZ_SOURCE_DIR)
|
||||
MARK_AS_ADVANCED(gcc_ar)
|
||||
MARK_AS_ADVANCED(gcc_ranlib)
|
||||
############################################
|
||||
|
||||
# pick language dialect
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG(-std=c++11)
|
||||
|
||||
SET(BUILD_TESTING OFF CACHE BOOL "")
|
||||
SET(USE_VALGRIND OFF CACHE BOOL "")
|
||||
SET(TOKU_DEBUG_PARANOID OFF CACHE BOOL "")
|
||||
|
||||
# Enable TokuDB's TOKUDB_DEBUG in debug builds
|
||||
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DTOKUDB_DEBUG")
|
||||
|
||||
IF(NOT DEFINED TOKUDB_VERSION)
|
||||
IF(DEFINED ENV{TOKUDB_VERSION})
|
||||
SET(TOKUDB_VERSION $ENV{TOKUDB_VERSION})
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
IF(DEFINED TOKUDB_VERSION)
|
||||
ADD_DEFINITIONS("-DTOKUDB_VERSION=${TOKUDB_VERSION}")
|
||||
IF (${TOKUDB_VERSION} MATCHES "^tokudb-([0-9]+)\\.([0-9]+)\\.([0-9]+.*)")
|
||||
ADD_DEFINITIONS("-DTOKUDB_VERSION_MAJOR=${CMAKE_MATCH_1}")
|
||||
ADD_DEFINITIONS("-DTOKUDB_VERSION_MINOR=${CMAKE_MATCH_2}")
|
||||
ADD_DEFINITIONS("-DTOKUDB_VERSION_PATCH=${CMAKE_MATCH_3}")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(DEFINED TOKUDB_NOPATCH_CONFIG)
|
||||
ADD_DEFINITIONS("-DTOKUDB_NOPATCH_CONFIG=${TOKUDB_NOPATCH_CONFIG}")
|
||||
ENDIF()
|
||||
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG(-Wno-missing-field-initializers)
|
||||
|
||||
IF (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/PerconaFT/")
|
||||
IF (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ft-index/")
|
||||
MESSAGE(FATAL_ERROR "Found both PerconaFT and ft-index sources. Don't know which to use.")
|
||||
ENDIF ()
|
||||
SET(TOKU_FT_DIR_NAME "PerconaFT")
|
||||
|
||||
ELSEIF (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ft-index/")
|
||||
MESSAGE(WARNING "Found ft-index sources, ft-index is deprecated and replaced with PerconaFT.")
|
||||
SET(TOKU_FT_DIR_NAME "ft-index")
|
||||
ELSE ()
|
||||
MESSAGE(FATAL_ERROR "Could not find PerconaFT sources.")
|
||||
ENDIF ()
|
||||
|
||||
IF (WITH_VALGRIND)
|
||||
SET(USE_VALGRIND "ON")
|
||||
ENDIF ()
|
||||
|
||||
ADD_SUBDIRECTORY(${TOKU_FT_DIR_NAME})
|
||||
INCLUDE_DIRECTORIES(${TOKU_FT_DIR_NAME})
|
||||
INCLUDE_DIRECTORIES(${TOKU_FT_DIR_NAME}/portability)
|
||||
INCLUDE_DIRECTORIES(${TOKU_FT_DIR_NAME}/util)
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/buildheader)
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/${TOKU_FT_DIR_NAME}/portability)
|
||||
|
||||
TARGET_LINK_LIBRARIES(tokudb tokufractaltree_static tokuportability_static
|
||||
${ZLIB_LIBRARY} ${LIBJEMALLOC} stdc++)
|
||||
|
||||
SET(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -flto -fuse-linker-plugin")
|
||||
SET(CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO} -flto -fuse-linker-plugin")
|
||||
|
||||
ADD_SUBDIRECTORY(man)
|
@ -1,36 +0,0 @@
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
|
||||
# The following parameters are default for Google style,
|
||||
# but as they are important for our project they
|
||||
# are set explicitly here
|
||||
AlignAfterOpenBracket: Align
|
||||
BreakBeforeBinaryOperators: None
|
||||
ColumnLimit: 80
|
||||
PointerAlignment: Left
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
UseTab: Never
|
||||
|
||||
# Non-default parametes
|
||||
NamespaceIndentation: All
|
||||
IndentWidth: 4
|
||||
TabWidth: 4
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
BinPackParameters: false
|
||||
BinPackArguments: false
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
#AlignConsecutiveAssignments: yes
|
||||
#AlignConsecutiveDeclarations: yes
|
||||
BreakStringLiterals: false
|
||||
ReflowComments: true
|
@ -1,106 +0,0 @@
|
||||
if (CMAKE_PROJECT_NAME STREQUAL TokuDB)
|
||||
cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
|
||||
endif()
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
|
||||
|
||||
project(TokuDB)
|
||||
|
||||
# suppress -rdynamic
|
||||
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
|
||||
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
|
||||
|
||||
# See: https://jira.percona.com/browse/TDB-93
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG("-Wno-address-of-packed-member")
|
||||
|
||||
# detect when we are being built as a subproject
|
||||
if (DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
|
||||
add_definitions(-DMYSQL_TOKUDB_ENGINE=1)
|
||||
add_definitions(-DMYSQL_VERSION_ID=${MYSQL_VERSION_ID})
|
||||
# Extended PFS instrumentation:
|
||||
# -DTOKU_PFS_MUTEX_EXTENDED_CACHETABLEMMUTEX=1
|
||||
if (WITH_PERFSCHEMA_STORAGE_ENGINE)
|
||||
add_definitions(-DTOKU_MYSQL_WITH_PFS)
|
||||
endif ()
|
||||
include_directories(${CMAKE_SOURCE_DIR}/include)
|
||||
if ((CMAKE_BUILD_TYPE MATCHES "Debug") AND
|
||||
(CMAKE_CXX_FLAGS_DEBUG MATCHES " -DENABLED_DEBUG_SYNC"))
|
||||
include_directories(${CMAKE_SOURCE_DIR}/sql)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
## Versions of gcc >= 4.9.0 require special version of 'ar' and 'ranlib' for
|
||||
## link-time optimizations to work properly.
|
||||
##
|
||||
## From https://gcc.gnu.org/gcc-4.9/changes.html:
|
||||
##
|
||||
## When using a linker plugin, compiling with the -flto option now
|
||||
## generates slim objects files (.o) which only contain intermediate
|
||||
## language representation for LTO. Use -ffat-lto-objects to create
|
||||
## files which contain additionally the object code. To generate
|
||||
## static libraries suitable for LTO processing, use gcc-ar and
|
||||
## gcc-ranlib; to list symbols from a slim object file use
|
||||
## gcc-nm. (Requires that ar, ranlib and nm have been compiled with
|
||||
## plugin support.)
|
||||
if ((CMAKE_CXX_COMPILER_ID STREQUAL GNU) AND
|
||||
NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9.0"))
|
||||
find_program(gcc_ar "gcc-ar")
|
||||
if (gcc_ar)
|
||||
set(CMAKE_AR "${gcc_ar}")
|
||||
endif ()
|
||||
find_program(gcc_ranlib "gcc-ranlib")
|
||||
if (gcc_ranlib)
|
||||
set(CMAKE_RANLIB "${gcc_ranlib}")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
include(TokuFeatureDetection)
|
||||
include(TokuSetupCompiler)
|
||||
#include(TokuSetupCTest)
|
||||
include(TokuThirdParty)
|
||||
|
||||
set(TOKU_CMAKE_SCRIPT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
include(TokuMergeLibs)
|
||||
|
||||
## need a way to change the name of libs we build
|
||||
set(LIBTOKUPORTABILITY "tokuportability" CACHE STRING "Name of libtokuportability.so")
|
||||
set(LIBTOKUDB "tokufractaltree" CACHE STRING "Name of libtokufractaltree.so")
|
||||
|
||||
set(INSTALL_LIBDIR "lib" CACHE STRING "where to install libs")
|
||||
|
||||
if (USE_VALGRIND AND NOT VALGRIND_INCLUDE_DIR MATCHES NOTFOUND)
|
||||
include_directories(
|
||||
${VALGRIND_INCLUDE_DIR}
|
||||
)
|
||||
endif()
|
||||
include_directories(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/portability
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ## so you can include <ft/ft-ops.h> from inside src/
|
||||
${CMAKE_CURRENT_BINARY_DIR} ## for logging code
|
||||
)
|
||||
## include where config.h will be generated
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR}/portability)
|
||||
|
||||
## build db.h and include where it will be generated
|
||||
add_subdirectory(buildheader)
|
||||
include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/buildheader)
|
||||
|
||||
## default includes and libraries
|
||||
include_directories(SYSTEM
|
||||
/usr/local/include
|
||||
${ZLIB_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
## add subdirectories
|
||||
add_subdirectory(util)
|
||||
add_subdirectory(portability)
|
||||
add_subdirectory(ft)
|
||||
add_subdirectory(locktree)
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(ftcxx)
|
||||
add_subdirectory(tools)
|
||||
|
||||
INSTALL_DOCUMENTATION(README.md COPYING.AGPLv3 COPYING.GPLv2 PATENTS
|
||||
COMPONENT Server)
|
||||
|
||||
## build tags
|
||||
#include(TokuBuildTagDatabases)
|
@ -1,661 +0,0 @@
|
||||
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||
Version 3, 19 November 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU Affero General Public License is a free, copyleft license for
|
||||
software and other kinds of works, specifically designed to ensure
|
||||
cooperation with the community in the case of network server software.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
our General Public Licenses are intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
Developers that use our General Public Licenses protect your rights
|
||||
with two steps: (1) assert copyright on the software, and (2) offer
|
||||
you this License which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
|
||||
A secondary benefit of defending all users' freedom is that
|
||||
improvements made in alternate versions of the program, if they
|
||||
receive widespread use, become available for other developers to
|
||||
incorporate. Many developers of free software are heartened and
|
||||
encouraged by the resulting cooperation. However, in the case of
|
||||
software used on network servers, this result may fail to come about.
|
||||
The GNU General Public License permits making a modified version and
|
||||
letting the public access it on a server without ever releasing its
|
||||
source code to the public.
|
||||
|
||||
The GNU Affero General Public License is designed specifically to
|
||||
ensure that, in such cases, the modified source code becomes available
|
||||
to the community. It requires the operator of a network server to
|
||||
provide the source code of the modified version running there to the
|
||||
users of that server. Therefore, public use of a modified version, on
|
||||
a publicly accessible server, gives the public access to the source
|
||||
code of the modified version.
|
||||
|
||||
An older license, called the Affero General Public License and
|
||||
published by Affero, was designed to accomplish similar goals. This is
|
||||
a different license, not a version of the Affero GPL, but Affero has
|
||||
released a new version of the Affero GPL which permits relicensing under
|
||||
this license.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, if you modify the
|
||||
Program, your modified version must prominently offer all users
|
||||
interacting with it remotely through a computer network (if your version
|
||||
supports such interaction) an opportunity to receive the Corresponding
|
||||
Source of your version by providing access to the Corresponding Source
|
||||
from a network server at no charge, through some standard or customary
|
||||
means of facilitating copying of software. This Corresponding Source
|
||||
shall include the Corresponding Source for any work covered by version 3
|
||||
of the GNU General Public License that is incorporated pursuant to the
|
||||
following paragraph.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the work with which it is combined will remain governed by version
|
||||
3 of the GNU General Public License.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU Affero General Public License from time to time. Such new versions
|
||||
will be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU Affero General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU Affero General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU Affero General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If your software can interact with users remotely through a computer
|
||||
network, you should also make sure that it provides a way for users to
|
||||
get its source. For example, if your program is a web application, its
|
||||
interface could display a "Source" link that leads users to an archive
|
||||
of the code. There are many ways you could offer source, and different
|
||||
solutions will be better for different programs; see section 13 for the
|
||||
specific requirements.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||
<http://www.gnu.org/licenses/>.
|
@ -1,174 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
@ -1,339 +0,0 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
@ -1,13 +0,0 @@
|
||||
## This file should be placed in the root directory of your project.
|
||||
## Then modify the CMakeLists.txt file in the root directory of your
|
||||
## project to incorporate the testing dashboard.
|
||||
## # The following are required to uses Dart and the Cdash dashboard
|
||||
## ENABLE_TESTING()
|
||||
## INCLUDE(CTest)
|
||||
set(CTEST_PROJECT_NAME "tokudb")
|
||||
set(CTEST_NIGHTLY_START_TIME "23:59:00 EDT")
|
||||
|
||||
set(CTEST_DROP_METHOD "http")
|
||||
set(CTEST_DROP_SITE "lex1:8080")
|
||||
set(CTEST_DROP_LOCATION "/CDash/submit.php?project=tokudb")
|
||||
set(CTEST_DROP_SITE_CDASH TRUE)
|
@ -1,239 +0,0 @@
|
||||
cmake_policy(SET CMP0012 NEW)
|
||||
|
||||
## these tests shouldn't run with valgrind
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
|
||||
ft/bnc-insert-benchmark
|
||||
ft/ft_loader-test-extractor-1
|
||||
ft/ft_loader-test-extractor-2
|
||||
ft/ft_loader-test-extractor-3
|
||||
ft/upgrade_test_simple
|
||||
portability/test-cache-line-boundary-fails
|
||||
portability/try-leak-lost
|
||||
portability/try-leak-reachable
|
||||
portability/try-leak-uninit
|
||||
util/helgrind_test_partitioned_counter
|
||||
util/helgrind_test_partitioned_counter_5833
|
||||
ydb/diskfull.tdb
|
||||
ydb/drd_test_4015.tdb
|
||||
ydb/drd_test_groupcommit_count.tdb
|
||||
ydb/filesize.tdb
|
||||
ydb/helgrind_helgrind1.tdb
|
||||
ydb/helgrind_helgrind2.tdb
|
||||
ydb/helgrind_helgrind3.tdb
|
||||
ydb/helgrind_test_groupcommit_count.tdb
|
||||
ydb/hot-optimize-table-tests.tdb
|
||||
ydb/insert-dup-prelock.tdb
|
||||
ydb/loader-cleanup-test2.tdb
|
||||
ydb/loader-cleanup-test3.tdb
|
||||
ydb/loader-stress-test4.tdb
|
||||
ydb/maxsize-for-loader-B.tdb
|
||||
ydb/openlimit17.tdb
|
||||
ydb/openlimit17-locktree.tdb
|
||||
ydb/preload-db-nested.tdb
|
||||
ydb/stress-gc.tdb
|
||||
ydb/stress-gc2.tdb
|
||||
ydb/stress-test.tdb
|
||||
ydb/test-5138.tdb
|
||||
ydb/test-prepare.tdb
|
||||
ydb/test-prepare2.tdb
|
||||
ydb/test-prepare3.tdb
|
||||
ydb/test-recover1.tdb
|
||||
ydb/test-recover2.tdb
|
||||
ydb/test-recover3.tdb
|
||||
ydb/test-xa-prepare.tdb
|
||||
ydb/test4573-logtrim.tdb
|
||||
ydb/test_3645.tdb
|
||||
ydb/test_groupcommit_perf.tdb
|
||||
ydb/test_large_update_broadcast_small_cachetable.tdb
|
||||
ydb/test_update_broadcast_stress.tdb
|
||||
ydb/test_update_stress.tdb
|
||||
ydb/upgrade-test-4.tdb
|
||||
)
|
||||
|
||||
if (NOT @RUN_HELGRIND_TESTS@)
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE
|
||||
util/helgrind_test_partitioned_counter
|
||||
util/helgrind_test_partitioned_counter_5833
|
||||
ydb/helgrind_helgrind1.tdb
|
||||
ydb/helgrind_helgrind2.tdb
|
||||
ydb/helgrind_helgrind3.tdb
|
||||
ydb/helgrind_test_groupcommit_count.tdb
|
||||
)
|
||||
endif ()
|
||||
|
||||
if (NOT @RUN_DRD_TESTS@)
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE
|
||||
ydb/drd_test_groupcommit_count.tdb
|
||||
ydb/drd_test_4015.tdb
|
||||
)
|
||||
endif ()
|
||||
|
||||
## osx's pthreads prefer writers, so this test will deadlock
|
||||
if (@CMAKE_SYSTEM_NAME@ STREQUAL Darwin)
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE portability/test-pthread-rwlock-rwr)
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE portability/test-pthread-rwlock-rwr)
|
||||
endif ()
|
||||
|
||||
## tests that are supposed to crash will generate memcheck failures
|
||||
set(tests_that_should_fail
|
||||
ft/test-assertA
|
||||
ft/test-assertB
|
||||
portability/try-assert-zero
|
||||
portability/try-assert0
|
||||
ydb/recover-missing-dbfile-2.abortrecover
|
||||
ydb/recover-missing-dbfile.abortrecover
|
||||
ydb/test_db_no_env.tdb
|
||||
ydb/test_truncate_txn_abort.tdb
|
||||
)
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${tests_that_should_fail})
|
||||
|
||||
## don't run drd stress tests with valgrind either (because that would do valgrind twice)
|
||||
set(stress_tests
|
||||
test_stress0.tdb
|
||||
test_stress1.tdb
|
||||
test_stress2.tdb
|
||||
test_stress3.tdb
|
||||
test_stress4.tdb
|
||||
test_stress5.tdb
|
||||
test_stress6.tdb
|
||||
test_stress7.tdb
|
||||
test_stress_hot_indexing.tdb
|
||||
test_stress_openclose.tdb
|
||||
test_stress_with_verify.tdb
|
||||
)
|
||||
foreach(test ${stress_tests})
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE
|
||||
ydb/drd_tiny_${test}
|
||||
ydb/drd_mid_${test}
|
||||
ydb/drd_large_${test}
|
||||
)
|
||||
if(NOT @RUN_LONG_TESTS@)
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE
|
||||
ydb/drd_large_${test}
|
||||
)
|
||||
endif()
|
||||
if (NOT @RUN_DRD_TESTS@)
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE
|
||||
ydb/drd_tiny_${test}
|
||||
ydb/drd_mid_${test}
|
||||
ydb/drd_large_${test}
|
||||
)
|
||||
endif ()
|
||||
endforeach(test)
|
||||
|
||||
## upgrade stress tests are 5 minutes long, don't need to run them always
|
||||
if(NOT @RUN_LONG_TESTS@)
|
||||
foreach(test ${stress_tests})
|
||||
if (NOT ${test} MATCHES test_stress_openclose)
|
||||
foreach(oldver 4.2.0 5.0.8 5.2.7 6.0.0 6.1.0 6.5.1 6.6.3)
|
||||
foreach(p_or_s pristine stressed)
|
||||
if (NOT (${test} MATCHES test_stress4 AND ${p_or_s} MATCHES stressed))
|
||||
foreach(size 2000)
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE ydb/${test}/upgrade/${oldver}/${p_or_s}/${size})
|
||||
endforeach(size)
|
||||
endif ()
|
||||
endforeach(p_or_s)
|
||||
endforeach(oldver)
|
||||
endif ()
|
||||
endforeach(test)
|
||||
endif()
|
||||
|
||||
set(tdb_tests_that_should_fail "ydb/${stress_tests}")
|
||||
string(REGEX REPLACE ";" ";ydb/" stress_tests "${stress_tests}")
|
||||
|
||||
set(recover_stress_tests
|
||||
ydb/recover-test_stress1.abortrecover
|
||||
ydb/recover-test_stress2.abortrecover
|
||||
ydb/recover-test_stress3.abortrecover
|
||||
ydb/recover-test_stress_openclose.abortrecover
|
||||
)
|
||||
|
||||
## we run stress tests separately, only run them if asked to
|
||||
if(NOT @RUN_STRESS_TESTS@)
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${stress_tests} ${recover_stress_tests})
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${stress_tests} ${recover_stress_tests})
|
||||
endif()
|
||||
|
||||
set(perf_tests
|
||||
ydb/perf_checkpoint_var.tdb
|
||||
ydb/perf_cursor_nop.tdb
|
||||
ydb/perf_malloc_free.tdb
|
||||
ydb/perf_nop.tdb
|
||||
ydb/perf_ptquery.tdb
|
||||
ydb/perf_ptquery2.tdb
|
||||
ydb/perf_read_write.tdb
|
||||
ydb/perf_xmalloc_free.tdb
|
||||
)
|
||||
|
||||
## we also don't need to run perf tests every time
|
||||
if(NOT @RUN_PERF_TESTS@)
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests})
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${perf_tests})
|
||||
endif()
|
||||
|
||||
## don't run perf tests with valgrind (that's slow)
|
||||
file(GLOB perf_test_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/src/tests" perf_*.cc)
|
||||
string(REGEX REPLACE "\\.cc(;|$)" ".tdb\\1" perf_tests "${perf_test_srcs}")
|
||||
set(tdb_tests_that_should_fail "ydb/${perf_tests}")
|
||||
string(REGEX REPLACE ";" ";ydb/" perf_tests "${perf_tests}")
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${perf_tests})
|
||||
|
||||
## these tests fail often and aren't helpful
|
||||
set(known_failing_tests
|
||||
ydb/diskfull.tdb
|
||||
)
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${known_failing_tests})
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${known_failing_tests})
|
||||
|
||||
## these tests take a long time, only run them if asked to
|
||||
set(long_running_tests
|
||||
ft/is_empty
|
||||
ft/upgrade_test_simple
|
||||
ydb/checkpoint_1.tdb
|
||||
ydb/checkpoint_stress.tdb
|
||||
ydb/hotindexer-with-queries.tdb
|
||||
ydb/hot-optimize-table-tests.tdb
|
||||
ydb/loader-cleanup-test0.tdb
|
||||
ydb/loader-cleanup-test0z.tdb
|
||||
ydb/loader-cleanup-test2.tdb
|
||||
ydb/loader-cleanup-test2z.tdb
|
||||
ydb/loader-stress-test4.tdb
|
||||
ydb/loader-stress-test4z.tdb
|
||||
ydb/manyfiles.tdb
|
||||
ydb/preload-db-nested.tdb
|
||||
ydb/recover_stress.tdb
|
||||
ydb/root_fifo_1.tdb
|
||||
ydb/root_fifo_2.tdb
|
||||
ydb/root_fifo_31.tdb
|
||||
ydb/root_fifo_32.tdb
|
||||
ydb/stress-gc.tdb
|
||||
ydb/stress-test.tdb
|
||||
ydb/test3529.tdb
|
||||
ydb/test_logmax.tdb
|
||||
ydb/test_txn_nested2.tdb
|
||||
ydb/test_update_broadcast_stress.tdb
|
||||
ydb/test_update_stress.tdb
|
||||
)
|
||||
if(NOT @RUN_LONG_TESTS@)
|
||||
list(APPEND CTEST_CUSTOM_MEMCHECK_IGNORE ${long_running_tests})
|
||||
list(APPEND CTEST_CUSTOM_TESTS_IGNORE ${long_running_tests})
|
||||
endif()
|
||||
|
||||
## ignore log_print.cc in coverage report
|
||||
list(APPEND CTEST_CUSTOM_COVERAGE_EXCLUDE "log_print.cc")
|
||||
|
||||
list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
|
||||
# don't complain about warnings in xz source
|
||||
"xz-4.999.9beta/src/liblzma"
|
||||
# don't complain about clang missing warnings from xz code
|
||||
"clang: warning: unknown warning option"
|
||||
# don't complain about warnings in jemalloc source
|
||||
"jemalloc/src"
|
||||
"jemalloc/internal"
|
||||
# don't complain about valgrind headers leaving things unused
|
||||
"valgrind/valgrind.h"
|
||||
"valgrind/memcheck.h"
|
||||
# don't complain about ranlib or libtool on empty archive
|
||||
"has no symbols"
|
||||
"the table of contents is empty"
|
||||
)
|
@ -1,37 +0,0 @@
|
||||
UNIVERSITY PATENT NOTICE:
|
||||
The technology is licensed by the Massachusetts Institute of
|
||||
Technology, Rutgers State University of New Jersey, and the Research
|
||||
Foundation of State University of New York at Stony Brook under
|
||||
United States of America Serial No. 11/760379 and to the patents
|
||||
and/or patent applications resulting from it.
|
||||
PATENT MARKING NOTICE:
|
||||
This software is covered by US Patent No. 8,185,551.
|
||||
This software is covered by US Patent No. 8,489,638.
|
||||
PATENT RIGHTS GRANT:
|
||||
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
||||
Percona as part of the Fractal Tree project.
|
||||
"PATENT CLAIMS" means the claims of patents that are owned or
|
||||
licensable by Percona, both currently or in the future; and that in
|
||||
the absence of this license would be infringed by THIS
|
||||
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
||||
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
||||
patentability, enforceability and/or non-infringement of any of the
|
||||
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
||||
Percona hereby grants to you, for the term and geographical scope of
|
||||
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
||||
irrevocable (except as stated in this section) patent license to
|
||||
make, have made, use, offer to sell, sell, import, transfer, and
|
||||
otherwise run, modify, and propagate the contents of THIS
|
||||
IMPLEMENTATION, where such license applies only to the PATENT
|
||||
CLAIMS. This grant does not include claims that would be infringed
|
||||
only as a consequence of further modifications of THIS
|
||||
IMPLEMENTATION. If you or your agent or licensee institute or order
|
||||
or agree to the institution of patent litigation against any entity
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
THIS IMPLEMENTATION constitutes direct or contributory patent
|
||||
infringement, or inducement of patent infringement, then any rights
|
||||
granted to you under this License shall terminate as of the date
|
||||
such litigation is filed. If you or your agent or exclusive
|
||||
licensee institute or order or agree to the institution of a PATENT
|
||||
CHALLENGE, then Percona may terminate any rights granted to you
|
||||
under this License.
|
@ -1,117 +0,0 @@
|
||||
PerconaFT
|
||||
======
|
||||
|
||||
PerconaFT is a high-performance, transactional key-value store, used in the
|
||||
TokuDB storage engine for Percona Server and MySQL, and in TokuMX, the
|
||||
high-performance MongoDB distribution.
|
||||
|
||||
PerconaFT is provided as a shared library with an interface similar to
|
||||
Berkeley DB.
|
||||
|
||||
To build the full MySQL product, see the instructions for
|
||||
[Percona/percona-server][percona-server]. This document covers PerconaFT only.
|
||||
|
||||
[percona-server]: https://github.com/Percona/percona-server
|
||||
|
||||
|
||||
Building
|
||||
--------
|
||||
|
||||
PerconaFT is built using CMake >= 2.8.9. Out-of-source builds are
|
||||
recommended. You need a C++11 compiler, though only some versions
|
||||
of GCC >= 4.7 and Clang are tested. You also need zlib development
|
||||
packages (`yum install zlib-devel` or `apt-get install zlib1g-dev`).
|
||||
|
||||
You will also need the source code for jemalloc, checked out in
|
||||
`third_party/`.
|
||||
|
||||
```sh
|
||||
git clone git://github.com/Percona/PerconaFT.git percona-ft
|
||||
cd percona-ft
|
||||
git clone git://github.com/Percona/jemalloc.git third_party/jemalloc
|
||||
mkdir build
|
||||
cd build
|
||||
CC=gcc47 CXX=g++47 cmake \
|
||||
-D CMAKE_BUILD_TYPE=Debug \
|
||||
-D BUILD_TESTING=OFF \
|
||||
-D USE_VALGRIND=OFF \
|
||||
-D CMAKE_INSTALL_PREFIX=../prefix/ \
|
||||
..
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
This will build `libft.so` and `libtokuportability.so` and install it,
|
||||
some header files, and some examples to `percona-ft/prefix/`. It will also
|
||||
build jemalloc and install it alongside these libraries, you should link
|
||||
to that if you are planning to run benchmarks or in production.
|
||||
|
||||
### Platforms
|
||||
|
||||
PerconaFT is supported on 64-bit Centos, Debian, and Ubuntu and should work
|
||||
on other 64-bit linux distributions, and may work on OSX 10.8 and FreeBSD.
|
||||
PerconaFT is not supported on 32-bit systems.
|
||||
|
||||
[Transparent hugepages][transparent-hugepages] is a feature in newer linux
|
||||
kernel versions that causes problems for the memory usage tracking
|
||||
calculations in PerconaFT and can lead to memory overcommit. If you have
|
||||
this feature enabled, PerconaFT will not start, and you should turn it off.
|
||||
If you want to run with transparent hugepages on, you can set an
|
||||
environment variable `TOKU_HUGE_PAGES_OK=1`, but only do this for testing,
|
||||
and only with a small cache size.
|
||||
|
||||
[transparent-hugepages]: https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Performance_Tuning_Guide/s-memory-transhuge.html
|
||||
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
PerconaFT uses CTest for testing. The CDash testing dashboard is not
|
||||
currently public, but you can run the tests without submitting them.
|
||||
|
||||
There are some large data files not stored in the git repository, that
|
||||
will be made available soon. For now, the tests that use these files will
|
||||
not run.
|
||||
|
||||
In the build directory from above:
|
||||
|
||||
```sh
|
||||
cmake -D BUILD_TESTING=ON ..
|
||||
ctest -D ExperimentalStart \
|
||||
-D ExperimentalConfigure \
|
||||
-D ExperimentalBuild \
|
||||
-D ExperimentalTest
|
||||
```
|
||||
|
||||
|
||||
Contributing
|
||||
------------
|
||||
|
||||
Please report bugs in PerconaFT to the [issue tracker][jira].
|
||||
|
||||
We have two publicly accessible mailing lists for TokuDB:
|
||||
|
||||
- tokudb-user@googlegroups.com is for general and support related
|
||||
questions about the use of TokuDB.
|
||||
- tokudb-dev@googlegroups.com is for discussion of the development of
|
||||
TokuDB.
|
||||
|
||||
All source code and test contributions must be provided under a [BSD 2-Clause][bsd-2] license. For any small change set, the license text may be contained within the commit comment and the pull request. For larger contributions, the license must be presented in a COPYING.<feature_name> file in the root of the PerconaFT project. Please see the [BSD 2-Clause license template][bsd-2] for the content of the license text.
|
||||
|
||||
[jira]: https://jira.percona.com/projects/TDB
|
||||
[bsd-2]: http://opensource.org/licenses/BSD-2-Clause/
|
||||
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
Portions of the PerconaFT library (the 'locktree' and 'omt') are available under the Apache version 2 license.
|
||||
PerconaFT is available under the GPL version 2, and AGPL version 3.
|
||||
See [COPYING.APACHEv2][apachelicense],
|
||||
[COPYING.AGPLv3][agpllicense],
|
||||
[COPYING.GPLv2][gpllicense], and
|
||||
[PATENTS][patents].
|
||||
|
||||
[apachelicense]: http://github.com/Percona/PerconaFT/blob/master/COPYING.APACHEv2
|
||||
[agpllicense]: http://github.com/Percona/PerconaFT/blob/master/COPYING.AGPLv3
|
||||
[gpllicense]: http://github.com/Percona/PerconaFT/blob/master/COPYING.GPLv2
|
||||
[patents]: http://github.com/Percona/PerconaFT/blob/master/PATENTS
|
@ -1,6 +0,0 @@
|
||||
{
|
||||
bash
|
||||
Memcheck:Leak
|
||||
...
|
||||
obj:/bin/bash
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
# Suppress some warnings we get from jemalloc and lzma, they aren't our fault.
|
||||
.*third_party/jemalloc/src/jemalloc.c : .*-Wattributes.*
|
||||
.*third_party/jemalloc/src/ctl.c : .*-Wunused-but-set-variable.*
|
||||
.*xz/src/build_lzma/src/liblzma/lz/lz_encoder.c : .*-Wunused-but-set-variable.*
|
@ -1,29 +0,0 @@
|
||||
set_directory_properties(PROPERTIES INCLUDE_DIRECTORIES "")
|
||||
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/runcat.sh" "#!/bin/sh
|
||||
out=$1; shift
|
||||
exec \"$@\" >$out")
|
||||
|
||||
add_executable(make_tdb make_tdb.cc)
|
||||
set_property(TARGET make_tdb APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/db.h"
|
||||
COMMAND sh runcat.sh "${CMAKE_CURRENT_BINARY_DIR}/db.h" $<TARGET_FILE:make_tdb>
|
||||
DEPENDS make_tdb)
|
||||
add_custom_target(install_tdb_h DEPENDS
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/db.h")
|
||||
|
||||
# detect when we are being built as a subproject
|
||||
if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
|
||||
DESTINATION include
|
||||
RENAME tokudb.h
|
||||
COMPONENT tokukv_headers
|
||||
)
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/db.h"
|
||||
DESTINATION include
|
||||
COMPONENT tokukv_headers
|
||||
)
|
||||
endif ()
|
File diff suppressed because it is too large
Load Diff
@ -1,845 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
/* Make a db.h that will be link-time compatible with Sleepycat's Berkeley DB. */
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
// Don't include toku_assert.h. Just use assert.h
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define VISIBLE "__attribute__((__visibility__(\"default\")))"
|
||||
|
||||
#define FIELD_LIMIT 100
|
||||
struct fieldinfo {
|
||||
const char *decl_format_string;
|
||||
const char *name;
|
||||
size_t offset;
|
||||
} fields[FIELD_LIMIT];
|
||||
static int field_counter=0;
|
||||
|
||||
static int compare_fields (const void *av, const void *bv) {
|
||||
const struct fieldinfo *a = (const struct fieldinfo *) av;
|
||||
const struct fieldinfo *b = (const struct fieldinfo *) bv;
|
||||
if (a->offset< b->offset) return -1;
|
||||
if (a->offset==b->offset) return 0;
|
||||
return +1;
|
||||
}
|
||||
|
||||
#define STRUCT_SETUP(typ, fname, fstring) ({ \
|
||||
assert(field_counter<FIELD_LIMIT); \
|
||||
fields[field_counter].decl_format_string = fstring; \
|
||||
fields[field_counter].name = #fname; \
|
||||
fields[field_counter].offset = __builtin_offsetof(typ, fname); \
|
||||
field_counter++; })
|
||||
|
||||
static void sort_and_dump_fields (const char *structname, bool has_internal, const char *extra_decls[]) {
|
||||
int i;
|
||||
qsort(fields, field_counter, sizeof(fields[0]), compare_fields);
|
||||
printf("struct __toku_%s {\n", structname);
|
||||
if (has_internal) {
|
||||
printf(" struct __toku_%s_internal *i;\n", structname);
|
||||
printf("#define %s_struct_i(x) ((x)->i)\n", structname);
|
||||
}
|
||||
if (extra_decls) {
|
||||
while (*extra_decls) {
|
||||
printf(" %s;\n", *extra_decls);
|
||||
extra_decls++;
|
||||
}
|
||||
}
|
||||
for (i=0; i<field_counter; i++) {
|
||||
printf(" ");
|
||||
printf(fields[i].decl_format_string, fields[i].name);
|
||||
printf(";\n");
|
||||
}
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
#include "db-4.6.19.h"
|
||||
|
||||
static void print_dbtype(void) {
|
||||
/* DBTYPE is mentioned by db_open.html */
|
||||
printf("typedef enum {\n");
|
||||
printf(" DB_BTREE=%d,\n", DB_BTREE);
|
||||
printf(" DB_UNKNOWN=%d\n", DB_UNKNOWN);
|
||||
printf("} DBTYPE;\n");
|
||||
}
|
||||
|
||||
|
||||
#define dodefine(name) printf("#define %s %d\n", #name, name)
|
||||
#define dodefine_track(flags, name) ({ assert((flags & name) != name); \
|
||||
flags |= (name); \
|
||||
printf("#define %s %d\n", #name, name); })
|
||||
#define dodefine_from_track(flags, name) ({\
|
||||
uint32_t which; \
|
||||
uint32_t bit; \
|
||||
for (which = 0; which < 32; which++) { \
|
||||
bit = 1U << which; \
|
||||
if (!(flags & bit)) break; \
|
||||
} \
|
||||
assert(which < 32); \
|
||||
printf("#define %s %u\n", #name, bit); \
|
||||
flags |= bit; \
|
||||
})
|
||||
|
||||
#define dodefine_track_enum(flags, name) ({ assert(name>=0 && name<256); \
|
||||
assert(!(flags[name])); \
|
||||
flags[name] = 1; \
|
||||
printf("#define %s %d\n", #name, (int)(name)); })
|
||||
#define dodefine_from_track_enum(flags, name) ({\
|
||||
uint32_t which; \
|
||||
/* don't use 0 */ \
|
||||
for (which = 1; which < 256; which++) { \
|
||||
if (!(flags[which])) break; \
|
||||
} \
|
||||
assert(which < 256); \
|
||||
flags[which] = 1; \
|
||||
printf("#define %s %u\n", #name, which); \
|
||||
})
|
||||
|
||||
enum {
|
||||
TOKUDB_OUT_OF_LOCKS = -100000,
|
||||
TOKUDB_SUCCEEDED_EARLY = -100001,
|
||||
TOKUDB_FOUND_BUT_REJECTED = -100002,
|
||||
TOKUDB_USER_CALLBACK_ERROR = -100003,
|
||||
TOKUDB_DICTIONARY_TOO_OLD = -100004,
|
||||
TOKUDB_DICTIONARY_TOO_NEW = -100005,
|
||||
TOKUDB_DICTIONARY_NO_HEADER = -100006,
|
||||
TOKUDB_CANCELED = -100007,
|
||||
TOKUDB_NO_DATA = -100008,
|
||||
TOKUDB_ACCEPT = -100009,
|
||||
TOKUDB_MVCC_DICTIONARY_TOO_NEW = -100010,
|
||||
TOKUDB_UPGRADE_FAILURE = -100011,
|
||||
TOKUDB_TRY_AGAIN = -100012,
|
||||
TOKUDB_NEEDS_REPAIR = -100013,
|
||||
TOKUDB_CURSOR_CONTINUE = -100014,
|
||||
TOKUDB_BAD_CHECKSUM = -100015,
|
||||
TOKUDB_HUGE_PAGES_ENABLED = -100016,
|
||||
TOKUDB_OUT_OF_RANGE = -100017,
|
||||
TOKUDB_INTERRUPTED = -100018,
|
||||
DONTUSE_I_JUST_PUT_THIS_HERE_SO_I_COULD_HAVE_A_COMMA_AFTER_EACH_ITEM
|
||||
};
|
||||
|
||||
static void print_defines (void) {
|
||||
dodefine(DB_VERB_DEADLOCK);
|
||||
dodefine(DB_VERB_RECOVERY);
|
||||
dodefine(DB_VERB_REPLICATION);
|
||||
dodefine(DB_VERB_WAITSFOR);
|
||||
|
||||
dodefine(DB_ARCH_ABS);
|
||||
dodefine(DB_ARCH_LOG);
|
||||
|
||||
dodefine(DB_CREATE);
|
||||
dodefine(DB_CXX_NO_EXCEPTIONS);
|
||||
dodefine(DB_EXCL);
|
||||
dodefine(DB_PRIVATE);
|
||||
dodefine(DB_RDONLY);
|
||||
dodefine(DB_RECOVER);
|
||||
dodefine(DB_RUNRECOVERY);
|
||||
dodefine(DB_THREAD);
|
||||
dodefine(DB_TXN_NOSYNC);
|
||||
|
||||
/* according to BDB 4.6.19, this is the next unused flag in the set of
|
||||
* common flags plus private flags for DB->open */
|
||||
#define DB_BLACKHOLE 0x0080000
|
||||
dodefine(DB_BLACKHOLE);
|
||||
#undef DB_BLACKHOLE
|
||||
|
||||
dodefine(DB_LOCK_DEFAULT);
|
||||
dodefine(DB_LOCK_OLDEST);
|
||||
dodefine(DB_LOCK_RANDOM);
|
||||
|
||||
//dodefine(DB_DUP); No longer supported #2862
|
||||
//dodefine(DB_DUPSORT); No longer supported #2862
|
||||
|
||||
dodefine(DB_KEYFIRST);
|
||||
dodefine(DB_KEYLAST);
|
||||
{
|
||||
static uint8_t insert_flags[256];
|
||||
dodefine_track_enum(insert_flags, DB_NOOVERWRITE);
|
||||
dodefine_track_enum(insert_flags, DB_NODUPDATA);
|
||||
dodefine_from_track_enum(insert_flags, DB_NOOVERWRITE_NO_ERROR);
|
||||
}
|
||||
dodefine(DB_OPFLAGS_MASK);
|
||||
|
||||
dodefine(DB_AUTO_COMMIT);
|
||||
|
||||
dodefine(DB_INIT_LOCK);
|
||||
dodefine(DB_INIT_LOG);
|
||||
dodefine(DB_INIT_MPOOL);
|
||||
dodefine(DB_INIT_TXN);
|
||||
|
||||
//dodefine(DB_KEYEMPTY); /// KEYEMPTY is no longer used. We just use DB_NOTFOUND
|
||||
dodefine(DB_KEYEXIST);
|
||||
dodefine(DB_LOCK_DEADLOCK);
|
||||
dodefine(DB_LOCK_NOTGRANTED);
|
||||
dodefine(DB_NOTFOUND);
|
||||
dodefine(DB_SECONDARY_BAD);
|
||||
dodefine(DB_DONOTINDEX);
|
||||
#ifdef DB_BUFFER_SMALL
|
||||
dodefine(DB_BUFFER_SMALL);
|
||||
#endif
|
||||
printf("#define DB_BADFORMAT -30500\n"); // private tokudb
|
||||
printf("#define DB_DELETE_ANY %d\n", 1<<16); // private tokudb
|
||||
|
||||
dodefine(DB_FIRST);
|
||||
dodefine(DB_LAST);
|
||||
dodefine(DB_CURRENT);
|
||||
dodefine(DB_NEXT);
|
||||
dodefine(DB_PREV);
|
||||
dodefine(DB_SET);
|
||||
dodefine(DB_SET_RANGE);
|
||||
printf("#define DB_CURRENT_BINDING 253\n"); // private tokudb
|
||||
printf("#define DB_SET_RANGE_REVERSE 252\n"); // private tokudb
|
||||
//printf("#define DB_GET_BOTH_RANGE_REVERSE 251\n"); // private tokudb. No longer supported #2862.
|
||||
dodefine(DB_RMW);
|
||||
|
||||
printf("#define DB_LOCKING_READ 0x80000000\n");
|
||||
printf("#define DB_IS_RESETTING_OP 0x01000000\n"); // private tokudb
|
||||
printf("#define DB_PRELOCKED 0x00800000\n"); // private tokudb
|
||||
printf("#define DB_PRELOCKED_WRITE 0x00400000\n"); // private tokudb
|
||||
//printf("#define DB_PRELOCKED_FILE_READ 0x00200000\n"); // private tokudb. No longer supported in #4472
|
||||
printf("#define DB_IS_HOT_INDEX 0x00100000\n"); // private tokudb
|
||||
printf("#define DBC_DISABLE_PREFETCHING 0x20000000\n"); // private tokudb
|
||||
printf("#define DB_UPDATE_CMP_DESCRIPTOR 0x40000000\n"); // private tokudb
|
||||
printf("#define TOKUFT_DIRTY_SHUTDOWN %x\n", 1<<31);
|
||||
|
||||
{
|
||||
//dbt flags
|
||||
uint32_t dbt_flags = 0;
|
||||
dodefine_track(dbt_flags, DB_DBT_APPMALLOC);
|
||||
dodefine_track(dbt_flags, DB_DBT_DUPOK);
|
||||
dodefine_track(dbt_flags, DB_DBT_MALLOC);
|
||||
#ifdef DB_DBT_MULTIPLE
|
||||
dodefine_track(dbt_flags, DB_DBT_MULTIPLE);
|
||||
#endif
|
||||
dodefine_track(dbt_flags, DB_DBT_REALLOC);
|
||||
dodefine_track(dbt_flags, DB_DBT_USERMEM);
|
||||
}
|
||||
|
||||
// flags for the env->set_flags function
|
||||
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
|
||||
dodefine(DB_LOG_AUTOREMOVE);
|
||||
#endif
|
||||
|
||||
{
|
||||
//Txn begin/commit flags
|
||||
uint32_t txn_flags = 0;
|
||||
dodefine_track(txn_flags, DB_TXN_WRITE_NOSYNC);
|
||||
dodefine_track(txn_flags, DB_TXN_NOWAIT);
|
||||
dodefine_track(txn_flags, DB_TXN_SYNC);
|
||||
#ifdef DB_TXN_SNAPSHOT
|
||||
dodefine_track(txn_flags, DB_TXN_SNAPSHOT);
|
||||
#endif
|
||||
#ifdef DB_READ_UNCOMMITTED
|
||||
dodefine_track(txn_flags, DB_READ_UNCOMMITTED);
|
||||
#endif
|
||||
#ifdef DB_READ_COMMITTED
|
||||
dodefine_track(txn_flags, DB_READ_COMMITTED);
|
||||
#endif
|
||||
//Add them if they didn't exist
|
||||
#ifndef DB_TXN_SNAPSHOT
|
||||
dodefine_from_track(txn_flags, DB_TXN_SNAPSHOT);
|
||||
#endif
|
||||
#ifndef DB_READ_UNCOMMITTED
|
||||
dodefine_from_track(txn_flags, DB_READ_UNCOMMITTED);
|
||||
#endif
|
||||
#ifndef DB_READ_COMMITTED
|
||||
dodefine_from_track(txn_flags, DB_READ_COMMITTED);
|
||||
#endif
|
||||
dodefine_from_track(txn_flags, DB_INHERIT_ISOLATION);
|
||||
dodefine_from_track(txn_flags, DB_SERIALIZABLE);
|
||||
dodefine_from_track(txn_flags, DB_TXN_READ_ONLY);
|
||||
dodefine_from_track(txn_flags, DB_READ_COMMITTED_ALWAYS);
|
||||
}
|
||||
|
||||
/* PerconaFT specific error codes*/
|
||||
printf("/* PerconaFT specific error codes */\n");
|
||||
dodefine(TOKUDB_OUT_OF_LOCKS);
|
||||
dodefine(TOKUDB_SUCCEEDED_EARLY);
|
||||
dodefine(TOKUDB_FOUND_BUT_REJECTED);
|
||||
dodefine(TOKUDB_USER_CALLBACK_ERROR);
|
||||
dodefine(TOKUDB_DICTIONARY_TOO_OLD);
|
||||
dodefine(TOKUDB_DICTIONARY_TOO_NEW);
|
||||
dodefine(TOKUDB_DICTIONARY_NO_HEADER);
|
||||
dodefine(TOKUDB_CANCELED);
|
||||
dodefine(TOKUDB_NO_DATA);
|
||||
dodefine(TOKUDB_ACCEPT);
|
||||
dodefine(TOKUDB_MVCC_DICTIONARY_TOO_NEW);
|
||||
dodefine(TOKUDB_UPGRADE_FAILURE);
|
||||
dodefine(TOKUDB_TRY_AGAIN);
|
||||
dodefine(TOKUDB_NEEDS_REPAIR);
|
||||
dodefine(TOKUDB_CURSOR_CONTINUE);
|
||||
dodefine(TOKUDB_BAD_CHECKSUM);
|
||||
dodefine(TOKUDB_HUGE_PAGES_ENABLED);
|
||||
dodefine(TOKUDB_OUT_OF_RANGE);
|
||||
dodefine(TOKUDB_INTERRUPTED);
|
||||
|
||||
/* LOADER flags */
|
||||
printf("/* LOADER flags */\n");
|
||||
{
|
||||
uint32_t loader_flags = 0;
|
||||
dodefine_from_track(loader_flags, LOADER_DISALLOW_PUTS); // Loader is only used for side effects.
|
||||
dodefine_from_track(loader_flags, LOADER_COMPRESS_INTERMEDIATES);
|
||||
}
|
||||
}
|
||||
|
||||
static void print_db_env_struct (void) {
|
||||
field_counter=0;
|
||||
STRUCT_SETUP(DB_ENV, api1_internal, "void *%s"); /* Used for C++ hacking. */
|
||||
STRUCT_SETUP(DB_ENV, app_private, "void *%s");
|
||||
STRUCT_SETUP(DB_ENV, close, "int (*%s) (DB_ENV *, uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, err, "void (*%s) (const DB_ENV *, int, const char *, ...) __attribute__ (( format (printf, 3, 4) ))");
|
||||
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
|
||||
STRUCT_SETUP(DB_ENV, get_cachesize, "int (*%s) (DB_ENV *, uint32_t *, uint32_t *, int *)");
|
||||
STRUCT_SETUP(DB_ENV, get_flags, "int (*%s) (DB_ENV *, uint32_t *)");
|
||||
STRUCT_SETUP(DB_ENV, get_lg_max, "int (*%s) (DB_ENV *, uint32_t*)");
|
||||
#endif
|
||||
STRUCT_SETUP(DB_ENV, log_archive, "int (*%s) (DB_ENV *, char **[], uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, log_flush, "int (*%s) (DB_ENV *, const DB_LSN *)");
|
||||
STRUCT_SETUP(DB_ENV, open, "int (*%s) (DB_ENV *, const char *, uint32_t, int)");
|
||||
STRUCT_SETUP(DB_ENV, set_cachesize, "int (*%s) (DB_ENV *, uint32_t, uint32_t, int)");
|
||||
STRUCT_SETUP(DB_ENV, set_data_dir, "int (*%s) (DB_ENV *, const char *)");
|
||||
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 1
|
||||
STRUCT_SETUP(DB_ENV, set_errcall, "void (*%s) (DB_ENV *, void (*)(const char *, char *))");
|
||||
#endif
|
||||
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
|
||||
STRUCT_SETUP(DB_ENV, set_errcall, "void (*%s) (DB_ENV *, void (*)(const DB_ENV *, const char *, const char *))");
|
||||
#endif
|
||||
STRUCT_SETUP(DB_ENV, set_errfile, "void (*%s) (DB_ENV *, FILE*)");
|
||||
STRUCT_SETUP(DB_ENV, set_errpfx, "void (*%s) (DB_ENV *, const char *)");
|
||||
STRUCT_SETUP(DB_ENV, set_flags, "int (*%s) (DB_ENV *, uint32_t, int)");
|
||||
STRUCT_SETUP(DB_ENV, set_lg_bsize, "int (*%s) (DB_ENV *, uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, set_lg_dir, "int (*%s) (DB_ENV *, const char *)");
|
||||
STRUCT_SETUP(DB_ENV, set_lg_max, "int (*%s) (DB_ENV *, uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, set_lk_detect, "int (*%s) (DB_ENV *, uint32_t)");
|
||||
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 4
|
||||
STRUCT_SETUP(DB_ENV, set_lk_max, "int (*%s) (DB_ENV *, uint32_t)");
|
||||
#endif
|
||||
//STRUCT_SETUP(DB_ENV, set_noticecall, "void (*%s) (DB_ENV *, void (*)(DB_ENV *, db_notices))");
|
||||
STRUCT_SETUP(DB_ENV, set_tmp_dir, "int (*%s) (DB_ENV *, const char *)");
|
||||
STRUCT_SETUP(DB_ENV, set_verbose, "int (*%s) (DB_ENV *, uint32_t, int)");
|
||||
STRUCT_SETUP(DB_ENV, txn_checkpoint, "int (*%s) (DB_ENV *, uint32_t, uint32_t, uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, txn_stat, "int (*%s) (DB_ENV *, DB_TXN_STAT **, uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, txn_begin, "int (*%s) (DB_ENV *, DB_TXN *, DB_TXN **, uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, txn_recover, "int (*%s) (DB_ENV *, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags)");
|
||||
STRUCT_SETUP(DB_ENV, dbremove, "int (*%s) (DB_ENV *, DB_TXN *, const char *, const char *, uint32_t)");
|
||||
STRUCT_SETUP(DB_ENV, dbrename, "int (*%s) (DB_ENV *, DB_TXN *, const char *, const char *, const char *, uint32_t)");
|
||||
|
||||
const char *extra[]={
|
||||
"int (*checkpointing_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic checkpoints. 0 means disabled. */",
|
||||
"int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
|
||||
"int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
|
||||
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
|
||||
"int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation. 0 means disabled. */",
|
||||
"int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation. 0 means disabled. */",
|
||||
"int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
|
||||
"int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
|
||||
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
|
||||
"int (*checkpointing_resume) (DB_ENV*) /* Alert tokuft that 'postpone' is no longer necessary */",
|
||||
"int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */",
|
||||
"int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */",
|
||||
"int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */",
|
||||
"int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */",
|
||||
"int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, uint64_t*, fs_redzone_state*, uint64_t*, char*, int, toku_engine_status_include_type) /* Fill in status struct and redzone state, possibly env panic string */",
|
||||
"int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */",
|
||||
"int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/)",
|
||||
"int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */",
|
||||
"int (*create_loader) (DB_ENV *env, DB_TXN *txn, DB_LOADER **blp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t dbt_flags[/*N*/], uint32_t loader_flags)",
|
||||
"int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags)",
|
||||
"int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
||||
" const DBT *src_key, const DBT *src_val,\n"
|
||||
" uint32_t num_dbs, DB **db_array, DBT_ARRAY *keys, DBT_ARRAY *vals, uint32_t *flags_array) /* insert into multiple DBs */",
|
||||
"int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put)",
|
||||
"int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
||||
" const DBT *src_key, const DBT *src_val,\n"
|
||||
" uint32_t num_dbs, DB **db_array, DBT_ARRAY *keys, uint32_t *flags_array) /* delete from multiple DBs */",
|
||||
"int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del)",
|
||||
"int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
|
||||
" DBT *old_src_key, DBT *old_src_data,\n"
|
||||
" DBT *new_src_key, DBT *new_src_data,\n"
|
||||
" uint32_t num_dbs, DB **db_array, uint32_t *flags_array,\n"
|
||||
" uint32_t num_keys, DBT_ARRAY *keys,\n"
|
||||
" uint32_t num_vals, DBT_ARRAY *vals) /* update multiple DBs */",
|
||||
"int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */",
|
||||
"int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */",
|
||||
"int (*set_lk_max_memory) (DB_ENV *env, uint64_t max)",
|
||||
"int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max)",
|
||||
"void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra))",
|
||||
"int (*set_lock_timeout) (DB_ENV *env, uint64_t default_lock_wait_time_msec, uint64_t (*get_lock_wait_time_cb)(uint64_t default_lock_wait_time))",
|
||||
"int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec)",
|
||||
"int (*set_lock_timeout_callback) (DB_ENV *env, lock_timeout_callback callback)",
|
||||
"int (*set_lock_wait_callback) (DB_ENV *env, lock_wait_callback callback)",
|
||||
"int (*txn_xa_recover) (DB_ENV*, TOKU_XA_XID list[/*count*/], long count, /*out*/ long *retp, uint32_t flags)",
|
||||
"int (*get_txn_from_xid) (DB_ENV*, /*in*/ TOKU_XA_XID *, /*out*/ DB_TXN **)",
|
||||
"DB* (*get_db_for_directory) (DB_ENV*)",
|
||||
"int (*get_cursor_for_directory) (DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
||||
"int (*get_cursor_for_persistent_environment)(DB_ENV*, /*in*/ DB_TXN *, /*out*/ DBC **)",
|
||||
"void (*change_fsync_log_period) (DB_ENV*, uint32_t)",
|
||||
"int (*iterate_live_transactions) (DB_ENV *env, iterate_transactions_callback callback, void *extra)",
|
||||
"int (*iterate_pending_lock_requests) (DB_ENV *env, iterate_requests_callback callback, void *extra)",
|
||||
"void (*set_loader_memory_size)(DB_ENV *env, uint64_t (*get_loader_memory_size_callback)(void))",
|
||||
"uint64_t (*get_loader_memory_size)(DB_ENV *env)",
|
||||
"void (*set_killed_callback)(DB_ENV *env, uint64_t default_killed_time_msec, uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec), int (*killed_callback)(void))",
|
||||
"void (*do_backtrace) (DB_ENV *env)",
|
||||
"int (*set_client_pool_threads)(DB_ENV *, uint32_t)",
|
||||
"int (*set_cachetable_pool_threads)(DB_ENV *, uint32_t)",
|
||||
"int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)",
|
||||
"void (*set_check_thp)(DB_ENV *, bool new_val)",
|
||||
"bool (*get_check_thp)(DB_ENV *)",
|
||||
"bool (*set_dir_per_db)(DB_ENV *, bool new_val)",
|
||||
"bool (*get_dir_per_db)(DB_ENV *)",
|
||||
"const char *(*get_data_dir)(DB_ENV *env)",
|
||||
"int (*dirtool_attach)(DB_ENV *, DB_TXN *, const char *, const char *)",
|
||||
"int (*dirtool_detach)(DB_ENV *, DB_TXN *, const char *)",
|
||||
"int (*dirtool_move)(DB_ENV *, DB_TXN *, const char *, const char *)",
|
||||
"void (*kill_waiter)(DB_ENV *, void *extra)",
|
||||
NULL};
|
||||
|
||||
sort_and_dump_fields("db_env", true, extra);
|
||||
}
|
||||
|
||||
static void print_db_key_range_struct (void) {
|
||||
field_counter=0;
|
||||
STRUCT_SETUP(DB_KEY_RANGE, less, "double %s");
|
||||
STRUCT_SETUP(DB_KEY_RANGE, equal, "double %s");
|
||||
STRUCT_SETUP(DB_KEY_RANGE, greater, "double %s");
|
||||
sort_and_dump_fields("db_key_range", false, NULL);
|
||||
}
|
||||
|
||||
static void print_db_lsn_struct(void) {
|
||||
field_counter = 0;
|
||||
// FT-692
|
||||
STRUCT_SETUP(DB_LSN, file, "uint32_t %s");
|
||||
STRUCT_SETUP(DB_LSN, offset, "uint32_t %s");
|
||||
sort_and_dump_fields("db_lsn", false, NULL);
|
||||
}
|
||||
|
||||
static void print_dbt_struct(void) {
|
||||
field_counter=0;
|
||||
#if 0 && DB_VERSION_MAJOR==4 && DB_VERSION_MINOR==1
|
||||
STRUCT_SETUP(DBT, app_private, "void*%s");
|
||||
#endif
|
||||
STRUCT_SETUP(DBT, data, "void*%s");
|
||||
STRUCT_SETUP(DBT, flags, "uint32_t %s");
|
||||
STRUCT_SETUP(DBT, size, "uint32_t %s");
|
||||
STRUCT_SETUP(DBT, ulen, "uint32_t %s");
|
||||
sort_and_dump_fields("dbt", false, NULL);
|
||||
}
|
||||
|
||||
static void print_db_struct (void) {
|
||||
/* Do these in alphabetical order. */
|
||||
field_counter=0;
|
||||
STRUCT_SETUP(DB, api_internal, "void *%s"); /* Used for C++ hacking. */
|
||||
STRUCT_SETUP(DB, app_private, "void *%s");
|
||||
STRUCT_SETUP(DB, close, "int (*%s) (DB*, uint32_t)");
|
||||
STRUCT_SETUP(DB, cursor, "int (*%s) (DB *, DB_TXN *, DBC **, uint32_t)");
|
||||
STRUCT_SETUP(DB, dbenv, "DB_ENV *%s");
|
||||
STRUCT_SETUP(DB, del, "int (*%s) (DB *, DB_TXN *, DBT *, uint32_t)");
|
||||
STRUCT_SETUP(DB, fd, "int (*%s) (DB *, int *)");
|
||||
STRUCT_SETUP(DB, get, "int (*%s) (DB *, DB_TXN *, DBT *, DBT *, uint32_t)");
|
||||
#if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
|
||||
STRUCT_SETUP(DB, get_flags, "int (*%s) (DB *, uint32_t *)");
|
||||
STRUCT_SETUP(DB, get_pagesize, "int (*%s) (DB *, uint32_t *)");
|
||||
#endif
|
||||
STRUCT_SETUP(DB, key_range, "int (*%s) (DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, uint32_t)");
|
||||
STRUCT_SETUP(DB, open, "int (*%s) (DB *, DB_TXN *, const char *, const char *, DBTYPE, uint32_t, int)");
|
||||
STRUCT_SETUP(DB, put, "int (*%s) (DB *, DB_TXN *, DBT *, DBT *, uint32_t)");
|
||||
STRUCT_SETUP(DB, set_errfile, "void (*%s) (DB *, FILE*)");
|
||||
STRUCT_SETUP(DB, set_flags, "int (*%s) (DB *, uint32_t)");
|
||||
STRUCT_SETUP(DB, set_pagesize, "int (*%s) (DB *, uint32_t)");
|
||||
STRUCT_SETUP(DB, stat, "int (*%s) (DB *, void *, uint32_t)");
|
||||
STRUCT_SETUP(DB, verify, "int (*%s) (DB *, const char *, const char *, FILE *, uint32_t)");
|
||||
const char *extra[]={
|
||||
"int (*key_range64)(DB*, DB_TXN *, DBT *, uint64_t *less, uint64_t *equal, uint64_t *greater, int *is_exact)",
|
||||
"int (*get_key_after_bytes)(DB *, DB_TXN *, const DBT *, uint64_t, void (*callback)(const DBT *, uint64_t, void *), void *, uint32_t); /* given start_key and skip_len, find largest end_key such that the elements in [start_key,end_key) sum to <= skip_len bytes */",
|
||||
"int (*keys_range64)(DB*, DB_TXN *, DBT *keyleft, DBT *keyright, uint64_t *less, uint64_t *left, uint64_t *between, uint64_t *right, uint64_t *greater, bool *middle_3_exact)",
|
||||
"int (*stat64)(DB *, DB_TXN *, DB_BTREE_STAT64 *)",
|
||||
"int (*pre_acquire_table_lock)(DB*, DB_TXN*)",
|
||||
"int (*pre_acquire_fileops_lock)(DB*, DB_TXN*)",
|
||||
"const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/",
|
||||
"const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/",
|
||||
"void (*get_max_row_size) (DB*, uint32_t *max_key_size, uint32_t *max_row_size)",
|
||||
"DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */",
|
||||
"DESCRIPTOR cmp_descriptor /* saved row/dictionary descriptor for aiding in comparisons */",
|
||||
"int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, uint32_t) /* change row/dictionary descriptor for a db. Available only while db is open */",
|
||||
"int (*getf_set)(DB*, DB_TXN*, uint32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
|
||||
"int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */",
|
||||
"int (*hot_optimize)(DB*, DBT*, DBT*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, uint64_t* loops_run)",
|
||||
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
|
||||
"int (*change_pagesize)(DB*,uint32_t)",
|
||||
"int (*change_readpagesize)(DB*,uint32_t)",
|
||||
"int (*get_readpagesize)(DB*,uint32_t*)",
|
||||
"int (*set_readpagesize)(DB*,uint32_t)",
|
||||
"int (*change_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
|
||||
"int (*get_compression_method)(DB*,TOKU_COMPRESSION_METHOD*)",
|
||||
"int (*set_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
|
||||
"int (*change_fanout)(DB *db, uint32_t fanout)",
|
||||
"int (*get_fanout)(DB *db, uint32_t *fanout)",
|
||||
"int (*set_fanout)(DB *db, uint32_t fanout)",
|
||||
"int (*set_memcmp_magic)(DB *db, uint8_t magic)",
|
||||
"int (*set_indexer)(DB*, DB_INDEXER*)",
|
||||
"void (*get_indexer)(DB*, DB_INDEXER**)",
|
||||
"int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)",
|
||||
"int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, uint32_t flags)",
|
||||
"int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, uint32_t flags)",
|
||||
"int (*get_fractal_tree_info64)(DB*,uint64_t*,uint64_t*,uint64_t*,uint64_t*)",
|
||||
"int (*iterate_fractal_tree_block_map)(DB*,int(*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*),void*)",
|
||||
"const char *(*get_dname)(DB *db)",
|
||||
"int (*get_last_key)(DB *db, YDB_CALLBACK_FUNCTION func, void* extra)",
|
||||
"int (*recount_rows)(DB* db, int (*progress_callback)(uint64_t count, uint64_t deleted, void* progress_extra), void* progress_extra)",
|
||||
NULL};
|
||||
sort_and_dump_fields("db", true, extra);
|
||||
}
|
||||
|
||||
static void print_db_txn_active_struct (void) {
|
||||
field_counter=0;
|
||||
STRUCT_SETUP(DB_TXN_ACTIVE, lsn, "DB_LSN %s");
|
||||
STRUCT_SETUP(DB_TXN_ACTIVE, txnid, "uint32_t %s");
|
||||
sort_and_dump_fields("db_txn_active", false, NULL);
|
||||
}
|
||||
|
||||
static void print_db_txn_struct (void) {
|
||||
field_counter=0;
|
||||
STRUCT_SETUP(DB_TXN, abort, "int (*%s) (DB_TXN *)");
|
||||
STRUCT_SETUP(DB_TXN, api_internal,"void *%s");
|
||||
STRUCT_SETUP(DB_TXN, commit, "int (*%s) (DB_TXN*, uint32_t)");
|
||||
STRUCT_SETUP(DB_TXN, prepare, "int (*%s) (DB_TXN*, uint8_t gid[DB_GID_SIZE], uint32_t flags)");
|
||||
STRUCT_SETUP(DB_TXN, discard, "int (*%s) (DB_TXN*, uint32_t)");
|
||||
STRUCT_SETUP(DB_TXN, id, "uint32_t (*%s) (DB_TXN *)");
|
||||
STRUCT_SETUP(DB_TXN, mgrp, "DB_ENV *%s /* In PerconaFT, mgrp is a DB_ENV, not a DB_TXNMGR */");
|
||||
STRUCT_SETUP(DB_TXN, parent, "DB_TXN *%s");
|
||||
const char *extra[] = {
|
||||
"int (*txn_stat)(DB_TXN *, struct txn_stat **)",
|
||||
"int (*commit_with_progress)(DB_TXN*, uint32_t, TXN_PROGRESS_POLL_FUNCTION, void*)",
|
||||
"int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*)",
|
||||
"int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *, uint32_t flags)",
|
||||
"uint64_t (*id64) (DB_TXN*)",
|
||||
"void (*set_client_id)(DB_TXN *, uint64_t client_id, void *client_extra)",
|
||||
"void (*get_client_id)(DB_TXN *, uint64_t *client_id, void **client_extra)",
|
||||
"bool (*is_prepared)(DB_TXN *)",
|
||||
"DB_TXN *(*get_child)(DB_TXN *)",
|
||||
"uint64_t (*get_start_time)(DB_TXN *)",
|
||||
NULL};
|
||||
sort_and_dump_fields("db_txn", false, extra);
|
||||
}
|
||||
|
||||
static void print_db_txn_stat_struct (void) {
|
||||
field_counter=0;
|
||||
STRUCT_SETUP(DB_TXN_STAT, st_nactive, "uint32_t %s");
|
||||
STRUCT_SETUP(DB_TXN_STAT, st_txnarray, "DB_TXN_ACTIVE *%s");
|
||||
sort_and_dump_fields("db_txn_stat", false, NULL);
|
||||
}
|
||||
|
||||
static void print_dbc_struct (void) {
|
||||
field_counter=0;
|
||||
STRUCT_SETUP(DBC, c_close, "int (*%s) (DBC *)");
|
||||
//STRUCT_SETUP(DBC, c_del, "int (*%s) (DBC *, uint32_t)"); // c_del was removed. See #4576.
|
||||
STRUCT_SETUP(DBC, c_get, "int (*%s) (DBC *, DBT *, DBT *, uint32_t)");
|
||||
STRUCT_SETUP(DBC, dbp, "DB *%s");
|
||||
const char *extra[]={
|
||||
"int (*c_getf_first)(DBC *, uint32_t, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_last)(DBC *, uint32_t, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_next)(DBC *, uint32_t, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_prev)(DBC *, uint32_t, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_current)(DBC *, uint32_t, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_set)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_set_range)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_set_range_reverse)(DBC *, uint32_t, DBT *, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_getf_set_range_with_bound)(DBC *, uint32_t, DBT *k, DBT *k_bound, YDB_CALLBACK_FUNCTION, void *)",
|
||||
"int (*c_set_bounds)(DBC*, const DBT*, const DBT*, bool pre_acquire, int out_of_range_error)",
|
||||
"void (*c_set_check_interrupt_callback)(DBC*, bool (*)(void*, uint64_t deleted_rows), void *)",
|
||||
"void (*c_remove_restriction)(DBC*)",
|
||||
"void (*c_set_txn)(DBC*, DB_TXN*)",
|
||||
"char _internal[512]",
|
||||
NULL};
|
||||
sort_and_dump_fields("dbc", false, extra);
|
||||
}
|
||||
|
||||
|
||||
int main (int argc, char *const argv[] __attribute__((__unused__))) {
|
||||
assert(argc==1);
|
||||
|
||||
printf("#ifndef _DB_H\n");
|
||||
printf("#define _DB_H\n");
|
||||
printf("/* This code generated by make_db_h. Copyright (c) 2006, 2015, Percona and/or its affiliates. */\n");
|
||||
printf("#ident \"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.\"\n");
|
||||
printf("#include <sys/types.h>\n");
|
||||
printf("/*stdio is needed for the FILE* in db->verify*/\n");
|
||||
printf("#include <stdio.h>\n");
|
||||
printf("/*stdbool is needed for the bool in db_env_enable_engine_status*/\n");
|
||||
printf("#include <stdbool.h>\n");
|
||||
printf("#include <stdint.h>\n");
|
||||
//printf("#include <inttypes.h>\n");
|
||||
printf("#if defined(__cplusplus) || defined(__cilkplusplus)\nextern \"C\" {\n#endif\n");
|
||||
|
||||
printf("#define DB_VERSION_MAJOR %d\n", DB_VERSION_MAJOR);
|
||||
printf("#define DB_VERSION_MINOR %d\n", DB_VERSION_MINOR);
|
||||
printf("/* As of r40364 (post PerconaFT 5.2.7), the patch version number is 100+ the BDB header patch version number.*/\n");
|
||||
printf("#define DB_VERSION_PATCH %d\n", 100+DB_VERSION_PATCH);
|
||||
printf("#define DB_VERSION_STRING \"Percona: PerconaFT %d.%d.%d\"\n", DB_VERSION_MAJOR, DB_VERSION_MINOR, 100+DB_VERSION_PATCH);
|
||||
|
||||
#ifndef DB_GID_SIZE
|
||||
#define DB_GID_SIZE DB_XIDDATASIZE
|
||||
#endif
|
||||
dodefine(DB_GID_SIZE);
|
||||
|
||||
printf("typedef struct toku_xa_xid_s { /* This struct is intended to be binary compatible with the XID in the XA architecture. See source:/import/opengroup.org/C193.pdf */\n"
|
||||
" long formatID; /* format identifier */\n"
|
||||
" long gtrid_length; /* value from 1 through 64 */\n"
|
||||
" long bqual_length; /* value from 1 through 64 */\n"
|
||||
" char data[DB_GID_SIZE];\n"
|
||||
"} TOKU_XA_XID;\n");
|
||||
|
||||
printf("#ifndef TOKU_OFF_T_DEFINED\n"
|
||||
"#define TOKU_OFF_T_DEFINED\n"
|
||||
"typedef int64_t toku_off_t;\n"
|
||||
"#endif\n");
|
||||
|
||||
printf("typedef struct __toku_db_env DB_ENV;\n");
|
||||
printf("typedef struct __toku_db_key_range DB_KEY_RANGE;\n");
|
||||
printf("typedef struct __toku_db_lsn DB_LSN;\n");
|
||||
printf("typedef struct __toku_db DB;\n");
|
||||
printf("typedef struct __toku_db_txn DB_TXN;\n");
|
||||
printf("typedef struct __toku_db_txn_active DB_TXN_ACTIVE;\n");
|
||||
printf("typedef struct __toku_db_txn_stat DB_TXN_STAT;\n");
|
||||
printf("typedef struct __toku_dbc DBC;\n");
|
||||
printf("typedef struct __toku_dbt DBT;\n");
|
||||
printf("typedef struct __toku_db_preplist { DB_TXN *txn; uint8_t gid[DB_GID_SIZE]; } DB_PREPLIST;\n");
|
||||
printf("typedef uint32_t db_recno_t;\n");
|
||||
printf("typedef int(*YDB_CALLBACK_FUNCTION)(DBT const*, DBT const*, void*);\n");
|
||||
|
||||
printf("struct simple_dbt {\n");
|
||||
printf(" uint32_t len;\n");
|
||||
printf(" void *data;\n");
|
||||
printf("};\n");
|
||||
|
||||
//stat64
|
||||
printf("typedef struct __toku_db_btree_stat64 {\n");
|
||||
printf(" uint64_t bt_nkeys; /* how many unique keys (guaranteed only to be an estimate, even when flattened) */\n");
|
||||
printf(" uint64_t bt_ndata; /* how many key-value pairs (an estimate, but exact when flattened) */\n");
|
||||
printf(" uint64_t bt_dsize; /* how big are the keys+values (not counting the lengths) (an estimate, unless flattened) */\n");
|
||||
printf(" uint64_t bt_fsize; /* how big is the underlying file */\n");
|
||||
// 4018
|
||||
printf(" uint64_t bt_create_time_sec; /* Creation time, in seconds */\n");
|
||||
printf(" uint64_t bt_modify_time_sec; /* Time of last serialization, in seconds */\n");
|
||||
printf(" uint64_t bt_verify_time_sec; /* Time of last verification, in seconds */\n");
|
||||
printf("} DB_BTREE_STAT64;\n");
|
||||
|
||||
// compression methods
|
||||
printf("typedef enum toku_compression_method {\n");
|
||||
printf(" TOKU_NO_COMPRESSION = 0,\n"); // "identity" compression
|
||||
printf(" TOKU_SNAPPY_METHOD = 7,\n"); // google snappy
|
||||
printf(" TOKU_ZLIB_METHOD = 8,\n"); // RFC 1950 says use 8 for zlib. It reserves 15 to allow more bytes.
|
||||
printf(" TOKU_QUICKLZ_METHOD = 9,\n"); // We use 9 for QUICKLZ (the QLZ compression level is stored int he high-order nibble). I couldn't find any standard for any other numbers, so I just use 9. -Bradley
|
||||
printf(" TOKU_LZMA_METHOD = 10,\n"); // We use 10 for LZMA. (Note the compression level is stored in the high-order nibble).
|
||||
printf(" TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD = 11,\n"); // We wrap a zlib without checksumming compression technique in our own checksummed metadata.
|
||||
printf(" TOKU_DEFAULT_COMPRESSION_METHOD = 1,\n"); // default is actually quicklz
|
||||
printf(" TOKU_FAST_COMPRESSION_METHOD = 2,\n"); // friendlier names
|
||||
printf(" TOKU_SMALL_COMPRESSION_METHOD = 3,\n");
|
||||
printf("} TOKU_COMPRESSION_METHOD;\n");
|
||||
|
||||
//bulk loader
|
||||
printf("typedef struct __toku_loader DB_LOADER;\n");
|
||||
printf("struct __toku_loader_internal;\n");
|
||||
printf("struct __toku_loader {\n");
|
||||
printf(" struct __toku_loader_internal *i;\n");
|
||||
printf(" int (*set_error_callback)(DB_LOADER *loader, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */\n");
|
||||
printf(" int (*set_poll_function)(DB_LOADER *loader, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */\n");
|
||||
printf(" int (*put)(DB_LOADER *loader, DBT *key, DBT* val); /* give a row to the loader */\n");
|
||||
printf(" int (*close)(DB_LOADER *loader); /* finish loading, free memory */\n");
|
||||
printf(" int (*abort)(DB_LOADER *loader); /* abort loading, free memory */\n");
|
||||
printf("};\n");
|
||||
|
||||
//indexer
|
||||
printf("typedef struct __toku_indexer DB_INDEXER;\n");
|
||||
printf("struct __toku_indexer_internal;\n");
|
||||
printf("struct __toku_indexer {\n");
|
||||
printf(" struct __toku_indexer_internal *i;\n");
|
||||
printf(" int (*set_error_callback)(DB_INDEXER *indexer, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */\n");
|
||||
printf(" int (*set_poll_function)(DB_INDEXER *indexer, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */\n");
|
||||
printf(" int (*build)(DB_INDEXER *indexer); /* build the indexes */\n");
|
||||
printf(" int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */\n");
|
||||
printf(" int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */\n");
|
||||
printf("};\n");
|
||||
|
||||
// Filesystem redzone state
|
||||
printf("typedef enum { \n");
|
||||
printf(" FS_GREEN = 0, // green zone (we have lots of space) \n");
|
||||
printf(" FS_YELLOW = 1, // yellow zone (issue warning but allow operations) \n");
|
||||
printf(" FS_RED = 2, // red zone (prevent insert operations) \n");
|
||||
printf(" FS_BLOCKED = 3 // For reporting engine status, completely blocked \n");
|
||||
printf("} fs_redzone_state;\n");
|
||||
|
||||
printf("// engine status info\n");
|
||||
printf("// engine status is passed to handlerton as an array of TOKU_ENGINE_STATUS_ROW_S[]\n");
|
||||
|
||||
printf("typedef enum {\n");
|
||||
printf(" FS_STATE = 0, // interpret as file system state (redzone) enum \n");
|
||||
printf(" UINT64, // interpret as uint64_t \n");
|
||||
printf(" CHARSTR, // interpret as char * \n");
|
||||
printf(" UNIXTIME, // interpret as time_t \n");
|
||||
printf(" TOKUTIME, // interpret as tokutime_t \n");
|
||||
printf(" PARCOUNT, // interpret as PARTITIONED_COUNTER\n");
|
||||
printf(" DOUBLE // interpret as double\n");
|
||||
printf("} toku_engine_status_display_type; \n");
|
||||
|
||||
printf("typedef enum {\n");
|
||||
printf(" TOKU_ENGINE_STATUS = (1ULL<<0), // Include when asking for engine status\n");
|
||||
printf(" TOKU_GLOBAL_STATUS = (1ULL<<1), // Include when asking for information_schema.global_status\n");
|
||||
printf("} toku_engine_status_include_type; \n");
|
||||
|
||||
printf("typedef struct __toku_engine_status_row {\n");
|
||||
printf(" const char * keyname; // info schema key, should not change across revisions without good reason \n");
|
||||
printf(" const char * columnname; // column for mysql, e.g. information_schema.global_status. TOKUDB_ will automatically be prefixed.\n");
|
||||
printf(" const char * legend; // the text that will appear at user interface \n");
|
||||
printf(" toku_engine_status_display_type type; // how to interpret the value \n");
|
||||
printf(" toku_engine_status_include_type include; // which kinds of callers should get read this row?\n");
|
||||
printf(" union { \n");
|
||||
printf(" double dnum; \n");
|
||||
printf(" uint64_t num; \n");
|
||||
printf(" const char * str; \n");
|
||||
printf(" char datebuf[26]; \n");
|
||||
printf(" struct partitioned_counter *parcount;\n");
|
||||
printf(" } value; \n");
|
||||
printf("} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S; \n");
|
||||
|
||||
print_dbtype();
|
||||
print_defines();
|
||||
|
||||
printf("typedef struct {\n");
|
||||
printf(" uint32_t capacity;\n");
|
||||
printf(" uint32_t size;\n");
|
||||
printf(" DBT *dbts;\n");
|
||||
printf("} DBT_ARRAY;\n\n");
|
||||
printf("typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT_ARRAY * dest_keys, DBT_ARRAY *dest_vals, const DBT *src_key, const DBT *src_val);\n");
|
||||
printf("typedef int (*generate_row_for_del_func)(DB *dest_db, DB *src_db, DBT_ARRAY * dest_keys, const DBT *src_key, const DBT *src_val);\n");
|
||||
printf("DBT_ARRAY * toku_dbt_array_init(DBT_ARRAY *dbts, uint32_t size) %s;\n", VISIBLE);
|
||||
printf("void toku_dbt_array_destroy(DBT_ARRAY *dbts) %s;\n", VISIBLE);
|
||||
printf("void toku_dbt_array_destroy_shallow(DBT_ARRAY *dbts) %s;\n", VISIBLE);
|
||||
printf("void toku_dbt_array_resize(DBT_ARRAY *dbts, uint32_t size) %s;\n", VISIBLE);
|
||||
|
||||
printf("typedef void (*lock_timeout_callback)(DB *db, uint64_t requesting_txnid, const DBT *left_key, const DBT *right_key, uint64_t blocking_txnid);\n");
|
||||
printf("typedef void (*lock_wait_callback)(void *arg, uint64_t requesting_txnid, uint64_t blocking_txnid);\n");
|
||||
printf("typedef int (*iterate_row_locks_callback)(DB **db, DBT *left_key, DBT *right_key, void *extra);\n");
|
||||
printf("typedef int (*iterate_transactions_callback)(DB_TXN *dbtxn, iterate_row_locks_callback cb, void *locks_extra, void *extra);\n");
|
||||
printf("typedef int (*iterate_requests_callback)(DB *db, uint64_t requesting_txnid, const DBT *left_key, const DBT *right_key, uint64_t blocking_txnid, uint64_t start_time, void *extra);\n");
|
||||
print_db_env_struct();
|
||||
print_db_key_range_struct();
|
||||
print_db_lsn_struct();
|
||||
print_dbt_struct();
|
||||
|
||||
printf("typedef struct __toku_descriptor {\n");
|
||||
printf(" DBT dbt;\n");
|
||||
printf("} *DESCRIPTOR, DESCRIPTOR_S;\n");
|
||||
|
||||
//file fragmentation info
|
||||
//a block is just a contiguous region in a file.
|
||||
printf("//One header is included in 'data'\n");
|
||||
printf("//One header is included in 'additional for checkpoint'\n");
|
||||
printf("typedef struct __toku_db_fragmentation {\n");
|
||||
printf(" uint64_t file_size_bytes; //Total file size in bytes\n");
|
||||
printf(" uint64_t data_bytes; //Compressed User Data in bytes\n");
|
||||
printf(" uint64_t data_blocks; //Number of blocks of compressed User Data\n");
|
||||
printf(" uint64_t checkpoint_bytes_additional; //Additional bytes used for checkpoint system\n");
|
||||
printf(" uint64_t checkpoint_blocks_additional; //Additional blocks used for checkpoint system \n");
|
||||
printf(" uint64_t unused_bytes; //Unused space in file\n");
|
||||
printf(" uint64_t unused_blocks; //Number of contiguous regions of unused space\n");
|
||||
printf(" uint64_t largest_unused_block; //Size of largest contiguous unused space\n");
|
||||
printf("} *TOKU_DB_FRAGMENTATION, TOKU_DB_FRAGMENTATION_S;\n");
|
||||
|
||||
print_db_struct();
|
||||
|
||||
print_db_txn_active_struct();
|
||||
|
||||
printf("typedef struct __toku_txn_progress {\n");
|
||||
printf(" uint64_t entries_total;\n");
|
||||
printf(" uint64_t entries_processed;\n");
|
||||
printf(" uint8_t is_commit;\n");
|
||||
printf(" uint8_t stalled_on_checkpoint;\n");
|
||||
printf("} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;\n");
|
||||
printf("typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);\n");
|
||||
printf("struct txn_stat {\n uint64_t rollback_raw_count;\n uint64_t rollback_num_entries;\n};\n");
|
||||
|
||||
print_db_txn_struct();
|
||||
print_db_txn_stat_struct();
|
||||
print_dbc_struct();
|
||||
|
||||
printf("int db_env_create(DB_ENV **, uint32_t) %s;\n", VISIBLE);
|
||||
printf("int db_create(DB **, DB_ENV *, uint32_t) %s;\n", VISIBLE);
|
||||
printf("const char *db_strerror(int) %s;\n", VISIBLE);
|
||||
printf("const char *db_version(int*,int *,int *) %s;\n", VISIBLE);
|
||||
printf("int log_compare (const DB_LSN*, const DB_LSN *) %s;\n", VISIBLE);
|
||||
printf("int toku_set_trace_file (const char *fname) %s;\n", VISIBLE);
|
||||
printf("int toku_close_trace_file (void) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_direct_io (bool direct_io_on) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_compress_buffers_before_eviction (bool compress_buffers) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_fsync (int (*)(int)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_free (void (*)(void*)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_malloc (void *(*)(size_t)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_realloc (void *(*)(void*, size_t)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_full_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_write (ssize_t (*)(int, const void *, size_t)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_full_write (ssize_t (*)(int, const void *, size_t)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_fdopen (FILE* (*)(int, const char *)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_fopen (FILE* (*)(const char *, const char *)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_open (int (*)(const char *, int, int)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_fclose (int (*)(FILE*)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_pread (ssize_t (*)(int, void *, size_t, off_t)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_func_loader_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_checkpoint_callback (void (*)(void*), void*) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_checkpoint_callback2 (void (*)(void*), void*) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_recover_callback (void (*)(void*), void*) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_recover_callback2 (void (*)(void*), void*) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_loader_size_factor (uint32_t) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_mvcc_garbage_collection_verification(uint32_t) %s;\n", VISIBLE);
|
||||
printf("void db_env_enable_engine_status(bool) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_flusher_thread_callback (void (*)(int, void*), void*) %s;\n", VISIBLE);
|
||||
printf("void db_env_set_num_bucket_mutexes(uint32_t) %s;\n", VISIBLE);
|
||||
printf("int db_env_set_toku_product_name(const char*) %s;\n", VISIBLE);
|
||||
printf("void db_env_try_gdb_stack_trace(const char *gdb_path) %s;\n", VISIBLE);
|
||||
|
||||
printf("#if defined(__cplusplus) || defined(__cilkplusplus)\n}\n#endif\n");
|
||||
printf("#endif\n");
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,96 +0,0 @@
|
||||
# Copyright (c) 2009 Sun Microsystems, Inc.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; version 2 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
|
||||
|
||||
# This script merges many static libraries into
|
||||
# one big library on Unix.
|
||||
SET(TARGET "@TARGET@")
|
||||
SET(CMAKE_CURRENT_BINARY_DIR "@CMAKE_CURRENT_BINARY_DIR@")
|
||||
SET(CMAKE_AR "@CMAKE_AR@")
|
||||
SET(CMAKE_RANLIB "@CMAKE_RANLIB@")
|
||||
|
||||
|
||||
SET(TEMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/merge_archives_${TARGET})
|
||||
MAKE_DIRECTORY(${TEMP_DIR})
|
||||
# Extract each archive to its own subdirectory(avoid object filename
|
||||
# clashes) Since the lib may contain objects with the same name, we first
|
||||
# list the archive contents, then uniquify the object names as we extract
|
||||
# them.
|
||||
FOREACH(LIB ${STATIC_LIB_FILES})
|
||||
GET_FILENAME_COMPONENT(NAME_NO_EXT ${LIB} NAME_WE)
|
||||
SET(TEMP_SUBDIR ${TEMP_DIR}/${NAME_NO_EXT})
|
||||
MAKE_DIRECTORY(${TEMP_SUBDIR})
|
||||
EXECUTE_PROCESS(
|
||||
COMMAND ${CMAKE_AR} -t ${LIB}
|
||||
OUTPUT_VARIABLE LIB_OBJS
|
||||
)
|
||||
STRING(REGEX REPLACE "\n" ";" LIB_OBJ_LIST "${LIB_OBJS}")
|
||||
STRING(REGEX REPLACE ";$" "" LIB_OBJ_LIST "${LIB_OBJ_LIST}")
|
||||
|
||||
LIST(LENGTH LIB_OBJ_LIST LENGTH_WITH_DUPS)
|
||||
SET(LIB_OBJ_LIST_NO_DUPS ${LIB_OBJ_LIST})
|
||||
IF (LENGTH_WITH_DUPS GREATER 0)
|
||||
LIST(REMOVE_DUPLICATES LIB_OBJ_LIST_NO_DUPS)
|
||||
ENDIF ()
|
||||
LIST(LENGTH LIB_OBJ_LIST_NO_DUPS LENGTH_WITHOUT_DUPS)
|
||||
|
||||
IF(LENGTH_WITH_DUPS EQUAL LENGTH_WITHOUT_DUPS)
|
||||
# Optimization for when lib doesn't actually have duplicate object
|
||||
# names, we can just extract everything.
|
||||
EXECUTE_PROCESS(
|
||||
COMMAND ${CMAKE_AR} -x ${LIB}
|
||||
WORKING_DIRECTORY ${TEMP_SUBDIR}
|
||||
)
|
||||
ELSE()
|
||||
LIST(SORT LIB_OBJ_LIST)
|
||||
SET(SAME_OBJ_COUNT 1)
|
||||
SET(LAST_OBJ_NAME)
|
||||
FOREACH(OBJ ${LIB_OBJ_LIST})
|
||||
IF(OBJ STREQUAL LAST_OBJ_NAME)
|
||||
GET_FILENAME_COMPONENT(OBJ_NO_EXT ${OBJ} NAME_WE)
|
||||
FILE(RENAME "${TEMP_SUBDIR}/${OBJ}" "${TEMP_SUBDIR}/${OBJ_NO_EXT}.${SAME_OBJ_COUNT}.o")
|
||||
MATH(EXPR SAME_OBJ_COUNT "${SAME_OBJ_COUNT}+1")
|
||||
ELSE()
|
||||
SET(SAME_OBJ_COUNT 1)
|
||||
ENDIF()
|
||||
SET(LAST_OBJ_NAME "${OBJ}")
|
||||
EXECUTE_PROCESS(
|
||||
COMMAND ${CMAKE_AR} -xN ${SAME_OBJ_COUNT} ${LIB} ${OBJ}
|
||||
WORKING_DIRECTORY ${TEMP_SUBDIR}
|
||||
)
|
||||
ENDFOREACH()
|
||||
ENDIF()
|
||||
|
||||
FILE(GLOB_RECURSE LIB_OBJECTS "${TEMP_SUBDIR}/*.o")
|
||||
SET(OBJECTS ${OBJECTS} ${LIB_OBJECTS})
|
||||
ENDFOREACH()
|
||||
|
||||
# Use relative paths, makes command line shorter.
|
||||
GET_FILENAME_COMPONENT(ABS_TEMP_DIR ${TEMP_DIR} ABSOLUTE)
|
||||
FOREACH(OBJ ${OBJECTS})
|
||||
FILE(RELATIVE_PATH OBJ ${ABS_TEMP_DIR} ${OBJ})
|
||||
FILE(TO_NATIVE_PATH ${OBJ} OBJ)
|
||||
SET(ALL_OBJECTS ${ALL_OBJECTS} ${OBJ})
|
||||
ENDFOREACH()
|
||||
|
||||
FILE(TO_NATIVE_PATH ${TARGET_FILE} TARGET_FILE)
|
||||
# Now pack the objects into library with ar.
|
||||
EXECUTE_PROCESS(
|
||||
COMMAND ${CMAKE_AR} rcs ${TARGET_FILE} ${ALL_OBJECTS}
|
||||
WORKING_DIRECTORY ${TEMP_DIR}
|
||||
)
|
||||
|
||||
# Cleanup
|
||||
FILE(REMOVE_RECURSE ${TEMP_DIR})
|
@ -1,18 +0,0 @@
|
||||
# Find Valgrind.
|
||||
#
|
||||
# This module defines:
|
||||
# VALGRIND_INCLUDE_DIR, where to find valgrind/memcheck.h, etc.
|
||||
# VALGRIND_PROGRAM, the valgrind executable.
|
||||
# VALGRIND_FOUND, If false, do not try to use valgrind.
|
||||
#
|
||||
# If you have valgrind installed in a non-standard place, you can define
|
||||
# VALGRIND_PREFIX to tell cmake where it is.
|
||||
|
||||
find_path(VALGRIND_INCLUDE_DIR valgrind/memcheck.h)
|
||||
find_program(VALGRIND_PROGRAM NAMES valgrind)
|
||||
|
||||
find_package_handle_standard_args(Valgrind DEFAULT_MSG
|
||||
VALGRIND_INCLUDE_DIR
|
||||
VALGRIND_PROGRAM)
|
||||
|
||||
mark_as_advanced(VALGRIND_INCLUDE_DIR VALGRIND_PROGRAM)
|
@ -1,126 +0,0 @@
|
||||
## set up lists of sources and headers for tags
|
||||
file(GLOB_RECURSE all_srcs
|
||||
buildheader/*.cc
|
||||
db-benchmark-test/*.cc
|
||||
ft/*.cc
|
||||
include/*.cc
|
||||
locktree/*.cc
|
||||
portability/*.cc
|
||||
src/*.cc
|
||||
utils/*.cc
|
||||
util/*.cc
|
||||
db-benchmark-test/*.cc
|
||||
)
|
||||
list(APPEND all_srcs
|
||||
${CMAKE_CURRENT_BINARY_DIR}/ft/log_code.cc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/ft/log_print.cc
|
||||
)
|
||||
file(GLOB_RECURSE all_hdrs
|
||||
buildheader/*.h
|
||||
db-benchmark-test/*.h
|
||||
ft/*.h
|
||||
include/*.h
|
||||
locktree/*.h
|
||||
portability/*.h
|
||||
src/*.h
|
||||
utils/*.h
|
||||
util/*.h
|
||||
db-benchmark-test/*.h
|
||||
)
|
||||
list(APPEND all_hdrs
|
||||
${CMAKE_CURRENT_BINARY_DIR}/portability/toku_config.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/buildheader/db.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/ft/log_header.h
|
||||
)
|
||||
|
||||
option(USE_ETAGS "Build the etags database." ON)
|
||||
if (USE_ETAGS)
|
||||
find_program(ETAGS "etags")
|
||||
if (NOT ETAGS MATCHES NOTFOUND)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/TAGS"
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp"
|
||||
COMMAND ${ETAGS} -o TAGS ${all_srcs} ${all_hdrs}
|
||||
COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/etags-stamp"
|
||||
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
add_custom_target(build_etags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/TAGS" etags-stamp)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
option(USE_CTAGS "Build the ctags database." ON)
|
||||
if (USE_CTAGS AND
|
||||
# Macs by default are not case-sensitive, so tags and TAGS clobber each other. Do etags and not ctags in that case, because Emacs is superior. :P
|
||||
(NOT APPLE OR NOT USE_ETAGS))
|
||||
find_program(CTAGS "ctags")
|
||||
if (NOT CTAGS MATCHES NOTFOUND)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/tags"
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp"
|
||||
COMMAND ${CTAGS} -o tags ${all_srcs} ${all_hdrs}
|
||||
COMMAND touch "${CMAKE_CURRENT_BINARY_DIR}/ctags-stamp"
|
||||
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
add_custom_target(build_ctags ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tags" ctags-stamp)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
option(USE_CSCOPE "Build the cscope database." ON)
|
||||
if (USE_CSCOPE)
|
||||
find_program(CSCOPE "cscope")
|
||||
if (NOT CSCOPE MATCHES NOTFOUND)
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "")
|
||||
foreach(file ${all_srcs} ${all_hdrs})
|
||||
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cscope.files" "${file}\n")
|
||||
endforeach(file)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.out"
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out"
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out"
|
||||
COMMAND ${CSCOPE} -b -q -R -i"${CMAKE_CURRENT_BINARY_DIR}/cscope.files" -I"${CMAKE_CURRENT_SOURCE_DIR}" -I"${CMAKE_CURRENT_SOURCE_DIR}/include" -I"${CMAKE_CURRENT_SOURCE_DIR}/portability" -I"${CMAKE_CURRENT_SOURCE_DIR}/portability" -I"${CMAKE_CURRENT_SOURCE_DIR}/ft" -I"${CMAKE_CURRENT_SOURCE_DIR}/src" -I"${CMAKE_CURRENT_SOURCE_DIR}/locktree" -I"${CMAKE_CURRENT_SOURCE_DIR}/utils" -I"${CMAKE_CURRENT_SOURCE_DIR}/db-benchmark-test" -I"${CMAKE_CURRENT_BINARY_DIR}" -I"${CMAKE_CURRENT_BINARY_DIR}/portability" -I"${CMAKE_CURRENT_BINARY_DIR}/buildheader"
|
||||
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
add_custom_target(build_cscope.out ALL DEPENDS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/cscope.out"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/cscope.in.out"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/cscope.po.out")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
option(USE_GTAGS "Build the gtags database." ON)
|
||||
if (USE_GTAGS)
|
||||
find_program(GTAGS "gtags")
|
||||
if (NOT GTAGS MATCHES NOTFOUND)
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "")
|
||||
foreach(file ${all_srcs} ${all_hdrs})
|
||||
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/gtags.files" "${file}\n")
|
||||
endforeach(file)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GTAGS"
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS"
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GPATH"
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/GSYMS"
|
||||
COMMAND ${GTAGS} -f "${CMAKE_CURRENT_BINARY_DIR}/gtags.files"
|
||||
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
add_custom_target(build_GTAGS ALL DEPENDS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/GTAGS"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/GRTAGS"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/GPATH"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/GSYMS")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
option(USE_MKID "Build the idutils database." ON)
|
||||
if (USE_MKID)
|
||||
find_program(MKID "mkid")
|
||||
if (NOT MKID MATCHES NOTFOUND)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/ID"
|
||||
COMMAND ${MKID} ${all_srcs} ${all_hdrs}
|
||||
DEPENDS ${all_srcs} ${all_hdrs} install_tdb_h generate_config_h generate_log_code
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
add_custom_target(build_MKID ALL DEPENDS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/ID")
|
||||
endif ()
|
||||
endif ()
|
@ -1,137 +0,0 @@
|
||||
## feature detection
|
||||
find_package(Threads)
|
||||
find_package(ZLIB REQUIRED)
|
||||
|
||||
option(USE_VALGRIND "Build to run safely under valgrind (often slower)." ON)
|
||||
if(USE_VALGRIND)
|
||||
find_package(Valgrind REQUIRED)
|
||||
endif()
|
||||
|
||||
option(TOKU_DEBUG_PARANOID "Enable paranoid asserts." ON)
|
||||
|
||||
include(CheckIncludeFiles)
|
||||
|
||||
## check for some include files
|
||||
check_include_files(alloca.h HAVE_ALLOCA_H)
|
||||
check_include_files(arpa/inet.h HAVE_ARPA_INET_H)
|
||||
check_include_files(bits/functexcept.h HAVE_BITS_FUNCTEXCEPT_H)
|
||||
check_include_files(byteswap.h HAVE_BYTESWAP_H)
|
||||
check_include_files(endian.h HAVE_ENDIAN_H)
|
||||
check_include_files(fcntl.h HAVE_FCNTL_H)
|
||||
check_include_files(inttypes.h HAVE_INTTYPES_H)
|
||||
check_include_files(libkern/OSAtomic.h HAVE_LIBKERN_OSATOMIC_H)
|
||||
check_include_files(libkern/OSByteOrder.h HAVE_LIBKERN_OSBYTEORDER_H)
|
||||
check_include_files(limits.h HAVE_LIMITS_H)
|
||||
check_include_files(machine/endian.h HAVE_MACHINE_ENDIAN_H)
|
||||
check_include_files(malloc.h HAVE_MALLOC_H)
|
||||
check_include_files(malloc/malloc.h HAVE_MALLOC_MALLOC_H)
|
||||
check_include_files(malloc_np.h HAVE_MALLOC_NP_H)
|
||||
check_include_files(pthread.h HAVE_PTHREAD_H)
|
||||
check_include_files(pthread_np.h HAVE_PTHREAD_NP_H)
|
||||
check_include_files(stdint.h HAVE_STDINT_H)
|
||||
check_include_files(stdlib.h HAVE_STDLIB_H)
|
||||
check_include_files(string.h HAVE_STRING_H)
|
||||
check_include_files(syscall.h HAVE_SYSCALL_H)
|
||||
check_include_files(sys/endian.h HAVE_SYS_ENDIAN_H)
|
||||
check_include_files(sys/file.h HAVE_SYS_FILE_H)
|
||||
check_include_files(sys/malloc.h HAVE_SYS_MALLOC_H)
|
||||
check_include_files(sys/prctl.h HAVE_SYS_PRCTL_H)
|
||||
check_include_files(sys/resource.h HAVE_SYS_RESOURCE_H)
|
||||
check_include_files(sys/statvfs.h HAVE_SYS_STATVFS_H)
|
||||
check_include_files(sys/syscall.h HAVE_SYS_SYSCALL_H)
|
||||
check_include_files(sys/sysctl.h HAVE_SYS_SYSCTL_H)
|
||||
check_include_files(sys/syslimits.h HAVE_SYS_SYSLIMITS_H)
|
||||
check_include_files(sys/time.h HAVE_SYS_TIME_H)
|
||||
check_include_files(unistd.h HAVE_UNISTD_H)
|
||||
|
||||
include(CheckSymbolExists)
|
||||
|
||||
## check whether we can set the mmap threshold like we can in gnu libc's malloc
|
||||
check_symbol_exists(M_MMAP_THRESHOLD "malloc.h" HAVE_M_MMAP_THRESHOLD)
|
||||
## check whether we have CLOCK_REALTIME
|
||||
check_symbol_exists(CLOCK_REALTIME "time.h" HAVE_CLOCK_REALTIME)
|
||||
## check how to do direct I/O
|
||||
if (NOT CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
|
||||
set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
|
||||
endif ()
|
||||
check_symbol_exists(O_DIRECT "fcntl.h" HAVE_O_DIRECT)
|
||||
check_symbol_exists(F_NOCACHE "fcntl.h" HAVE_F_NOCACHE)
|
||||
check_symbol_exists(MAP_ANONYMOUS "sys/mman.h" HAVE_MAP_ANONYMOUS)
|
||||
check_symbol_exists(PR_SET_PTRACER "sys/prctl.h" HAVE_PR_SET_PTRACER)
|
||||
check_symbol_exists(PR_SET_PTRACER_ANY "sys/prctl.h" HAVE_PR_SET_PTRACER_ANY)
|
||||
|
||||
include(CheckFunctionExists)
|
||||
|
||||
## check for the right way to get the actual allocation size of a pointer
|
||||
check_function_exists(malloc_size HAVE_MALLOC_SIZE)
|
||||
check_function_exists(malloc_usable_size HAVE_MALLOC_USABLE_SIZE)
|
||||
## check whether we have memalign or valloc (a weak substitute for memalign on darwin)
|
||||
check_function_exists(memalign HAVE_MEMALIGN)
|
||||
check_function_exists(valloc HAVE_VALLOC)
|
||||
## check whether we have random_r or nrand48 to use as a reentrant random function
|
||||
check_function_exists(nrand48 HAVE_NRAND48)
|
||||
check_function_exists(random_r HAVE_RANDOM_R)
|
||||
check_function_exists(mincore HAVE_MINCORE)
|
||||
|
||||
## clear this out in case mysql modified it
|
||||
set(CMAKE_REQUIRED_LIBRARIES "")
|
||||
set(EXTRA_SYSTEM_LIBS "")
|
||||
check_function_exists(dlsym HAVE_DLSYM_WITHOUT_DL)
|
||||
if (NOT HAVE_DLSYM_WITHOUT_DL)
|
||||
set(CMAKE_REQUIRED_LIBRARIES dl)
|
||||
check_function_exists(dlsym HAVE_DLSYM_WITH_DL)
|
||||
if (HAVE_DLSYM_WITH_DL)
|
||||
list(APPEND EXTRA_SYSTEM_LIBS dl)
|
||||
else ()
|
||||
message(FATAL_ERROR "Cannot find dlsym(), even with -ldl.")
|
||||
endif ()
|
||||
endif ()
|
||||
check_function_exists(backtrace HAVE_BACKTRACE_WITHOUT_EXECINFO)
|
||||
if (NOT HAVE_BACKTRACE_WITHOUT_EXECINFO)
|
||||
set(CMAKE_REQUIRED_LIBRARIES execinfo)
|
||||
check_function_exists(backtrace HAVE_BACKTRACE_WITH_EXECINFO)
|
||||
if (HAVE_BACKTRACE_WITH_EXECINFO)
|
||||
list(APPEND EXTRA_SYSTEM_LIBS execinfo)
|
||||
else ()
|
||||
message(WARNING "Cannot find backtrace(), even with -lexecinfo.")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if(HAVE_CLOCK_REALTIME AND (NOT APPLE))
|
||||
list(APPEND EXTRA_SYSTEM_LIBS rt)
|
||||
else()
|
||||
list(APPEND EXTRA_SYSTEM_LIBS System)
|
||||
endif()
|
||||
|
||||
set(CMAKE_REQUIRED_LIBRARIES pthread)
|
||||
## check whether we can change rwlock preference
|
||||
check_function_exists(pthread_rwlockattr_setkind_np HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
|
||||
## check for the right way to yield using pthreads
|
||||
check_function_exists(pthread_yield HAVE_PTHREAD_YIELD)
|
||||
check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP)
|
||||
## check if we have pthread_threadid_np() (i.e. osx)
|
||||
check_function_exists(pthread_threadid_np HAVE_PTHREAD_THREADID_NP)
|
||||
## check if we have pthread_getthreadid_np() (i.e. freebsd)
|
||||
check_function_exists(pthread_getthreadid_np HAVE_PTHREAD_GETTHREADID_NP)
|
||||
check_function_exists(sched_getcpu HAVE_SCHED_GETCPU)
|
||||
|
||||
include(CheckCSourceCompiles)
|
||||
|
||||
if (HAVE_PTHREAD_YIELD)
|
||||
include(CheckPrototypeDefinition)
|
||||
|
||||
check_prototype_definition(pthread_yield "void pthread_yield(void)" "(void)0" "pthread.h" PTHREAD_YIELD_RETURNS_VOID)
|
||||
check_c_source_compiles("#include <pthread.h>
|
||||
int main(void) {
|
||||
int r = pthread_yield();
|
||||
return r;
|
||||
}" PTHREAD_YIELD_RETURNS_INT)
|
||||
endif (HAVE_PTHREAD_YIELD)
|
||||
|
||||
## check whether we have gcc-style thread-local storage using a storage class modifier
|
||||
check_c_source_compiles("#include <pthread.h>
|
||||
static __thread int tlsvar = 0;
|
||||
int main(void) { return tlsvar; }" HAVE_GNU_TLS)
|
||||
|
||||
## set TOKUDB_REVISION
|
||||
set(CMAKE_TOKUDB_REVISION 0 CACHE INTERNAL "Revision of tokudb.")
|
@ -1,99 +0,0 @@
|
||||
# Merge static libraries into a big static lib. The resulting library
|
||||
# should not not have dependencies on other static libraries.
|
||||
# We use it in MySQL to merge mysys,dbug,vio etc into mysqlclient
|
||||
FUNCTION(TOKU_GET_DEPENDEND_OS_LIBS target result)
|
||||
SET(deps ${${target}_LIB_DEPENDS})
|
||||
FOREACH(lib ${deps})
|
||||
IF(TARGET ${lib})
|
||||
SET(ret ${ret} ${lib})
|
||||
ENDIF()
|
||||
ENDFOREACH()
|
||||
SET(${result} ${ret} PARENT_SCOPE)
|
||||
ENDFUNCTION(TOKU_GET_DEPENDEND_OS_LIBS)
|
||||
|
||||
MACRO(TOKU_MERGE_STATIC_LIBS TARGET OUTPUT_NAME LIBS_TO_MERGE)
|
||||
# To produce a library we need at least one source file.
|
||||
# It is created by ADD_CUSTOM_COMMAND below and will helps
|
||||
# also help to track dependencies.
|
||||
SET(SOURCE_FILE ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_depends.cc)
|
||||
ADD_LIBRARY(${TARGET} STATIC ${SOURCE_FILE})
|
||||
SET_TARGET_PROPERTIES(${TARGET} PROPERTIES OUTPUT_NAME ${OUTPUT_NAME})
|
||||
|
||||
SET(OSLIBS)
|
||||
FOREACH(LIB ${LIBS_TO_MERGE})
|
||||
IF(TARGET ${LIB})
|
||||
# This is a target in current project
|
||||
# (can be a static or shared lib)
|
||||
GET_TARGET_PROPERTY(LIB_TYPE ${LIB} TYPE)
|
||||
IF(LIB_TYPE STREQUAL "STATIC_LIBRARY")
|
||||
LIST(APPEND STATIC_LIBS ${LIB})
|
||||
ADD_DEPENDENCIES(${TARGET} ${LIB})
|
||||
# Extract dependend OS libraries
|
||||
TOKU_GET_DEPENDEND_OS_LIBS(${LIB} LIB_OSLIBS)
|
||||
LIST(APPEND OSLIBS ${LIB_OSLIBS})
|
||||
ELSE()
|
||||
# This is a shared library our static lib depends on.
|
||||
LIST(APPEND OSLIBS ${LIB})
|
||||
ENDIF()
|
||||
ELSE()
|
||||
# 3rd party library like libz.so. Make sure that everything
|
||||
# that links to our library links to this one as well.
|
||||
LIST(APPEND OSLIBS ${LIB})
|
||||
ENDIF()
|
||||
ENDFOREACH()
|
||||
IF(OSLIBS)
|
||||
# REMOVE_DUPLICATES destroys the order of the libs so disabled
|
||||
# LIST(REMOVE_DUPLICATES OSLIBS)
|
||||
TARGET_LINK_LIBRARIES(${TARGET} LINK_PUBLIC ${OSLIBS})
|
||||
ENDIF()
|
||||
|
||||
# Make the generated dummy source file depended on all static input
|
||||
# libs. If input lib changes,the source file is touched
|
||||
# which causes the desired effect (relink).
|
||||
ADD_CUSTOM_COMMAND(
|
||||
OUTPUT ${SOURCE_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E touch ${SOURCE_FILE}
|
||||
DEPENDS ${STATIC_LIBS})
|
||||
|
||||
IF(MSVC)
|
||||
# To merge libs, just pass them to lib.exe command line.
|
||||
SET(LINKER_EXTRA_FLAGS "")
|
||||
FOREACH(LIB ${STATIC_LIBS})
|
||||
SET(LINKER_EXTRA_FLAGS "${LINKER_EXTRA_FLAGS} $<TARGET_FILE:${LIB}>")
|
||||
ENDFOREACH()
|
||||
SET_TARGET_PROPERTIES(${TARGET} PROPERTIES STATIC_LIBRARY_FLAGS
|
||||
"${LINKER_EXTRA_FLAGS}")
|
||||
ELSE()
|
||||
FOREACH(STATIC_LIB ${STATIC_LIBS})
|
||||
LIST(APPEND STATIC_LIB_FILES $<TARGET_FILE:${STATIC_LIB}>)
|
||||
ENDFOREACH()
|
||||
IF(APPLE)
|
||||
# Use OSX's libtool to merge archives (ihandles universal
|
||||
# binaries properly)
|
||||
ADD_CUSTOM_COMMAND(TARGET ${TARGET} POST_BUILD
|
||||
COMMAND rm $<TARGET_FILE:${TARGET}>
|
||||
COMMAND /usr/bin/libtool -static -o $<TARGET_FILE:${TARGET}>
|
||||
${STATIC_LIB_FILES}
|
||||
)
|
||||
ELSE()
|
||||
# Generic Unix, Cygwin or MinGW. In post-build step, call
|
||||
# script, that extracts objects from archives with "ar x"
|
||||
# and repacks them with "ar r"
|
||||
SET(TARGET ${TARGET})
|
||||
CONFIGURE_FILE(
|
||||
${TOKU_CMAKE_SCRIPT_DIR}/merge_archives_unix.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/merge_archives_${TARGET}.cmake
|
||||
@ONLY
|
||||
)
|
||||
STRING(REGEX REPLACE ";" "\\\;" STATIC_LIB_FILES "${STATIC_LIB_FILES}")
|
||||
ADD_CUSTOM_COMMAND(TARGET ${TARGET} POST_BUILD
|
||||
COMMAND rm $<TARGET_FILE:${TARGET}>
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
-D TARGET_FILE=$<TARGET_FILE:${TARGET}>
|
||||
-D STATIC_LIB_FILES="${STATIC_LIB_FILES}"
|
||||
-P ${CMAKE_CURRENT_BINARY_DIR}/merge_archives_${TARGET}.cmake
|
||||
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/merge_archives_${TARGET}.cmake"
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDMACRO(TOKU_MERGE_STATIC_LIBS)
|
@ -1,155 +0,0 @@
|
||||
## some functions for getting system info so we can construct BUILDNAME
|
||||
|
||||
## given an executable, follows symlinks and resolves paths until it runs
|
||||
## out of symlinks, then gives you the basename
|
||||
macro(real_executable_name filename_input out)
|
||||
set(res 0)
|
||||
set(filename ${filename_input})
|
||||
while(NOT(res))
|
||||
execute_process(
|
||||
COMMAND which ${filename}
|
||||
RESULT_VARIABLE res
|
||||
ERROR_QUIET
|
||||
OUTPUT_VARIABLE full_filename
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(NOT(res))
|
||||
execute_process(
|
||||
COMMAND readlink ${full_filename}
|
||||
RESULT_VARIABLE res
|
||||
OUTPUT_VARIABLE link_target
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(NOT(res))
|
||||
execute_process(
|
||||
COMMAND dirname ${full_filename}
|
||||
OUTPUT_VARIABLE filepath
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
set(filename "${filepath}/${link_target}")
|
||||
else()
|
||||
set(filename ${full_filename})
|
||||
endif()
|
||||
else()
|
||||
set(filename ${filename})
|
||||
endif()
|
||||
endwhile()
|
||||
execute_process(
|
||||
COMMAND basename ${filename}
|
||||
OUTPUT_VARIABLE real_filename
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
set(${out} ${real_filename})
|
||||
endmacro(real_executable_name)
|
||||
|
||||
## gives you `uname ${flag}`
|
||||
macro(uname flag out)
|
||||
execute_process(
|
||||
COMMAND uname ${flag}
|
||||
OUTPUT_VARIABLE ${out}
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
endmacro(uname)
|
||||
|
||||
## gives the current username
|
||||
macro(whoami out)
|
||||
execute_process(
|
||||
COMMAND whoami
|
||||
OUTPUT_VARIABLE ${out}
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
endmacro(whoami)
|
||||
|
||||
## gives the current hostname, minus .tokutek.com if it's there
|
||||
macro(hostname out)
|
||||
execute_process(
|
||||
COMMAND hostname
|
||||
OUTPUT_VARIABLE fullhostname
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(REGEX REPLACE "\\.tokutek\\.com$" "" ${out} "${fullhostname}")
|
||||
endmacro(hostname)
|
||||
|
||||
## gather machine info
|
||||
uname("-m" machine_type)
|
||||
real_executable_name("${CMAKE_CXX_COMPILER}" real_cxx_compiler)
|
||||
get_filename_component(branchname "${CMAKE_CURRENT_SOURCE_DIR}" NAME)
|
||||
hostname(host)
|
||||
whoami(user)
|
||||
|
||||
## construct SITE, seems to have to happen before include(CTest)
|
||||
set(SITE "${user}@${host}")
|
||||
if (USE_GCOV)
|
||||
set(buildname_build_type "Coverage")
|
||||
else (USE_GCOV)
|
||||
set(buildname_build_type "${CMAKE_BUILD_TYPE}")
|
||||
endif (USE_GCOV)
|
||||
## construct BUILDNAME, seems to have to happen before include(CTest)
|
||||
set(BUILDNAME "${branchname} ${buildname_build_type} ${CMAKE_SYSTEM} ${machine_type} ${CMAKE_CXX_COMPILER_ID} ${real_cxx_compiler} ${CMAKE_CXX_COMPILER_VERSION}" CACHE STRING "CTest build name" FORCE)
|
||||
|
||||
include(CTest)
|
||||
|
||||
set(TOKUDB_DATA "${TokuDB_SOURCE_DIR}/../tokudb.data" CACHE FILEPATH "Path to data files for tests")
|
||||
|
||||
if (BUILD_TESTING OR BUILD_FT_TESTS OR BUILD_SRC_TESTS)
|
||||
set(WARNED_ABOUT_DATA 0)
|
||||
if (NOT EXISTS "${TOKUDB_DATA}/" AND NOT WARNED_ABOUT_DATA AND CMAKE_PROJECT_NAME STREQUAL TokuDB)
|
||||
message(WARNING "Test data files are missing from ${TOKUDB_DATA}, which will cause some tests to fail. Please put them there or modify TOKUDB_DATA to avoid this.")
|
||||
set(WARNED_ABOUT_DATA 1)
|
||||
endif ()
|
||||
|
||||
## set up full valgrind suppressions file (concatenate the suppressions files)
|
||||
file(READ ft/valgrind.suppressions valgrind_suppressions)
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/valgrind.suppressions" "${valgrind_suppressions}")
|
||||
file(READ bash.suppressions bash_suppressions)
|
||||
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/valgrind.suppressions" "${bash_suppressions}")
|
||||
|
||||
include(CMakeDependentOption)
|
||||
set(helgrind_drd_depend_conditions "")
|
||||
## Helgrind and DRD explicitly state that they only run with the Linux
|
||||
## glibc-2.3 NPTL threading implementation [1,2]. If this ever changes
|
||||
## we can enable helgrind and drd on other systems.
|
||||
## [1]: http://valgrind.org/docs/manual/hg-manual.html#hg-manual.effective-use
|
||||
## [2]: http://valgrind.org/docs/manual/drd-manual.html#drd-manual.limitations
|
||||
list(APPEND helgrind_drd_depend_conditions "CMAKE_SYSTEM_NAME STREQUAL Linux")
|
||||
## no point doing it with gcov
|
||||
list(APPEND helgrind_drd_depend_conditions "NOT USE_GCOV")
|
||||
cmake_dependent_option(RUN_DRD_TESTS "Run some tests under drd." ON
|
||||
"${helgrind_drd_depend_conditions}" OFF)
|
||||
cmake_dependent_option(RUN_HELGRIND_TESTS "Run some tests under helgrind." ON
|
||||
"${helgrind_drd_depend_conditions}" OFF)
|
||||
|
||||
macro(setup_toku_test_properties test str)
|
||||
set_tests_properties(${test} PROPERTIES ENVIRONMENT "TOKU_TEST_FILENAME=${str}.ctest-data")
|
||||
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${str}.ctest-data")
|
||||
endmacro(setup_toku_test_properties)
|
||||
|
||||
macro(add_toku_test_aux pfx name bin)
|
||||
add_test(${pfx}/${name} ${bin} ${ARGN})
|
||||
setup_toku_test_properties(${pfx}/${name} ${name})
|
||||
endmacro(add_toku_test_aux)
|
||||
macro(add_toku_test pfx bin)
|
||||
add_toku_test_aux(${pfx} ${bin} ${bin} ${ARGN})
|
||||
endmacro(add_toku_test)
|
||||
|
||||
## setup a function to write tests that will run with helgrind
|
||||
set(CMAKE_HELGRIND_COMMAND_STRING "valgrind --quiet --tool=helgrind --error-exitcode=1 --soname-synonyms=somalloc=*tokuportability* --suppressions=${TokuDB_SOURCE_DIR}/src/tests/helgrind.suppressions --trace-children=yes --trace-children-skip=sh,*/sh,basename,*/basename,dirname,*/dirname,rm,*/rm,cp,*/cp,mv,*/mv,cat,*/cat,diff,*/diff,grep,*/grep,date,*/date,test,*/tokudb_dump* --trace-children-skip-by-arg=--only_create,--test,--no-shutdown,novalgrind")
|
||||
function(add_helgrind_test pfx name)
|
||||
separate_arguments(CMAKE_HELGRIND_COMMAND_STRING)
|
||||
add_test(
|
||||
NAME ${pfx}/${name}
|
||||
COMMAND ${CMAKE_HELGRIND_COMMAND_STRING} ${ARGN}
|
||||
)
|
||||
setup_toku_test_properties(${pfx}/${name} ${name})
|
||||
endfunction(add_helgrind_test)
|
||||
|
||||
## setup a function to write tests that will run with drd
|
||||
set(CMAKE_DRD_COMMAND_STRING "valgrind --quiet --tool=drd --error-exitcode=1 --soname-synonyms=somalloc=*tokuportability* --suppressions=${TokuDB_SOURCE_DIR}/src/tests/drd.suppressions --trace-children=yes --trace-children-skip=sh,*/sh,basename,*/basename,dirname,*/dirname,rm,*/rm,cp,*/cp,mv,*/mv,cat,*/cat,diff,*/diff,grep,*/grep,date,*/date,test,*/tokudb_dump* --trace-children-skip-by-arg=--only_create,--test,--no-shutdown,novalgrind")
|
||||
function(add_drd_test pfx name)
|
||||
separate_arguments(CMAKE_DRD_COMMAND_STRING)
|
||||
add_test(
|
||||
NAME ${pfx}/${name}
|
||||
COMMAND ${CMAKE_DRD_COMMAND_STRING} ${ARGN}
|
||||
)
|
||||
setup_toku_test_properties(${pfx}/${name} ${name})
|
||||
endfunction(add_drd_test)
|
||||
|
||||
option(RUN_LONG_TESTS "If set, run all tests, even the ones that take a long time to complete." OFF)
|
||||
option(RUN_STRESS_TESTS "If set, run the stress tests." OFF)
|
||||
option(RUN_PERF_TESTS "If set, run the perf tests." OFF)
|
||||
|
||||
configure_file(CTestCustom.cmake.in CTestCustom.cmake @ONLY)
|
||||
endif (BUILD_TESTING OR BUILD_FT_TESTS OR BUILD_SRC_TESTS)
|
@ -1,200 +0,0 @@
|
||||
function(add_c_defines)
|
||||
set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS ${ARGN})
|
||||
endfunction(add_c_defines)
|
||||
|
||||
if (APPLE)
|
||||
add_c_defines(DARWIN=1 _DARWIN_C_SOURCE)
|
||||
endif ()
|
||||
|
||||
## preprocessor definitions we want everywhere
|
||||
add_c_defines(
|
||||
_FILE_OFFSET_BITS=64
|
||||
_LARGEFILE64_SOURCE
|
||||
__STDC_FORMAT_MACROS
|
||||
__STDC_LIMIT_MACROS
|
||||
__LONG_LONG_SUPPORTED
|
||||
)
|
||||
if (NOT CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
|
||||
## on FreeBSD these types of macros actually remove functionality
|
||||
add_c_defines(
|
||||
_DEFAULT_SOURCE
|
||||
_XOPEN_SOURCE=600
|
||||
)
|
||||
endif ()
|
||||
|
||||
## add TOKU_PTHREAD_DEBUG for debug builds
|
||||
if (CMAKE_VERSION VERSION_LESS 3.0)
|
||||
set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DEBUG TOKU_PTHREAD_DEBUG=1 TOKU_DEBUG_TXN_SYNC=1)
|
||||
set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DRD TOKU_PTHREAD_DEBUG=1 TOKU_DEBUG_TXN_SYNC=1)
|
||||
set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS_DRD _FORTIFY_SOURCE=2)
|
||||
else ()
|
||||
set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
|
||||
$<$<OR:$<CONFIG:DEBUG>,$<CONFIG:DRD>>:TOKU_PTHREAD_DEBUG=1 TOKU_DEBUG_TXN_SYNC=1>
|
||||
$<$<CONFIG:DRD>:_FORTIFY_SOURCE=2>
|
||||
)
|
||||
endif ()
|
||||
|
||||
## coverage
|
||||
option(USE_GCOV "Use gcov for test coverage." OFF)
|
||||
if (USE_GCOV)
|
||||
if (NOT CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||
message(FATAL_ERROR "Must use the GNU compiler to compile for test coverage.")
|
||||
endif ()
|
||||
find_program(COVERAGE_COMMAND NAMES gcov47 gcov)
|
||||
endif (USE_GCOV)
|
||||
|
||||
include(CheckCCompilerFlag)
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
## adds a compiler flag if the compiler supports it
|
||||
macro(set_cflags_if_supported)
|
||||
foreach(flag ${ARGN})
|
||||
MY_CHECK_AND_SET_COMPILER_FLAG(${flag})
|
||||
endforeach(flag)
|
||||
endmacro(set_cflags_if_supported)
|
||||
|
||||
if (NOT DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
|
||||
set (OPTIONAL_CFLAGS "${OPTIONAL_CFLAGS} -Wmissing-format-attribute")
|
||||
endif()
|
||||
|
||||
## disable some warnings
|
||||
## missing-format-attribute causes warnings in some MySQL include files
|
||||
## if the library is built as a part of TokuDB MySQL storage engine
|
||||
set_cflags_if_supported(
|
||||
-Wno-missing-field-initializers
|
||||
-Wstrict-null-sentinel
|
||||
-Winit-self
|
||||
-Wswitch
|
||||
-Wtrampolines
|
||||
-Wlogical-op
|
||||
${OPTIONAL_CFLAGS}
|
||||
-Wno-error=missing-format-attribute
|
||||
-Wno-error=address-of-array-temporary
|
||||
-Wno-error=tautological-constant-out-of-range-compare
|
||||
-Wno-error=maybe-uninitialized
|
||||
-Wno-error=extern-c-compat
|
||||
-fno-exceptions
|
||||
-Wno-error=nonnull-compare
|
||||
)
|
||||
## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
|
||||
|
||||
## Clang has stricter POD checks. So, only enable this warning on our other builds (Linux + GCC)
|
||||
if (NOT CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
set_cflags_if_supported(
|
||||
-Wpacked
|
||||
)
|
||||
endif ()
|
||||
|
||||
option (PROFILING "Allow profiling and debug" ON)
|
||||
if (PROFILING)
|
||||
set_cflags_if_supported(
|
||||
-fno-omit-frame-pointer
|
||||
)
|
||||
endif ()
|
||||
|
||||
## this hits with optimized builds somewhere in ftleaf_split, we don't
|
||||
## know why but we don't think it's a big deal
|
||||
set_cflags_if_supported(
|
||||
-Wno-error=strict-overflow
|
||||
)
|
||||
|
||||
# new flag sets in MySQL 8.0 seem to explicitly disable this
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
|
||||
|
||||
## set extra debugging flags and preprocessor definitions
|
||||
set(CMAKE_C_FLAGS_DEBUG "-g3 -O0 ${CMAKE_C_FLAGS_DEBUG}")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-g3 -O0 ${CMAKE_CXX_FLAGS_DEBUG}")
|
||||
|
||||
## flags to use when we want to run DRD on the resulting binaries
|
||||
## DRD needs debugging symbols.
|
||||
## -O0 makes it too slow, and -O2 inlines too much for our suppressions to work. -O1 is just right.
|
||||
set(CMAKE_C_FLAGS_DRD "-g3 -O1 ${CMAKE_C_FLAGS_DRD}")
|
||||
set(CMAKE_CXX_FLAGS_DRD "-g3 -O1 ${CMAKE_CXX_FLAGS_DRD}")
|
||||
|
||||
## set extra release flags
|
||||
## need to set flags for RelWithDebInfo as well because we want the MySQL/MariaDB builds to use them
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL Clang)
|
||||
# have tried -flto and -O4, both make our statically linked executables break apple's linker
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
||||
set(CMAKE_C_FLAGS_RELEASE "-g -O3 ${CMAKE_C_FLAGS_RELEASE} -UNDEBUG")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 ${CMAKE_CXX_FLAGS_RELEASE} -UNDEBUG")
|
||||
else ()
|
||||
if (APPLE)
|
||||
set(FLTO_OPTS "-fwhole-program")
|
||||
else ()
|
||||
set(FLTO_OPTS "-fuse-linker-plugin")
|
||||
endif()
|
||||
# we overwrite this because the default passes -DNDEBUG and we don't want that
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-flto ${FLTO_OPTS} ${CMAKE_C_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-flto ${FLTO_OPTS} ${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -g -O3 -UNDEBUG")
|
||||
set(CMAKE_C_FLAGS_RELEASE "-g -O3 -flto ${FLTO_OPTS} ${CMAKE_C_FLAGS_RELEASE} -UNDEBUG")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -flto ${FLTO_OPTS} ${CMAKE_CXX_FLAGS_RELEASE} -UNDEBUG")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "-g ${FLTO_OPTS} ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "-g ${FLTO_OPTS} ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
endif ()
|
||||
|
||||
## set warnings
|
||||
set_cflags_if_supported(
|
||||
-Wextra
|
||||
-Wbad-function-cast
|
||||
-Wno-missing-noreturn
|
||||
-Wstrict-prototypes
|
||||
-Wmissing-prototypes
|
||||
-Wmissing-declarations
|
||||
-Wpointer-arith
|
||||
#-Wshadow will fail with GCC-8
|
||||
${OPTIONAL_CFLAGS}
|
||||
## other flags to try:
|
||||
#-Wunsafe-loop-optimizations
|
||||
#-Wpointer-arith
|
||||
#-Wc++-compat
|
||||
#-Wc++11-compat
|
||||
#-Wwrite-strings
|
||||
#-Wzero-as-null-pointer-constant
|
||||
#-Wlogical-op
|
||||
#-Wvector-optimization-performance
|
||||
)
|
||||
|
||||
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang)
|
||||
# Disabling -Wcast-align with clang. TODO: fix casting and re-enable it, someday.
|
||||
set_cflags_if_supported(-Wcast-align)
|
||||
endif ()
|
||||
|
||||
## never want these
|
||||
set(CMAKE_C_FLAGS "-Wno-error ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-Wno-error ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
# pick language dialect
|
||||
set(CMAKE_C_FLAGS "-std=c99 ${CMAKE_C_FLAGS}")
|
||||
check_cxx_compiler_flag(-std=c++11 HAVE_STDCXX11)
|
||||
check_cxx_compiler_flag(-std=c++0x HAVE_STDCXX0X)
|
||||
if (HAVE_STDCXX11)
|
||||
set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
|
||||
elseif (HAVE_STDCXX0X)
|
||||
set(CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}")
|
||||
else ()
|
||||
message(FATAL_ERROR "${CMAKE_CXX_COMPILER} doesn't support -std=c++11 or -std=c++0x, you need one that does.")
|
||||
endif ()
|
||||
|
||||
function(add_space_separated_property type obj propname val)
|
||||
get_property(oldval ${type} ${obj} PROPERTY ${propname})
|
||||
if (oldval MATCHES NOTFOUND)
|
||||
set_property(${type} ${obj} PROPERTY ${propname} "${val}")
|
||||
else ()
|
||||
set_property(${type} ${obj} PROPERTY ${propname} "${val} ${oldval}")
|
||||
endif ()
|
||||
endfunction(add_space_separated_property)
|
||||
|
||||
## this function makes sure that the libraries passed to it get compiled
|
||||
## with gcov-needed flags, we only add those flags to our libraries
|
||||
## because we don't really care whether our tests get covered
|
||||
function(maybe_add_gcov_to_libraries)
|
||||
if (USE_GCOV)
|
||||
foreach(lib ${ARGN})
|
||||
add_space_separated_property(TARGET ${lib} COMPILE_FLAGS --coverage)
|
||||
add_space_separated_property(TARGET ${lib} LINK_FLAGS --coverage)
|
||||
target_link_libraries(${lib} LINK_PRIVATE gcov)
|
||||
endforeach(lib)
|
||||
endif (USE_GCOV)
|
||||
endfunction(maybe_add_gcov_to_libraries)
|
@ -1,111 +0,0 @@
|
||||
include(ExternalProject)
|
||||
|
||||
## add lzma with an external project
|
||||
set(xz_configure_opts --with-pic --enable-static)
|
||||
if (APPLE)
|
||||
## lzma has some assembly that doesn't work on darwin
|
||||
list(APPEND xz_configure_opts --disable-assembler)
|
||||
endif ()
|
||||
|
||||
list(APPEND xz_configure_opts "CC=${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}")
|
||||
if (CMAKE_BUILD_TYPE STREQUAL Debug OR CMAKE_BUILD_TYPE STREQUAL drd)
|
||||
list(APPEND xz_configure_opts --enable-debug)
|
||||
endif ()
|
||||
|
||||
set(XZ_SOURCE_DIR "${TokuDB_SOURCE_DIR}/third_party/xz-4.999.9beta" CACHE FILEPATH "Where to find sources for xz (lzma).")
|
||||
if (NOT EXISTS "${XZ_SOURCE_DIR}/configure")
|
||||
message(FATAL_ERROR "Can't find the xz sources. Please check them out to ${XZ_SOURCE_DIR} or modify XZ_SOURCE_DIR.")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_GENERATOR STREQUAL Ninja)
|
||||
## ninja doesn't understand "$(MAKE)"
|
||||
set(SUBMAKE_COMMAND make)
|
||||
else ()
|
||||
## use "$(MAKE)" for submakes so they can use the jobserver, doesn't
|
||||
## seem to break Xcode...
|
||||
set(SUBMAKE_COMMAND $(MAKE))
|
||||
endif ()
|
||||
|
||||
FILE(GLOB XZ_ALL_FILES ${XZ_SOURCE_DIR}/*)
|
||||
ExternalProject_Add(build_lzma
|
||||
PREFIX xz
|
||||
DOWNLOAD_COMMAND
|
||||
cp -a "${XZ_ALL_FILES}" "<SOURCE_DIR>/"
|
||||
CONFIGURE_COMMAND
|
||||
"<SOURCE_DIR>/configure" ${xz_configure_opts}
|
||||
"--prefix=${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz"
|
||||
"--libdir=${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/lib"
|
||||
BUILD_COMMAND
|
||||
${SUBMAKE_COMMAND} -C src/liblzma
|
||||
INSTALL_COMMAND
|
||||
${SUBMAKE_COMMAND} -C src/liblzma install
|
||||
)
|
||||
FILE(GLOB_RECURSE XZ_ALL_FILES_RECURSIVE ${XZ_SOURCE_DIR}/*)
|
||||
ExternalProject_Add_Step(build_lzma reclone_src # Names of project and custom step
|
||||
COMMENT "(re)cloning xz source..." # Text printed when step executes
|
||||
DEPENDERS download configure # Steps that depend on this step
|
||||
DEPENDS ${XZ_ALL_FILES_RECURSIVE} # Files on which this step depends
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/base.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/bcj.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/block.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/check.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/container.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/delta.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/filter.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/index.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/index_hash.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/lzma.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/stream_flags.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/subblock.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/version.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include/lzma/vli.h"
|
||||
PROPERTIES GENERATED TRUE)
|
||||
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/include")
|
||||
|
||||
add_library(lzma STATIC IMPORTED)
|
||||
set_target_properties(lzma PROPERTIES IMPORTED_LOCATION
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/xz/lib/liblzma.a")
|
||||
add_dependencies(lzma build_lzma)
|
||||
|
||||
|
||||
## add snappy with an external project
|
||||
set(SNAPPY_SOURCE_DIR "${TokuDB_SOURCE_DIR}/third_party/snappy-1.1.2" CACHE FILEPATH "Where to find sources for snappy.")
|
||||
if (NOT EXISTS "${SNAPPY_SOURCE_DIR}/CMakeLists.txt")
|
||||
message(FATAL_ERROR "Can't find the snappy sources. Please check them out to ${SNAPPY_SOURCE_DIR} or modify SNAPPY_SOURCE_DIR.")
|
||||
endif ()
|
||||
|
||||
FILE(GLOB SNAPPY_ALL_FILES ${SNAPPY_SOURCE_DIR}/*)
|
||||
ExternalProject_Add(build_snappy
|
||||
PREFIX snappy
|
||||
DOWNLOAD_COMMAND
|
||||
cp -a "${SNAPPY_ALL_FILES}" "<SOURCE_DIR>/"
|
||||
CMAKE_ARGS
|
||||
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
|
||||
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
|
||||
-DCMAKE_AR=${CMAKE_AR}
|
||||
-DCMAKE_NM=${CMAKE_NM}
|
||||
-DCMAKE_RANLIB=${CMAKE_RANLIB}
|
||||
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
|
||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||
${USE_PROJECT_CMAKE_MODULE_PATH}
|
||||
)
|
||||
FILE(GLOB_RECURSE SNAPPY_ALL_FILES_RECURSIVE ${SNAPPY_SOURCE_DIR}/*)
|
||||
ExternalProject_Add_Step(build_snappy reclone_src # Names of project and custom step
|
||||
COMMENT "(re)cloning snappy source..." # Text printed when step executes
|
||||
DEPENDERS download configure # Steps that depend on this step
|
||||
DEPENDS ${SNAPPY_ALL_FILES_RECURSIVE} # Files on which this step depends
|
||||
)
|
||||
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/snappy/include")
|
||||
|
||||
add_library(snappy STATIC IMPORTED)
|
||||
set_target_properties(snappy PROPERTIES IMPORTED_LOCATION
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/snappy/lib/libsnappy.a")
|
||||
add_dependencies(snappy build_snappy)
|
@ -1,96 +0,0 @@
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
## generate log_code.cc, log_print.cc, log_header.cc
|
||||
set_source_files_properties(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/log_code"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/log_print"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
|
||||
PROPERTIES GENERATED TRUE)
|
||||
|
||||
add_executable(logformat logger/logformat.cc)
|
||||
target_link_libraries(logformat ${LIBTOKUPORTABILITY}_static)
|
||||
if (USE_GCOV)
|
||||
add_space_separated_property(TARGET logformat LINK_FLAGS --coverage)
|
||||
endif (USE_GCOV)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc"
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_print.cc"
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
|
||||
COMMAND $<TARGET_FILE:logformat> .
|
||||
DEPENDS logger/logformat
|
||||
)
|
||||
add_custom_target(
|
||||
generate_log_code
|
||||
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/log_code.cc" "${CMAKE_CURRENT_BINARY_DIR}/log_print.cc" "${CMAKE_CURRENT_BINARY_DIR}/log_header.h"
|
||||
)
|
||||
|
||||
set(FT_SOURCES
|
||||
bndata
|
||||
cachetable/background_job_manager
|
||||
cachetable/cachetable
|
||||
cachetable/checkpoint
|
||||
cursor
|
||||
ft
|
||||
ft-cachetable-wrappers
|
||||
ft-flusher
|
||||
ft-hot-flusher
|
||||
ft-ops
|
||||
ft-recount-rows
|
||||
ft-status
|
||||
ft-test-helpers
|
||||
ft-verify
|
||||
loader/callbacks
|
||||
loader/dbufio
|
||||
loader/loader
|
||||
loader/pqueue
|
||||
leafentry
|
||||
le-cursor
|
||||
logger/logcursor
|
||||
logger/logfilemgr
|
||||
logger/logger
|
||||
logger/log_upgrade
|
||||
logger/recover
|
||||
msg
|
||||
msg_buffer
|
||||
node
|
||||
pivotkeys
|
||||
serialize/rbtree_mhs
|
||||
serialize/block_allocator
|
||||
serialize/block_table
|
||||
serialize/compress
|
||||
serialize/ft_node-serialize
|
||||
serialize/ft-node-deserialize
|
||||
serialize/ft-serialize
|
||||
serialize/quicklz
|
||||
serialize/sub_block
|
||||
txn/rollback
|
||||
txn/rollback-apply
|
||||
txn/rollback-ct-callbacks
|
||||
txn/rollback_log_node_cache
|
||||
txn/roll
|
||||
txn/txn
|
||||
txn/txn_child_manager
|
||||
txn/txn_manager
|
||||
txn/xids
|
||||
ule
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/log_code"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/log_print"
|
||||
)
|
||||
|
||||
add_library(ft SHARED ${FT_SOURCES})
|
||||
add_library(ft_static STATIC ${FT_SOURCES})
|
||||
## we're going to link this into libtokudb.so so it needs to have PIC
|
||||
set_target_properties(ft_static PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
maybe_add_gcov_to_libraries(ft ft_static)
|
||||
|
||||
## depend on other generated targets
|
||||
add_dependencies(ft install_tdb_h generate_log_code build_lzma build_snappy)
|
||||
add_dependencies(ft_static install_tdb_h generate_log_code build_lzma build_snappy)
|
||||
|
||||
## link with lzma (which should be static) and link dependers with zlib
|
||||
target_link_libraries(ft LINK_PRIVATE util_static lzma snappy ${LIBTOKUPORTABILITY})
|
||||
target_link_libraries(ft LINK_PUBLIC z)
|
||||
target_link_libraries(ft_static LINK_PRIVATE lzma snappy)
|
||||
|
||||
add_subdirectory(tests)
|
@ -1,675 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include <ft/bndata.h>
|
||||
#include <ft/ft-internal.h>
|
||||
|
||||
using namespace toku;
|
||||
uint32_t bn_data::klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const {
|
||||
return sizeof(*klpair) + keylen_from_klpair_len(klpair_len) + leafentry_disksize(get_le_from_klpair(klpair));
|
||||
}
|
||||
|
||||
void bn_data::init_zero() {
|
||||
toku_mempool_zero(&m_buffer_mempool);
|
||||
m_disksize_of_keys = 0;
|
||||
}
|
||||
|
||||
void bn_data::initialize_empty() {
|
||||
init_zero();
|
||||
m_buffer.create();
|
||||
}
|
||||
|
||||
void bn_data::add_key(uint32_t keylen) {
|
||||
m_disksize_of_keys += sizeof(keylen) + keylen;
|
||||
}
|
||||
|
||||
void bn_data::add_keys(uint32_t n_keys, uint32_t combined_klpair_len) {
|
||||
invariant(n_keys * sizeof(uint32_t) <= combined_klpair_len);
|
||||
m_disksize_of_keys += combined_klpair_len;
|
||||
}
|
||||
|
||||
void bn_data::remove_key(uint32_t keylen) {
|
||||
m_disksize_of_keys -= sizeof(keylen) + keylen;
|
||||
}
|
||||
|
||||
// Deserialize from format optimized for keys being inlined.
|
||||
// Currently only supports fixed-length keys.
|
||||
void bn_data::initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version UU(),
|
||||
uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length,
|
||||
uint32_t fixed_klpair_length) {
|
||||
paranoid_invariant(version >= FT_LAYOUT_VERSION_26); // Support was added @26
|
||||
uint32_t ndone_before = rb->ndone;
|
||||
init_zero();
|
||||
invariant(all_keys_same_length); // Until otherwise supported.
|
||||
const void *keys_src;
|
||||
rbuf_literal_bytes(rb, &keys_src, key_data_size);
|
||||
//Generate dmt
|
||||
this->m_buffer.create_from_sorted_memory_of_fixed_size_elements(
|
||||
keys_src, num_entries, key_data_size, fixed_klpair_length);
|
||||
toku_mempool_construct(&this->m_buffer_mempool, val_data_size);
|
||||
|
||||
const void *vals_src;
|
||||
rbuf_literal_bytes(rb, &vals_src, val_data_size);
|
||||
|
||||
if (num_entries > 0) {
|
||||
void *vals_dest = toku_mempool_malloc(&this->m_buffer_mempool, val_data_size);
|
||||
paranoid_invariant_notnull(vals_dest);
|
||||
memcpy(vals_dest, vals_src, val_data_size);
|
||||
}
|
||||
|
||||
add_keys(num_entries, num_entries * fixed_klpair_length);
|
||||
|
||||
toku_note_deserialized_basement_node(all_keys_same_length);
|
||||
|
||||
invariant(rb->ndone - ndone_before == data_size);
|
||||
}
|
||||
|
||||
static int
|
||||
wbufwriteleafentry(const void* key, const uint32_t keylen, const LEAFENTRY &le, const uint32_t UU(idx), struct wbuf * const wb) {
|
||||
// need to pack the leafentry as it was in versions
|
||||
// where the key was integrated into it (< 26)
|
||||
uint32_t begin_spot UU() = wb->ndone;
|
||||
uint32_t le_disk_size = leafentry_disksize(le);
|
||||
wbuf_nocrc_uint8_t(wb, le->type);
|
||||
wbuf_nocrc_uint32_t(wb, keylen);
|
||||
if (le->type == LE_CLEAN) {
|
||||
wbuf_nocrc_uint32_t(wb, le->u.clean.vallen);
|
||||
wbuf_nocrc_literal_bytes(wb, key, keylen);
|
||||
wbuf_nocrc_literal_bytes(wb, le->u.clean.val, le->u.clean.vallen);
|
||||
}
|
||||
else {
|
||||
paranoid_invariant(le->type == LE_MVCC);
|
||||
wbuf_nocrc_uint32_t(wb, le->u.mvcc.num_cxrs);
|
||||
wbuf_nocrc_uint8_t(wb, le->u.mvcc.num_pxrs);
|
||||
wbuf_nocrc_literal_bytes(wb, key, keylen);
|
||||
wbuf_nocrc_literal_bytes(wb, le->u.mvcc.xrs, le_disk_size - (1 + 4 + 1));
|
||||
}
|
||||
uint32_t end_spot UU() = wb->ndone;
|
||||
paranoid_invariant((end_spot - begin_spot) == keylen + sizeof(keylen) + le_disk_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bn_data::serialize_to_wbuf(struct wbuf *const wb) {
|
||||
prepare_to_serialize();
|
||||
serialize_header(wb);
|
||||
if (m_buffer.value_length_is_fixed()) {
|
||||
serialize_rest(wb);
|
||||
} else {
|
||||
//
|
||||
// iterate over leafentries and place them into the buffer
|
||||
//
|
||||
iterate<struct wbuf, wbufwriteleafentry>(wb);
|
||||
}
|
||||
}
|
||||
|
||||
// If we have fixed-length keys, we prepare the dmt and mempool.
|
||||
// The mempool is prepared by removing any fragmented space and ordering leafentries in the same order as their keys.
|
||||
void bn_data::prepare_to_serialize(void) {
|
||||
if (m_buffer.value_length_is_fixed()) {
|
||||
m_buffer.prepare_for_serialize();
|
||||
dmt_compress_kvspace(0, nullptr, true); // Gets it ready for easy serialization.
|
||||
}
|
||||
}
|
||||
|
||||
void bn_data::serialize_header(struct wbuf *wb) const {
|
||||
bool fixed = m_buffer.value_length_is_fixed();
|
||||
|
||||
//key_data_size
|
||||
wbuf_nocrc_uint(wb, m_disksize_of_keys);
|
||||
//val_data_size
|
||||
wbuf_nocrc_uint(wb, toku_mempool_get_used_size(&m_buffer_mempool));
|
||||
//fixed_klpair_length
|
||||
wbuf_nocrc_uint(wb, m_buffer.get_fixed_length());
|
||||
// all_keys_same_length
|
||||
wbuf_nocrc_uint8_t(wb, fixed);
|
||||
// keys_vals_separate
|
||||
wbuf_nocrc_uint8_t(wb, fixed);
|
||||
}
|
||||
|
||||
void bn_data::serialize_rest(struct wbuf *wb) const {
|
||||
//Write keys
|
||||
invariant(m_buffer.value_length_is_fixed()); //Assumes prepare_to_serialize was called
|
||||
m_buffer.serialize_values(m_disksize_of_keys, wb);
|
||||
|
||||
//Write leafentries
|
||||
//Just ran dmt_compress_kvspace so there is no fragmentation and also leafentries are in sorted order.
|
||||
paranoid_invariant(toku_mempool_get_frag_size(&m_buffer_mempool) == 0);
|
||||
uint32_t val_data_size = toku_mempool_get_used_size(&m_buffer_mempool);
|
||||
wbuf_nocrc_literal_bytes(wb, toku_mempool_get_base(&m_buffer_mempool), val_data_size);
|
||||
}
|
||||
|
||||
// Deserialize from rbuf
|
||||
void bn_data::deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version) {
|
||||
uint32_t key_data_size = data_size; // overallocate if < version 26 (best guess that is guaranteed not too small)
|
||||
uint32_t val_data_size = data_size; // overallocate if < version 26 (best guess that is guaranteed not too small)
|
||||
|
||||
bool all_keys_same_length = false;
|
||||
bool keys_vals_separate = false;
|
||||
uint32_t fixed_klpair_length = 0;
|
||||
|
||||
// In version 25 and older there is no header. Skip reading header for old version.
|
||||
if (version >= FT_LAYOUT_VERSION_26) {
|
||||
uint32_t ndone_before = rb->ndone;
|
||||
key_data_size = rbuf_int(rb);
|
||||
val_data_size = rbuf_int(rb);
|
||||
fixed_klpair_length = rbuf_int(rb); // 0 if !all_keys_same_length
|
||||
all_keys_same_length = rbuf_char(rb);
|
||||
keys_vals_separate = rbuf_char(rb);
|
||||
invariant(all_keys_same_length == keys_vals_separate); // Until we support otherwise
|
||||
uint32_t header_size = rb->ndone - ndone_before;
|
||||
data_size -= header_size;
|
||||
invariant(header_size == HEADER_LENGTH);
|
||||
if (keys_vals_separate) {
|
||||
invariant(fixed_klpair_length >= sizeof(klpair_struct) || num_entries == 0);
|
||||
initialize_from_separate_keys_and_vals(num_entries, rb, data_size, version,
|
||||
key_data_size, val_data_size, all_keys_same_length,
|
||||
fixed_klpair_length);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Version >= 26 and version 25 deserialization are now identical except that <= 25 might allocate too much memory.
|
||||
const void *bytes;
|
||||
rbuf_literal_bytes(rb, &bytes, data_size);
|
||||
const unsigned char *CAST_FROM_VOIDP(buf, bytes);
|
||||
if (data_size == 0) {
|
||||
invariant_zero(num_entries);
|
||||
}
|
||||
init_zero();
|
||||
klpair_dmt_t::builder dmt_builder;
|
||||
dmt_builder.create(num_entries, key_data_size);
|
||||
|
||||
// TODO(leif): clean this up (#149)
|
||||
unsigned char *newmem = nullptr;
|
||||
// add 25% extra wiggle room
|
||||
uint32_t allocated_bytes_vals = val_data_size + (val_data_size / 4);
|
||||
CAST_FROM_VOIDP(newmem, toku_xmalloc(allocated_bytes_vals));
|
||||
const unsigned char* curr_src_pos = buf;
|
||||
unsigned char* curr_dest_pos = newmem;
|
||||
for (uint32_t i = 0; i < num_entries; i++) {
|
||||
uint8_t curr_type = curr_src_pos[0];
|
||||
curr_src_pos++;
|
||||
// first thing we do is lay out the key,
|
||||
// to do so, we must extract it from the leafentry
|
||||
// and write it in
|
||||
uint32_t keylen = 0;
|
||||
const void* keyp = nullptr;
|
||||
keylen = *(uint32_t *)curr_src_pos;
|
||||
curr_src_pos += sizeof(uint32_t);
|
||||
uint32_t clean_vallen = 0;
|
||||
uint32_t num_cxrs = 0;
|
||||
uint8_t num_pxrs = 0;
|
||||
if (curr_type == LE_CLEAN) {
|
||||
clean_vallen = toku_dtoh32(*(uint32_t *)curr_src_pos);
|
||||
curr_src_pos += sizeof(clean_vallen); // val_len
|
||||
keyp = curr_src_pos;
|
||||
curr_src_pos += keylen;
|
||||
}
|
||||
else {
|
||||
paranoid_invariant(curr_type == LE_MVCC);
|
||||
num_cxrs = toku_htod32(*(uint32_t *)curr_src_pos);
|
||||
curr_src_pos += sizeof(uint32_t); // num_cxrs
|
||||
num_pxrs = curr_src_pos[0];
|
||||
curr_src_pos += sizeof(uint8_t); //num_pxrs
|
||||
keyp = curr_src_pos;
|
||||
curr_src_pos += keylen;
|
||||
}
|
||||
uint32_t le_offset = curr_dest_pos - newmem;
|
||||
dmt_builder.append(klpair_dmtwriter(keylen, le_offset, keyp));
|
||||
add_key(keylen);
|
||||
|
||||
// now curr_dest_pos is pointing to where the leafentry should be packed
|
||||
curr_dest_pos[0] = curr_type;
|
||||
curr_dest_pos++;
|
||||
if (curr_type == LE_CLEAN) {
|
||||
*(uint32_t *)curr_dest_pos = toku_htod32(clean_vallen);
|
||||
curr_dest_pos += sizeof(clean_vallen);
|
||||
memcpy(curr_dest_pos, curr_src_pos, clean_vallen); // copy the val
|
||||
curr_dest_pos += clean_vallen;
|
||||
curr_src_pos += clean_vallen;
|
||||
}
|
||||
else {
|
||||
// pack num_cxrs and num_pxrs
|
||||
*(uint32_t *)curr_dest_pos = toku_htod32(num_cxrs);
|
||||
curr_dest_pos += sizeof(num_cxrs);
|
||||
*(uint8_t *)curr_dest_pos = num_pxrs;
|
||||
curr_dest_pos += sizeof(num_pxrs);
|
||||
// now we need to pack the rest of the data
|
||||
uint32_t num_rest_bytes = leafentry_rest_memsize(num_pxrs, num_cxrs, const_cast<uint8_t*>(curr_src_pos));
|
||||
memcpy(curr_dest_pos, curr_src_pos, num_rest_bytes);
|
||||
curr_dest_pos += num_rest_bytes;
|
||||
curr_src_pos += num_rest_bytes;
|
||||
}
|
||||
}
|
||||
dmt_builder.build(&this->m_buffer);
|
||||
toku_note_deserialized_basement_node(m_buffer.value_length_is_fixed());
|
||||
|
||||
uint32_t num_bytes_read = (uint32_t)(curr_src_pos - buf);
|
||||
invariant(num_bytes_read == data_size);
|
||||
|
||||
uint32_t num_bytes_written = curr_dest_pos - newmem + m_disksize_of_keys;
|
||||
invariant(num_bytes_written == data_size);
|
||||
toku_mempool_init(&m_buffer_mempool, newmem, (size_t)(curr_dest_pos - newmem), allocated_bytes_vals);
|
||||
|
||||
invariant(get_disk_size() == data_size);
|
||||
// Versions older than 26 might have allocated too much memory. Try to shrink the mempool now that we
|
||||
// know how much memory we need.
|
||||
if (version < FT_LAYOUT_VERSION_26) {
|
||||
// Unnecessary after version 26
|
||||
// Reallocate smaller mempool to save memory
|
||||
invariant_zero(toku_mempool_get_frag_size(&m_buffer_mempool));
|
||||
toku_mempool_realloc_larger(&m_buffer_mempool, toku_mempool_get_used_size(&m_buffer_mempool));
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t bn_data::get_memory_size() {
|
||||
uint64_t retval = 0;
|
||||
//TODO: Maybe ask for memory_size instead of mempool_footprint (either this todo or the next)
|
||||
// include fragmentation overhead but do not include space in the
|
||||
// mempool that has not yet been allocated for leaf entries
|
||||
size_t poolsize = toku_mempool_footprint(&m_buffer_mempool);
|
||||
retval += poolsize;
|
||||
// This one includes not-yet-allocated for nodes (just like old constant-key omt)
|
||||
//TODO: Maybe ask for mempool_footprint instead of memory_size.
|
||||
retval += m_buffer.memory_size();
|
||||
invariant(retval >= get_disk_size());
|
||||
return retval;
|
||||
}
|
||||
|
||||
void bn_data::delete_leafentry (
|
||||
uint32_t idx,
|
||||
uint32_t keylen,
|
||||
uint32_t old_le_size
|
||||
)
|
||||
{
|
||||
remove_key(keylen);
|
||||
m_buffer.delete_at(idx);
|
||||
toku_mempool_mfree(&m_buffer_mempool, nullptr, old_le_size);
|
||||
}
|
||||
|
||||
/* mempool support */
|
||||
|
||||
struct dmt_compressor_state {
|
||||
struct mempool *new_kvspace;
|
||||
class bn_data *bd;
|
||||
};
|
||||
|
||||
static int move_it (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct dmt_compressor_state * const oc) {
|
||||
LEAFENTRY old_le = oc->bd->get_le_from_klpair(klpair);
|
||||
uint32_t size = leafentry_memsize(old_le);
|
||||
void* newdata = toku_mempool_malloc(oc->new_kvspace, size);
|
||||
paranoid_invariant_notnull(newdata); // we do this on a fresh mempool, so nothing bad should happen
|
||||
memcpy(newdata, old_le, size);
|
||||
klpair->le_offset = toku_mempool_get_offset_from_pointer_and_base(oc->new_kvspace, newdata);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Compress things, and grow or shrink the mempool if needed.
|
||||
// May (always if force_compress) have a side effect of putting contents of mempool in sorted order.
|
||||
void bn_data::dmt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress) {
|
||||
uint32_t total_size_needed = toku_mempool_get_used_size(&m_buffer_mempool) + added_size;
|
||||
|
||||
// If there is no fragmentation, e.g. in serial inserts, we can just increase the size
|
||||
// of the mempool and move things over with a cheap memcpy. If force_compress is true,
|
||||
// the caller needs the side effect that all contents are put in sorted order.
|
||||
bool do_compress = toku_mempool_get_frag_size(&m_buffer_mempool) > 0 || force_compress;
|
||||
|
||||
void *old_mempool_base = toku_mempool_get_base(&m_buffer_mempool);
|
||||
struct mempool new_kvspace;
|
||||
if (do_compress) {
|
||||
size_t requested_size = force_compress ? total_size_needed : ((total_size_needed * 3) / 2);
|
||||
toku_mempool_construct(&new_kvspace, requested_size);
|
||||
struct dmt_compressor_state oc = { &new_kvspace, this };
|
||||
m_buffer.iterate_ptr< decltype(oc), move_it >(&oc);
|
||||
} else {
|
||||
toku_mempool_construct(&new_kvspace, total_size_needed);
|
||||
size_t old_offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool);
|
||||
void *new_mempool_base = toku_mempool_malloc(&new_kvspace, old_offset_limit);
|
||||
memcpy(new_mempool_base, old_mempool_base, old_offset_limit);
|
||||
}
|
||||
|
||||
if (maybe_free) {
|
||||
*maybe_free = old_mempool_base;
|
||||
} else {
|
||||
toku_free(old_mempool_base);
|
||||
}
|
||||
m_buffer_mempool = new_kvspace;
|
||||
}
|
||||
|
||||
// Effect: Allocate a new object of size SIZE in MP. If MP runs out of space, allocate new a new mempool space, and copy all the items
|
||||
// from the OMT (which items refer to items in the old mempool) into the new mempool.
|
||||
// If MAYBE_FREE is nullptr then free the old mempool's space.
|
||||
// Otherwise, store the old mempool's space in maybe_free.
|
||||
LEAFENTRY bn_data::mempool_malloc_and_update_dmt(size_t size, void **maybe_free) {
|
||||
void *v = toku_mempool_malloc(&m_buffer_mempool, size);
|
||||
if (v == nullptr) {
|
||||
dmt_compress_kvspace(size, maybe_free, false);
|
||||
v = toku_mempool_malloc(&m_buffer_mempool, size);
|
||||
paranoid_invariant_notnull(v);
|
||||
}
|
||||
return (LEAFENTRY)v;
|
||||
}
|
||||
|
||||
void bn_data::get_space_for_overwrite(
|
||||
uint32_t idx,
|
||||
const void* keyp UU(),
|
||||
uint32_t keylen UU(),
|
||||
uint32_t old_keylen,
|
||||
uint32_t old_le_size,
|
||||
uint32_t new_size,
|
||||
LEAFENTRY* new_le_space,
|
||||
void **const maybe_free
|
||||
)
|
||||
{
|
||||
*maybe_free = nullptr;
|
||||
LEAFENTRY new_le = mempool_malloc_and_update_dmt(new_size, maybe_free);
|
||||
toku_mempool_mfree(&m_buffer_mempool, nullptr, old_le_size);
|
||||
klpair_struct* klp = nullptr;
|
||||
uint32_t klpair_len;
|
||||
int r = m_buffer.fetch(idx, &klpair_len, &klp);
|
||||
invariant_zero(r);
|
||||
paranoid_invariant(klp!=nullptr);
|
||||
// Old key length should be consistent with what is stored in the DMT
|
||||
invariant(keylen_from_klpair_len(klpair_len) == old_keylen);
|
||||
|
||||
size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le);
|
||||
paranoid_invariant(new_le_offset <= UINT32_MAX - new_size); // Not using > 4GB
|
||||
klp->le_offset = new_le_offset;
|
||||
|
||||
paranoid_invariant(new_le == get_le_from_klpair(klp));
|
||||
*new_le_space = new_le;
|
||||
}
|
||||
|
||||
void bn_data::get_space_for_insert(
|
||||
uint32_t idx,
|
||||
const void* keyp,
|
||||
uint32_t keylen,
|
||||
size_t size,
|
||||
LEAFENTRY* new_le_space,
|
||||
void **const maybe_free
|
||||
)
|
||||
{
|
||||
add_key(keylen);
|
||||
|
||||
*maybe_free = nullptr;
|
||||
LEAFENTRY new_le = mempool_malloc_and_update_dmt(size, maybe_free);
|
||||
size_t new_le_offset = toku_mempool_get_offset_from_pointer_and_base(&this->m_buffer_mempool, new_le);
|
||||
|
||||
klpair_dmtwriter kl(keylen, new_le_offset, keyp);
|
||||
m_buffer.insert_at(kl, idx);
|
||||
|
||||
*new_le_space = new_le;
|
||||
}
|
||||
|
||||
class split_klpairs_extra {
|
||||
bn_data *const m_left_bn;
|
||||
bn_data *const m_right_bn;
|
||||
klpair_dmt_t::builder *const m_left_builder;
|
||||
klpair_dmt_t::builder *const m_right_builder;
|
||||
struct mempool *const m_left_dest_mp;
|
||||
uint32_t m_split_at;
|
||||
|
||||
struct mempool *left_dest_mp(void) const { return m_left_dest_mp; }
|
||||
struct mempool *right_dest_mp(void) const { return &m_right_bn->m_buffer_mempool; }
|
||||
|
||||
void copy_klpair(const uint32_t klpair_len, const klpair_struct &klpair,
|
||||
klpair_dmt_t::builder *const builder,
|
||||
struct mempool *const dest_mp,
|
||||
bn_data *const bn) {
|
||||
LEAFENTRY old_le = m_left_bn->get_le_from_klpair(&klpair);
|
||||
size_t le_size = leafentry_memsize(old_le);
|
||||
|
||||
void *new_le = toku_mempool_malloc(dest_mp, le_size);
|
||||
paranoid_invariant_notnull(new_le);
|
||||
memcpy(new_le, old_le, le_size);
|
||||
size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(dest_mp, new_le);
|
||||
size_t keylen = keylen_from_klpair_len(klpair_len);
|
||||
builder->append(klpair_dmtwriter(keylen, le_offset, klpair.key));
|
||||
|
||||
bn->add_key(keylen);
|
||||
}
|
||||
|
||||
int move_leafentry(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx) {
|
||||
m_left_bn->remove_key(keylen_from_klpair_len(klpair_len));
|
||||
|
||||
if (idx < m_split_at) {
|
||||
copy_klpair(klpair_len, klpair, m_left_builder, left_dest_mp(), m_left_bn);
|
||||
} else {
|
||||
copy_klpair(klpair_len, klpair, m_right_builder, right_dest_mp(), m_right_bn);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public:
|
||||
split_klpairs_extra(bn_data *const left_bn, bn_data *const right_bn,
|
||||
klpair_dmt_t::builder *const left_builder,
|
||||
klpair_dmt_t::builder *const right_builder,
|
||||
struct mempool *const left_new_mp,
|
||||
uint32_t split_at)
|
||||
: m_left_bn(left_bn),
|
||||
m_right_bn(right_bn),
|
||||
m_left_builder(left_builder),
|
||||
m_right_builder(right_builder),
|
||||
m_left_dest_mp(left_new_mp),
|
||||
m_split_at(split_at) {}
|
||||
static int cb(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, split_klpairs_extra *const thisp) {
|
||||
return thisp->move_leafentry(klpair_len, klpair, idx);
|
||||
}
|
||||
};
|
||||
|
||||
void bn_data::split_klpairs(
|
||||
bn_data* right_bd,
|
||||
uint32_t split_at //lower bound inclusive for right_bd
|
||||
)
|
||||
{
|
||||
// We use move_leafentries_to during a split, and the split algorithm should never call this
|
||||
// if it's splitting on a boundary, so there must be some leafentries in the range to move.
|
||||
paranoid_invariant(split_at < num_klpairs());
|
||||
|
||||
right_bd->init_zero();
|
||||
|
||||
size_t mpsize = toku_mempool_get_used_size(&m_buffer_mempool); // overkill, but safe
|
||||
|
||||
struct mempool new_left_mp;
|
||||
toku_mempool_construct(&new_left_mp, mpsize);
|
||||
|
||||
struct mempool *right_mp = &right_bd->m_buffer_mempool;
|
||||
toku_mempool_construct(right_mp, mpsize);
|
||||
|
||||
klpair_dmt_t::builder left_dmt_builder;
|
||||
left_dmt_builder.create(split_at, m_disksize_of_keys); // overkill, but safe (builder will realloc at the end)
|
||||
|
||||
klpair_dmt_t::builder right_dmt_builder;
|
||||
right_dmt_builder.create(num_klpairs() - split_at, m_disksize_of_keys); // overkill, but safe (builder will realloc at the end)
|
||||
|
||||
split_klpairs_extra extra(this, right_bd, &left_dmt_builder, &right_dmt_builder, &new_left_mp, split_at);
|
||||
|
||||
int r = m_buffer.iterate<split_klpairs_extra, split_klpairs_extra::cb>(&extra);
|
||||
invariant_zero(r);
|
||||
|
||||
m_buffer.destroy();
|
||||
toku_mempool_destroy(&m_buffer_mempool);
|
||||
|
||||
m_buffer_mempool = new_left_mp;
|
||||
|
||||
left_dmt_builder.build(&m_buffer);
|
||||
right_dmt_builder.build(&right_bd->m_buffer);
|
||||
|
||||
// Potentially shrink memory pool for destination.
|
||||
// We overallocated ("overkill") above
|
||||
struct mempool *const left_mp = &m_buffer_mempool;
|
||||
paranoid_invariant_zero(toku_mempool_get_frag_size(left_mp));
|
||||
toku_mempool_realloc_larger(left_mp, toku_mempool_get_used_size(left_mp));
|
||||
paranoid_invariant_zero(toku_mempool_get_frag_size(right_mp));
|
||||
toku_mempool_realloc_larger(right_mp, toku_mempool_get_used_size(right_mp));
|
||||
}
|
||||
|
||||
uint64_t bn_data::get_disk_size() {
|
||||
return m_disksize_of_keys +
|
||||
toku_mempool_get_used_size(&m_buffer_mempool);
|
||||
}
|
||||
|
||||
struct verify_le_in_mempool_state {
|
||||
size_t offset_limit;
|
||||
class bn_data *bd;
|
||||
};
|
||||
|
||||
static int verify_le_in_mempool (const uint32_t, klpair_struct *klpair, const uint32_t idx UU(), struct verify_le_in_mempool_state * const state) {
|
||||
invariant(klpair->le_offset < state->offset_limit);
|
||||
|
||||
LEAFENTRY le = state->bd->get_le_from_klpair(klpair);
|
||||
uint32_t size = leafentry_memsize(le);
|
||||
|
||||
size_t end_offset = klpair->le_offset+size;
|
||||
|
||||
invariant(end_offset <= state->offset_limit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
//This is a debug-only (paranoid) verification.
|
||||
//Verifies the dmt is valid, and all leafentries are entirely in the mempool's memory.
|
||||
void bn_data::verify_mempool(void) {
|
||||
//Verify the dmt itself <- paranoid and slow
|
||||
m_buffer.verify();
|
||||
|
||||
verify_le_in_mempool_state state = { .offset_limit = toku_mempool_get_offset_limit(&m_buffer_mempool), .bd = this };
|
||||
//Verify every leafentry pointed to by the keys in the dmt are fully inside the mempool
|
||||
m_buffer.iterate_ptr< decltype(state), verify_le_in_mempool >(&state);
|
||||
}
|
||||
|
||||
uint32_t bn_data::num_klpairs(void) const {
|
||||
return m_buffer.size();
|
||||
}
|
||||
|
||||
void bn_data::destroy(void) {
|
||||
// The buffer may have been freed already, in some cases.
|
||||
m_buffer.destroy();
|
||||
toku_mempool_destroy(&m_buffer_mempool);
|
||||
m_disksize_of_keys = 0;
|
||||
}
|
||||
|
||||
void bn_data::set_contents_as_clone_of_sorted_array(
|
||||
uint32_t num_les,
|
||||
const void** old_key_ptrs,
|
||||
uint32_t* old_keylens,
|
||||
LEAFENTRY* old_les,
|
||||
size_t *le_sizes,
|
||||
size_t total_key_size,
|
||||
size_t total_le_size
|
||||
)
|
||||
{
|
||||
//Enforce "just created" invariant.
|
||||
paranoid_invariant_zero(m_disksize_of_keys);
|
||||
paranoid_invariant_zero(num_klpairs());
|
||||
paranoid_invariant_null(toku_mempool_get_base(&m_buffer_mempool));
|
||||
paranoid_invariant_zero(toku_mempool_get_size(&m_buffer_mempool));
|
||||
|
||||
toku_mempool_construct(&m_buffer_mempool, total_le_size);
|
||||
m_buffer.destroy();
|
||||
m_disksize_of_keys = 0;
|
||||
|
||||
klpair_dmt_t::builder dmt_builder;
|
||||
dmt_builder.create(num_les, total_key_size);
|
||||
|
||||
for (uint32_t idx = 0; idx < num_les; idx++) {
|
||||
void* new_le = toku_mempool_malloc(&m_buffer_mempool, le_sizes[idx]);
|
||||
paranoid_invariant_notnull(new_le);
|
||||
memcpy(new_le, old_les[idx], le_sizes[idx]);
|
||||
size_t le_offset = toku_mempool_get_offset_from_pointer_and_base(&m_buffer_mempool, new_le);
|
||||
dmt_builder.append(klpair_dmtwriter(old_keylens[idx], le_offset, old_key_ptrs[idx]));
|
||||
add_key(old_keylens[idx]);
|
||||
}
|
||||
dmt_builder.build(&this->m_buffer);
|
||||
}
|
||||
|
||||
LEAFENTRY bn_data::get_le_from_klpair(const klpair_struct *klpair) const {
|
||||
void * ptr = toku_mempool_get_pointer_from_base_and_offset(&this->m_buffer_mempool, klpair->le_offset);
|
||||
LEAFENTRY CAST_FROM_VOIDP(le, ptr);
|
||||
return le;
|
||||
}
|
||||
|
||||
|
||||
// get info about a single leafentry by index
|
||||
int bn_data::fetch_le(uint32_t idx, LEAFENTRY *le) {
|
||||
klpair_struct* klpair = nullptr;
|
||||
int r = m_buffer.fetch(idx, nullptr, &klpair);
|
||||
if (r == 0) {
|
||||
*le = get_le_from_klpair(klpair);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int bn_data::fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key) {
|
||||
klpair_struct* klpair = nullptr;
|
||||
uint32_t klpair_len;
|
||||
int r = m_buffer.fetch(idx, &klpair_len, &klpair);
|
||||
if (r == 0) {
|
||||
*len = keylen_from_klpair_len(klpair_len);
|
||||
*key = klpair->key;
|
||||
*le = get_le_from_klpair(klpair);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int bn_data::fetch_klpair_disksize(uint32_t idx, size_t *size) {
|
||||
klpair_struct* klpair = nullptr;
|
||||
uint32_t klpair_len;
|
||||
int r = m_buffer.fetch(idx, &klpair_len, &klpair);
|
||||
if (r == 0) {
|
||||
*size = klpair_disksize(klpair_len, klpair);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int bn_data::fetch_key_and_len(uint32_t idx, uint32_t *len, void** key) {
|
||||
klpair_struct* klpair = nullptr;
|
||||
uint32_t klpair_len;
|
||||
int r = m_buffer.fetch(idx, &klpair_len, &klpair);
|
||||
if (r == 0) {
|
||||
*len = keylen_from_klpair_len(klpair_len);
|
||||
*key = klpair->key;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
void bn_data::clone(bn_data* orig_bn_data) {
|
||||
toku_mempool_clone(&orig_bn_data->m_buffer_mempool, &m_buffer_mempool);
|
||||
m_buffer.clone(orig_bn_data->m_buffer);
|
||||
this->m_disksize_of_keys = orig_bn_data->m_disksize_of_keys;
|
||||
}
|
||||
|
@ -1,333 +0,0 @@
|
||||
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/dmt.h"
|
||||
#include "util/mempool.h"
|
||||
|
||||
#include "ft/leafentry.h"
|
||||
#include "ft/serialize/wbuf.h"
|
||||
|
||||
// Key/leafentry pair stored in a dmt. The key is inlined, the offset (in leafentry mempool) is stored for the leafentry.
|
||||
struct klpair_struct {
|
||||
uint32_t le_offset; //Offset of leafentry (in leafentry mempool)
|
||||
uint8_t key[0]; // key, followed by le
|
||||
};
|
||||
|
||||
static constexpr uint32_t keylen_from_klpair_len(const uint32_t klpair_len) {
|
||||
return klpair_len - __builtin_offsetof(klpair_struct, key);
|
||||
}
|
||||
|
||||
|
||||
static_assert(__builtin_offsetof(klpair_struct, key) == 1*sizeof(uint32_t), "klpair alignment issues");
|
||||
static_assert(__builtin_offsetof(klpair_struct, key) == sizeof(klpair_struct), "klpair size issues");
|
||||
|
||||
// A wrapper for the heaviside function provided to dmt->find*.
|
||||
// Needed because the heaviside functions provided to bndata do not know about the internal types.
|
||||
// Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions.
|
||||
template<typename dmtcmp_t,
|
||||
int (*h)(const DBT &, const dmtcmp_t &)>
|
||||
static int klpair_find_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const dmtcmp_t &extra) {
|
||||
DBT kdbt;
|
||||
kdbt.data = const_cast<void*>(reinterpret_cast<const void*>(klpair.key));
|
||||
kdbt.size = keylen_from_klpair_len(klpair_len);
|
||||
return h(kdbt, extra);
|
||||
}
|
||||
|
||||
template<typename inner_iterate_extra_t>
|
||||
struct klpair_iterate_extra {
|
||||
public:
|
||||
inner_iterate_extra_t *inner;
|
||||
const class bn_data * bd;
|
||||
};
|
||||
|
||||
// A wrapper for the high-order function provided to dmt->iterate*
|
||||
// Needed because the heaviside functions provided to bndata do not know about the internal types.
|
||||
// Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions.
|
||||
template<typename iterate_extra_t,
|
||||
int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t idx, iterate_extra_t *const)>
|
||||
static int klpair_iterate_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, klpair_iterate_extra<iterate_extra_t> *const extra) {
|
||||
const void* key = &klpair.key;
|
||||
LEAFENTRY le = extra->bd->get_le_from_klpair(&klpair);
|
||||
return f(key, keylen_from_klpair_len(klpair_len), le, idx, extra->inner);
|
||||
}
|
||||
|
||||
|
||||
namespace toku {
|
||||
// dmt writer for klpair_struct
|
||||
class klpair_dmtwriter {
|
||||
public:
|
||||
// Return the size needed for the klpair_struct that this dmtwriter represents
|
||||
size_t get_size(void) const {
|
||||
return sizeof(klpair_struct) + this->keylen;
|
||||
}
|
||||
// Write the klpair_struct this dmtwriter represents to a destination
|
||||
void write_to(klpair_struct *const dest) const {
|
||||
dest->le_offset = this->le_offset;
|
||||
memcpy(dest->key, this->keyp, this->keylen);
|
||||
}
|
||||
|
||||
klpair_dmtwriter(uint32_t _keylen, uint32_t _le_offset, const void* _keyp)
|
||||
: keylen(_keylen), le_offset(_le_offset), keyp(_keyp) {}
|
||||
klpair_dmtwriter(const uint32_t klpair_len, klpair_struct *const src)
|
||||
: keylen(keylen_from_klpair_len(klpair_len)), le_offset(src->le_offset), keyp(src->key) {}
|
||||
private:
|
||||
const uint32_t keylen;
|
||||
const uint32_t le_offset;
|
||||
const void* keyp;
|
||||
};
|
||||
}
|
||||
|
||||
typedef toku::dmt<klpair_struct, klpair_struct*, toku::klpair_dmtwriter> klpair_dmt_t;
|
||||
// This class stores the data associated with a basement node
|
||||
class bn_data {
|
||||
public:
|
||||
// Initialize an empty bn_data _without_ a dmt backing.
|
||||
// Externally only used for deserialization.
|
||||
void init_zero(void);
|
||||
|
||||
// Initialize an empty bn_data _with_ a dmt
|
||||
void initialize_empty(void);
|
||||
|
||||
// Deserialize a bn_data from rbuf.
|
||||
// This is the entry point for deserialization.
|
||||
void deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version);
|
||||
|
||||
// Retrieve the memory footprint of this basement node.
|
||||
// May over or under count: see Percona/PerconaFT#136
|
||||
// Also see dmt's implementation.
|
||||
uint64_t get_memory_size(void);
|
||||
|
||||
// Get the serialized size of this basement node.
|
||||
uint64_t get_disk_size(void);
|
||||
|
||||
// Perform (paranoid) verification that all leafentries are fully contained within the mempool
|
||||
void verify_mempool(void);
|
||||
|
||||
// size() of key dmt
|
||||
uint32_t num_klpairs(void) const;
|
||||
|
||||
// iterate() on key dmt (and associated leafentries)
|
||||
template<typename iterate_extra_t,
|
||||
int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
|
||||
int iterate(iterate_extra_t *const iterate_extra) const {
|
||||
return iterate_on_range<iterate_extra_t, f>(0, num_klpairs(), iterate_extra);
|
||||
}
|
||||
|
||||
// iterate_on_range() on key dmt (and associated leafentries)
|
||||
template<typename iterate_extra_t,
|
||||
int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
|
||||
int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const {
|
||||
klpair_iterate_extra<iterate_extra_t> klpair_extra = { iterate_extra, this };
|
||||
return m_buffer.iterate_on_range< klpair_iterate_extra<iterate_extra_t>, klpair_iterate_wrapper<iterate_extra_t, f> >(left, right, &klpair_extra);
|
||||
}
|
||||
|
||||
// find_zero() on key dmt
|
||||
template<typename dmtcmp_t,
|
||||
int (*h)(const DBT &, const dmtcmp_t &)>
|
||||
int find_zero(const dmtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
|
||||
klpair_struct* klpair = nullptr;
|
||||
uint32_t klpair_len;
|
||||
int r = m_buffer.find_zero< dmtcmp_t, klpair_find_wrapper<dmtcmp_t, h> >(extra, &klpair_len, &klpair, idxp);
|
||||
if (r == 0) {
|
||||
if (value) {
|
||||
*value = get_le_from_klpair(klpair);
|
||||
}
|
||||
if (key) {
|
||||
paranoid_invariant_notnull(keylen);
|
||||
*key = klpair->key;
|
||||
*keylen = keylen_from_klpair_len(klpair_len);
|
||||
}
|
||||
else {
|
||||
paranoid_invariant_null(keylen);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// find() on key dmt (and associated leafentries)
|
||||
template<typename dmtcmp_t,
|
||||
int (*h)(const DBT &, const dmtcmp_t &)>
|
||||
int find(const dmtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
|
||||
klpair_struct* klpair = nullptr;
|
||||
uint32_t klpair_len;
|
||||
int r = m_buffer.find< dmtcmp_t, klpair_find_wrapper<dmtcmp_t, h> >(extra, direction, &klpair_len, &klpair, idxp);
|
||||
if (r == 0) {
|
||||
if (value) {
|
||||
*value = get_le_from_klpair(klpair);
|
||||
}
|
||||
if (key) {
|
||||
paranoid_invariant_notnull(keylen);
|
||||
*key = klpair->key;
|
||||
*keylen = keylen_from_klpair_len(klpair_len);
|
||||
}
|
||||
else {
|
||||
paranoid_invariant_null(keylen);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Fetch leafentry by index
|
||||
__attribute__((__nonnull__))
|
||||
int fetch_le(uint32_t idx, LEAFENTRY *le);
|
||||
// Fetch (leafentry, key, keylen) by index
|
||||
__attribute__((__nonnull__))
|
||||
int fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key);
|
||||
// Fetch (serialized size of leafentry, key, and keylen) by index
|
||||
__attribute__((__nonnull__))
|
||||
int fetch_klpair_disksize(uint32_t idx, size_t *size);
|
||||
// Fetch (key, keylen) by index
|
||||
__attribute__((__nonnull__))
|
||||
int fetch_key_and_len(uint32_t idx, uint32_t *len, void** key);
|
||||
|
||||
// Move leafentries (and associated key/keylens) from this basement node to dest_bd
|
||||
// Moves indexes [lbi-ube)
|
||||
__attribute__((__nonnull__))
|
||||
void split_klpairs(bn_data* dest_bd, uint32_t first_index_for_dest);
|
||||
|
||||
// Destroy this basement node and free memory.
|
||||
void destroy(void);
|
||||
|
||||
// Uses sorted array as input for this basement node.
|
||||
// Expects this to be a basement node just initialized with initialize_empty()
|
||||
void set_contents_as_clone_of_sorted_array(
|
||||
uint32_t num_les,
|
||||
const void** old_key_ptrs,
|
||||
uint32_t* old_keylens,
|
||||
LEAFENTRY* old_les,
|
||||
size_t *le_sizes,
|
||||
size_t total_key_size,
|
||||
size_t total_le_size
|
||||
);
|
||||
|
||||
// Make this basement node a clone of orig_bn_data.
|
||||
// orig_bn_data still owns all its memory (dmt, mempool)
|
||||
// this basement node will have a new dmt, mempool containing same data.
|
||||
void clone(bn_data* orig_bn_data);
|
||||
|
||||
// Delete klpair index idx with provided keylen and old leafentry with size old_le_size
|
||||
void delete_leafentry (
|
||||
uint32_t idx,
|
||||
uint32_t keylen,
|
||||
uint32_t old_le_size
|
||||
);
|
||||
|
||||
// Allocates space in the mempool to store a new leafentry.
|
||||
// This may require reorganizing the mempool and updating the dmt.
|
||||
__attribute__((__nonnull__))
|
||||
void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_keylen, uint32_t old_size,
|
||||
uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free);
|
||||
|
||||
// Allocates space in the mempool to store a new leafentry
|
||||
// and inserts a new key into the dmt
|
||||
// This may require reorganizing the mempool and updating the dmt.
|
||||
__attribute__((__nonnull__))
|
||||
void get_space_for_insert(uint32_t idx, const void* keyp, uint32_t keylen, size_t size, LEAFENTRY* new_le_space, void **const maybe_free);
|
||||
|
||||
// Gets a leafentry given a klpair from this basement node.
|
||||
LEAFENTRY get_le_from_klpair(const klpair_struct *klpair) const;
|
||||
|
||||
void serialize_to_wbuf(struct wbuf *const wb);
|
||||
|
||||
// Prepares this basement node for serialization.
|
||||
// Must be called before serializing this basement node.
|
||||
// Between calling prepare_to_serialize and actually serializing, the basement node may not be modified
|
||||
void prepare_to_serialize(void);
|
||||
|
||||
// Serialize the basement node header to a wbuf
|
||||
// Requires prepare_to_serialize() to have been called first.
|
||||
void serialize_header(struct wbuf *wb) const;
|
||||
|
||||
// Serialize all keys and leafentries to a wbuf
|
||||
// Requires prepare_to_serialize() (and serialize_header()) has been called first.
|
||||
// Currently only supported when all keys are fixed-length.
|
||||
void serialize_rest(struct wbuf *wb) const;
|
||||
|
||||
static const uint32_t HEADER_LENGTH = 0
|
||||
+ sizeof(uint32_t) // key_data_size
|
||||
+ sizeof(uint32_t) // val_data_size
|
||||
+ sizeof(uint32_t) // fixed_key_length
|
||||
+ sizeof(uint8_t) // all_keys_same_length
|
||||
+ sizeof(uint8_t) // keys_vals_separate
|
||||
+ 0;
|
||||
private:
|
||||
|
||||
// split_klpairs_extra should be a local class in split_klpairs, but
|
||||
// the dmt template parameter for iterate needs linkage, so it has to be a
|
||||
// separate class, but we want it to be able to call e.g. add_key
|
||||
friend class split_klpairs_extra;
|
||||
|
||||
// Allocates space in the mempool.
|
||||
// If there is insufficient space, the mempool is enlarged and leafentries may be shuffled to reduce fragmentation.
|
||||
// If shuffling happens, the offsets stored in the dmt are updated.
|
||||
LEAFENTRY mempool_malloc_and_update_dmt(size_t size, void **maybe_free);
|
||||
|
||||
// Change the size of the mempool to support what is already in it, plus added_size.
|
||||
// possibly "compress" by shuffling leafentries around to reduce fragmentation to 0.
|
||||
// If fragmentation is already 0 and force_compress is not true, shuffling may be skipped.
|
||||
// If shuffling happens, leafentries will be stored in the mempool in sorted order.
|
||||
void dmt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress);
|
||||
|
||||
// Note that a key was added (for maintaining disk-size of this basement node)
|
||||
void add_key(uint32_t keylen);
|
||||
|
||||
// Note that multiple keys were added (for maintaining disk-size of this basement node)
|
||||
void add_keys(uint32_t n_keys, uint32_t combined_klpair_len);
|
||||
|
||||
// Note that a key was removed (for maintaining disk-size of this basement node)
|
||||
void remove_key(uint32_t keylen);
|
||||
|
||||
klpair_dmt_t m_buffer; // pointers to individual leaf entries
|
||||
struct mempool m_buffer_mempool; // storage for all leaf entries
|
||||
|
||||
friend class bndata_bugfix_test;
|
||||
|
||||
// Get the serialized size of a klpair.
|
||||
// As of Jan 14, 2014, serialized size of a klpair is independent of whether this basement node has fixed-length keys.
|
||||
uint32_t klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const;
|
||||
|
||||
// The disk/memory size of all keys. (Note that the size of memory for the leafentries is maintained by m_buffer_mempool)
|
||||
size_t m_disksize_of_keys;
|
||||
|
||||
// Deserialize this basement node from rbuf
|
||||
// all keys will be first followed by all leafentries (both in sorted order)
|
||||
void initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version,
|
||||
uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length,
|
||||
uint32_t fixed_klpair_length);
|
||||
};
|
@ -1,109 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <portability/toku_config.h>
|
||||
#include <memory.h>
|
||||
#include <toku_pthread.h>
|
||||
|
||||
#include "cachetable/background_job_manager.h"
|
||||
|
||||
toku_instr_key *bjm_jobs_lock_mutex_key;
|
||||
toku_instr_key *bjm_jobs_wait_key;
|
||||
|
||||
struct background_job_manager_struct {
|
||||
bool accepting_jobs;
|
||||
uint32_t num_jobs;
|
||||
toku_cond_t jobs_wait;
|
||||
toku_mutex_t jobs_lock;
|
||||
};
|
||||
|
||||
void bjm_init(BACKGROUND_JOB_MANAGER *pbjm) {
|
||||
BACKGROUND_JOB_MANAGER XCALLOC(bjm);
|
||||
toku_mutex_init(*bjm_jobs_lock_mutex_key, &bjm->jobs_lock, nullptr);
|
||||
toku_cond_init(*bjm_jobs_wait_key, &bjm->jobs_wait, nullptr);
|
||||
bjm->accepting_jobs = true;
|
||||
bjm->num_jobs = 0;
|
||||
*pbjm = bjm;
|
||||
}
|
||||
|
||||
void bjm_destroy(BACKGROUND_JOB_MANAGER bjm) {
|
||||
assert(bjm->num_jobs == 0);
|
||||
toku_cond_destroy(&bjm->jobs_wait);
|
||||
toku_mutex_destroy(&bjm->jobs_lock);
|
||||
toku_free(bjm);
|
||||
}
|
||||
|
||||
void bjm_reset(BACKGROUND_JOB_MANAGER bjm) {
|
||||
toku_mutex_lock(&bjm->jobs_lock);
|
||||
assert(bjm->num_jobs == 0);
|
||||
bjm->accepting_jobs = true;
|
||||
toku_mutex_unlock(&bjm->jobs_lock);
|
||||
}
|
||||
|
||||
int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm) {
|
||||
int ret_val;
|
||||
toku_mutex_lock(&bjm->jobs_lock);
|
||||
if (bjm->accepting_jobs) {
|
||||
bjm->num_jobs++;
|
||||
ret_val = 0;
|
||||
}
|
||||
else {
|
||||
ret_val = -1;
|
||||
}
|
||||
toku_mutex_unlock(&bjm->jobs_lock);
|
||||
return ret_val;
|
||||
}
|
||||
void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm){
|
||||
toku_mutex_lock(&bjm->jobs_lock);
|
||||
assert(bjm->num_jobs > 0);
|
||||
bjm->num_jobs--;
|
||||
if (bjm->num_jobs == 0 && !bjm->accepting_jobs) {
|
||||
toku_cond_broadcast(&bjm->jobs_wait);
|
||||
}
|
||||
toku_mutex_unlock(&bjm->jobs_lock);
|
||||
}
|
||||
|
||||
void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm) {
|
||||
toku_mutex_lock(&bjm->jobs_lock);
|
||||
bjm->accepting_jobs = false;
|
||||
while (bjm->num_jobs > 0) {
|
||||
toku_cond_wait(&bjm->jobs_wait, &bjm->jobs_lock);
|
||||
}
|
||||
toku_mutex_unlock(&bjm->jobs_lock);
|
||||
}
|
||||
|
@ -1,78 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
//
|
||||
// The background job manager keeps track of the existence of
|
||||
// background jobs running. We use the background job manager
|
||||
// to allow threads to perform background jobs on various pieces
|
||||
// of the system (e.g. cachefiles and cloned pairs being written out
|
||||
// for checkpoint)
|
||||
//
|
||||
|
||||
typedef struct background_job_manager_struct *BACKGROUND_JOB_MANAGER;
|
||||
|
||||
|
||||
void bjm_init(BACKGROUND_JOB_MANAGER* bjm);
|
||||
void bjm_destroy(BACKGROUND_JOB_MANAGER bjm);
|
||||
|
||||
//
|
||||
// Re-allows a background job manager to accept background jobs
|
||||
//
|
||||
void bjm_reset(BACKGROUND_JOB_MANAGER bjm);
|
||||
|
||||
//
|
||||
// add a background job. If return value is 0, then the addition of the job
|
||||
// was successful and the user may perform the background job. If return
|
||||
// value is non-zero, then adding of the background job failed and the user
|
||||
// may not perform the background job.
|
||||
//
|
||||
int bjm_add_background_job(BACKGROUND_JOB_MANAGER bjm);
|
||||
|
||||
//
|
||||
// remove a background job
|
||||
//
|
||||
void bjm_remove_background_job(BACKGROUND_JOB_MANAGER bjm);
|
||||
|
||||
//
|
||||
// This function waits for all current background jobs to be removed. If the user
|
||||
// calls bjm_add_background_job while this function is running, or after this function
|
||||
// has completed, bjm_add_background_job returns an error.
|
||||
//
|
||||
void bjm_wait_for_jobs_to_finish(BACKGROUND_JOB_MANAGER bjm);
|
@ -1,607 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cachetable/background_job_manager.h"
|
||||
#include <portability/toku_random.h>
|
||||
#include <util/frwlock.h>
|
||||
#include <util/kibbutz.h>
|
||||
#include <util/nb_mutex.h>
|
||||
#include <util/partitioned_counter.h>
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// This file contains the classes and structs that make up the cachetable.
|
||||
// The structs are:
|
||||
// - cachefile
|
||||
// - ctpair
|
||||
// - pair_list
|
||||
// - cachefile_list
|
||||
// - checkpointer
|
||||
// - evictor
|
||||
// - cleaner
|
||||
//
|
||||
// The rest of this comment assumes familiarity with the locks used in these
|
||||
// classes/structs and what the locks protect. Nevertheless, here is
|
||||
// a list of the locks that we have:
|
||||
// - pair_list->list_lock
|
||||
// - pair_list->pending_lock_expensive
|
||||
// - pair_list->pending_lock_cheap
|
||||
// - cachefile_list->lock
|
||||
// - PAIR->mutex
|
||||
// - PAIR->value_rwlock
|
||||
// - PAIR->disk_nb_mutex
|
||||
//
|
||||
// Here are rules for how the locks interact:
|
||||
// - To grab any of the pair_list's locks, or the cachefile_list's lock,
|
||||
// the cachetable must be in existence
|
||||
// - To grab the PAIR mutex, we must know the PAIR will not dissappear:
|
||||
// - the PAIR must be pinned (value_rwlock or disk_nb_mutex is held)
|
||||
// - OR, the pair_list's list lock is held
|
||||
// - As a result, to get rid of a PAIR from the pair_list, we must hold
|
||||
// both the pair_list's list_lock and the PAIR's mutex
|
||||
// - To grab PAIR->value_rwlock, we must hold the PAIR's mutex
|
||||
// - To grab PAIR->disk_nb_mutex, we must hold the PAIR's mutex
|
||||
// and hold PAIR->value_rwlock
|
||||
//
|
||||
// Now let's talk about ordering. Here is an order from outer to inner (top locks must be grabbed first)
|
||||
// - pair_list->pending_lock_expensive
|
||||
// - pair_list->list_lock
|
||||
// - cachefile_list->lock
|
||||
// - PAIR->mutex
|
||||
// - pair_list->pending_lock_cheap <-- after grabbing this lock,
|
||||
// NO other locks
|
||||
// should be grabbed.
|
||||
// - when grabbing PAIR->value_rwlock or PAIR->disk_nb_mutex,
|
||||
// if the acquisition will not block, then it does not matter if any other locks held,
|
||||
// BUT if the acquisition will block, then NO other locks may be held besides
|
||||
// PAIR->mutex.
|
||||
//
|
||||
// HERE ARE TWO EXAMPLES:
|
||||
// To pin a PAIR on a client thread, the following must be done:
|
||||
// - first grab the list lock and find the PAIR
|
||||
// - with the list lock grabbed, grab PAIR->mutex
|
||||
// - with PAIR->mutex held:
|
||||
// - release list lock
|
||||
// - pin PAIR
|
||||
// - with PAIR pinned, grab pending_lock_cheap,
|
||||
// - copy and clear PAIR->checkpoint_pending,
|
||||
// - resolve checkpointing if necessary
|
||||
// - return to user.
|
||||
// The list lock may be held while pinning the PAIR if
|
||||
// the PAIR has no contention. Otherwise, we may have
|
||||
// get a deadlock with another thread that has the PAIR pinned,
|
||||
// tries to pin some other PAIR, and in doing so, grabs the list lock.
|
||||
//
|
||||
// To unpin a PAIR on a client thread:
|
||||
// - because the PAIR is pinned, we don't need the pair_list's list_lock
|
||||
// - so, simply acquire PAIR->mutex
|
||||
// - unpin the PAIR
|
||||
// - return
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
class evictor;
|
||||
class pair_list;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Maps to a file on disk.
|
||||
//
|
||||
struct cachefile {
|
||||
// these next two fields are protected by cachetable's list lock
|
||||
// they are managed whenever we add or remove a pair from
|
||||
// the cachetable. As of Riddler, this linked list is only used to
|
||||
// make cachetable_flush_cachefile more efficient
|
||||
PAIR cf_head; // doubly linked list that is NOT circular
|
||||
uint32_t num_pairs; // count on number of pairs in the cachetable belong to this cachefile
|
||||
|
||||
bool for_checkpoint; //True if part of the in-progress checkpoint
|
||||
|
||||
// If set and the cachefile closes, the file will be removed.
|
||||
// Clients must not operate on the cachefile after setting this,
|
||||
// nor attempt to open any cachefile with the same fname (dname)
|
||||
// until this cachefile has been fully closed and unlinked.
|
||||
bool unlink_on_close;
|
||||
// If set then fclose will not be logged in recovery log.
|
||||
bool skip_log_recover_on_close;
|
||||
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
|
||||
CACHETABLE cachetable;
|
||||
struct fileid fileid;
|
||||
// the filenum is used as an identifer of the cachefile
|
||||
// for logging and recovery
|
||||
FILENUM filenum;
|
||||
// number used to generate hashes for blocks in the cachefile
|
||||
// used in toku_cachetable_hash
|
||||
// this used to be the filenum.fileid, but now it is separate
|
||||
uint32_t hash_id;
|
||||
char *fname_in_env; /* Used for logging */
|
||||
|
||||
void *userdata;
|
||||
void (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files.
|
||||
void (*close_userdata)(CACHEFILE cf, int fd, void *userdata, bool lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function.
|
||||
void (*free_userdata)(CACHEFILE cf, void *userdata); // when closing the last reference to a cachefile, first call this function.
|
||||
void (*begin_checkpoint_userdata)(LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function.
|
||||
void (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function.
|
||||
void (*end_checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // after checkpointing cachefiles call this function.
|
||||
void (*note_pin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
|
||||
void (*note_unpin_by_checkpoint)(CACHEFILE cf, void *userdata); // add a reference to the userdata to prevent it from being removed from memory
|
||||
BACKGROUND_JOB_MANAGER bjm;
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The pair represents the data stored in the cachetable.
|
||||
//
|
||||
struct ctpair {
|
||||
// these fields are essentially constants. They do not change.
|
||||
CACHEFILE cachefile;
|
||||
CACHEKEY key;
|
||||
uint32_t fullhash;
|
||||
CACHETABLE_FLUSH_CALLBACK flush_callback;
|
||||
CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback;
|
||||
CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback;
|
||||
CACHETABLE_CLEANER_CALLBACK cleaner_callback;
|
||||
CACHETABLE_CLONE_CALLBACK clone_callback;
|
||||
CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback;
|
||||
void *write_extraargs;
|
||||
|
||||
// access to these fields are protected by disk_nb_mutex
|
||||
void* cloned_value_data; // cloned copy of value_data used for checkpointing
|
||||
long cloned_value_size; // size of cloned_value_data, used for accounting of size_current
|
||||
void* disk_data; // data used to fetch/flush value_data to and from disk.
|
||||
|
||||
// access to these fields are protected by value_rwlock
|
||||
void* value_data; // data used by client threads, FTNODEs and ROLLBACK_LOG_NODEs
|
||||
PAIR_ATTR attr;
|
||||
enum cachetable_dirty dirty;
|
||||
|
||||
// protected by PAIR->mutex
|
||||
uint32_t count; // clock count
|
||||
uint32_t refcount; // if > 0, then this PAIR is referenced by
|
||||
// callers to the cachetable, and therefore cannot
|
||||
// be evicted
|
||||
uint32_t num_waiting_on_refs; // number of threads waiting on refcount to go to zero
|
||||
toku_cond_t refcount_wait; // cond used to wait for refcount to go to zero
|
||||
|
||||
// locks
|
||||
toku::frwlock value_rwlock;
|
||||
struct nb_mutex disk_nb_mutex; // single writer, protects disk_data, is used for writing cloned nodes for checkpoint
|
||||
toku_mutex_t* mutex; // gotten from the pair list
|
||||
|
||||
// Access to checkpoint_pending is protected by two mechanisms,
|
||||
// the value_rwlock and the pair_list's pending locks (expensive and cheap).
|
||||
// checkpoint_pending may be true of false.
|
||||
// Here are the rules for reading/modifying this bit.
|
||||
// - To transition this field from false to true during begin_checkpoint,
|
||||
// we must be holding both of the pair_list's pending locks.
|
||||
// - To transition this field from true to false during end_checkpoint,
|
||||
// we must be holding the value_rwlock.
|
||||
// - For a non-checkpoint thread to read the value, we must hold both the
|
||||
// value_rwlock and one of the pair_list's pending locks
|
||||
// - For the checkpoint thread to read the value, we must
|
||||
// hold the value_rwlock
|
||||
//
|
||||
bool checkpoint_pending; // If this is on, then we have got to resolve checkpointing modifying it.
|
||||
|
||||
// these are variables that are only used to transfer information to background threads
|
||||
// we cache them here to avoid a malloc. In the future, we should investigate if this
|
||||
// is necessary, as having these fields here is not technically necessary
|
||||
long size_evicting_estimate;
|
||||
evictor* ev;
|
||||
pair_list* list;
|
||||
|
||||
// A PAIR is stored in a pair_list (which happens to be PAIR->list).
|
||||
// These variables are protected by the list lock in the pair_list
|
||||
//
|
||||
// clock_next,clock_prev represent a circular doubly-linked list.
|
||||
PAIR clock_next,clock_prev; // In clock.
|
||||
PAIR hash_chain;
|
||||
|
||||
// pending_next,pending_next represent a non-circular doubly-linked list.
|
||||
PAIR pending_next;
|
||||
PAIR pending_prev;
|
||||
|
||||
// cf_next, cf_prev represent a non-circular doubly-linked list.
|
||||
// entries in linked list for PAIRs in a cachefile, these are protected
|
||||
// by the list lock of the PAIR's pair_list. They are used to make
|
||||
// cachetable_flush_cachefile cheaper so that we don't need
|
||||
// to search the entire cachetable to find a particular cachefile's
|
||||
// PAIRs
|
||||
PAIR cf_next;
|
||||
PAIR cf_prev;
|
||||
};
|
||||
|
||||
//
|
||||
// This initializes the fields and members of the pair.
|
||||
//
|
||||
void pair_init(PAIR p,
|
||||
CACHEFILE cachefile,
|
||||
CACHEKEY key,
|
||||
void *value,
|
||||
PAIR_ATTR attr,
|
||||
enum cachetable_dirty dirty,
|
||||
uint32_t fullhash,
|
||||
CACHETABLE_WRITE_CALLBACK write_callback,
|
||||
evictor *ev,
|
||||
pair_list *list);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The pair list maintains the set of PAIR's that make up
|
||||
// the cachetable.
|
||||
//
|
||||
class pair_list {
|
||||
public:
|
||||
//
|
||||
// the following fields are protected by the list lock
|
||||
//
|
||||
uint32_t m_n_in_table; // number of pairs in the hash table
|
||||
uint32_t m_table_size; // number of buckets in the hash table
|
||||
uint32_t m_num_locks;
|
||||
PAIR *m_table; // hash table
|
||||
toku_mutex_aligned_t *m_mutexes;
|
||||
//
|
||||
// The following fields are the heads of various linked lists.
|
||||
// They also protected by the list lock, but their
|
||||
// usage is not as straightforward. For each of them,
|
||||
// only ONE thread is allowed iterate over them with
|
||||
// a read lock on the list lock. All other threads
|
||||
// that want to modify elements in the lists or iterate over
|
||||
// the lists must hold the write list lock. Here is the
|
||||
// association between what threads may hold a read lock
|
||||
// on the list lock while iterating:
|
||||
// - clock_head -> eviction thread (evictor)
|
||||
// - cleaner_head -> cleaner thread (cleaner)
|
||||
// - pending_head -> checkpoint thread (checkpointer)
|
||||
//
|
||||
PAIR m_clock_head; // of clock . head is the next thing to be up for decrement.
|
||||
PAIR m_cleaner_head; // for cleaner thread. head is the next thing to look at for possible cleaning.
|
||||
PAIR m_checkpoint_head; // for begin checkpoint to iterate over PAIRs and mark as pending_checkpoint
|
||||
PAIR m_pending_head; // list of pairs marked with checkpoint_pending
|
||||
|
||||
// this field is public so we are still POD
|
||||
|
||||
// usage of this lock is described above
|
||||
toku_pthread_rwlock_t m_list_lock;
|
||||
//
|
||||
// these locks are the "pending locks" referenced
|
||||
// in comments about PAIR->checkpoint_pending. There
|
||||
// are two of them, but both serve the same purpose, which
|
||||
// is to protect the transition of a PAIR's checkpoint pending
|
||||
// value from false to true during begin_checkpoint.
|
||||
// We use two locks, because threads that want to read the
|
||||
// checkpoint_pending value may hold a lock for varying periods of time.
|
||||
// Threads running eviction may need to protect checkpoint_pending
|
||||
// while writing a node to disk, which is an expensive operation,
|
||||
// so it uses pending_lock_expensive. Client threads that
|
||||
// want to pin PAIRs will want to protect checkpoint_pending
|
||||
// just long enough to read the value and wipe it out. This is
|
||||
// a cheap operation, and as a result, uses pending_lock_cheap.
|
||||
//
|
||||
// By having two locks, and making begin_checkpoint first
|
||||
// grab pending_lock_expensive and then pending_lock_cheap,
|
||||
// we ensure that threads that want to pin nodes can grab
|
||||
// only pending_lock_cheap, and never block behind threads
|
||||
// holding pending_lock_expensive and writing a node out to disk
|
||||
//
|
||||
toku_pthread_rwlock_t m_pending_lock_expensive;
|
||||
toku_pthread_rwlock_t m_pending_lock_cheap;
|
||||
void init();
|
||||
void destroy();
|
||||
void evict_completely(PAIR pair);
|
||||
void evict_from_cachetable(PAIR pair);
|
||||
void evict_from_cachefile(PAIR pair);
|
||||
void add_to_cachetable_only(PAIR p);
|
||||
void put(PAIR pair);
|
||||
PAIR find_pair(CACHEFILE file, CACHEKEY key, uint32_t hash);
|
||||
void pending_pairs_remove (PAIR p);
|
||||
void verify();
|
||||
void get_state(int *num_entries, int *hash_size);
|
||||
void read_list_lock();
|
||||
void read_list_unlock();
|
||||
void write_list_lock();
|
||||
void write_list_unlock();
|
||||
void read_pending_exp_lock();
|
||||
void read_pending_exp_unlock();
|
||||
void write_pending_exp_lock();
|
||||
void write_pending_exp_unlock();
|
||||
void read_pending_cheap_lock();
|
||||
void read_pending_cheap_unlock();
|
||||
void write_pending_cheap_lock();
|
||||
void write_pending_cheap_unlock();
|
||||
toku_mutex_t* get_mutex_for_pair(uint32_t fullhash);
|
||||
void pair_lock_by_fullhash(uint32_t fullhash);
|
||||
void pair_unlock_by_fullhash(uint32_t fullhash);
|
||||
|
||||
private:
|
||||
void pair_remove (PAIR p);
|
||||
void remove_from_hash_chain(PAIR p);
|
||||
void add_to_cf_list (PAIR p);
|
||||
void add_to_clock (PAIR p);
|
||||
void add_to_hash_chain(PAIR p);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Wrapper for the head of our cachefile list.
|
||||
//
|
||||
class cachefile_list {
|
||||
public:
|
||||
void init();
|
||||
void destroy();
|
||||
void read_lock();
|
||||
void read_unlock();
|
||||
void write_lock();
|
||||
void write_unlock();
|
||||
int cachefile_of_iname_in_env(const char *iname_in_env, CACHEFILE *cf);
|
||||
int cachefile_of_filenum(FILENUM filenum, CACHEFILE *cf);
|
||||
void add_cf_unlocked(CACHEFILE newcf);
|
||||
void add_stale_cf(CACHEFILE newcf);
|
||||
void remove_cf(CACHEFILE cf);
|
||||
void remove_stale_cf_unlocked(CACHEFILE cf);
|
||||
FILENUM reserve_filenum();
|
||||
uint32_t get_new_hash_id_unlocked();
|
||||
CACHEFILE find_cachefile_unlocked(struct fileid* fileid);
|
||||
CACHEFILE find_stale_cachefile_unlocked(struct fileid* fileid);
|
||||
void verify_unused_filenum(FILENUM filenum);
|
||||
bool evict_some_stale_pair(evictor* ev);
|
||||
void free_stale_data(evictor* ev);
|
||||
// access to these fields are protected by the lock
|
||||
FILENUM m_next_filenum_to_use;
|
||||
uint32_t m_next_hash_id_to_use;
|
||||
toku_pthread_rwlock_t m_lock; // this field is publoc so we are still POD
|
||||
toku::omt<CACHEFILE> m_active_filenum;
|
||||
toku::omt<CACHEFILE> m_active_fileid;
|
||||
toku::omt<CACHEFILE> m_stale_fileid;
|
||||
private:
|
||||
CACHEFILE find_cachefile_in_list_unlocked(CACHEFILE start, struct fileid* fileid);
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The checkpointer handles starting and finishing checkpoints of the
|
||||
// cachetable's data.
|
||||
//
|
||||
class checkpointer {
|
||||
public:
|
||||
int init(pair_list *_pl, TOKULOGGER _logger, evictor *_ev, cachefile_list *files);
|
||||
void destroy();
|
||||
void set_checkpoint_period(uint32_t new_period);
|
||||
uint32_t get_checkpoint_period();
|
||||
int shutdown();
|
||||
bool has_been_shutdown();
|
||||
void begin_checkpoint();
|
||||
void add_background_job();
|
||||
void remove_background_job();
|
||||
void end_checkpoint(void (*testcallback_f)(void*), void* testextra);
|
||||
TOKULOGGER get_logger();
|
||||
// used during begin_checkpoint
|
||||
void increment_num_txns();
|
||||
private:
|
||||
uint32_t m_checkpoint_num_txns; // how many transactions are in the checkpoint
|
||||
TOKULOGGER m_logger;
|
||||
LSN m_lsn_of_checkpoint_in_progress;
|
||||
uint32_t m_checkpoint_num_files; // how many cachefiles are in the checkpoint
|
||||
struct minicron m_checkpointer_cron; // the periodic checkpointing thread
|
||||
cachefile_list *m_cf_list;
|
||||
pair_list *m_list;
|
||||
evictor *m_ev;
|
||||
bool m_checkpointer_cron_init;
|
||||
bool m_checkpointer_init;
|
||||
|
||||
// variable used by the checkpoint thread to know
|
||||
// when all work induced by cloning on client threads is done
|
||||
BACKGROUND_JOB_MANAGER m_checkpoint_clones_bjm;
|
||||
// private methods for begin_checkpoint
|
||||
void update_cachefiles();
|
||||
void log_begin_checkpoint();
|
||||
void turn_on_pending_bits();
|
||||
// private methods for end_checkpoint
|
||||
void fill_checkpoint_cfs(CACHEFILE* checkpoint_cfs);
|
||||
void checkpoint_pending_pairs();
|
||||
void checkpoint_userdata(CACHEFILE* checkpoint_cfs);
|
||||
void log_end_checkpoint();
|
||||
void end_checkpoint_userdata(CACHEFILE* checkpoint_cfs);
|
||||
void remove_cachefiles(CACHEFILE* checkpoint_cfs);
|
||||
|
||||
// Unit test struct needs access to private members.
|
||||
friend struct checkpointer_test;
|
||||
};
|
||||
|
||||
//
|
||||
// This is how often we want the eviction thread
|
||||
// to run, in seconds.
|
||||
//
|
||||
const int EVICTION_PERIOD = 1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The evictor handles the removal of pairs from the pair list/cachetable.
|
||||
//
|
||||
class evictor {
|
||||
public:
|
||||
int init(long _size_limit, pair_list* _pl, cachefile_list* _cf_list, KIBBUTZ _kibbutz, uint32_t eviction_period);
|
||||
void destroy();
|
||||
void add_pair_attr(PAIR_ATTR attr);
|
||||
void remove_pair_attr(PAIR_ATTR attr);
|
||||
void change_pair_attr(PAIR_ATTR old_attr, PAIR_ATTR new_attr);
|
||||
void add_cloned_data_size(long size);
|
||||
void remove_cloned_data_size(long size);
|
||||
uint64_t reserve_memory(double fraction, uint64_t upper_bound);
|
||||
void release_reserved_memory(uint64_t reserved_memory);
|
||||
void run_eviction_thread();
|
||||
void do_partial_eviction(PAIR p);
|
||||
void evict_pair(PAIR p, bool checkpoint_pending);
|
||||
void wait_for_cache_pressure_to_subside();
|
||||
void signal_eviction_thread();
|
||||
void signal_eviction_thread_locked();
|
||||
bool should_client_thread_sleep();
|
||||
bool should_client_wake_eviction_thread();
|
||||
// function needed for testing
|
||||
void get_state(long *size_current_ptr, long *size_limit_ptr);
|
||||
void fill_engine_status();
|
||||
void set_enable_partial_eviction(bool enabled);
|
||||
bool get_enable_partial_eviction(void) const;
|
||||
private:
|
||||
void add_to_size_current(long size);
|
||||
void remove_from_size_current(long size);
|
||||
void run_eviction();
|
||||
bool run_eviction_on_pair(PAIR p);
|
||||
void try_evict_pair(PAIR p);
|
||||
void decrease_size_evicting(long size_evicting_estimate);
|
||||
bool should_sleeping_clients_wakeup();
|
||||
bool eviction_needed();
|
||||
|
||||
// We have some intentional races with these variables because we're ok with reading something a little bit old.
|
||||
// Provide some hooks for reading variables in an unsafe way so that there are function names we can stick in a valgrind suppression.
|
||||
int64_t unsafe_read_size_current(void) const;
|
||||
int64_t unsafe_read_size_evicting(void) const;
|
||||
|
||||
pair_list* m_pl;
|
||||
cachefile_list* m_cf_list;
|
||||
int64_t m_size_current; // the sum of the sizes of the pairs in the cachetable
|
||||
int64_t m_size_cloned_data; // stores amount of cloned data we have, only used for engine status
|
||||
// changes to these two values are protected
|
||||
// by ev_thread_lock
|
||||
int64_t m_size_reserved; // How much memory is reserved (e.g., by the loader)
|
||||
int64_t m_size_evicting; // the sum of the sizes of the pairs being written
|
||||
|
||||
// these are constants
|
||||
int64_t m_low_size_watermark; // target max size of cachetable that eviction thread aims for
|
||||
int64_t m_low_size_hysteresis; // if cachetable grows to this size, client threads wake up eviction thread upon adding data
|
||||
int64_t m_high_size_watermark; // if cachetable grows to this size, client threads sleep upon adding data
|
||||
int64_t m_high_size_hysteresis; // if > cachetable size, then sleeping client threads may wake up
|
||||
|
||||
bool m_enable_partial_eviction; // true if partial evictions are permitted
|
||||
|
||||
// used to calculate random numbers
|
||||
struct random_data m_random_data;
|
||||
char m_random_statebuf[64];
|
||||
|
||||
// mutex that protects fields listed immedietly below
|
||||
toku_mutex_t m_ev_thread_lock;
|
||||
// the eviction thread
|
||||
toku_pthread_t m_ev_thread;
|
||||
// condition variable that controls the sleeping period
|
||||
// of the eviction thread
|
||||
toku_cond_t m_ev_thread_cond;
|
||||
// number of client threads that are currently sleeping
|
||||
// due to an over-subscribed cachetable
|
||||
uint32_t m_num_sleepers;
|
||||
// states if the eviction thread should run. set to true
|
||||
// in init, set to false during destroy
|
||||
bool m_run_thread;
|
||||
// bool that states if the eviction thread is currently running
|
||||
bool m_ev_thread_is_running;
|
||||
// period which the eviction thread sleeps
|
||||
uint32_t m_period_in_seconds;
|
||||
// condition variable on which client threads wait on when sleeping
|
||||
// due to an over-subscribed cachetable
|
||||
toku_cond_t m_flow_control_cond;
|
||||
|
||||
// variables for engine status
|
||||
PARTITIONED_COUNTER m_size_nonleaf;
|
||||
PARTITIONED_COUNTER m_size_leaf;
|
||||
PARTITIONED_COUNTER m_size_rollback;
|
||||
PARTITIONED_COUNTER m_size_cachepressure;
|
||||
PARTITIONED_COUNTER m_wait_pressure_count;
|
||||
PARTITIONED_COUNTER m_wait_pressure_time;
|
||||
PARTITIONED_COUNTER m_long_wait_pressure_count;
|
||||
PARTITIONED_COUNTER m_long_wait_pressure_time;
|
||||
|
||||
KIBBUTZ m_kibbutz;
|
||||
|
||||
// this variable is ONLY used for testing purposes
|
||||
uint64_t m_num_eviction_thread_runs;
|
||||
|
||||
bool m_ev_thread_init;
|
||||
bool m_evictor_init;
|
||||
|
||||
friend class evictor_test_helpers;
|
||||
friend class evictor_unit_test;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Iterates over the clean head in the pair list, calling the cleaner
|
||||
// callback on each node in that list.
|
||||
//
|
||||
class cleaner {
|
||||
public:
|
||||
int init(uint32_t cleaner_iterations, pair_list* _pl, CACHETABLE _ct);
|
||||
void destroy(void);
|
||||
uint32_t get_iterations(void);
|
||||
void set_iterations(uint32_t new_iterations);
|
||||
uint32_t get_period_unlocked(void);
|
||||
void set_period(uint32_t new_period);
|
||||
int run_cleaner(void);
|
||||
|
||||
private:
|
||||
pair_list* m_pl;
|
||||
CACHETABLE m_ct;
|
||||
struct minicron m_cleaner_cron; // the periodic cleaner thread
|
||||
uint32_t m_cleaner_iterations; // how many times to run the cleaner per
|
||||
// cleaner period (minicron has a
|
||||
// minimum period of 1s so if you want
|
||||
// more frequent cleaner runs you must
|
||||
// use this)
|
||||
bool m_cleaner_cron_init;
|
||||
bool m_cleaner_init;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The cachetable is as close to an ENV as we get.
|
||||
//
|
||||
struct cachetable {
|
||||
pair_list list;
|
||||
cleaner cl;
|
||||
evictor ev;
|
||||
checkpointer cp;
|
||||
cachefile_list cf_list;
|
||||
|
||||
KIBBUTZ client_kibbutz; // pool of worker threads and jobs to do asynchronously for the client.
|
||||
KIBBUTZ ct_kibbutz; // pool of worker threads and jobs to do asynchronously for the cachetable
|
||||
KIBBUTZ checkpointing_kibbutz; // small pool for checkpointing cloned pairs
|
||||
|
||||
char *env_dir;
|
||||
};
|
File diff suppressed because it is too large
Load Diff
@ -1,588 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "ft/logger/logger.h"
|
||||
#include "ft/serialize/block_table.h"
|
||||
#include "ft/txn/txn.h"
|
||||
#include "ft/ft-status.h"
|
||||
#include "util/minicron.h"
|
||||
|
||||
// Maintain a cache mapping from cachekeys to values (void*)
|
||||
// Some of the keys can be pinned. Don't pin too many or for too long.
|
||||
// If the cachetable is too full, it will call the flush_callback() function with the key, the value, and the otherargs
|
||||
// and then remove the key-value pair from the cache.
|
||||
// The callback won't be any of the currently pinned keys.
|
||||
// Also when flushing an object, the cachetable drops all references to it,
|
||||
// so you may need to free() it.
|
||||
// Note: The cachetable should use a common pool of memory, flushing things across cachetables.
|
||||
// (The first implementation doesn't)
|
||||
// If you pin something twice, you must unpin it twice.
|
||||
// table_size is the initial size of the cache table hash table (in number of entries)
|
||||
// size limit is the upper bound of the sum of size of the entries in the cache table (total number of bytes)
|
||||
|
||||
typedef BLOCKNUM CACHEKEY;
|
||||
|
||||
class checkpointer;
|
||||
typedef class checkpointer *CHECKPOINTER;
|
||||
typedef struct cachetable *CACHETABLE;
|
||||
typedef struct cachefile *CACHEFILE;
|
||||
typedef struct ctpair *PAIR;
|
||||
|
||||
// This struct hold information about values stored in the cachetable.
|
||||
// As one can tell from the names, we are probably violating an
|
||||
// abstraction layer by placing names.
|
||||
//
|
||||
// The purpose of having this struct is to have a way for the
|
||||
// cachetable to accumulate the some totals we are interested in.
|
||||
// Breaking this abstraction layer by having these names was the
|
||||
// easiest way.
|
||||
//
|
||||
typedef struct pair_attr_s {
|
||||
long size; // size PAIR's value takes in memory
|
||||
long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status
|
||||
long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status
|
||||
long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status
|
||||
long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts
|
||||
bool is_valid;
|
||||
} PAIR_ATTR;
|
||||
|
||||
static inline PAIR_ATTR make_pair_attr(long size) {
|
||||
PAIR_ATTR result={
|
||||
.size = size,
|
||||
.nonleaf_size = 0,
|
||||
.leaf_size = 0,
|
||||
.rollback_size = 0,
|
||||
.cache_pressure_size = 0,
|
||||
.is_valid = true
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period);
|
||||
uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct);
|
||||
void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations);
|
||||
uint32_t toku_get_cleaner_iterations (CACHETABLE ct);
|
||||
uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct);
|
||||
void toku_set_enable_partial_eviction (CACHETABLE ct, bool enabled);
|
||||
bool toku_get_enable_partial_eviction (CACHETABLE ct);
|
||||
|
||||
// cachetable operations
|
||||
|
||||
// create and initialize a cache table
|
||||
// size_limit is the upper limit on the size of the size of the values in the table
|
||||
// pass 0 if you want the default
|
||||
int toku_cachetable_create_ex(CACHETABLE *result, long size_limit,
|
||||
unsigned long client_pool_threads,
|
||||
unsigned long cachetable_pool_threads,
|
||||
unsigned long checkpoint_pool_threads,
|
||||
LSN initial_lsn, struct tokulogger *logger);
|
||||
|
||||
#define toku_cachetable_create(r, s, l, o) \
|
||||
toku_cachetable_create_ex(r, s, 0, 0, 0, l, o);
|
||||
|
||||
// Create a new cachetable.
|
||||
// Effects: a new cachetable is created and initialized.
|
||||
// The cachetable pointer is stored into result.
|
||||
// The sum of the sizes of the memory objects is set to size_limit, in whatever
|
||||
// units make sense to the user of the cachetable.
|
||||
// Returns: If success, returns 0 and result points to the new cachetable. Otherwise,
|
||||
// returns an error number.
|
||||
|
||||
// Returns a pointer to the checkpointer within the given cachetable.
|
||||
CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct);
|
||||
|
||||
// What is the cachefile that goes with a particular filenum?
|
||||
// During a transaction, we cannot reuse a filenum.
|
||||
int toku_cachefile_of_filenum (CACHETABLE t, FILENUM filenum, CACHEFILE *cf);
|
||||
|
||||
// What is the cachefile that goes with a particular iname (relative to env)?
|
||||
// During a transaction, we cannot reuse an iname.
|
||||
int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf);
|
||||
|
||||
// Get the iname (within the cwd) associated with the cachefile
|
||||
// Return the filename
|
||||
char *toku_cachefile_fname_in_cwd (CACHEFILE cf);
|
||||
|
||||
void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logger);
|
||||
|
||||
void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger,
|
||||
void (*testcallback_f)(void*), void * testextra);
|
||||
|
||||
|
||||
// Shuts down checkpoint thread
|
||||
// Requires no locks be held that are taken by the checkpoint function
|
||||
void toku_cachetable_minicron_shutdown(CACHETABLE ct);
|
||||
|
||||
// Prepare to close the cachetable. This informs the cachetable that it is about to be closed
|
||||
// so that it can tune its checkpoint resource use.
|
||||
void toku_cachetable_prepare_close(CACHETABLE ct);
|
||||
|
||||
// Close the cachetable.
|
||||
// Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed.
|
||||
void toku_cachetable_close(CACHETABLE *ct);
|
||||
|
||||
// Open a file and bind the file to a new cachefile object. (For use by test programs only.)
|
||||
int toku_cachetable_openf(CACHEFILE *,CACHETABLE, const char *fname_in_env, int flags, mode_t mode);
|
||||
|
||||
// Bind a file to a new cachefile object.
|
||||
int toku_cachetable_openfd(CACHEFILE *,CACHETABLE, int fd,
|
||||
const char *fname_relative_to_env);
|
||||
int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd,
|
||||
const char *fname_in_env,
|
||||
FILENUM filenum, bool* was_open);
|
||||
|
||||
// reserve a unique filenum
|
||||
FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct);
|
||||
|
||||
// Effect: Reserve a fraction of the cachetable memory.
|
||||
// Returns the amount reserved.
|
||||
// To return the memory to the cachetable, call toku_cachetable_release_reserved_memory
|
||||
// Requires 0<fraction<1.
|
||||
uint64_t toku_cachetable_reserve_memory(CACHETABLE, double fraction, uint64_t upper_bound);
|
||||
void toku_cachetable_release_reserved_memory(CACHETABLE, uint64_t);
|
||||
|
||||
// cachefile operations
|
||||
|
||||
// Does an fsync of a cachefile.
|
||||
void toku_cachefile_fsync(CACHEFILE cf);
|
||||
|
||||
enum partial_eviction_cost {
|
||||
PE_CHEAP=0, // running partial eviction is cheap, and can be done on the client thread
|
||||
PE_EXPENSIVE=1, // running partial eviction is expensive, and should not be done on the client thread
|
||||
};
|
||||
|
||||
// cachetable pair clean or dirty WRT external memory
|
||||
enum cachetable_dirty {
|
||||
CACHETABLE_CLEAN=0, // the cached object is clean WRT the cachefile
|
||||
CACHETABLE_DIRTY=1, // the cached object is dirty WRT the cachefile
|
||||
};
|
||||
|
||||
// The flush callback is called when a key value pair is being written to storage and possibly removed from the cachetable.
|
||||
// When write_me is true, the value should be written to storage.
|
||||
// When keep_me is false, the value should be freed.
|
||||
// When for_checkpoint is true, this was a 'pending' write
|
||||
// Returns: 0 if success, otherwise an error number.
|
||||
// Can access fd (fd is protected by a readlock during call)
|
||||
typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, void *value, void **disk_data, void *write_extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
|
||||
|
||||
// The fetch callback is called when a thread is attempting to get and pin a memory
|
||||
// object and it is not in the cachetable.
|
||||
// Returns: 0 if success, otherwise an error number. The address and size of the object
|
||||
// associated with the key are returned.
|
||||
// Can access fd (fd is protected by a readlock during call)
|
||||
typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, PAIR p, int fd, CACHEKEY key, uint32_t fullhash, void **value_data, void **disk_data, PAIR_ATTR *sizep, int *dirtyp, void *read_extraargs);
|
||||
|
||||
// The cachetable calls the partial eviction estimate callback to determine if
|
||||
// partial eviction is a cheap operation that may be called by on the client thread
|
||||
// or whether partial eviction is expensive and should be done on a background (writer) thread.
|
||||
// The callback conveys this information by setting cost to either PE_CHEAP or PE_EXPENSIVE.
|
||||
// If cost is PE_EXPENSIVE, then the callback also sets bytes_freed_estimate
|
||||
// to return an estimate of the number of bytes it will free
|
||||
// so that the cachetable can estimate how much data is being evicted on background threads.
|
||||
// If cost is PE_CHEAP, then the callback does not set bytes_freed_estimate.
|
||||
typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void *write_extraargs);
|
||||
|
||||
// The cachetable calls the partial eviction callback is to possibly try and partially evict pieces
|
||||
// of the PAIR. The callback determines the strategy for what to evict. The callback may choose to free
|
||||
// nothing, or may choose to free as much as possible. When the partial eviction callback is finished,
|
||||
// it must call finalize with the new PAIR_ATTR and the given finalize_extra. After this point, the
|
||||
// write lock will be released on the PAIR and it is no longer safe to operate on any of the passed arguments.
|
||||
// This is useful for doing expensive cleanup work outside of the PAIR's write lock (such as destroying objects, etc)
|
||||
//
|
||||
// on entry, requires a write lock to be held on the PAIR in the cachetable while this function is called
|
||||
// on exit, the finalize continuation is called
|
||||
typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
|
||||
void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
|
||||
|
||||
// The cachetable calls this function to determine if get_and_pin call requires a partial fetch. If this function returns true,
|
||||
// then the cachetable will subsequently call CACHETABLE_PARTIAL_FETCH_CALLBACK to perform
|
||||
// a partial fetch. If this function returns false, then the PAIR's value is returned to the caller as is.
|
||||
//
|
||||
// An alternative to having this callback is to always call CACHETABLE_PARTIAL_FETCH_CALLBACK, and let
|
||||
// CACHETABLE_PARTIAL_FETCH_CALLBACK decide whether to do any partial fetching or not.
|
||||
// There is no particular reason why this alternative was not chosen.
|
||||
// Requires: a read lock to be held on the PAIR
|
||||
typedef bool (*CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK)(void *ftnode_pv, void *read_extraargs);
|
||||
|
||||
// The cachetable calls the partial fetch callback when a thread needs to read or decompress a subset of a PAIR into memory.
|
||||
// An example is needing to read a basement node into memory. Another example is decompressing an internal node's
|
||||
// message buffer. The cachetable determines if a partial fetch is necessary by first calling CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK.
|
||||
// The new PAIR_ATTR of the PAIR is returned in sizep
|
||||
// Can access fd (fd is protected by a readlock during call)
|
||||
// Returns: 0 if success, otherwise an error number.
|
||||
typedef int (*CACHETABLE_PARTIAL_FETCH_CALLBACK)(void *value_data, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep);
|
||||
|
||||
// The cachetable calls the put callback during a cachetable_put command to provide the opaque PAIR.
|
||||
// The PAIR can then be used to later unpin the pair.
|
||||
// Returns: 0 if success, otherwise an error number.
|
||||
typedef void (*CACHETABLE_PUT_CALLBACK)(CACHEKEY key, void *value_data, PAIR p);
|
||||
|
||||
// TODO(leif) XXX TODO XXX
|
||||
typedef int (*CACHETABLE_CLEANER_CALLBACK)(void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *write_extraargs);
|
||||
|
||||
typedef void (*CACHETABLE_CLONE_CALLBACK)(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
|
||||
|
||||
typedef void (*CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK)(void *value_data);
|
||||
|
||||
typedef struct {
|
||||
CACHETABLE_FLUSH_CALLBACK flush_callback;
|
||||
CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback;
|
||||
CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback;
|
||||
CACHETABLE_CLEANER_CALLBACK cleaner_callback;
|
||||
CACHETABLE_CLONE_CALLBACK clone_callback;
|
||||
CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback;
|
||||
void* write_extraargs; // parameter for flush_callback, pe_est_callback, pe_callback, and cleaner_callback
|
||||
} CACHETABLE_WRITE_CALLBACK;
|
||||
|
||||
typedef void (*CACHETABLE_GET_KEY_AND_FULLHASH)(CACHEKEY* cachekey, uint32_t* fullhash, void* extra);
|
||||
|
||||
typedef void (*CACHETABLE_REMOVE_KEY)(CACHEKEY* cachekey, bool for_checkpoint, void* extra);
|
||||
|
||||
void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata,
|
||||
void (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
|
||||
void (*close_userdata)(CACHEFILE, int, void*, bool, LSN),
|
||||
void (*free_userdata)(CACHEFILE, void*),
|
||||
void (*checkpoint_userdata)(CACHEFILE, int, void*),
|
||||
void (*begin_checkpoint_userdata)(LSN, void*),
|
||||
void (*end_checkpoint_userdata)(CACHEFILE, int, void*),
|
||||
void (*note_pin_by_checkpoint)(CACHEFILE, void*),
|
||||
void (*note_unpin_by_checkpoint)(CACHEFILE, void*));
|
||||
// Effect: Store some cachefile-specific user data. When the last reference to a cachefile is closed, we call close_userdata().
|
||||
// Before starting a checkpoint, we call checkpoint_prepare_userdata().
|
||||
// When the cachefile needs to be checkpointed, we call checkpoint_userdata().
|
||||
// If userdata is already non-NULL, then we simply overwrite it.
|
||||
|
||||
void *toku_cachefile_get_userdata(CACHEFILE);
|
||||
// Effect: Get the user data.
|
||||
|
||||
CACHETABLE toku_cachefile_get_cachetable(CACHEFILE cf);
|
||||
// Effect: Get the cachetable.
|
||||
|
||||
CACHEFILE toku_pair_get_cachefile(PAIR);
|
||||
// Effect: Get the cachefile of the pair
|
||||
|
||||
void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair);
|
||||
// Effect: Swaps the value_data of old_pair and new_pair.
|
||||
// Requires: both old_pair and new_pair to be pinned with write locks.
|
||||
|
||||
typedef enum {
|
||||
PL_READ = 0,
|
||||
PL_WRITE_CHEAP,
|
||||
PL_WRITE_EXPENSIVE
|
||||
} pair_lock_type;
|
||||
|
||||
// put something into the cachetable and checkpoint dependent pairs
|
||||
// if the checkpointing is necessary
|
||||
void toku_cachetable_put_with_dep_pairs(
|
||||
CACHEFILE cachefile,
|
||||
CACHETABLE_GET_KEY_AND_FULLHASH get_key_and_fullhash,
|
||||
void *value,
|
||||
PAIR_ATTR attr,
|
||||
CACHETABLE_WRITE_CALLBACK write_callback,
|
||||
void *get_key_and_fullhash_extra,
|
||||
uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint
|
||||
PAIR* dependent_pairs,
|
||||
enum cachetable_dirty* dependent_dirty, // array stating dirty/cleanness of dependent pairs
|
||||
CACHEKEY* key,
|
||||
uint32_t* fullhash,
|
||||
CACHETABLE_PUT_CALLBACK put_callback
|
||||
);
|
||||
|
||||
// Put a memory object into the cachetable.
|
||||
// Effects: Lookup the key in the cachetable. If the key is not in the cachetable,
|
||||
// then insert the pair and pin it. Otherwise return an error. Some of the key
|
||||
// value pairs may be evicted from the cachetable when the cachetable gets too big.
|
||||
void toku_cachetable_put(CACHEFILE cf, CACHEKEY key, uint32_t fullhash,
|
||||
void *value, PAIR_ATTR size,
|
||||
CACHETABLE_WRITE_CALLBACK write_callback,
|
||||
CACHETABLE_PUT_CALLBACK put_callback
|
||||
);
|
||||
|
||||
// Get and pin the memory object of a PAIR, and write dependent pairs to disk
|
||||
// if the dependent pairs are pending a checkpoint.
|
||||
// Effects: If the memory object is in the cachetable, acquire a PAIR lock on it.
|
||||
// Otherwise, fetch it from storage by calling the fetch callback. If the fetch
|
||||
// succeeded, add the memory object to the cachetable with a PAIR lock on it.
|
||||
// Before returning to the user, if the PAIR object being retrieved, or any of the
|
||||
// dependent pairs passed in as parameters must be written to disk for checkpoint,
|
||||
// then the required PAIRs are written to disk for checkpoint.
|
||||
// KEY PROPERTY OF DEPENDENT PAIRS: They are already locked by the client
|
||||
// Returns: 0 if the memory object is in memory, otherwise an error number.
|
||||
int toku_cachetable_get_and_pin_with_dep_pairs (
|
||||
CACHEFILE cachefile,
|
||||
CACHEKEY key,
|
||||
uint32_t fullhash,
|
||||
void**value,
|
||||
CACHETABLE_WRITE_CALLBACK write_callback,
|
||||
CACHETABLE_FETCH_CALLBACK fetch_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
|
||||
pair_lock_type lock_type,
|
||||
void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback
|
||||
uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint
|
||||
PAIR* dependent_pairs,
|
||||
enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs
|
||||
);
|
||||
|
||||
// Get and pin a memory object.
|
||||
// Effects: If the memory object is in the cachetable acquire the PAIR lock on it.
|
||||
// Otherwise, fetch it from storage by calling the fetch callback. If the fetch
|
||||
// succeeded, add the memory object to the cachetable with a read lock on it.
|
||||
// Returns: 0 if the memory object is in memory, otherwise an error number.
|
||||
int toku_cachetable_get_and_pin (
|
||||
CACHEFILE cachefile,
|
||||
CACHEKEY key,
|
||||
uint32_t fullhash,
|
||||
void**value,
|
||||
CACHETABLE_WRITE_CALLBACK write_callback,
|
||||
CACHETABLE_FETCH_CALLBACK fetch_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
|
||||
bool may_modify_value,
|
||||
void* read_extraargs // parameter for fetch_callback, pf_req_callback, and pf_callback
|
||||
);
|
||||
|
||||
// does partial fetch on a pinned pair
|
||||
void toku_cachetable_pf_pinned_pair(
|
||||
void* value,
|
||||
CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
|
||||
void* read_extraargs,
|
||||
CACHEFILE cf,
|
||||
CACHEKEY key,
|
||||
uint32_t fullhash
|
||||
);
|
||||
|
||||
struct unlockers {
|
||||
bool locked;
|
||||
void (*f)(void* extra);
|
||||
void *extra;
|
||||
struct unlockers *next;
|
||||
};
|
||||
typedef struct unlockers *UNLOCKERS;
|
||||
|
||||
// Effect: If the block is in the cachetable, then return it.
|
||||
// Otherwise call the functions in unlockers, fetch the data (but don't pin it, since we'll just end up pinning it again later), and return TOKUDB_TRY_AGAIN.
|
||||
int toku_cachetable_get_and_pin_nonblocking (
|
||||
CACHEFILE cf,
|
||||
CACHEKEY key,
|
||||
uint32_t fullhash,
|
||||
void**value,
|
||||
CACHETABLE_WRITE_CALLBACK write_callback,
|
||||
CACHETABLE_FETCH_CALLBACK fetch_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
|
||||
pair_lock_type lock_type,
|
||||
void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback
|
||||
UNLOCKERS unlockers
|
||||
);
|
||||
|
||||
int toku_cachetable_maybe_get_and_pin (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**);
|
||||
// Effect: Maybe get and pin a memory object.
|
||||
// This function is similar to the get_and_pin function except that it
|
||||
// will not attempt to fetch a memory object that is not in the cachetable or requires any kind of blocking to get it.
|
||||
// Returns: If the the item is already in memory, then return 0 and store it in the
|
||||
// void**. If the item is not in memory, then return a nonzero error number.
|
||||
|
||||
int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**);
|
||||
// Effect: Like maybe get and pin, but may pin a clean pair.
|
||||
|
||||
int toku_cachetable_get_attr(CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, PAIR_ATTR *);
|
||||
// Effect: get the attributes for cachekey
|
||||
// Returns: 0 if success, non-zero if cachekey is not cached
|
||||
// Notes: this function exists for tests
|
||||
|
||||
int toku_cachetable_unpin(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size);
|
||||
// Effect: Unpin a memory object
|
||||
// Modifies: If the memory object is in the cachetable, then OR the dirty flag,
|
||||
// update the size, and release the read lock on the memory object.
|
||||
// Returns: 0 if success, otherwise returns an error number.
|
||||
// Requires: The ct is locked.
|
||||
|
||||
int toku_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size);
|
||||
// Effect: The same as tokud_cachetable_unpin, except that the ct must not be locked.
|
||||
// Requires: The ct is NOT locked.
|
||||
|
||||
int toku_cachetable_unpin_and_remove (CACHEFILE, PAIR, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */
|
||||
// Effect: Remove an object from the cachetable. Don't write it back.
|
||||
// Requires: The object must be pinned exactly once.
|
||||
|
||||
// test-only wrapper that use CACHEKEY and fullhash
|
||||
int toku_test_cachetable_unpin(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size);
|
||||
|
||||
// test-only wrapper that use CACHEKEY and fullhash
|
||||
int toku_test_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size);
|
||||
|
||||
// test-only wrapper that use CACHEKEY
|
||||
int toku_test_cachetable_unpin_and_remove (CACHEFILE, CACHEKEY, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */
|
||||
|
||||
int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash,
|
||||
CACHETABLE_WRITE_CALLBACK write_callback,
|
||||
CACHETABLE_FETCH_CALLBACK fetch_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
|
||||
CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
|
||||
void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback
|
||||
bool *doing_prefetch);
|
||||
// Effect: Prefetch a memory object for a given key into the cachetable
|
||||
// Precondition: The cachetable mutex is NOT held.
|
||||
// Postcondition: The cachetable mutex is NOT held.
|
||||
// Returns: 0 if success
|
||||
// Implement Note:
|
||||
// 1) The pair's rwlock is acquired (for write) (there is not a deadlock here because the rwlock is a pthread_cond_wait using the cachetable mutex).
|
||||
// Case A: Single-threaded.
|
||||
// A1) Call cachetable_fetch_pair, which
|
||||
// a) Obtains a readlock on the cachefile's fd (to prevent multipler readers at once)
|
||||
// b) Unlocks the cachetable
|
||||
// c) Does the fetch off disk.
|
||||
// d) Locks the cachetable
|
||||
// e) Unlocks the fd lock.
|
||||
// f) Unlocks the pair rwlock.
|
||||
// Case B: Multithreaded
|
||||
// a) Enqueue a cachetable_reader into the workqueue.
|
||||
// b) Unlock the cache table.
|
||||
// c) The enqueue'd job later locks the cachetable, and calls cachetable_fetch_pair (doing the steps in A1 above).
|
||||
|
||||
int toku_cachetable_assert_all_unpinned (CACHETABLE);
|
||||
|
||||
int toku_cachefile_count_pinned (CACHEFILE, int /*printthem*/ );
|
||||
|
||||
// Close the cachefile.
|
||||
// Effects: All of the cached object associated with the cachefile are evicted from
|
||||
// the cachetable. The flush callback is called for each of these objects. The
|
||||
// close function does not return until all of the objects are evicted. The cachefile
|
||||
// object is freed.
|
||||
// If oplsn_valid is true then use oplsn as the LSN of the close instead of asking the logger. oplsn_valid being true is only allowed during recovery, and requires that you are removing the last reference (otherwise the lsn wouldn't make it in.)
|
||||
void toku_cachefile_close (CACHEFILE*, bool oplsn_valid, LSN oplsn);
|
||||
|
||||
// Return on success (different from pread and pwrite)
|
||||
//int cachefile_pwrite (CACHEFILE, const void *buf, size_t count, toku_off_t offset);
|
||||
//int cachefile_pread (CACHEFILE, void *buf, size_t count, toku_off_t offset);
|
||||
|
||||
// Get the file descriptor associated with the cachefile
|
||||
// Return the file descriptor
|
||||
// Grabs a read lock protecting the fd
|
||||
int toku_cachefile_get_fd (CACHEFILE);
|
||||
|
||||
// Get the iname (within the environment) associated with the cachefile
|
||||
// Return the filename
|
||||
char * toku_cachefile_fname_in_env (CACHEFILE cf);
|
||||
|
||||
void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env);
|
||||
|
||||
// Make it so when the cachefile closes, the underlying file is unlinked
|
||||
void toku_cachefile_unlink_on_close(CACHEFILE cf);
|
||||
|
||||
// is this cachefile marked as unlink on close?
|
||||
bool toku_cachefile_is_unlink_on_close(CACHEFILE cf);
|
||||
|
||||
void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf);
|
||||
void toku_cachefile_do_log_recover_on_close(CACHEFILE cf);
|
||||
bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf);
|
||||
|
||||
// Return the logger associated with the cachefile
|
||||
struct tokulogger *toku_cachefile_logger(CACHEFILE cf);
|
||||
|
||||
// Return the filenum associated with the cachefile
|
||||
FILENUM toku_cachefile_filenum(CACHEFILE cf);
|
||||
|
||||
// Effect: Return a 32-bit hash key. The hash key shall be suitable for using with bitmasking for a table of size power-of-two.
|
||||
uint32_t toku_cachetable_hash(CACHEFILE cf, CACHEKEY key);
|
||||
|
||||
uint32_t toku_cachefile_fullhash_of_header(CACHEFILE cf);
|
||||
|
||||
// debug functions
|
||||
|
||||
// Print the contents of the cachetable. This is mainly used from gdb
|
||||
void toku_cachetable_print_state (CACHETABLE ct);
|
||||
|
||||
// Get the state of the cachetable. This is used to verify the cachetable
|
||||
void toku_cachetable_get_state(CACHETABLE ct, int *num_entries_ptr, int *hash_size_ptr, long *size_current_ptr, long *size_limit_ptr);
|
||||
|
||||
// Get the state of a cachetable entry by key. This is used to verify the cachetable
|
||||
int toku_cachetable_get_key_state(CACHETABLE ct, CACHEKEY key, CACHEFILE cf,
|
||||
void **value_ptr,
|
||||
int *dirty_ptr,
|
||||
long long *pin_ptr,
|
||||
long *size_ptr);
|
||||
|
||||
// Verify the whole cachetable that the cachefile is in. Slow.
|
||||
void toku_cachefile_verify (CACHEFILE cf);
|
||||
|
||||
// Verify the cachetable. Slow.
|
||||
void toku_cachetable_verify (CACHETABLE t);
|
||||
|
||||
// Not for use in production, but useful for testing.
|
||||
void toku_cachetable_print_hash_histogram (void) __attribute__((__visibility__("default")));
|
||||
|
||||
void toku_cachetable_maybe_flush_some(CACHETABLE ct);
|
||||
|
||||
// for stat64
|
||||
uint64_t toku_cachefile_size(CACHEFILE cf);
|
||||
|
||||
void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s);
|
||||
|
||||
void toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir);
|
||||
char * toku_construct_full_name(int count, ...);
|
||||
char * toku_cachetable_get_fname_in_cwd(CACHETABLE ct, const char * fname_in_env);
|
||||
|
||||
void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra);
|
||||
// Effect: Add a job to the cachetable's collection of work to do. Note that function f must call remove_background_job_from_cf()
|
||||
|
||||
void remove_background_job_from_cf (CACHEFILE cf);
|
||||
// Effect: When a kibbutz job or cleaner thread finishes in a cachefile,
|
||||
// the cachetable must be notified.
|
||||
|
||||
// test-only function
|
||||
int toku_cachetable_get_checkpointing_user_data_status(void);
|
||||
|
||||
// test-only function
|
||||
int toku_cleaner_thread_for_test(CACHETABLE ct);
|
||||
int toku_cleaner_thread(void *cleaner_v);
|
||||
|
||||
// test function. Exported in the ydb layer and used by tests that want to run DRD
|
||||
// The default of 1M is too high for drd tests, so this is a mechanism to set a smaller number.
|
||||
void toku_pair_list_set_lock_size(uint32_t num_locks);
|
||||
|
||||
// Used by ft-ops.cc to figure out if it has the write lock on a pair.
|
||||
// Pretty hacky and not accurate enough, should be improved at the frwlock
|
||||
// layer.
|
||||
__attribute__((const,nonnull))
|
||||
bool toku_ctpair_is_write_locked(PAIR pair);
|
@ -1,333 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
/***********
|
||||
* The purpose of this file is to implement the high-level logic for
|
||||
* taking a checkpoint.
|
||||
*
|
||||
* There are three locks used for taking a checkpoint. They are listed below.
|
||||
*
|
||||
* NOTE: The reader-writer locks may be held by either multiple clients
|
||||
* or the checkpoint function. (The checkpoint function has the role
|
||||
* of the writer, the clients have the reader roles.)
|
||||
*
|
||||
* - multi_operation_lock
|
||||
* This is a new reader-writer lock.
|
||||
* This lock is held by the checkpoint function only for as long as is required to
|
||||
* to set all the "pending" bits and to create the checkpoint-in-progress versions
|
||||
* of the header and translation table (btt).
|
||||
* The following operations must take the multi_operation_lock:
|
||||
* - any set of operations that must be atomic with respect to begin checkpoint
|
||||
*
|
||||
* - checkpoint_safe_lock
|
||||
* This is a new reader-writer lock.
|
||||
* This lock is held for the entire duration of the checkpoint.
|
||||
* It is used to prevent more than one checkpoint from happening at a time
|
||||
* (the checkpoint function is non-re-entrant), and to prevent certain operations
|
||||
* that should not happen during a checkpoint.
|
||||
* The following operations must take the checkpoint_safe lock:
|
||||
* - delete a dictionary
|
||||
* - rename a dictionary
|
||||
* The application can use this lock to disable checkpointing during other sensitive
|
||||
* operations, such as making a backup copy of the database.
|
||||
*
|
||||
* Once the "pending" bits are set and the snapshots are taken of the header and btt,
|
||||
* most normal database operations are permitted to resume.
|
||||
*
|
||||
*
|
||||
*
|
||||
*****/
|
||||
|
||||
#include <my_global.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "portability/toku_portability.h"
|
||||
#include "portability/toku_atomic.h"
|
||||
|
||||
#include "ft/cachetable/cachetable.h"
|
||||
#include "ft/cachetable/checkpoint.h"
|
||||
#include "ft/ft.h"
|
||||
#include "ft/logger/log-internal.h"
|
||||
#include "ft/logger/recover.h"
|
||||
#include "util/frwlock.h"
|
||||
#include "util/status.h"
|
||||
|
||||
toku_instr_key *checkpoint_safe_mutex_key;
|
||||
toku_instr_key *checkpoint_safe_rwlock_key;
|
||||
toku_instr_key *multi_operation_lock_key;
|
||||
toku_instr_key *low_priority_multi_operation_lock_key;
|
||||
|
||||
toku_instr_key *rwlock_cond_key;
|
||||
toku_instr_key *rwlock_wait_read_key;
|
||||
toku_instr_key *rwlock_wait_write_key;
|
||||
|
||||
void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
|
||||
cp_status.init();
|
||||
CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
|
||||
*statp = cp_status;
|
||||
}
|
||||
|
||||
static LSN last_completed_checkpoint_lsn;
|
||||
|
||||
static toku_mutex_t checkpoint_safe_mutex;
|
||||
static toku::frwlock checkpoint_safe_lock;
|
||||
static toku_pthread_rwlock_t multi_operation_lock;
|
||||
static toku_pthread_rwlock_t low_priority_multi_operation_lock;
|
||||
|
||||
static bool initialized = false; // sanity check
|
||||
static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint)
|
||||
static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint)
|
||||
static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second
|
||||
static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute
|
||||
|
||||
// Note following static functions are called from checkpoint internal logic only,
|
||||
// and use the "writer" calls for locking and unlocking.
|
||||
|
||||
static void
|
||||
multi_operation_lock_init(void) {
|
||||
pthread_rwlockattr_t attr;
|
||||
pthread_rwlockattr_init(&attr);
|
||||
#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
|
||||
pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
|
||||
#else
|
||||
// TODO: need to figure out how to make writer-preferential rwlocks
|
||||
// happen on osx
|
||||
#endif
|
||||
toku_pthread_rwlock_init(
|
||||
*multi_operation_lock_key, &multi_operation_lock, &attr);
|
||||
toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key,
|
||||
&low_priority_multi_operation_lock,
|
||||
&attr);
|
||||
pthread_rwlockattr_destroy(&attr);
|
||||
locked_mo = false;
|
||||
}
|
||||
|
||||
static void
|
||||
multi_operation_lock_destroy(void) {
|
||||
toku_pthread_rwlock_destroy(&multi_operation_lock);
|
||||
toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
multi_operation_checkpoint_lock(void) {
|
||||
toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock);
|
||||
toku_pthread_rwlock_wrlock(&multi_operation_lock);
|
||||
locked_mo = true;
|
||||
}
|
||||
|
||||
static void
|
||||
multi_operation_checkpoint_unlock(void) {
|
||||
locked_mo = false;
|
||||
toku_pthread_rwlock_wrunlock(&multi_operation_lock);
|
||||
toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock);
|
||||
}
|
||||
|
||||
static void checkpoint_safe_lock_init(void) {
|
||||
toku_mutex_init(
|
||||
*checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr);
|
||||
checkpoint_safe_lock.init(&checkpoint_safe_mutex
|
||||
#ifdef TOKU_MYSQL_WITH_PFS
|
||||
,
|
||||
*checkpoint_safe_rwlock_key
|
||||
#endif
|
||||
);
|
||||
locked_cs = false;
|
||||
}
|
||||
|
||||
static void
|
||||
checkpoint_safe_lock_destroy(void) {
|
||||
checkpoint_safe_lock.deinit();
|
||||
toku_mutex_destroy(&checkpoint_safe_mutex);
|
||||
}
|
||||
|
||||
static void
|
||||
checkpoint_safe_checkpoint_lock(void) {
|
||||
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||
checkpoint_safe_lock.write_lock(false);
|
||||
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||
locked_cs = true;
|
||||
}
|
||||
|
||||
static void
|
||||
checkpoint_safe_checkpoint_unlock(void) {
|
||||
locked_cs = false;
|
||||
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||
checkpoint_safe_lock.write_unlock();
|
||||
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||
}
|
||||
|
||||
// toku_xxx_client_(un)lock() functions are only called from client code,
|
||||
// never from checkpoint code, and use the "reader" interface to the lock functions.
|
||||
|
||||
void
|
||||
toku_multi_operation_client_lock(void) {
|
||||
if (locked_mo)
|
||||
(void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1);
|
||||
toku_pthread_rwlock_rdlock(&multi_operation_lock);
|
||||
}
|
||||
|
||||
void
|
||||
toku_multi_operation_client_unlock(void) {
|
||||
toku_pthread_rwlock_rdunlock(&multi_operation_lock);
|
||||
}
|
||||
|
||||
void toku_low_priority_multi_operation_client_lock(void) {
|
||||
toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock);
|
||||
}
|
||||
|
||||
void toku_low_priority_multi_operation_client_unlock(void) {
|
||||
toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock);
|
||||
}
|
||||
|
||||
void
|
||||
toku_checkpoint_safe_client_lock(void) {
|
||||
if (locked_cs)
|
||||
(void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1);
|
||||
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||
checkpoint_safe_lock.read_lock();
|
||||
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||
toku_multi_operation_client_lock();
|
||||
}
|
||||
|
||||
void
|
||||
toku_checkpoint_safe_client_unlock(void) {
|
||||
toku_mutex_lock(&checkpoint_safe_mutex);
|
||||
checkpoint_safe_lock.read_unlock();
|
||||
toku_mutex_unlock(&checkpoint_safe_mutex);
|
||||
toku_multi_operation_client_unlock();
|
||||
}
|
||||
|
||||
// Initialize the checkpoint mechanism, must be called before any client operations.
|
||||
void
|
||||
toku_checkpoint_init(void) {
|
||||
multi_operation_lock_init();
|
||||
checkpoint_safe_lock_init();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void
|
||||
toku_checkpoint_destroy(void) {
|
||||
multi_operation_lock_destroy();
|
||||
checkpoint_safe_lock_destroy();
|
||||
initialized = false;
|
||||
}
|
||||
|
||||
#define SET_CHECKPOINT_FOOTPRINT(x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x
|
||||
|
||||
|
||||
// Take a checkpoint of all currently open dictionaries
|
||||
int
|
||||
toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
|
||||
void (*callback_f)(void*), void * extra,
|
||||
void (*callback2_f)(void*), void * extra2,
|
||||
checkpoint_caller_t caller_id) {
|
||||
int footprint_offset = (int) caller_id * 1000;
|
||||
|
||||
assert(initialized);
|
||||
|
||||
(void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
|
||||
checkpoint_safe_checkpoint_lock();
|
||||
(void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
|
||||
|
||||
if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX))
|
||||
CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
|
||||
|
||||
SET_CHECKPOINT_FOOTPRINT(10);
|
||||
multi_operation_checkpoint_lock();
|
||||
SET_CHECKPOINT_FOOTPRINT(20);
|
||||
toku_ft_open_close_lock();
|
||||
|
||||
SET_CHECKPOINT_FOOTPRINT(30);
|
||||
CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
|
||||
uint64_t t_checkpoint_begin_start = toku_current_time_microsec();
|
||||
toku_cachetable_begin_checkpoint(cp, logger);
|
||||
uint64_t t_checkpoint_begin_end = toku_current_time_microsec();
|
||||
|
||||
toku_ft_open_close_unlock();
|
||||
multi_operation_checkpoint_unlock();
|
||||
|
||||
SET_CHECKPOINT_FOOTPRINT(40);
|
||||
if (callback_f) {
|
||||
callback_f(extra); // callback is called with checkpoint_safe_lock still held
|
||||
}
|
||||
|
||||
uint64_t t_checkpoint_end_start = toku_current_time_microsec();
|
||||
toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2);
|
||||
uint64_t t_checkpoint_end_end = toku_current_time_microsec();
|
||||
|
||||
SET_CHECKPOINT_FOOTPRINT(50);
|
||||
if (logger) {
|
||||
last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
|
||||
toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
|
||||
CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
|
||||
}
|
||||
|
||||
SET_CHECKPOINT_FOOTPRINT(60);
|
||||
CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
|
||||
CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN);
|
||||
CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++;
|
||||
uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start;
|
||||
CP_STATUS_VAL(CP_BEGIN_TIME) += duration;
|
||||
if (duration >= toku_checkpoint_begin_long_threshold) {
|
||||
CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration;
|
||||
CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1;
|
||||
}
|
||||
duration = t_checkpoint_end_end - t_checkpoint_end_start;
|
||||
CP_STATUS_VAL(CP_END_TIME) += duration;
|
||||
if (duration >= toku_checkpoint_end_long_threshold) {
|
||||
CP_STATUS_VAL(CP_LONG_END_TIME) += duration;
|
||||
CP_STATUS_VAL(CP_LONG_END_COUNT) += 1;
|
||||
}
|
||||
CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
|
||||
CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
|
||||
CP_STATUS_VAL(CP_FOOTPRINT) = 0;
|
||||
|
||||
checkpoint_safe_checkpoint_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include <toku_race_tools.h>
|
||||
void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void);
|
||||
void
|
||||
toku_checkpoint_helgrind_ignore(void) {
|
||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status);
|
||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo);
|
||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs);
|
||||
}
|
||||
|
||||
#undef SET_CHECKPOINT_FOOTPRINT
|
@ -1,120 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "ft/cachetable/cachetable.h"
|
||||
|
||||
//Effect: Change [end checkpoint (n) - begin checkpoint (n+1)] delay to
|
||||
// new_period seconds. 0 means disable.
|
||||
void toku_set_checkpoint_period(CACHETABLE ct, uint32_t new_period);
|
||||
|
||||
uint32_t toku_get_checkpoint_period_unlocked(CACHETABLE ct);
|
||||
|
||||
|
||||
/******
|
||||
*
|
||||
* NOTE: checkpoint_safe_lock is highest level lock
|
||||
* multi_operation_lock is next level lock
|
||||
* ydb_big_lock is next level lock
|
||||
*
|
||||
* Locks must always be taken in this sequence (highest level first).
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/******
|
||||
* Client code must hold the checkpoint_safe lock during the following operations:
|
||||
* - delete a dictionary via DB->remove
|
||||
* - delete a dictionary via DB_TXN->abort(txn) (where txn created a dictionary)
|
||||
* - rename a dictionary //TODO: Handlerton rename needs to take this
|
||||
* //TODO: Handlerton rename needs to be recoded for transaction recovery
|
||||
*****/
|
||||
|
||||
void toku_checkpoint_safe_client_lock(void);
|
||||
|
||||
void toku_checkpoint_safe_client_unlock(void);
|
||||
|
||||
|
||||
|
||||
/******
|
||||
* These functions are called from the ydb level.
|
||||
* Client code must hold the multi_operation lock during the following operations:
|
||||
* - insertion into multiple indexes
|
||||
* - replace into (simultaneous delete/insert on a single key)
|
||||
*****/
|
||||
|
||||
void toku_multi_operation_client_lock(void);
|
||||
void toku_low_priority_multi_operation_client_lock(void);
|
||||
|
||||
void toku_multi_operation_client_unlock(void);
|
||||
void toku_low_priority_multi_operation_client_unlock(void);
|
||||
|
||||
|
||||
// Initialize the checkpoint mechanism, must be called before any client operations.
|
||||
// Must pass in function pointers to take/release ydb lock.
|
||||
void toku_checkpoint_init(void);
|
||||
|
||||
void toku_checkpoint_destroy(void);
|
||||
|
||||
typedef enum {SCHEDULED_CHECKPOINT = 0, // "normal" checkpoint taken on checkpoint thread
|
||||
CLIENT_CHECKPOINT = 1, // induced by client, such as FLUSH LOGS or SAVEPOINT
|
||||
INDEXER_CHECKPOINT = 2,
|
||||
STARTUP_CHECKPOINT = 3,
|
||||
UPGRADE_CHECKPOINT = 4,
|
||||
RECOVERY_CHECKPOINT = 5,
|
||||
SHUTDOWN_CHECKPOINT = 6} checkpoint_caller_t;
|
||||
|
||||
// Take a checkpoint of all currently open dictionaries
|
||||
// Callbacks are called during checkpoint procedure while checkpoint_safe lock is still held.
|
||||
// Callbacks are primarily intended for use in testing.
|
||||
// caller_id identifies why the checkpoint is being taken.
|
||||
int toku_checkpoint(CHECKPOINTER cp, struct tokulogger *logger,
|
||||
void (*callback_f)(void *extra), void *extra,
|
||||
void (*callback2_f)(void *extra2), void *extra2,
|
||||
checkpoint_caller_t caller_id);
|
||||
|
||||
/******
|
||||
* These functions are called from the ydb level.
|
||||
* They return status information and have no side effects.
|
||||
* Some status information may be incorrect because no locks are taken to collect status.
|
||||
* (If checkpoint is in progress, it may overwrite status info while it is being read.)
|
||||
*****/
|
||||
void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS stat);
|
@ -1,150 +0,0 @@
|
||||
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "portability/memory.h"
|
||||
|
||||
#include "util/dbt.h"
|
||||
|
||||
typedef int (*ft_compare_func)(DB *db, const DBT *a, const DBT *b);
|
||||
|
||||
int toku_keycompare(const void *key1, uint32_t key1len, const void *key2, uint32_t key2len);
|
||||
|
||||
int toku_builtin_compare_fun (DB *, const DBT *, const DBT*) __attribute__((__visibility__("default")));
|
||||
|
||||
namespace toku {
|
||||
|
||||
// a comparator object encapsulates the data necessary for
|
||||
// comparing two keys in a fractal tree. it further understands
|
||||
// that points may be positive or negative infinity.
|
||||
|
||||
class comparator {
|
||||
void init(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic) {
|
||||
_cmp = cmp;
|
||||
_fake_db->cmp_descriptor = desc;
|
||||
_memcmp_magic = memcmp_magic;
|
||||
}
|
||||
|
||||
public:
|
||||
// This magic value is reserved to mean that the magic has not been set.
|
||||
static const uint8_t MEMCMP_MAGIC_NONE = 0;
|
||||
|
||||
void create(ft_compare_func cmp, DESCRIPTOR desc, uint8_t memcmp_magic = MEMCMP_MAGIC_NONE) {
|
||||
XCALLOC(_fake_db);
|
||||
init(cmp, desc, memcmp_magic);
|
||||
}
|
||||
|
||||
// inherit the attributes of another comparator, but keep our own
|
||||
// copy of fake_db that is owned separately from the one given.
|
||||
void inherit(const comparator &cmp) {
|
||||
invariant_notnull(_fake_db);
|
||||
invariant_notnull(cmp._cmp);
|
||||
invariant_notnull(cmp._fake_db);
|
||||
init(cmp._cmp, cmp._fake_db->cmp_descriptor, cmp._memcmp_magic);
|
||||
}
|
||||
|
||||
// like inherit, but doesn't require that the this comparator
|
||||
// was already created
|
||||
void create_from(const comparator &cmp) {
|
||||
XCALLOC(_fake_db);
|
||||
inherit(cmp);
|
||||
}
|
||||
|
||||
void destroy() {
|
||||
toku_free(_fake_db);
|
||||
}
|
||||
|
||||
const DESCRIPTOR_S *get_descriptor() const {
|
||||
return _fake_db->cmp_descriptor;
|
||||
}
|
||||
|
||||
ft_compare_func get_compare_func() const {
|
||||
return _cmp;
|
||||
}
|
||||
|
||||
uint8_t get_memcmp_magic() const {
|
||||
return _memcmp_magic;
|
||||
}
|
||||
|
||||
bool valid() const {
|
||||
return _cmp != nullptr;
|
||||
}
|
||||
|
||||
inline bool dbt_has_memcmp_magic(const DBT *dbt) const {
|
||||
return *reinterpret_cast<const char *>(dbt->data) == _memcmp_magic;
|
||||
}
|
||||
|
||||
int operator()(const DBT *a, const DBT *b) const {
|
||||
if (__builtin_expect(toku_dbt_is_infinite(a) || toku_dbt_is_infinite(b), 0)) {
|
||||
return toku_dbt_infinite_compare(a, b);
|
||||
} else if (_memcmp_magic != MEMCMP_MAGIC_NONE
|
||||
// If `a' has the memcmp magic..
|
||||
&& dbt_has_memcmp_magic(a)
|
||||
// ..then we expect `b' to also have the memcmp magic
|
||||
&& __builtin_expect(dbt_has_memcmp_magic(b), 1)) {
|
||||
return toku_builtin_compare_fun(nullptr, a, b);
|
||||
} else {
|
||||
// yikes, const sadness here
|
||||
return _cmp(const_cast<DB *>(_fake_db), a, b);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
DB *_fake_db;
|
||||
ft_compare_func _cmp;
|
||||
uint8_t _memcmp_magic;
|
||||
};
|
||||
|
||||
} /* namespace toku */
|
@ -1,456 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "ft/ft-internal.h"
|
||||
|
||||
#include "ft/cursor.h"
|
||||
#include "ft/leafentry.h"
|
||||
#include "ft/txn/txn.h"
|
||||
#include "util/dbt.h"
|
||||
|
||||
int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN ttxn,
|
||||
enum cursor_read_type read_type,
|
||||
bool disable_prefetching,
|
||||
bool is_temporary) {
|
||||
if (read_type == C_READ_SNAPSHOT) {
|
||||
invariant(ttxn != NULL);
|
||||
int accepted = toku_txn_reads_txnid(ft_handle->ft->h->root_xid_that_created, ttxn, false); // last parameter is irrelevant
|
||||
if (accepted != TOKUDB_ACCEPT) {
|
||||
invariant(accepted == 0);
|
||||
return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
|
||||
}
|
||||
}
|
||||
|
||||
memset(cursor, 0, sizeof(*cursor));
|
||||
cursor->ft_handle = ft_handle;
|
||||
cursor->ttxn = ttxn;
|
||||
cursor->read_type = read_type;
|
||||
cursor->disable_prefetching = disable_prefetching;
|
||||
cursor->is_temporary = is_temporary;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void toku_ft_cursor_destroy(FT_CURSOR cursor) {
|
||||
toku_destroy_dbt(&cursor->key);
|
||||
toku_destroy_dbt(&cursor->val);
|
||||
toku_destroy_dbt(&cursor->range_lock_left_key);
|
||||
toku_destroy_dbt(&cursor->range_lock_right_key);
|
||||
}
|
||||
|
||||
// deprecated, should only be used by tests
|
||||
int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *cursorptr, TOKUTXN ttxn,
|
||||
bool is_snapshot_read, bool disable_prefetching) {
|
||||
FT_CURSOR XCALLOC(cursor);
|
||||
enum cursor_read_type read_type = is_snapshot_read ? C_READ_SNAPSHOT : C_READ_ANY;
|
||||
int r = toku_ft_cursor_create(ft_handle, cursor, ttxn, read_type, disable_prefetching, false);
|
||||
if (r == 0) {
|
||||
*cursorptr = cursor;
|
||||
} else {
|
||||
toku_free(cursor);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// deprecated, should only be used by tests
|
||||
void toku_ft_cursor_close(FT_CURSOR cursor) {
|
||||
toku_ft_cursor_destroy(cursor);
|
||||
toku_free(cursor);
|
||||
}
|
||||
|
||||
void toku_ft_cursor_remove_restriction(FT_CURSOR cursor) {
|
||||
cursor->out_of_range_error = 0;
|
||||
cursor->direction = 0;
|
||||
}
|
||||
|
||||
void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra) {
|
||||
cursor->interrupt_cb = cb;
|
||||
cursor->interrupt_cb_extra = extra;
|
||||
}
|
||||
|
||||
void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor) {
|
||||
cursor->is_leaf_mode = true;
|
||||
}
|
||||
|
||||
int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor) {
|
||||
return cursor->is_leaf_mode;
|
||||
}
|
||||
|
||||
// TODO: Rename / cleanup - this has nothing to do with locking
|
||||
void toku_ft_cursor_set_range_lock(FT_CURSOR cursor,
|
||||
const DBT *left, const DBT *right,
|
||||
bool left_is_neg_infty, bool right_is_pos_infty,
|
||||
int out_of_range_error) {
|
||||
// Destroy any existing keys and then clone the given left, right keys
|
||||
toku_destroy_dbt(&cursor->range_lock_left_key);
|
||||
if (left_is_neg_infty) {
|
||||
cursor->left_is_neg_infty = true;
|
||||
} else {
|
||||
toku_clone_dbt(&cursor->range_lock_left_key, *left);
|
||||
}
|
||||
|
||||
toku_destroy_dbt(&cursor->range_lock_right_key);
|
||||
if (right_is_pos_infty) {
|
||||
cursor->right_is_pos_infty = true;
|
||||
} else {
|
||||
toku_clone_dbt(&cursor->range_lock_right_key, *right);
|
||||
}
|
||||
|
||||
// TOKUDB_FOUND_BUT_REJECTED is a DB_NOTFOUND with instructions to stop looking. (Faster)
|
||||
cursor->out_of_range_error = out_of_range_error == DB_NOTFOUND ? TOKUDB_FOUND_BUT_REJECTED : out_of_range_error;
|
||||
cursor->direction = 0;
|
||||
}
|
||||
|
||||
void toku_ft_cursor_set_prefetching(FT_CURSOR cursor) {
|
||||
cursor->prefetching = true;
|
||||
}
|
||||
|
||||
bool toku_ft_cursor_prefetching(FT_CURSOR cursor) {
|
||||
return cursor->prefetching;
|
||||
}
|
||||
|
||||
//Return true if cursor is uninitialized. false otherwise.
|
||||
bool toku_ft_cursor_not_set(FT_CURSOR cursor) {
|
||||
assert((cursor->key.data==NULL) == (cursor->val.data==NULL));
|
||||
return (bool)(cursor->key.data == NULL);
|
||||
}
|
||||
|
||||
struct ft_cursor_search_struct {
|
||||
FT_GET_CALLBACK_FUNCTION getf;
|
||||
void *getf_v;
|
||||
FT_CURSOR cursor;
|
||||
ft_search *search;
|
||||
};
|
||||
|
||||
/* search for the first kv pair that matches the search object */
|
||||
static int ft_cursor_search(FT_CURSOR cursor, ft_search *search,
|
||||
FT_GET_CALLBACK_FUNCTION getf, void *getf_v, bool can_bulk_fetch) {
|
||||
int r = toku_ft_search(cursor->ft_handle, search, getf, getf_v, cursor, can_bulk_fetch);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline int compare_k_x(FT_HANDLE ft_handle, const DBT *k, const DBT *x) {
|
||||
return ft_handle->ft->cmp(k, x);
|
||||
}
|
||||
|
||||
int toku_ft_cursor_compare_one(const ft_search &UU(search), const DBT *UU(x)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ft_cursor_compare_set(const ft_search &search, const DBT *x) {
|
||||
FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
|
||||
return compare_k_x(ft_handle, search.k, x) <= 0; /* return min xy: kv <= xy */
|
||||
}
|
||||
|
||||
static int
|
||||
ft_cursor_current_getf(uint32_t keylen, const void *key,
|
||||
uint32_t vallen, const void *val,
|
||||
void *v, bool lock_only) {
|
||||
struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
|
||||
int r;
|
||||
if (key==NULL) {
|
||||
r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only);
|
||||
} else {
|
||||
FT_CURSOR cursor = bcss->cursor;
|
||||
DBT newkey;
|
||||
toku_fill_dbt(&newkey, key, keylen);
|
||||
if (compare_k_x(cursor->ft_handle, &cursor->key, &newkey) != 0) {
|
||||
r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only); // This was once DB_KEYEMPTY
|
||||
if (r==0) r = TOKUDB_FOUND_BUT_REJECTED;
|
||||
}
|
||||
else
|
||||
r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int ft_cursor_compare_next(const ft_search &search, const DBT *x) {
|
||||
FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
|
||||
return compare_k_x(ft_handle, search.k, x) < 0; /* return min xy: kv < xy */
|
||||
}
|
||||
|
||||
int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
if (toku_ft_cursor_not_set(cursor)) {
|
||||
return EINVAL;
|
||||
}
|
||||
cursor->direction = 0;
|
||||
if (op == DB_CURRENT) {
|
||||
struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, 0};
|
||||
ft_search search;
|
||||
ft_search_init(&search, ft_cursor_compare_set, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle);
|
||||
int r = toku_ft_search(cursor->ft_handle, &search, ft_cursor_current_getf, &bcss, cursor, false);
|
||||
ft_search_finish(&search);
|
||||
return r;
|
||||
}
|
||||
return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // ft_cursor_copyout(cursor, outkey, outval);
|
||||
}
|
||||
|
||||
int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
cursor->direction = 0;
|
||||
ft_search search;
|
||||
ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_LEFT, nullptr, nullptr, cursor->ft_handle);
|
||||
int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
|
||||
ft_search_finish(&search);
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
cursor->direction = 0;
|
||||
ft_search search;
|
||||
ft_search_init(&search, toku_ft_cursor_compare_one, FT_SEARCH_RIGHT, nullptr, nullptr, cursor->ft_handle);
|
||||
int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
|
||||
ft_search_finish(&search);
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_ft_cursor_check_restricted_range(FT_CURSOR c, const void *key, uint32_t keylen) {
|
||||
if (c->out_of_range_error) {
|
||||
FT ft = c->ft_handle->ft;
|
||||
DBT found_key;
|
||||
toku_fill_dbt(&found_key, key, keylen);
|
||||
if ((!c->left_is_neg_infty && c->direction <= 0 && ft->cmp(&found_key, &c->range_lock_left_key) < 0) ||
|
||||
(!c->right_is_pos_infty && c->direction >= 0 && ft->cmp(&found_key, &c->range_lock_right_key) > 0)) {
|
||||
invariant(c->out_of_range_error);
|
||||
return c->out_of_range_error;
|
||||
}
|
||||
}
|
||||
// Reset cursor direction to mitigate risk if some query type doesn't set the direction.
|
||||
// It is always correct to check both bounds (which happens when direction==0) but it can be slower.
|
||||
c->direction = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd,
|
||||
FT_GET_CALLBACK_FUNCTION getf, void *getf_v,
|
||||
uint32_t *keylen, void **key, uint32_t *vallen, void **val) {
|
||||
int r = 0;
|
||||
// if we are searching towards the end, limit is last element
|
||||
// if we are searching towards the beginning, limit is the first element
|
||||
uint32_t limit = (direction > 0) ? (bd->num_klpairs() - 1) : 0;
|
||||
|
||||
//Starting with the prev, find the first real (non-provdel) leafentry.
|
||||
while (index != limit) {
|
||||
index += direction;
|
||||
LEAFENTRY le;
|
||||
void* foundkey = NULL;
|
||||
uint32_t foundkeylen = 0;
|
||||
|
||||
r = bd->fetch_klpair(index, &le, &foundkeylen, &foundkey);
|
||||
invariant_zero(r);
|
||||
|
||||
if (toku_ft_cursor_is_leaf_mode(cursor) || !le_val_is_del(le, cursor->read_type, cursor->ttxn)) {
|
||||
le_extract_val(
|
||||
le,
|
||||
toku_ft_cursor_is_leaf_mode(cursor),
|
||||
cursor->read_type,
|
||||
cursor->ttxn,
|
||||
vallen,
|
||||
val
|
||||
);
|
||||
*key = foundkey;
|
||||
*keylen = foundkeylen;
|
||||
|
||||
cursor->direction = direction;
|
||||
r = toku_ft_cursor_check_restricted_range(cursor, *key, *keylen);
|
||||
if (r!=0) {
|
||||
paranoid_invariant(r == cursor->out_of_range_error);
|
||||
// We already got at least one entry from the bulk fetch.
|
||||
// Return 0 (instead of out of range error).
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
r = getf(*keylen, *key, *vallen, *val, getf_v, false);
|
||||
if (r == TOKUDB_CURSOR_CONTINUE) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
cursor->direction = +1;
|
||||
ft_search search;
|
||||
ft_search_init(&search, ft_cursor_compare_next, FT_SEARCH_LEFT, &cursor->key, nullptr, cursor->ft_handle);
|
||||
int r = ft_cursor_search(cursor, &search, getf, getf_v, true);
|
||||
ft_search_finish(&search);
|
||||
if (r == 0) {
|
||||
toku_ft_cursor_set_prefetching(cursor);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int ft_cursor_search_eq_k_x_getf(uint32_t keylen, const void *key,
|
||||
uint32_t vallen, const void *val,
|
||||
void *v, bool lock_only) {
|
||||
struct ft_cursor_search_struct *CAST_FROM_VOIDP(bcss, v);
|
||||
int r;
|
||||
if (key==NULL) {
|
||||
r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, false);
|
||||
} else {
|
||||
FT_CURSOR cursor = bcss->cursor;
|
||||
DBT newkey;
|
||||
toku_fill_dbt(&newkey, key, keylen);
|
||||
if (compare_k_x(cursor->ft_handle, bcss->search->k, &newkey) == 0) {
|
||||
r = bcss->getf(keylen, key, vallen, val, bcss->getf_v, lock_only);
|
||||
} else {
|
||||
r = bcss->getf(0, NULL, 0, NULL, bcss->getf_v, lock_only);
|
||||
if (r==0) r = TOKUDB_FOUND_BUT_REJECTED;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/* search for the kv pair that matches the search object and is equal to k */
|
||||
static int ft_cursor_search_eq_k_x(FT_CURSOR cursor, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
struct ft_cursor_search_struct bcss = {getf, getf_v, cursor, search};
|
||||
int r = toku_ft_search(cursor->ft_handle, search, ft_cursor_search_eq_k_x_getf, &bcss, cursor, false);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int ft_cursor_compare_prev(const ft_search &search, const DBT *x) {
|
||||
FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
|
||||
return compare_k_x(ft_handle, search.k, x) > 0; /* return max xy: kv > xy */
|
||||
}
|
||||
|
||||
int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
cursor->direction = -1;
|
||||
ft_search search;
|
||||
ft_search_init(&search, ft_cursor_compare_prev, FT_SEARCH_RIGHT, &cursor->key, nullptr, cursor->ft_handle);
|
||||
int r = ft_cursor_search(cursor, &search, getf, getf_v, true);
|
||||
ft_search_finish(&search);
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x) {
|
||||
FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
|
||||
return compare_k_x(ft_handle, search.k, x) <= 0; /* return kv <= xy */
|
||||
}
|
||||
|
||||
int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
cursor->direction = 0;
|
||||
ft_search search;
|
||||
ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, nullptr, cursor->ft_handle);
|
||||
int r = ft_cursor_search_eq_k_x(cursor, &search, getf, getf_v);
|
||||
ft_search_finish(&search);
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
cursor->direction = 0;
|
||||
ft_search search;
|
||||
ft_search_init(&search, toku_ft_cursor_compare_set_range, FT_SEARCH_LEFT, key, key_bound, cursor->ft_handle);
|
||||
int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
|
||||
ft_search_finish(&search);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int ft_cursor_compare_set_range_reverse(const ft_search &search, const DBT *x) {
|
||||
FT_HANDLE CAST_FROM_VOIDP(ft_handle, search.context);
|
||||
return compare_k_x(ft_handle, search.k, x) >= 0; /* return kv >= xy */
|
||||
}
|
||||
|
||||
int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
cursor->direction = 0;
|
||||
ft_search search;
|
||||
ft_search_init(&search, ft_cursor_compare_set_range_reverse, FT_SEARCH_RIGHT, key, nullptr, cursor->ft_handle);
|
||||
int r = ft_cursor_search(cursor, &search, getf, getf_v, false);
|
||||
ft_search_finish(&search);
|
||||
return r;
|
||||
}
|
||||
|
||||
//TODO: When tests have been rewritten, get rid of this function.
|
||||
//Only used by tests.
|
||||
int toku_ft_cursor_get (FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) {
|
||||
int op = get_flags & DB_OPFLAGS_MASK;
|
||||
if (get_flags & ~DB_OPFLAGS_MASK)
|
||||
return EINVAL;
|
||||
|
||||
switch (op) {
|
||||
case DB_CURRENT:
|
||||
case DB_CURRENT_BINDING:
|
||||
return toku_ft_cursor_current(cursor, op, getf, getf_v);
|
||||
case DB_FIRST:
|
||||
return toku_ft_cursor_first(cursor, getf, getf_v);
|
||||
case DB_LAST:
|
||||
return toku_ft_cursor_last(cursor, getf, getf_v);
|
||||
case DB_NEXT:
|
||||
if (toku_ft_cursor_not_set(cursor)) {
|
||||
return toku_ft_cursor_first(cursor, getf, getf_v);
|
||||
} else {
|
||||
return toku_ft_cursor_next(cursor, getf, getf_v);
|
||||
}
|
||||
case DB_PREV:
|
||||
if (toku_ft_cursor_not_set(cursor)) {
|
||||
return toku_ft_cursor_last(cursor, getf, getf_v);
|
||||
} else {
|
||||
return toku_ft_cursor_prev(cursor, getf, getf_v);
|
||||
}
|
||||
case DB_SET:
|
||||
return toku_ft_cursor_set(cursor, key, getf, getf_v);
|
||||
case DB_SET_RANGE:
|
||||
return toku_ft_cursor_set_range(cursor, key, nullptr, getf, getf_v);
|
||||
default: ;// Fall through
|
||||
}
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval) {
|
||||
*pkey = &cursor->key;
|
||||
*pval = &cursor->val;
|
||||
}
|
||||
|
||||
bool toku_ft_cursor_uninitialized(FT_CURSOR c) {
|
||||
return toku_ft_cursor_not_set(c);
|
||||
}
|
||||
|
||||
int toku_ft_lookup(FT_HANDLE ft_handle, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
FT_CURSOR cursor;
|
||||
int r = toku_ft_cursor(ft_handle, &cursor, NULL, false, false);
|
||||
if (r != 0) {
|
||||
return r;
|
||||
}
|
||||
|
||||
r = toku_ft_cursor_set(cursor, k, getf, getf_v);
|
||||
|
||||
toku_ft_cursor_close(cursor);
|
||||
return r;
|
||||
}
|
@ -1,186 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
|
||||
#include "ft/ft-internal.h"
|
||||
|
||||
/* an ft cursor is represented as a kv pair in a tree */
|
||||
struct ft_cursor {
|
||||
FT_HANDLE ft_handle;
|
||||
DBT key, val; // The key-value pair that the cursor currently points to
|
||||
DBT range_lock_left_key, range_lock_right_key;
|
||||
bool prefetching;
|
||||
bool left_is_neg_infty, right_is_pos_infty;
|
||||
enum cursor_read_type read_type; // true if query is reading from a snapshot, false otherwise
|
||||
bool is_leaf_mode;
|
||||
bool disable_prefetching;
|
||||
bool is_temporary;
|
||||
int out_of_range_error;
|
||||
int direction;
|
||||
TOKUTXN ttxn;
|
||||
FT_CHECK_INTERRUPT_CALLBACK interrupt_cb;
|
||||
void *interrupt_cb_extra;
|
||||
};
|
||||
typedef struct ft_cursor *FT_CURSOR;
|
||||
|
||||
enum ft_search_direction_e {
|
||||
FT_SEARCH_LEFT = 1, /* search left -> right, finds min xy as defined by the compare function */
|
||||
FT_SEARCH_RIGHT = 2, /* search right -> left, finds max xy as defined by the compare function */
|
||||
};
|
||||
|
||||
struct ft_search;
|
||||
|
||||
/* the search compare function should return 0 for all xy < kv and 1 for all xy >= kv
|
||||
the compare function should be a step function from 0 to 1 for a left to right search
|
||||
and 1 to 0 for a right to left search */
|
||||
|
||||
typedef int (*ft_search_compare_func_t)(const struct ft_search &, const DBT *);
|
||||
|
||||
/* the search object contains the compare function, search direction, and the kv pair that
|
||||
is used in the compare function. the context is the user's private data */
|
||||
|
||||
struct ft_search {
|
||||
ft_search_compare_func_t compare;
|
||||
enum ft_search_direction_e direction;
|
||||
const DBT *k;
|
||||
void *context;
|
||||
|
||||
// To fix #3522, we need to remember the pivots that we have searched unsuccessfully.
|
||||
// For example, when searching right (left), we call search->compare() on the ith pivot key. If search->compare(0 returns
|
||||
// nonzero, then we search the ith subtree. If that subsearch returns DB_NOTFOUND then maybe the key isn't present in the
|
||||
// tree. But maybe we are doing a DB_NEXT (DB_PREV), and everything was deleted. So we remember the pivot, and later we
|
||||
// will only search subtrees which contain keys that are bigger than (less than) the pivot.
|
||||
// The code is a kludge (even before this fix), and interacts strangely with the TOKUDB_FOUND_BUT_REJECTED (which is there
|
||||
// because a failed DB_GET we would keep searching the rest of the tree). We probably should write the various lookup
|
||||
// codes (NEXT, PREV, CURRENT, etc) more directly, and we should probably use a binary search within a node to search the
|
||||
// pivots so that we can support a larger fanout.
|
||||
// These changes (3312+3522) also (probably) introduce an isolation error (#3529).
|
||||
// We must make sure we lock the right range for proper isolation level.
|
||||
// There's probably a bug in which the following could happen.
|
||||
// Thread A: Searches through deleted keys A,B,D,E and finds nothing, so searches the next leaf, releasing the YDB lock.
|
||||
// Thread B: Inserts key C, and acquires the write lock, then commits.
|
||||
// Thread A: Resumes, searching F,G,H and return success. Thread A then read-locks the range A-H, and doesn't notice
|
||||
// the value C inserted by thread B. Thus a failure of serialization.
|
||||
// See #3529.
|
||||
// There also remains a potential thrashing problem. When we get a TOKUDB_TRY_AGAIN, we unpin everything. There's
|
||||
// no guarantee that we will get everything pinned again. We ought to keep nodes pinned when we retry, except that on the
|
||||
// way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528.
|
||||
DBT pivot_bound;
|
||||
const DBT *k_bound;
|
||||
};
|
||||
|
||||
/* initialize the search compare object */
|
||||
static inline ft_search *ft_search_init(ft_search *search, ft_search_compare_func_t compare,
|
||||
enum ft_search_direction_e direction,
|
||||
const DBT *k, const DBT *k_bound, void *context) {
|
||||
search->compare = compare;
|
||||
search->direction = direction;
|
||||
search->k = k;
|
||||
search->context = context;
|
||||
toku_init_dbt(&search->pivot_bound);
|
||||
search->k_bound = k_bound;
|
||||
return search;
|
||||
}
|
||||
|
||||
static inline void ft_search_finish(ft_search *search) {
|
||||
toku_destroy_dbt(&search->pivot_bound);
|
||||
}
|
||||
|
||||
|
||||
int toku_ft_cursor_create(FT_HANDLE ft_handle, FT_CURSOR cursor, TOKUTXN txn,
|
||||
enum cursor_read_type read_type,
|
||||
bool disable_prefetching,
|
||||
bool is_temporary);
|
||||
|
||||
void toku_ft_cursor_destroy(FT_CURSOR cursor);
|
||||
|
||||
int toku_ft_lookup(FT_HANDLE ft_h, DBT *k, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
void toku_ft_cursor_set_prefetching(FT_CURSOR cursor);
|
||||
|
||||
bool toku_ft_cursor_prefetching(FT_CURSOR cursor);
|
||||
|
||||
bool toku_ft_cursor_not_set(FT_CURSOR cursor);
|
||||
|
||||
void toku_ft_cursor_set_leaf_mode(FT_CURSOR cursor);
|
||||
|
||||
void toku_ft_cursor_remove_restriction(FT_CURSOR cursor);
|
||||
|
||||
void toku_ft_cursor_set_check_interrupt_cb(FT_CURSOR cursor, FT_CHECK_INTERRUPT_CALLBACK cb, void *extra);
|
||||
|
||||
int toku_ft_cursor_is_leaf_mode(FT_CURSOR cursor);
|
||||
|
||||
void toku_ft_cursor_set_range_lock(FT_CURSOR, const DBT *, const DBT *, bool, bool, int);
|
||||
|
||||
int toku_ft_cursor_first(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_cursor_last(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_cursor_next(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_cursor_prev(FT_CURSOR cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_cursor_current(FT_CURSOR cursor, int op, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_cursor_set(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_cursor_set_range(FT_CURSOR cursor, DBT *key, DBT *key_bound, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_cursor_set_range_reverse(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
|
||||
|
||||
bool toku_ft_cursor_uninitialized(FT_CURSOR cursor) __attribute__ ((warn_unused_result));
|
||||
|
||||
void toku_ft_cursor_peek(FT_CURSOR cursor, const DBT **pkey, const DBT **pval);
|
||||
|
||||
int toku_ft_cursor_check_restricted_range(FT_CURSOR cursor, const void *key, uint32_t keylen);
|
||||
|
||||
int toku_ft_cursor_shortcut(FT_CURSOR cursor, int direction, uint32_t index, bn_data *bd,
|
||||
FT_GET_CALLBACK_FUNCTION getf, void *getf_v,
|
||||
uint32_t *keylen, void **key, uint32_t *vallen, void **val);
|
||||
|
||||
// used by get_key_after_bytes
|
||||
int toku_ft_cursor_compare_one(const ft_search &search, const DBT *x);
|
||||
int toku_ft_cursor_compare_set_range(const ft_search &search, const DBT *x);
|
||||
|
||||
// deprecated, should only be used by tests, and eventually removed
|
||||
int toku_ft_cursor(FT_HANDLE ft_handle, FT_CURSOR *ftcursor_p, TOKUTXN txn, bool, bool) __attribute__ ((warn_unused_result));
|
||||
void toku_ft_cursor_close(FT_CURSOR cursor);
|
||||
int toku_ft_cursor_get(FT_CURSOR cursor, DBT *key, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags);
|
||||
int toku_ft_cursor_delete(FT_CURSOR cursor, int flags, TOKUTXN txn);
|
@ -1,373 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "ft/serialize/block_table.h"
|
||||
#include "ft/ft-cachetable-wrappers.h"
|
||||
#include "ft/ft-flusher.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/ft.h"
|
||||
#include "ft/node.h"
|
||||
|
||||
#include <util/context.h>
|
||||
|
||||
static void
|
||||
ftnode_get_key_and_fullhash(
|
||||
BLOCKNUM* cachekey,
|
||||
uint32_t* fullhash,
|
||||
void* extra)
|
||||
{
|
||||
FT ft = (FT) extra;
|
||||
BLOCKNUM blocknum;
|
||||
ft->blocktable.allocate_blocknum(&blocknum, ft);
|
||||
*cachekey = blocknum;
|
||||
*fullhash = toku_cachetable_hash(ft->cf, blocknum);
|
||||
}
|
||||
|
||||
void
|
||||
cachetable_put_empty_node_with_dep_nodes(
|
||||
FT ft,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE* dependent_nodes,
|
||||
BLOCKNUM* blocknum, //output
|
||||
uint32_t* fullhash, //output
|
||||
FTNODE* result)
|
||||
{
|
||||
FTNODE XCALLOC(new_node);
|
||||
PAIR dependent_pairs[num_dependent_nodes];
|
||||
enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
|
||||
for (uint32_t i = 0; i < num_dependent_nodes; i++) {
|
||||
dependent_pairs[i] = dependent_nodes[i]->ct_pair;
|
||||
dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty();
|
||||
}
|
||||
|
||||
toku_cachetable_put_with_dep_pairs(
|
||||
ft->cf,
|
||||
ftnode_get_key_and_fullhash,
|
||||
new_node,
|
||||
make_pair_attr(sizeof(FTNODE)),
|
||||
get_write_callbacks_for_node(ft),
|
||||
ft,
|
||||
num_dependent_nodes,
|
||||
dependent_pairs,
|
||||
dependent_dirty_bits,
|
||||
blocknum,
|
||||
fullhash,
|
||||
toku_ftnode_save_ct_pair);
|
||||
*result = new_node;
|
||||
}
|
||||
|
||||
void
|
||||
create_new_ftnode_with_dep_nodes(
|
||||
FT ft,
|
||||
FTNODE *result,
|
||||
int height,
|
||||
int n_children,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE* dependent_nodes)
|
||||
{
|
||||
uint32_t fullhash = 0;
|
||||
BLOCKNUM blocknum;
|
||||
|
||||
cachetable_put_empty_node_with_dep_nodes(
|
||||
ft,
|
||||
num_dependent_nodes,
|
||||
dependent_nodes,
|
||||
&blocknum,
|
||||
&fullhash,
|
||||
result);
|
||||
|
||||
assert(ft->h->basementnodesize > 0);
|
||||
if (height == 0) {
|
||||
assert(n_children > 0);
|
||||
}
|
||||
|
||||
toku_initialize_empty_ftnode(
|
||||
*result,
|
||||
blocknum,
|
||||
height,
|
||||
n_children,
|
||||
ft->h->layout_version,
|
||||
ft->h->flags);
|
||||
|
||||
(*result)->fullhash = fullhash;
|
||||
}
|
||||
|
||||
void
|
||||
toku_create_new_ftnode (
|
||||
FT_HANDLE t,
|
||||
FTNODE *result,
|
||||
int height,
|
||||
int n_children)
|
||||
{
|
||||
return create_new_ftnode_with_dep_nodes(
|
||||
t->ft,
|
||||
result,
|
||||
height,
|
||||
n_children,
|
||||
0,
|
||||
NULL);
|
||||
}
|
||||
|
||||
//
|
||||
// On success, this function assumes that the caller is trying to pin the node
|
||||
// with a PL_READ lock. If message application is needed,
|
||||
// then a PL_WRITE_CHEAP lock is grabbed
|
||||
//
|
||||
int
|
||||
toku_pin_ftnode_for_query(
|
||||
FT_HANDLE ft_handle,
|
||||
BLOCKNUM blocknum,
|
||||
uint32_t fullhash,
|
||||
UNLOCKERS unlockers,
|
||||
ANCESTORS ancestors,
|
||||
const pivot_bounds &bounds,
|
||||
ftnode_fetch_extra *bfe,
|
||||
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
|
||||
FTNODE *node_p,
|
||||
bool* msgs_applied)
|
||||
{
|
||||
void *node_v;
|
||||
*msgs_applied = false;
|
||||
FTNODE node = nullptr;
|
||||
MSN max_msn_in_path = ZERO_MSN;
|
||||
bool needs_ancestors_messages = false;
|
||||
// this function assumes that if you want ancestor messages applied,
|
||||
// you are doing a read for a query. This is so we can make some optimizations
|
||||
// below.
|
||||
if (apply_ancestor_messages) {
|
||||
paranoid_invariant(bfe->type == ftnode_fetch_subset);
|
||||
}
|
||||
|
||||
int r = toku_cachetable_get_and_pin_nonblocking(
|
||||
ft_handle->ft->cf,
|
||||
blocknum,
|
||||
fullhash,
|
||||
&node_v,
|
||||
get_write_callbacks_for_node(ft_handle->ft),
|
||||
toku_ftnode_fetch_callback,
|
||||
toku_ftnode_pf_req_callback,
|
||||
toku_ftnode_pf_callback,
|
||||
PL_READ,
|
||||
bfe, //read_extraargs
|
||||
unlockers);
|
||||
if (r != 0) {
|
||||
assert(r == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
|
||||
goto exit;
|
||||
}
|
||||
node = static_cast<FTNODE>(node_v);
|
||||
if (apply_ancestor_messages && node->height == 0) {
|
||||
needs_ancestors_messages = toku_ft_leaf_needs_ancestors_messages(
|
||||
ft_handle->ft,
|
||||
node,
|
||||
ancestors,
|
||||
bounds,
|
||||
&max_msn_in_path,
|
||||
bfe->child_to_read
|
||||
);
|
||||
if (needs_ancestors_messages) {
|
||||
toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION);
|
||||
|
||||
toku_unpin_ftnode_read_only(ft_handle->ft, node);
|
||||
int rr = toku_cachetable_get_and_pin_nonblocking(
|
||||
ft_handle->ft->cf,
|
||||
blocknum,
|
||||
fullhash,
|
||||
&node_v,
|
||||
get_write_callbacks_for_node(ft_handle->ft),
|
||||
toku_ftnode_fetch_callback,
|
||||
toku_ftnode_pf_req_callback,
|
||||
toku_ftnode_pf_callback,
|
||||
PL_WRITE_CHEAP,
|
||||
bfe, //read_extraargs
|
||||
unlockers);
|
||||
if (rr != 0) {
|
||||
assert(rr == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
|
||||
r = TOKUDB_TRY_AGAIN;
|
||||
goto exit;
|
||||
}
|
||||
node = static_cast<FTNODE>(node_v);
|
||||
toku_apply_ancestors_messages_to_node(
|
||||
ft_handle,
|
||||
node,
|
||||
ancestors,
|
||||
bounds,
|
||||
msgs_applied,
|
||||
bfe->child_to_read
|
||||
);
|
||||
} else {
|
||||
// At this point, we aren't going to run
|
||||
// toku_apply_ancestors_messages_to_node but that doesn't
|
||||
// mean max_msn_applied shouldn't be updated if possible
|
||||
// (this saves the CPU work involved in
|
||||
// toku_ft_leaf_needs_ancestors_messages).
|
||||
//
|
||||
// We still have a read lock, so we have not resolved
|
||||
// checkpointing. If the node is pending and dirty, we
|
||||
// can't modify anything, including max_msn, until we
|
||||
// resolve checkpointing. If we do, the node might get
|
||||
// written out that way as part of a checkpoint with a
|
||||
// root that was already written out with a smaller
|
||||
// max_msn. During recovery, we would then inject a
|
||||
// message based on the root's max_msn, and that message
|
||||
// would get filtered by the leaf because it had too high
|
||||
// a max_msn value. (see #5407)
|
||||
//
|
||||
// So for simplicity we only update the max_msn if the
|
||||
// node is clean. That way, in order for the node to get
|
||||
// written out, it would have to be dirtied. That
|
||||
// requires a write lock, and a write lock requires you to
|
||||
// resolve checkpointing.
|
||||
if (!node->dirty()) {
|
||||
toku_ft_bn_update_max_msn(node, max_msn_in_path, bfe->child_to_read);
|
||||
}
|
||||
}
|
||||
}
|
||||
*node_p = node;
|
||||
exit:
|
||||
return r;
|
||||
}
|
||||
|
||||
void
|
||||
toku_pin_ftnode_with_dep_nodes(
|
||||
FT ft,
|
||||
BLOCKNUM blocknum,
|
||||
uint32_t fullhash,
|
||||
ftnode_fetch_extra *bfe,
|
||||
pair_lock_type lock_type,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE *dependent_nodes,
|
||||
FTNODE *node_p,
|
||||
bool move_messages)
|
||||
{
|
||||
void *node_v;
|
||||
PAIR dependent_pairs[num_dependent_nodes];
|
||||
enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
|
||||
for (uint32_t i = 0; i < num_dependent_nodes; i++) {
|
||||
dependent_pairs[i] = dependent_nodes[i]->ct_pair;
|
||||
dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty();
|
||||
}
|
||||
|
||||
int r = toku_cachetable_get_and_pin_with_dep_pairs(
|
||||
ft->cf,
|
||||
blocknum,
|
||||
fullhash,
|
||||
&node_v,
|
||||
get_write_callbacks_for_node(ft),
|
||||
toku_ftnode_fetch_callback,
|
||||
toku_ftnode_pf_req_callback,
|
||||
toku_ftnode_pf_callback,
|
||||
lock_type,
|
||||
bfe,
|
||||
num_dependent_nodes,
|
||||
dependent_pairs,
|
||||
dependent_dirty_bits
|
||||
);
|
||||
invariant_zero(r);
|
||||
FTNODE node = (FTNODE) node_v;
|
||||
if (lock_type != PL_READ && node->height > 0 && move_messages) {
|
||||
toku_move_ftnode_messages_to_stale(ft, node);
|
||||
}
|
||||
*node_p = node;
|
||||
}
|
||||
|
||||
void toku_pin_ftnode(FT ft,
|
||||
BLOCKNUM blocknum,
|
||||
uint32_t fullhash,
|
||||
ftnode_fetch_extra *bfe,
|
||||
pair_lock_type lock_type,
|
||||
FTNODE *node_p,
|
||||
bool move_messages) {
|
||||
toku_pin_ftnode_with_dep_nodes(ft, blocknum, fullhash, bfe, lock_type, 0, nullptr, node_p, move_messages);
|
||||
}
|
||||
|
||||
int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep) {
|
||||
void *node_v;
|
||||
int r = toku_cachetable_maybe_get_and_pin_clean(ft->cf, blocknum, fullhash, lock_type, &node_v);
|
||||
if (r != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
CAST_FROM_VOIDP(*nodep, node_v);
|
||||
if ((*nodep)->height > 0 && lock_type != PL_READ) {
|
||||
toku_move_ftnode_messages_to_stale(ft, *nodep);
|
||||
}
|
||||
cleanup:
|
||||
return r;
|
||||
}
|
||||
|
||||
void toku_unpin_ftnode(FT ft, FTNODE node) {
|
||||
int r = toku_cachetable_unpin(ft->cf,
|
||||
node->ct_pair,
|
||||
static_cast<enum cachetable_dirty>(node->dirty()),
|
||||
make_ftnode_pair_attr(node));
|
||||
invariant_zero(r);
|
||||
}
|
||||
|
||||
void
|
||||
toku_unpin_ftnode_read_only(FT ft, FTNODE node)
|
||||
{
|
||||
int r = toku_cachetable_unpin(
|
||||
ft->cf,
|
||||
node->ct_pair,
|
||||
(enum cachetable_dirty) node->dirty(),
|
||||
make_invalid_pair_attr()
|
||||
);
|
||||
assert(r==0);
|
||||
}
|
||||
|
||||
void toku_ftnode_swap_pair_values(FTNODE a, FTNODE b)
|
||||
// Effect: Swap the blocknum, fullhash, and PAIR for for a and b
|
||||
// Requires: Both nodes are pinned
|
||||
{
|
||||
BLOCKNUM tmp_blocknum = a->blocknum;
|
||||
uint32_t tmp_fullhash = a->fullhash;
|
||||
PAIR tmp_pair = a->ct_pair;
|
||||
|
||||
a->blocknum = b->blocknum;
|
||||
a->fullhash = b->fullhash;
|
||||
a->ct_pair = b->ct_pair;
|
||||
|
||||
b->blocknum = tmp_blocknum;
|
||||
b->fullhash = tmp_fullhash;
|
||||
b->ct_pair = tmp_pair;
|
||||
|
||||
// A and B swapped pair pointers, but we still have to swap
|
||||
// the actual pair values (ie: the FTNODEs they represent)
|
||||
// in the cachetable.
|
||||
toku_cachetable_swap_pair_values(a->ct_pair, b->ct_pair);
|
||||
}
|
@ -1,141 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ft/cachetable/cachetable.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/node.h"
|
||||
|
||||
/**
|
||||
* Put an empty node (that is, no fields filled) into the cachetable.
|
||||
* In the process, write dependent nodes out for checkpoint if
|
||||
* necessary.
|
||||
*/
|
||||
void
|
||||
cachetable_put_empty_node_with_dep_nodes(
|
||||
FT ft,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE* dependent_nodes,
|
||||
BLOCKNUM* name, //output
|
||||
uint32_t* fullhash, //output
|
||||
FTNODE* result
|
||||
);
|
||||
|
||||
/**
|
||||
* Create a new ftnode with specified height and number of children.
|
||||
* In the process, write dependent nodes out for checkpoint if
|
||||
* necessary.
|
||||
*/
|
||||
void
|
||||
create_new_ftnode_with_dep_nodes(
|
||||
FT ft,
|
||||
FTNODE *result,
|
||||
int height,
|
||||
int n_children,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE* dependent_nodes
|
||||
);
|
||||
|
||||
/**
|
||||
* Create a new ftnode with specified height
|
||||
* and children.
|
||||
* Used for test functions only.
|
||||
*/
|
||||
void
|
||||
toku_create_new_ftnode (
|
||||
FT_HANDLE t,
|
||||
FTNODE *result,
|
||||
int height,
|
||||
int n_children
|
||||
);
|
||||
|
||||
// This function returns a pinned ftnode to the caller.
|
||||
int
|
||||
toku_pin_ftnode_for_query(
|
||||
FT_HANDLE ft_h,
|
||||
BLOCKNUM blocknum,
|
||||
uint32_t fullhash,
|
||||
UNLOCKERS unlockers,
|
||||
ANCESTORS ancestors,
|
||||
const pivot_bounds &bounds,
|
||||
ftnode_fetch_extra *bfe,
|
||||
bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
|
||||
FTNODE *node_p,
|
||||
bool* msgs_applied
|
||||
);
|
||||
|
||||
// Pins an ftnode without dependent pairs
|
||||
void toku_pin_ftnode(
|
||||
FT ft,
|
||||
BLOCKNUM blocknum,
|
||||
uint32_t fullhash,
|
||||
ftnode_fetch_extra *bfe,
|
||||
pair_lock_type lock_type,
|
||||
FTNODE *node_p,
|
||||
bool move_messages
|
||||
);
|
||||
|
||||
// Pins an ftnode with dependent pairs
|
||||
// Unlike toku_pin_ftnode_for_query, this function blocks until the node is pinned.
|
||||
void toku_pin_ftnode_with_dep_nodes(
|
||||
FT ft,
|
||||
BLOCKNUM blocknum,
|
||||
uint32_t fullhash,
|
||||
ftnode_fetch_extra *bfe,
|
||||
pair_lock_type lock_type,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE *dependent_nodes,
|
||||
FTNODE *node_p,
|
||||
bool move_messages
|
||||
);
|
||||
|
||||
/**
|
||||
* This function may return a pinned ftnode to the caller, if pinning is cheap.
|
||||
* If the node is already locked, or is pending a checkpoint, the node is not pinned and -1 is returned.
|
||||
*/
|
||||
int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep);
|
||||
|
||||
/**
|
||||
* Effect: Unpin an ftnode.
|
||||
*/
|
||||
void toku_unpin_ftnode(FT ft, FTNODE node);
|
||||
void toku_unpin_ftnode_read_only(FT ft, FTNODE node);
|
||||
|
||||
// Effect: Swaps pair values of two pinned nodes
|
||||
void toku_ftnode_swap_pair_values(FTNODE nodea, FTNODE nodeb);
|
@ -1,183 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#define flt_flush_before_applying_inbox 1
|
||||
#define flt_flush_before_child_pin 2
|
||||
#define ft_flush_aflter_child_pin 3
|
||||
#define flt_flush_before_split 4
|
||||
#define flt_flush_during_split 5
|
||||
#define flt_flush_before_merge 6
|
||||
#define ft_flush_aflter_merge 7
|
||||
#define ft_flush_aflter_rebalance 8
|
||||
#define flt_flush_before_unpin_remove 9
|
||||
#define flt_flush_before_pin_second_node_for_merge 10
|
||||
|
||||
typedef struct flusher_advice FLUSHER_ADVICE;
|
||||
|
||||
/**
|
||||
* Choose a child to flush to. Returns a childnum, or -1 if we should
|
||||
* go no further.
|
||||
*
|
||||
* Flusher threads: pick the heaviest child buffer
|
||||
* Cleaner threads: pick the heaviest child buffer
|
||||
* Cleaner thread merging leaf nodes: follow down to a key
|
||||
* Hot optimize table: follow down to the right of a key
|
||||
*/
|
||||
typedef int (*FA_PICK_CHILD)(FT ft, FTNODE parent, void* extra);
|
||||
|
||||
/**
|
||||
* Decide whether to call `toku_ft_flush_some_child` on the child if it is
|
||||
* stable and a nonleaf node.
|
||||
*
|
||||
* Flusher threads: yes if child is gorged
|
||||
* Cleaner threads: yes if child is gorged
|
||||
* Cleaner thread merging leaf nodes: always yes
|
||||
* Hot optimize table: always yes
|
||||
*/
|
||||
typedef bool (*FA_SHOULD_RECURSIVELY_FLUSH)(FTNODE child, void* extra);
|
||||
|
||||
/**
|
||||
* Called if the child needs merging. Should do something to get the
|
||||
* child out of a fusible state. Must unpin parent and child.
|
||||
*
|
||||
* Flusher threads: just do the merge
|
||||
* Cleaner threads: if nonleaf, just merge, otherwise start a "cleaner
|
||||
* thread merge"
|
||||
* Cleaner thread merging leaf nodes: just do the merge
|
||||
* Hot optimize table: just do the merge
|
||||
*/
|
||||
typedef void (*FA_MAYBE_MERGE_CHILD)(struct flusher_advice *fa,
|
||||
FT ft,
|
||||
FTNODE parent,
|
||||
int childnum,
|
||||
FTNODE child,
|
||||
void* extra);
|
||||
|
||||
/**
|
||||
* Cleaner threads may need to destroy basement nodes which have been
|
||||
* brought more up to date than the height 1 node flushing to them.
|
||||
* This function is used to determine if we need to check for basement
|
||||
* nodes that are too up to date, and then destroy them if we find
|
||||
* them.
|
||||
*
|
||||
* Flusher threads: no
|
||||
* Cleaner threads: yes
|
||||
* Cleaner thread merging leaf nodes: no
|
||||
* Hot optimize table: no
|
||||
*/
|
||||
typedef bool (*FA_SHOULD_DESTROY_BN)(void* extra);
|
||||
|
||||
/**
|
||||
* Update `ft_flusher_status` in whatever way necessary. Called once
|
||||
* by `toku_ft_flush_some_child` right before choosing what to do next (split,
|
||||
* merge, recurse), with the number of nodes that were dirtied by this
|
||||
* execution of `toku_ft_flush_some_child`.
|
||||
*/
|
||||
typedef void (*FA_UPDATE_STATUS)(FTNODE child, int dirtied, void* extra);
|
||||
|
||||
/**
|
||||
* Choose whether to go to the left or right child after a split. Called
|
||||
* by `ft_split_child`. If -1 is returned, `ft_split_child` defaults to
|
||||
* the old behavior.
|
||||
*/
|
||||
typedef int (*FA_PICK_CHILD_AFTER_SPLIT)(FT ft,
|
||||
FTNODE node,
|
||||
int childnuma,
|
||||
int childnumb,
|
||||
void* extra);
|
||||
|
||||
/**
|
||||
* A collection of callbacks used by the flushing machinery to make
|
||||
* various decisions. There are implementations of each of these
|
||||
* functions for flusher threads (flt_*), cleaner threads (ct_*), , and hot
|
||||
* optimize table (hot_*).
|
||||
*/
|
||||
struct flusher_advice {
|
||||
FA_PICK_CHILD pick_child;
|
||||
FA_SHOULD_RECURSIVELY_FLUSH should_recursively_flush;
|
||||
FA_MAYBE_MERGE_CHILD maybe_merge_child;
|
||||
FA_SHOULD_DESTROY_BN should_destroy_basement_nodes;
|
||||
FA_UPDATE_STATUS update_status;
|
||||
FA_PICK_CHILD_AFTER_SPLIT pick_child_after_split;
|
||||
void* extra; // parameter passed into callbacks
|
||||
};
|
||||
|
||||
void
|
||||
flusher_advice_init(
|
||||
struct flusher_advice *fa,
|
||||
FA_PICK_CHILD pick_child,
|
||||
FA_SHOULD_DESTROY_BN should_destroy_basement_nodes,
|
||||
FA_SHOULD_RECURSIVELY_FLUSH should_recursively_flush,
|
||||
FA_MAYBE_MERGE_CHILD maybe_merge_child,
|
||||
FA_UPDATE_STATUS update_status,
|
||||
FA_PICK_CHILD_AFTER_SPLIT pick_child_after_split,
|
||||
void* extra
|
||||
);
|
||||
|
||||
void toku_ft_flush_some_child(
|
||||
FT ft,
|
||||
FTNODE parent,
|
||||
struct flusher_advice *fa
|
||||
);
|
||||
|
||||
bool
|
||||
always_recursively_flush(FTNODE child, void* extra);
|
||||
|
||||
bool
|
||||
never_recursively_flush(FTNODE UU(child), void* UU(extra));
|
||||
|
||||
bool
|
||||
dont_destroy_basement_nodes(void* extra);
|
||||
|
||||
void
|
||||
default_merge_child(struct flusher_advice *fa,
|
||||
FT ft,
|
||||
FTNODE parent,
|
||||
int childnum,
|
||||
FTNODE child,
|
||||
void* extra);
|
||||
|
||||
int
|
||||
default_pick_child_after_split(FT ft,
|
||||
FTNODE parent,
|
||||
int childnuma,
|
||||
int childnumb,
|
||||
void *extra);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,147 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ft/ft-internal.h"
|
||||
|
||||
void toku_ft_flusher_get_status(FT_FLUSHER_STATUS);
|
||||
|
||||
/**
|
||||
* Only for testing, not for production.
|
||||
*
|
||||
* Set a callback the flusher thread will use to signal various points
|
||||
* during its execution.
|
||||
*/
|
||||
void
|
||||
toku_flusher_thread_set_callback(
|
||||
void (*callback_f)(int, void*),
|
||||
void* extra
|
||||
);
|
||||
|
||||
/**
|
||||
* Puts a workitem on the flusher thread queue, scheduling the node to be
|
||||
* flushed by toku_ft_flush_some_child.
|
||||
*/
|
||||
void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent);
|
||||
|
||||
enum split_mode {
|
||||
SPLIT_EVENLY,
|
||||
SPLIT_LEFT_HEAVY,
|
||||
SPLIT_RIGHT_HEAVY
|
||||
};
|
||||
|
||||
|
||||
// Given pinned node and pinned child, split child into two
|
||||
// and update node with information about its new child.
|
||||
void toku_ft_split_child(
|
||||
FT ft,
|
||||
FTNODE node,
|
||||
int childnum,
|
||||
FTNODE child,
|
||||
enum split_mode split_mode
|
||||
);
|
||||
|
||||
// Given pinned node, merge childnum with a neighbor and update node with
|
||||
// information about the change
|
||||
void toku_ft_merge_child(
|
||||
FT ft,
|
||||
FTNODE node,
|
||||
int childnum
|
||||
);
|
||||
|
||||
/**
|
||||
* Effect: Split a leaf node.
|
||||
* Argument "node" is node to be split.
|
||||
* Upon return:
|
||||
* nodea and nodeb point to new nodes that result from split of "node"
|
||||
* nodea is the left node that results from the split
|
||||
* splitk is the right-most key of nodea
|
||||
*/
|
||||
// TODO: Rename toku_ft_leaf_split
|
||||
void
|
||||
ftleaf_split(
|
||||
FT ft,
|
||||
FTNODE node,
|
||||
FTNODE *nodea,
|
||||
FTNODE *nodeb,
|
||||
DBT *splitk,
|
||||
bool create_new_node,
|
||||
enum split_mode split_mode,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE* dependent_nodes
|
||||
);
|
||||
|
||||
/**
|
||||
* Effect: node must be a node-leaf node. It is split into two nodes, and
|
||||
* the fanout is split between them.
|
||||
* Sets splitk->data pointer to a malloc'd value
|
||||
* Sets nodea, and nodeb to the two new nodes.
|
||||
* The caller must replace the old node with the two new nodes.
|
||||
* This function will definitely reduce the number of children for the node,
|
||||
* but it does not guarantee that the resulting nodes are smaller than nodesize.
|
||||
*/
|
||||
void
|
||||
// TODO: Rename toku_ft_nonleaf_split
|
||||
ft_nonleaf_split(
|
||||
FT ft,
|
||||
FTNODE node,
|
||||
FTNODE *nodea,
|
||||
FTNODE *nodeb,
|
||||
DBT *splitk,
|
||||
uint32_t num_dependent_nodes,
|
||||
FTNODE* dependent_nodes
|
||||
);
|
||||
|
||||
/************************************************************************
|
||||
* HOT optimize, should perhaps be factored out to its own header file *
|
||||
************************************************************************
|
||||
*/
|
||||
void toku_ft_hot_get_status(FT_HOT_STATUS);
|
||||
|
||||
/**
|
||||
* Takes given FT and pushes all pending messages between left and right to the leaf nodes.
|
||||
* All messages between left and right (inclusive) will be pushed, as will some others
|
||||
* that happen to share buffers with messages near the boundary.
|
||||
* If left is NULL, messages from beginning of FT are pushed. If right is NULL, that means
|
||||
* we go until the end of the FT.
|
||||
*/
|
||||
int
|
||||
toku_ft_hot_optimize(FT_HANDLE ft_h, DBT* left, DBT* right,
|
||||
int (*progress_callback)(void *extra, float progress),
|
||||
void *progress_extra, uint64_t* loops_run);
|
@ -1,362 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "ft/ft.h"
|
||||
#include "ft/ft-cachetable-wrappers.h"
|
||||
#include "ft/ft-flusher.h"
|
||||
#include "ft/ft-flusher-internal.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/node.h"
|
||||
#include "portability/toku_atomic.h"
|
||||
#include "util/context.h"
|
||||
#include "util/status.h"
|
||||
|
||||
// Member Descirption:
|
||||
// 1. highest_pivot_key - this is the key that corresponds to the
|
||||
// most recently flushed leaf entry.
|
||||
// 2. max_current_key - this is the pivot/key that we inherit as
|
||||
// we descend down the tree. We use this to set the highest_pivot_key.
|
||||
// 3. sub_tree_size - this is the percentage of the entire tree that our
|
||||
// current position (in a sub-tree) encompasses.
|
||||
// 4. percentage_done - this is the percentage of leaf nodes that have
|
||||
// been flushed into.
|
||||
// 5. rightmost_leaf_seen - this is a boolean we use to determine if
|
||||
// if we have flushed to every leaf node.
|
||||
struct hot_flusher_extra {
|
||||
DBT highest_pivot_key;
|
||||
DBT max_current_key;
|
||||
float sub_tree_size;
|
||||
float percentage_done;
|
||||
bool rightmost_leaf_seen;
|
||||
};
|
||||
|
||||
void
|
||||
toku_ft_hot_get_status(FT_HOT_STATUS s) {
|
||||
hot_status.init();
|
||||
*s = hot_status;
|
||||
}
|
||||
|
||||
// Copies the max current key to the highest pivot key seen.
|
||||
static void
|
||||
hot_set_highest_key(struct hot_flusher_extra *flusher)
|
||||
{
|
||||
// The max current key will be NULL if we are traversing in the
|
||||
// rightmost subtree of a given parent. As such, we don't want to
|
||||
// allocate memory for this case.
|
||||
toku_destroy_dbt(&flusher->highest_pivot_key);
|
||||
if (flusher->max_current_key.data != NULL) {
|
||||
// Otherwise, let's copy all the contents from one key to the other.
|
||||
toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start)
|
||||
{
|
||||
toku_destroy_dbt(&flusher->highest_pivot_key);
|
||||
if (start != NULL) {
|
||||
// Otherwise, let's copy all the contents from one key to the other.
|
||||
toku_clone_dbt(&flusher->highest_pivot_key, *start);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
hot_just_pick_child(FT ft,
|
||||
FTNODE parent,
|
||||
struct hot_flusher_extra *flusher)
|
||||
{
|
||||
int childnum = 0;
|
||||
|
||||
// Search through Parents pivots, see which one is greater than
|
||||
// the highest_pivot_key seen so far.
|
||||
if (flusher->highest_pivot_key.data == NULL)
|
||||
{
|
||||
// Special case of the first child of the root node.
|
||||
// Also known as, NEGATIVE INFINITY....
|
||||
childnum = 0;
|
||||
} else {
|
||||
// Find the pivot boundary.
|
||||
childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp);
|
||||
}
|
||||
|
||||
return childnum;
|
||||
}
|
||||
|
||||
static void
|
||||
hot_update_flusher_keys(FTNODE parent,
|
||||
int childnum,
|
||||
struct hot_flusher_extra *flusher)
|
||||
{
|
||||
// Update maximum current key if the child is NOT the rightmost
|
||||
// child node.
|
||||
if (childnum < (parent->n_children - 1)) {
|
||||
toku_destroy_dbt(&flusher->max_current_key);
|
||||
toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum));
|
||||
}
|
||||
}
|
||||
|
||||
// Picks which child toku_ft_flush_some_child will use for flushing and
|
||||
// recursion.
|
||||
static int
|
||||
hot_pick_child(FT ft,
|
||||
FTNODE parent,
|
||||
void *extra)
|
||||
{
|
||||
struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
|
||||
int childnum = hot_just_pick_child(ft, parent, flusher);
|
||||
|
||||
// Now we determine the percentage of the tree flushed so far.
|
||||
|
||||
// Whichever subtree we choose to recurse into, it is a fraction
|
||||
// of the current parent.
|
||||
flusher->sub_tree_size /= parent->n_children;
|
||||
|
||||
// Update the precentage complete, using our new sub tree size AND
|
||||
// the number of children we have already flushed.
|
||||
flusher->percentage_done += (flusher->sub_tree_size * childnum);
|
||||
|
||||
hot_update_flusher_keys(parent, childnum, flusher);
|
||||
|
||||
return childnum;
|
||||
}
|
||||
|
||||
// Does nothing for now.
|
||||
static void
|
||||
hot_update_status(FTNODE UU(child),
|
||||
int UU(dirtied),
|
||||
void *UU(extra))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// If we've just split a node, HOT needs another chance to decide which
|
||||
// one to flush into. This gives it a chance to do that, and update the
|
||||
// keys it maintains.
|
||||
static int
|
||||
hot_pick_child_after_split(FT ft,
|
||||
FTNODE parent,
|
||||
int childnuma,
|
||||
int childnumb,
|
||||
void *extra)
|
||||
{
|
||||
struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
|
||||
int childnum = hot_just_pick_child(ft, parent, flusher);
|
||||
assert(childnum == childnuma || childnum == childnumb);
|
||||
hot_update_flusher_keys(parent, childnum, flusher);
|
||||
if (parent->height == 1) {
|
||||
// We don't want to recurse into a leaf node, but if we return
|
||||
// anything valid, ft_split_child will try to go there, so we
|
||||
// return -1 to allow ft_split_child to have its default
|
||||
// behavior, which will be to stop recursing.
|
||||
childnum = -1;
|
||||
}
|
||||
return childnum;
|
||||
}
|
||||
|
||||
// Basic constructor/initializer for the hot flusher struct.
|
||||
static void
|
||||
hot_flusher_init(struct flusher_advice *advice,
|
||||
struct hot_flusher_extra *flusher)
|
||||
{
|
||||
// Initialize the highest pivot key seen to NULL. This represents
|
||||
// NEGATIVE INFINITY and is used to cover the special case of our
|
||||
// first traversal of the tree.
|
||||
toku_init_dbt(&(flusher->highest_pivot_key));
|
||||
toku_init_dbt(&(flusher->max_current_key));
|
||||
flusher->rightmost_leaf_seen = 0;
|
||||
flusher->sub_tree_size = 1.0;
|
||||
flusher->percentage_done = 0.0;
|
||||
flusher_advice_init(advice,
|
||||
hot_pick_child,
|
||||
dont_destroy_basement_nodes,
|
||||
always_recursively_flush,
|
||||
default_merge_child,
|
||||
hot_update_status,
|
||||
hot_pick_child_after_split,
|
||||
flusher
|
||||
);
|
||||
}
|
||||
|
||||
// Erases any DBT keys we have copied from a traversal.
|
||||
static void
|
||||
hot_flusher_destroy(struct hot_flusher_extra *flusher)
|
||||
{
|
||||
toku_destroy_dbt(&flusher->highest_pivot_key);
|
||||
toku_destroy_dbt(&flusher->max_current_key);
|
||||
}
|
||||
|
||||
// Entry point for Hot Optimize Table (HOT). Note, this function is
|
||||
// not recursive. It iterates over root-to-leaf paths.
|
||||
int
|
||||
toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
|
||||
int (*progress_callback)(void *extra, float progress),
|
||||
void *progress_extra, uint64_t* loops_run)
|
||||
{
|
||||
toku::context flush_ctx(CTX_FLUSH);
|
||||
|
||||
int r = 0;
|
||||
struct hot_flusher_extra flusher;
|
||||
struct flusher_advice advice;
|
||||
|
||||
hot_flusher_init(&advice, &flusher);
|
||||
hot_set_start_key(&flusher, left);
|
||||
|
||||
uint64_t loop_count = 0;
|
||||
MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
|
||||
// start of HOT operation
|
||||
(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_STARTED), 1);
|
||||
|
||||
toku_ft_note_hot_begin(ft_handle);
|
||||
|
||||
// Higher level logic prevents a dictionary from being deleted or
|
||||
// truncated during a hot optimize operation. Doing so would violate
|
||||
// the hot optimize contract.
|
||||
do {
|
||||
FTNODE root;
|
||||
CACHEKEY root_key;
|
||||
uint32_t fullhash;
|
||||
|
||||
{
|
||||
// Get root node (the first parent of each successive HOT
|
||||
// call.)
|
||||
toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_full_read(ft_handle->ft);
|
||||
toku_pin_ftnode(ft_handle->ft,
|
||||
(BLOCKNUM) root_key,
|
||||
fullhash,
|
||||
&bfe,
|
||||
PL_WRITE_EXPENSIVE,
|
||||
&root,
|
||||
true);
|
||||
toku_ftnode_assert_fully_in_memory(root);
|
||||
}
|
||||
|
||||
// Prepare HOT diagnostics.
|
||||
if (loop_count == 0) {
|
||||
// The first time through, capture msn from root
|
||||
msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk;
|
||||
}
|
||||
|
||||
loop_count++;
|
||||
|
||||
if (loop_count > HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT)) {
|
||||
HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT) = loop_count;
|
||||
}
|
||||
|
||||
// Initialize the maximum current key. We need to do this for
|
||||
// every traversal.
|
||||
toku_destroy_dbt(&flusher.max_current_key);
|
||||
|
||||
flusher.sub_tree_size = 1.0;
|
||||
flusher.percentage_done = 0.0;
|
||||
|
||||
// This should recurse to the bottom of the tree and then
|
||||
// return.
|
||||
if (root->height > 0) {
|
||||
toku_ft_flush_some_child(ft_handle->ft, root, &advice);
|
||||
} else {
|
||||
// Since there are no children to flush, we should abort
|
||||
// the HOT call.
|
||||
flusher.rightmost_leaf_seen = 1;
|
||||
toku_unpin_ftnode(ft_handle->ft, root);
|
||||
}
|
||||
|
||||
// Set the highest pivot key seen here, since the parent may
|
||||
// be unlocked and NULL'd later in our caller:
|
||||
// toku_ft_flush_some_child().
|
||||
hot_set_highest_key(&flusher);
|
||||
|
||||
// This is where we determine if the traversal is finished or
|
||||
// not.
|
||||
if (flusher.max_current_key.data == NULL) {
|
||||
flusher.rightmost_leaf_seen = 1;
|
||||
}
|
||||
else if (right) {
|
||||
// if we have flushed past the bounds set for us,
|
||||
// set rightmost_leaf_seen so we exit
|
||||
int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right);
|
||||
if (cmp > 0) {
|
||||
flusher.rightmost_leaf_seen = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Update HOT's progress.
|
||||
if (progress_callback != NULL) {
|
||||
r = progress_callback(progress_extra, flusher.percentage_done);
|
||||
|
||||
// Check if the callback wants us to stop running HOT.
|
||||
if (r != 0) {
|
||||
flusher.rightmost_leaf_seen = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop until the max key has been updated to positive
|
||||
// infinity.
|
||||
} while (!flusher.rightmost_leaf_seen);
|
||||
*loops_run = loop_count;
|
||||
|
||||
// Cleanup.
|
||||
hot_flusher_destroy(&flusher);
|
||||
|
||||
// More diagnostics.
|
||||
{
|
||||
bool success = false;
|
||||
if (r == 0) { success = true; }
|
||||
|
||||
{
|
||||
toku_ft_note_hot_complete(ft_handle, success, msn_at_start_of_hot);
|
||||
}
|
||||
|
||||
if (success) {
|
||||
(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_COMPLETED), 1);
|
||||
} else {
|
||||
(void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_ABORTED), 1);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#include <toku_race_tools.h>
|
||||
void __attribute__((__constructor__)) toku_hot_helgrind_ignore(void);
|
||||
void
|
||||
toku_hot_helgrind_ignore(void) {
|
||||
// incremented only while lock is held, but read by engine status asynchronously.
|
||||
TOKU_VALGRIND_HG_DISABLE_CHECKING(&hot_status, sizeof hot_status);
|
||||
}
|
@ -1,495 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "portability/toku_config.h"
|
||||
#include "portability/toku_list.h"
|
||||
#include "portability/toku_race_tools.h"
|
||||
|
||||
#include "ft/cachetable/cachetable.h"
|
||||
#include "ft/comparator.h"
|
||||
#include "ft/ft.h"
|
||||
#include "ft/ft-ops.h"
|
||||
#include "ft/node.h"
|
||||
#include "ft/serialize/block_table.h"
|
||||
#include "ft/txn/rollback.h"
|
||||
#include "ft/ft-status.h"
|
||||
|
||||
// Symbol TOKUDB_REVISION is not defined by fractal-tree makefiles, so
|
||||
// BUILD_ID of 1000 indicates development build of main, not a release build.
|
||||
#if defined(TOKUDB_REVISION)
|
||||
#define BUILD_ID TOKUDB_REVISION
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
|
||||
struct ft_search;
|
||||
|
||||
enum { FT_DEFAULT_FANOUT = 16 };
|
||||
enum { FT_DEFAULT_NODE_SIZE = 4 * 1024 * 1024 };
|
||||
enum { FT_DEFAULT_BASEMENT_NODE_SIZE = 128 * 1024 };
|
||||
|
||||
// We optimize for a sequential insert pattern if 100 consecutive injections
|
||||
// happen into the rightmost leaf node due to promotion.
|
||||
enum { FT_SEQINSERT_SCORE_THRESHOLD = 100 };
|
||||
|
||||
uint32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum);
|
||||
|
||||
enum ft_type {
|
||||
FT_CURRENT = 1,
|
||||
FT_CHECKPOINT_INPROGRESS
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
extern uint force_recovery;
|
||||
}
|
||||
|
||||
extern int writing_rollback;
|
||||
|
||||
// The ft_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
|
||||
struct ft_header {
|
||||
enum ft_type type;
|
||||
|
||||
int dirty_;
|
||||
|
||||
void set_dirty() {
|
||||
if(force_recovery) assert(writing_rollback);
|
||||
dirty_ = 1;
|
||||
}
|
||||
|
||||
void clear_dirty() {
|
||||
dirty_ = 0;
|
||||
}
|
||||
|
||||
bool dirty() {
|
||||
return dirty_;
|
||||
}
|
||||
|
||||
// Free-running counter incremented once per checkpoint (toggling LSB).
|
||||
// LSB indicates which header location is used on disk so this
|
||||
// counter is effectively a boolean which alternates with each checkpoint.
|
||||
uint64_t checkpoint_count;
|
||||
// LSN of creation of "checkpoint-begin" record in log.
|
||||
LSN checkpoint_lsn;
|
||||
|
||||
// see serialize/ft_layout_version.h. maybe don't need this if we assume
|
||||
// it's always the current version after deserializing
|
||||
const int layout_version;
|
||||
// different (<) from layout_version if upgraded from a previous
|
||||
// version (useful for debugging)
|
||||
const int layout_version_original;
|
||||
// build_id (svn rev number) of software that wrote this node to
|
||||
// disk. (read from disk, overwritten when written to disk, I
|
||||
// think).
|
||||
const uint32_t build_id;
|
||||
// build_id of software that created this tree
|
||||
const uint32_t build_id_original;
|
||||
|
||||
// time this tree was created
|
||||
const uint64_t time_of_creation;
|
||||
// and the root transaction id that created it
|
||||
TXNID root_xid_that_created;
|
||||
// last time this header was serialized to disk (read from disk,
|
||||
// overwritten when written to disk)
|
||||
uint64_t time_of_last_modification;
|
||||
// last time that this tree was verified
|
||||
uint64_t time_of_last_verification;
|
||||
|
||||
// this field is essentially a const
|
||||
BLOCKNUM root_blocknum;
|
||||
|
||||
const unsigned int flags;
|
||||
|
||||
//protected by toku_ft_lock
|
||||
unsigned int nodesize;
|
||||
unsigned int basementnodesize;
|
||||
enum toku_compression_method compression_method;
|
||||
unsigned int fanout;
|
||||
|
||||
// Current Minimum MSN to be used when upgrading pre-MSN FT's.
|
||||
// This is decremented from our currnt MIN_MSN so as not to clash
|
||||
// with any existing 'normal' MSN's.
|
||||
MSN highest_unused_msn_for_upgrade;
|
||||
// Largest MSN ever injected into the tree. Used to set the MSN for
|
||||
// messages as they get injected.
|
||||
MSN max_msn_in_ft;
|
||||
|
||||
// last time that a hot optimize operation was begun
|
||||
uint64_t time_of_last_optimize_begin;
|
||||
// last time that a hot optimize operation was successfully completed
|
||||
uint64_t time_of_last_optimize_end;
|
||||
// the number of hot optimize operations currently in progress on this tree
|
||||
uint32_t count_of_optimize_in_progress;
|
||||
// the number of hot optimize operations in progress on this tree at the time of the last crash (this field is in-memory only)
|
||||
uint32_t count_of_optimize_in_progress_read_from_disk;
|
||||
// all messages before this msn have been applied to leaf nodes
|
||||
MSN msn_at_start_of_last_completed_optimize;
|
||||
|
||||
STAT64INFO_S on_disk_stats;
|
||||
|
||||
// This represents the balance of inserts - deletes and should be
|
||||
// closer to a logical representation of the number of records in an index
|
||||
uint64_t on_disk_logical_rows;
|
||||
};
|
||||
typedef struct ft_header *FT_HEADER;
|
||||
|
||||
// ft_header is always the current version.
|
||||
struct ft {
|
||||
FT_HEADER h;
|
||||
FT_HEADER checkpoint_header;
|
||||
|
||||
// These are (mostly) read-only.
|
||||
|
||||
CACHEFILE cf;
|
||||
// unique id for dictionary
|
||||
DICTIONARY_ID dict_id;
|
||||
|
||||
// protected by locktree
|
||||
DESCRIPTOR_S descriptor;
|
||||
|
||||
// protected by locktree and user.
|
||||
// User makes sure this is only changed when no activity on tree
|
||||
DESCRIPTOR_S cmp_descriptor;
|
||||
// contains a pointer to cmp_descriptor (above) - their lifetimes are bound
|
||||
toku::comparator cmp;
|
||||
|
||||
// the update function always utilizes the cmp_descriptor, not the regular one
|
||||
ft_update_func update_fun;
|
||||
|
||||
// These are not read-only:
|
||||
|
||||
// protected by blocktable lock
|
||||
block_table blocktable;
|
||||
|
||||
// protected by atomic builtins
|
||||
STAT64INFO_S in_memory_stats;
|
||||
uint64_t in_memory_logical_rows;
|
||||
|
||||
// transient, not serialized to disk. updated when we do write to
|
||||
// disk. tells us whether we can do partial eviction (we can't if
|
||||
// the on-disk layout version is from before basement nodes)
|
||||
int layout_version_read_from_disk;
|
||||
|
||||
// Logically the reference count is zero if live_ft_handles is empty, txns is 0, and pinned_by_checkpoint is false.
|
||||
|
||||
// ft_ref_lock protects modifying live_ft_handles, txns, and pinned_by_checkpoint.
|
||||
toku_mutex_t ft_ref_lock;
|
||||
struct toku_list live_ft_handles;
|
||||
// Number of transactions that are using this FT. you should only be able
|
||||
// to modify this if you have a valid handle in live_ft_handles
|
||||
uint32_t num_txns;
|
||||
// A checkpoint is running. If true, then keep this header around for checkpoint, like a transaction
|
||||
bool pinned_by_checkpoint;
|
||||
|
||||
// is this ft a blackhole? if so, all messages are dropped.
|
||||
bool blackhole;
|
||||
|
||||
// The blocknum of the rightmost leaf node in the tree. Stays constant through splits
|
||||
// and merges using pair-swapping (like the root node, see toku_ftnode_swap_pair_values())
|
||||
//
|
||||
// This field only transitions from RESERVED_BLOCKNUM_NULL to non-null, never back.
|
||||
// We initialize it when promotion inserts into a non-root leaf node on the right extreme.
|
||||
// We use the blocktable lock to protect the initialize transition, though it's not really
|
||||
// necessary since all threads should be setting it to the same value. We maintain that invariant
|
||||
// on first initialization, see ft_set_or_verify_rightmost_blocknum()
|
||||
BLOCKNUM rightmost_blocknum;
|
||||
|
||||
// sequential access pattern heuristic
|
||||
// - when promotion pushes a message directly into the rightmost leaf, the score goes up.
|
||||
// - if the score is high enough, we optimistically attempt to insert directly into the rightmost leaf
|
||||
// - if our attempt fails because the key was not in range of the rightmost leaf, we reset the score back to 0
|
||||
uint32_t seqinsert_score;
|
||||
};
|
||||
|
||||
// Allocate a DB struct off the stack and only set its comparison
|
||||
// descriptor. We don't bother setting any other fields because
|
||||
// the comparison function doesn't need it, and we would like to
|
||||
// reduce the CPU work done per comparison.
|
||||
#define FAKE_DB(db, desc) struct __toku_db db; do { db.cmp_descriptor = const_cast<DESCRIPTOR>(desc); } while (0)
|
||||
|
||||
struct ft_options {
|
||||
unsigned int nodesize;
|
||||
unsigned int basementnodesize;
|
||||
enum toku_compression_method compression_method;
|
||||
unsigned int fanout;
|
||||
unsigned int flags;
|
||||
uint8_t memcmp_magic;
|
||||
ft_compare_func compare_fun;
|
||||
ft_update_func update_fun;
|
||||
};
|
||||
|
||||
struct ft_handle {
|
||||
// The fractal tree.
|
||||
FT ft;
|
||||
|
||||
on_redirect_callback redirect_callback;
|
||||
void *redirect_callback_extra;
|
||||
struct toku_list live_ft_handle_link;
|
||||
bool did_set_flags;
|
||||
|
||||
struct ft_options options;
|
||||
};
|
||||
|
||||
PAIR_ATTR make_ftnode_pair_attr(FTNODE node);
|
||||
PAIR_ATTR make_invalid_pair_attr(void);
|
||||
|
||||
//
|
||||
// Field in ftnode_fetch_extra that tells the
|
||||
// partial fetch callback what piece of the node
|
||||
// is needed by the ydb
|
||||
//
|
||||
enum ftnode_fetch_type {
|
||||
ftnode_fetch_none = 1, // no partitions needed.
|
||||
ftnode_fetch_subset, // some subset of partitions needed
|
||||
ftnode_fetch_prefetch, // this is part of a prefetch call
|
||||
ftnode_fetch_all, // every partition is needed
|
||||
ftnode_fetch_keymatch, // one child is needed if it holds both keys
|
||||
};
|
||||
|
||||
// Info passed to cachetable fetch callbacks to say which parts of a node
|
||||
// should be fetched (perhaps a subset, perhaps the whole thing, depending
|
||||
// on operation)
|
||||
class ftnode_fetch_extra {
|
||||
public:
|
||||
// Used when the whole node must be in memory, such as for flushes.
|
||||
void create_for_full_read(FT ft);
|
||||
|
||||
// A subset of children are necessary. Used by point queries.
|
||||
void create_for_subset_read(FT ft, ft_search *search, const DBT *left, const DBT *right,
|
||||
bool left_is_neg_infty, bool right_is_pos_infty,
|
||||
bool disable_prefetching, bool read_all_partitions);
|
||||
|
||||
// No partitions are necessary - only pivots and/or subtree estimates.
|
||||
// Currently used for stat64.
|
||||
void create_for_min_read(FT ft);
|
||||
|
||||
// Used to prefetch partitions that fall within the bounds given by the cursor.
|
||||
void create_for_prefetch(FT ft, struct ft_cursor *cursor);
|
||||
|
||||
// Only a portion of the node (within a keyrange) is required.
|
||||
// Used by keysrange when the left and right key are in the same basement node.
|
||||
void create_for_keymatch(FT ft, const DBT *left, const DBT *right,
|
||||
bool disable_prefetching, bool read_all_partitions);
|
||||
|
||||
void destroy(void);
|
||||
|
||||
// return: true if a specific childnum is required to be in memory
|
||||
bool wants_child_available(int childnum) const;
|
||||
|
||||
// return: the childnum of the leftmost child that is required to be in memory
|
||||
int leftmost_child_wanted(FTNODE node) const;
|
||||
|
||||
// return: the childnum of the rightmost child that is required to be in memory
|
||||
int rightmost_child_wanted(FTNODE node) const;
|
||||
|
||||
// needed for reading a node off disk
|
||||
FT ft;
|
||||
|
||||
enum ftnode_fetch_type type;
|
||||
|
||||
// used in the case where type == ftnode_fetch_subset
|
||||
// parameters needed to find out which child needs to be decompressed (so it can be read)
|
||||
ft_search *search;
|
||||
DBT range_lock_left_key, range_lock_right_key;
|
||||
bool left_is_neg_infty, right_is_pos_infty;
|
||||
|
||||
// states if we should try to aggressively fetch basement nodes
|
||||
// that are not specifically needed for current query,
|
||||
// but may be needed for other cursor operations user is doing
|
||||
// For example, if we have not disabled prefetching,
|
||||
// and the user is doing a dictionary wide scan, then
|
||||
// even though a query may only want one basement node,
|
||||
// we fetch all basement nodes in a leaf node.
|
||||
bool disable_prefetching;
|
||||
|
||||
// this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
|
||||
// thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
|
||||
int child_to_read;
|
||||
|
||||
// when we read internal nodes, we want to read all the data off disk in one I/O
|
||||
// then we'll treat it as normal and only decompress the needed partitions etc.
|
||||
bool read_all_partitions;
|
||||
|
||||
// Accounting: How many bytes were read, and how much time did we spend doing I/O?
|
||||
uint64_t bytes_read;
|
||||
tokutime_t io_time;
|
||||
tokutime_t decompress_time;
|
||||
tokutime_t deserialize_time;
|
||||
|
||||
private:
|
||||
void _create_internal(FT ft_);
|
||||
};
|
||||
|
||||
// Only exported for tests.
|
||||
// Cachetable callbacks for ftnodes.
|
||||
void toku_ftnode_clone_callback(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
|
||||
void toku_ftnode_checkpoint_complete_callback(void *value_data);
|
||||
void toku_ftnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM blocknum, void *ftnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
|
||||
int toku_ftnode_fetch_callback (CACHEFILE cachefile, PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs);
|
||||
void toku_ftnode_pe_est_callback(void* ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
|
||||
int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *extraargs,
|
||||
void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
|
||||
bool toku_ftnode_pf_req_callback(void* ftnode_pv, void* read_extraargs);
|
||||
int toku_ftnode_pf_callback(void* ftnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep);
|
||||
int toku_ftnode_cleaner_callback( void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *extraargs);
|
||||
|
||||
CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(FT ft);
|
||||
|
||||
// This is only exported for tests.
|
||||
// append a child node to a parent node
|
||||
void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey);
|
||||
|
||||
// This is only exported for tests.
|
||||
// append a message to a nonleaf node child buffer
|
||||
void toku_ft_append_to_child_buffer(const toku::comparator &cmp, FTNODE node, int childnum, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const DBT *key, const DBT *val);
|
||||
|
||||
STAT64INFO_S toku_get_and_clear_basement_stats(FTNODE leafnode);
|
||||
|
||||
//#define SLOW
|
||||
#ifdef SLOW
|
||||
#define VERIFY_NODE(t,n) (toku_verify_or_set_counts(n), toku_verify_estimates(t,n))
|
||||
#else
|
||||
#define VERIFY_NODE(t,n) ((void)0)
|
||||
#endif
|
||||
|
||||
void toku_verify_or_set_counts(FTNODE);
|
||||
|
||||
// TODO: consider moving this to ft/pivotkeys.cc
|
||||
class pivot_bounds {
|
||||
public:
|
||||
pivot_bounds(const DBT &lbe_dbt, const DBT &ubi_dbt);
|
||||
|
||||
pivot_bounds next_bounds(FTNODE node, int childnum) const;
|
||||
|
||||
const DBT *lbe() const;
|
||||
const DBT *ubi() const;
|
||||
|
||||
static pivot_bounds infinite_bounds();
|
||||
|
||||
private:
|
||||
DBT _prepivotkey(FTNODE node, int childnum, const DBT &lbe_dbt) const;
|
||||
DBT _postpivotkey(FTNODE node, int childnum, const DBT &ubi_dbt) const;
|
||||
|
||||
// if toku_dbt_is_empty() is true for either bound, then it represents
|
||||
// negative or positive infinity (which are exclusive in practice)
|
||||
const DBT _lower_bound_exclusive;
|
||||
const DBT _upper_bound_inclusive;
|
||||
};
|
||||
|
||||
// allocate a block number
|
||||
// allocate and initialize a ftnode
|
||||
// put the ftnode into the cache table
|
||||
void toku_create_new_ftnode(FT_HANDLE ft_handle, FTNODE *result, int height, int n_children);
|
||||
|
||||
/* Stuff for testing */
|
||||
// toku_testsetup_initialize() must be called before any other test_setup_xxx() functions are called.
|
||||
void toku_testsetup_initialize(void);
|
||||
int toku_testsetup_leaf(FT_HANDLE ft_h, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens);
|
||||
int toku_testsetup_nonleaf (FT_HANDLE ft_h, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens);
|
||||
int toku_testsetup_root(FT_HANDLE ft_h, BLOCKNUM);
|
||||
int toku_testsetup_get_sersize(FT_HANDLE ft_h, BLOCKNUM); // Return the size on disk.
|
||||
int toku_testsetup_insert_to_leaf (FT_HANDLE ft_h, BLOCKNUM, const char *key, int keylen, const char *val, int vallen);
|
||||
int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_h, BLOCKNUM, enum ft_msg_type, const char *key, int keylen, const char *val, int vallen);
|
||||
void toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t);
|
||||
|
||||
void toku_ft_root_put_msg(FT ft, const ft_msg &msg, txn_gc_info *gc_info);
|
||||
|
||||
// TODO: Rename
|
||||
void toku_get_node_for_verify(BLOCKNUM blocknum, FT_HANDLE ft_h, FTNODE* nodep);
|
||||
|
||||
int
|
||||
toku_verify_ftnode (FT_HANDLE ft_h,
|
||||
MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above,
|
||||
FTNODE node, int height,
|
||||
const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||
const DBT *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||
int (*progress_callback)(void *extra, float progress), void *progress_extra,
|
||||
int recurse, int verbose, int keep_going_on_failure)
|
||||
__attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_db_badformat(void) __attribute__((__warn_unused_result__));
|
||||
|
||||
typedef enum {
|
||||
FT_UPGRADE_FOOTPRINT = 0,
|
||||
FT_UPGRADE_STATUS_NUM_ROWS
|
||||
} ft_upgrade_status_entry;
|
||||
|
||||
typedef struct {
|
||||
bool initialized;
|
||||
TOKU_ENGINE_STATUS_ROW_S status[FT_UPGRADE_STATUS_NUM_ROWS];
|
||||
} FT_UPGRADE_STATUS_S, *FT_UPGRADE_STATUS;
|
||||
|
||||
void toku_ft_upgrade_get_status(FT_UPGRADE_STATUS);
|
||||
|
||||
void toku_le_get_status(LE_STATUS);
|
||||
|
||||
void toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe);
|
||||
void toku_ft_status_update_flush_reason(FTNODE node, uint64_t uncompressed_bytes_flushed, uint64_t bytes_written, tokutime_t write_time, bool for_checkpoint);
|
||||
void toku_ft_status_update_serialize_times(FTNODE node, tokutime_t serialize_time, tokutime_t compress_time);
|
||||
void toku_ft_status_update_deserialize_times(FTNODE node, tokutime_t deserialize_time, tokutime_t decompress_time);
|
||||
void toku_ft_status_note_msn_discard(void);
|
||||
void toku_ft_status_note_update(bool broadcast);
|
||||
void toku_ft_status_note_msg_bytes_out(size_t buffsize);
|
||||
void toku_ft_status_note_ftnode(int height, bool created); // created = false means destroyed
|
||||
|
||||
void toku_ft_get_status(FT_STATUS);
|
||||
|
||||
void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
|
||||
|
||||
// For upgrade
|
||||
int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT ft) __attribute__((nonnull));
|
||||
int toku_upgrade_msn_from_root_to_header(int fd, FT ft) __attribute__((nonnull));
|
||||
|
||||
// A callback function is invoked with the key, and the data.
|
||||
// The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns.
|
||||
// Note: In the thread-safe version, the ftnode remains locked while the callback function runs. So return soon, and don't call the ft code from the callback function.
|
||||
// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself.
|
||||
// The cursor object will have been updated (so that if result==0 the current value is the value being passed)
|
||||
// (If r!=0 then the cursor won't have been updated.)
|
||||
// If r!=0, it's up to the callback function to return that value of r.
|
||||
// A 'key' pointer of NULL means that element is not found (effectively infinity or
|
||||
// -infinity depending on direction)
|
||||
// When lock_only is false, the callback does optional lock tree locking and then processes the key and val.
|
||||
// When lock_only is true, the callback only does optional lock tree locking.
|
||||
typedef int (*FT_GET_CALLBACK_FUNCTION)(uint32_t keylen, const void *key, uint32_t vallen, const void *val, void *extra, bool lock_only);
|
||||
|
||||
typedef bool (*FT_CHECK_INTERRUPT_CALLBACK)(void *extra, uint64_t deleted_rows);
|
||||
|
||||
struct ft_cursor;
|
||||
int toku_ft_search(FT_HANDLE ft_handle, ft_search *search, FT_GET_CALLBACK_FUNCTION getf, void *getf_v, struct ft_cursor *ftcursor, bool can_bulk_fetch);
|
File diff suppressed because it is too large
Load Diff
@ -1,295 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
// This must be first to make the 64-bit file mode work right in Linux
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include <db.h>
|
||||
|
||||
#include "ft/cachetable/cachetable.h"
|
||||
#include "ft/comparator.h"
|
||||
#include "ft/msg.h"
|
||||
#include "util/dbt.h"
|
||||
|
||||
#define OS_PATH_SEPARATOR '/'
|
||||
|
||||
typedef struct ft_handle *FT_HANDLE;
|
||||
|
||||
int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
|
||||
|
||||
// effect: changes the descriptor for the ft of the given handle.
|
||||
// requires:
|
||||
// - cannot change descriptor for same ft in two threads in parallel.
|
||||
// - can only update cmp descriptor immidiately after opening the FIRST ft handle for this ft and before
|
||||
// ANY operations. to update the cmp descriptor after any operations have already happened, all handles
|
||||
// and transactions must close and reopen before the change, then you can update the cmp descriptor
|
||||
void toku_ft_change_descriptor(FT_HANDLE t, const DBT* old_descriptor, const DBT* new_descriptor, bool do_log, TOKUTXN txn, bool update_cmp_descriptor);
|
||||
uint32_t toku_serialize_descriptor_size(DESCRIPTOR desc);
|
||||
|
||||
void toku_ft_handle_create(FT_HANDLE *ft);
|
||||
void toku_ft_set_flags(FT_HANDLE, unsigned int flags);
|
||||
void toku_ft_get_flags(FT_HANDLE, unsigned int *flags);
|
||||
void toku_ft_handle_set_nodesize(FT_HANDLE, unsigned int nodesize);
|
||||
void toku_ft_handle_get_nodesize(FT_HANDLE, unsigned int *nodesize);
|
||||
void toku_ft_get_maximum_advised_key_value_lengths(unsigned int *klimit, unsigned int *vlimit);
|
||||
void toku_ft_handle_set_basementnodesize(FT_HANDLE, unsigned int basementnodesize);
|
||||
void toku_ft_handle_get_basementnodesize(FT_HANDLE, unsigned int *basementnodesize);
|
||||
void toku_ft_handle_set_compression_method(FT_HANDLE, enum toku_compression_method);
|
||||
void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *);
|
||||
void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout);
|
||||
void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);
|
||||
int toku_ft_handle_set_memcmp_magic(FT_HANDLE, uint8_t magic);
|
||||
|
||||
void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func);
|
||||
const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle);
|
||||
|
||||
typedef void (*on_redirect_callback)(FT_HANDLE ft_handle, void *extra);
|
||||
void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback cb, void *extra);
|
||||
|
||||
// How updates (update/insert/deletes) work:
|
||||
// There are two flavers of upsertdels: Singleton and broadcast.
|
||||
// When a singleton upsertdel message arrives it contains a key and an extra DBT.
|
||||
//
|
||||
// At the YDB layer, the function looks like
|
||||
//
|
||||
// int (*update_function)(DB*, DB_TXN*, const DBT *key, const DBT *old_val, const DBT *extra,
|
||||
// void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra);
|
||||
//
|
||||
// And there are two DB functions
|
||||
//
|
||||
// int DB->update(DB *, DB_TXN *, const DBT *key, const DBT *extra);
|
||||
// Effect:
|
||||
// If there is a key-value pair visible to the txn with value old_val then the system calls
|
||||
// update_function(DB, key, old_val, extra, set_val, set_extra)
|
||||
// where set_val and set_extra are a function and a void* provided by the system.
|
||||
// The update_function can do one of two things:
|
||||
// a) call set_val(new_val, set_extra)
|
||||
// which has the effect of doing DB->put(db, txn, key, new_val, 0)
|
||||
// overwriting the old value.
|
||||
// b) Return DB_DELETE (a new return code)
|
||||
// c) Return 0 (success) without calling set_val, which leaves the old value unchanged.
|
||||
// If there is no such key-value pair visible to the txn, then the system calls
|
||||
// update_function(DB, key, NULL, extra, set_val, set_extra)
|
||||
// and the update_function can do one of the same three things.
|
||||
// Implementation notes: Update acquires a write lock (just as DB->put
|
||||
// does). This function works by sending a UPDATE message containing
|
||||
// the key and extra.
|
||||
//
|
||||
// int DB->update_broadcast(DB *, DB_TXN*, const DBT *extra);
|
||||
// Effect: This has the same effect as building a cursor that walks
|
||||
// through the DB, calling DB->update() on every key that the cursor
|
||||
// finds.
|
||||
// Implementation note: Acquires a write lock on the entire database.
|
||||
// This function works by sending an BROADCAST-UPDATE message containing
|
||||
// the key and the extra.
|
||||
typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const DBT *extra,
|
||||
void (*set_val)(const DBT *new_val, void *set_extra),
|
||||
void *set_extra);
|
||||
void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun);
|
||||
|
||||
int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env,
|
||||
int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, bool open_rw=true) __attribute__ ((warn_unused_result));
|
||||
int toku_ft_handle_open_recovery(FT_HANDLE, const char *fname_in_env, int is_create, int only_create, CACHETABLE ct, TOKUTXN txn,
|
||||
FILENUM use_filenum, LSN max_acceptable_lsn) __attribute__ ((warn_unused_result));
|
||||
|
||||
// clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree
|
||||
int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw=true);
|
||||
|
||||
// close an ft handle during normal operation. the underlying ft may or may not close,
|
||||
// depending if there are still references. an lsn for this close will come from the logger.
|
||||
void toku_ft_handle_close(FT_HANDLE ft_handle);
|
||||
// close an ft handle during recovery. the underlying ft must close, and will use the given lsn.
|
||||
void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn);
|
||||
|
||||
// At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary.
|
||||
// With the introduction of the loader (ticket 2216), it is possible for the file that holds
|
||||
// an open dictionary to change, so these are now separate and independent unique identifiers (see FILENUM)
|
||||
struct DICTIONARY_ID {
|
||||
uint64_t dictid;
|
||||
};
|
||||
static const DICTIONARY_ID DICTIONARY_ID_NONE = { .dictid = 0 };
|
||||
|
||||
int
|
||||
toku_ft_handle_open_with_dict_id(
|
||||
FT_HANDLE ft_h,
|
||||
const char *fname_in_env,
|
||||
int is_create,
|
||||
int only_create,
|
||||
CACHETABLE cachetable,
|
||||
TOKUTXN txn,
|
||||
DICTIONARY_ID use_dictionary_id
|
||||
) __attribute__ ((warn_unused_result));
|
||||
|
||||
// Effect: Insert a key and data pair into an ft
|
||||
void toku_ft_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn);
|
||||
|
||||
// Returns: 0 if the key was inserted, DB_KEYEXIST if the key already exists
|
||||
int toku_ft_insert_unique(FT_HANDLE ft, DBT *k, DBT *v, TOKUTXN txn, bool do_logging);
|
||||
|
||||
// Effect: Optimize the ft
|
||||
void toku_ft_optimize (FT_HANDLE ft_h);
|
||||
|
||||
// Effect: Insert a key and data pair into an ft if the oplsn is newer than the ft's lsn. This function is called during recovery.
|
||||
void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type);
|
||||
|
||||
// Effect: Send an update message into an ft. This function is called
|
||||
// during recovery.
|
||||
void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
|
||||
|
||||
// Effect: Send a broadcasting update message into an ft. This function
|
||||
// is called during recovery.
|
||||
void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, bool is_resetting_op);
|
||||
|
||||
void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn);
|
||||
void toku_ft_load(FT_HANDLE ft_h, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *get_lsn);
|
||||
void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, int do_log, LSN *hot_index_lsn);
|
||||
void toku_ft_hot_index(FT_HANDLE ft_h, TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn);
|
||||
|
||||
void toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val);
|
||||
void toku_ft_log_put (TOKUTXN txn, FT_HANDLE ft_h, const DBT *key, const DBT *val);
|
||||
void toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val);
|
||||
void toku_ft_log_del (TOKUTXN txn, FT_HANDLE ft_h, const DBT *key);
|
||||
|
||||
// Effect: Delete a key from an ft
|
||||
void toku_ft_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn);
|
||||
|
||||
// Effect: Delete a key from an ft if the oplsn is newer than the ft lsn. This function is called during recovery.
|
||||
void toku_ft_maybe_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
|
||||
|
||||
TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
|
||||
struct txn_manager *toku_ft_get_txn_manager(FT_HANDLE ft_h);
|
||||
|
||||
struct txn_gc_info;
|
||||
void toku_ft_send_insert(FT_HANDLE ft_h, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
|
||||
void toku_ft_send_delete(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
|
||||
void toku_ft_send_commit_any(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
|
||||
|
||||
int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_dump_ft (FILE *,FT_HANDLE ft_h) __attribute__ ((warn_unused_result));
|
||||
|
||||
extern int toku_ft_debug_mode;
|
||||
int toku_verify_ft (FT_HANDLE ft_h) __attribute__ ((warn_unused_result));
|
||||
int toku_verify_ft_with_progress (FT_HANDLE ft_h, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going) __attribute__ ((warn_unused_result));
|
||||
|
||||
int toku_ft_recount_rows(
|
||||
FT_HANDLE ft,
|
||||
int (*progress_callback)(
|
||||
uint64_t count,
|
||||
uint64_t deleted,
|
||||
void* progress_extra),
|
||||
void* progress_extra);
|
||||
|
||||
|
||||
DICTIONARY_ID toku_ft_get_dictionary_id(FT_HANDLE);
|
||||
|
||||
enum ft_flags {
|
||||
//TOKU_DB_DUP = (1<<0), //Obsolete #2862
|
||||
//TOKU_DB_DUPSORT = (1<<1), //Obsolete #2862
|
||||
TOKU_DB_KEYCMP_BUILTIN = (1<<2),
|
||||
TOKU_DB_VALCMP_BUILTIN_13 = (1<<3),
|
||||
};
|
||||
|
||||
void toku_ft_keyrange(FT_HANDLE ft_h, DBT *key, uint64_t *less, uint64_t *equal, uint64_t *greater);
|
||||
void toku_ft_keysrange(FT_HANDLE ft_h, DBT* key_left, DBT* key_right, uint64_t *less_p, uint64_t* equal_left_p, uint64_t* middle_p, uint64_t* equal_right_p, uint64_t* greater_p, bool* middle_3_exact_p);
|
||||
|
||||
int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *end_key, uint64_t actually_skipped, void *extra), void *cb_extra);
|
||||
|
||||
struct ftstat64_s {
|
||||
uint64_t nkeys; /* estimate how many unique keys (even when flattened this may be an estimate) */
|
||||
uint64_t ndata; /* estimate the number of pairs (exact when flattened and committed) */
|
||||
uint64_t dsize; /* estimate the sum of the sizes of the pairs (exact when flattened and committed) */
|
||||
uint64_t fsize; /* the size of the underlying file */
|
||||
uint64_t ffree; /* Number of free bytes in the underlying file */
|
||||
uint64_t create_time_sec; /* creation time in seconds. */
|
||||
uint64_t modify_time_sec; /* time of last serialization, in seconds. */
|
||||
uint64_t verify_time_sec; /* time of last verification, in seconds */
|
||||
};
|
||||
|
||||
void toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat);
|
||||
|
||||
struct ftinfo64 {
|
||||
uint64_t num_blocks_allocated; // number of blocks in the blocktable
|
||||
uint64_t num_blocks_in_use; // number of blocks in use by most recent checkpoint
|
||||
uint64_t size_allocated; // sum of sizes of blocks in blocktable
|
||||
uint64_t size_in_use; // sum of sizes of blocks in use by most recent checkpoint
|
||||
};
|
||||
|
||||
void toku_ft_handle_get_fractal_tree_info64(FT_HANDLE, struct ftinfo64 *);
|
||||
|
||||
int toku_ft_handle_iterate_fractal_tree_block_map(FT_HANDLE, int (*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *);
|
||||
|
||||
int toku_ft_layer_init(void) __attribute__ ((warn_unused_result));
|
||||
void toku_ft_open_close_lock(void);
|
||||
void toku_ft_open_close_unlock(void);
|
||||
void toku_ft_layer_destroy(void);
|
||||
void toku_ft_serialize_layer_init(void);
|
||||
void toku_ft_serialize_layer_destroy(void);
|
||||
|
||||
void toku_maybe_truncate_file (int fd, uint64_t size_used, uint64_t expected_size, uint64_t *new_size);
|
||||
// Effect: truncate file if overallocated by at least 32MiB
|
||||
|
||||
void toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int64_t *new_size);
|
||||
// Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size
|
||||
// Return 0 on success, otherwise an error number.
|
||||
|
||||
int toku_ft_get_fragmentation(FT_HANDLE ft_h, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result));
|
||||
|
||||
bool toku_ft_is_empty_fast (FT_HANDLE ft_h) __attribute__ ((warn_unused_result));
|
||||
// Effect: Return true if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty
|
||||
// even though if we were to optimize the tree it might turn out that they are empty.
|
||||
|
||||
int toku_ft_strerror_r(int error, char *buf, size_t buflen);
|
||||
// Effect: LIke the XSI-compliant strerorr_r, extended to db_strerror().
|
||||
// If error>=0 then the result is to do strerror_r(error, buf, buflen), that is fill buf with a descriptive error message.
|
||||
// If error<0 then return a PerconaFT-specific error code. For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known. (Not all DB errors are known by this function which is a bug.)
|
||||
|
||||
extern bool garbage_collection_debug;
|
||||
|
||||
// This is a poor place to put global options like these.
|
||||
void toku_ft_set_direct_io(bool direct_io_on);
|
||||
void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
|
||||
|
||||
void toku_note_deserialized_basement_node(bool fixed_key_size);
|
||||
|
||||
// Creates all directories for the path if necessary,
|
||||
// returns true if all dirs are created successfully or
|
||||
// all dirs exist, false otherwise.
|
||||
bool toku_create_subdirs_if_needed(const char* path);
|
@ -1,106 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include "ft/serialize/block_table.h"
|
||||
#include "ft/ft.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/cursor.h"
|
||||
|
||||
struct recount_rows_extra_t {
|
||||
int (*_progress_callback)(
|
||||
uint64_t count,
|
||||
uint64_t deleted,
|
||||
void* progress_extra);
|
||||
void* _progress_extra;
|
||||
uint64_t _keys;
|
||||
bool _cancelled;
|
||||
};
|
||||
|
||||
static int recount_rows_found(
|
||||
uint32_t UU(keylen),
|
||||
const void* key,
|
||||
uint32_t UU(vallen),
|
||||
const void* UU(val),
|
||||
void* extra,
|
||||
bool UU(lock_only)) {
|
||||
|
||||
recount_rows_extra_t* rre = (recount_rows_extra_t*)extra;
|
||||
|
||||
if (FT_LIKELY(key != nullptr)) {
|
||||
rre->_keys++;
|
||||
}
|
||||
return rre->_cancelled
|
||||
= rre->_progress_callback(rre->_keys, 0, rre->_progress_extra);
|
||||
}
|
||||
static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
|
||||
recount_rows_extra_t* rre = (recount_rows_extra_t*)extra;
|
||||
|
||||
return rre->_cancelled =
|
||||
rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
|
||||
}
|
||||
int toku_ft_recount_rows(FT_HANDLE ft,
|
||||
int (*progress_callback)(uint64_t count,
|
||||
uint64_t deleted,
|
||||
void* progress_extra),
|
||||
void* progress_extra) {
|
||||
int ret = 0;
|
||||
recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};
|
||||
|
||||
ft_cursor c;
|
||||
ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);
|
||||
|
||||
ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
|
||||
while (FT_LIKELY(ret == 0)) {
|
||||
ret = toku_ft_cursor_next(&c, recount_rows_found, &rre);
|
||||
}
|
||||
|
||||
toku_ft_cursor_destroy(&c);
|
||||
|
||||
if (rre._cancelled == false) {
|
||||
// update ft count
|
||||
toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
|
||||
ft->ft->h->set_dirty();
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
@ -1,503 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "ft/ft.h"
|
||||
#include "ft/ft-status.h"
|
||||
|
||||
#include <toku_race_tools.h>
|
||||
|
||||
LE_STATUS_S le_status;
|
||||
void LE_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define LE_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "le: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
LE_STATUS_INIT(LE_MAX_COMMITTED_XR, LEAF_ENTRY_MAX_COMMITTED_XR, UINT64, "max committed xr");
|
||||
LE_STATUS_INIT(LE_MAX_PROVISIONAL_XR, LEAF_ENTRY_MAX_PROVISIONAL_XR, UINT64, "max provisional xr");
|
||||
LE_STATUS_INIT(LE_EXPANDED, LEAF_ENTRY_EXPANDED, UINT64, "expanded");
|
||||
LE_STATUS_INIT(LE_MAX_MEMSIZE, LEAF_ENTRY_MAX_MEMSIZE, UINT64, "max memsize");
|
||||
LE_STATUS_INIT(LE_APPLY_GC_BYTES_IN, LEAF_ENTRY_APPLY_GC_BYTES_IN, PARCOUNT, "size of leafentries before garbage collection (during message application)");
|
||||
LE_STATUS_INIT(LE_APPLY_GC_BYTES_OUT, LEAF_ENTRY_APPLY_GC_BYTES_OUT, PARCOUNT, "size of leafentries after garbage collection (during message application)");
|
||||
LE_STATUS_INIT(LE_NORMAL_GC_BYTES_IN, LEAF_ENTRY_NORMAL_GC_BYTES_IN, PARCOUNT, "size of leafentries before garbage collection (outside message application)");
|
||||
LE_STATUS_INIT(LE_NORMAL_GC_BYTES_OUT, LEAF_ENTRY_NORMAL_GC_BYTES_OUT, PARCOUNT, "size of leafentries after garbage collection (outside message application)");
|
||||
m_initialized = true;
|
||||
#undef LE_STATUS_INIT
|
||||
}
|
||||
void LE_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < LE_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
CHECKPOINT_STATUS_S cp_status;
|
||||
void CHECKPOINT_STATUS_S::init(void) {
|
||||
if (m_initialized) return;
|
||||
#define CP_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "checkpoint: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
CP_STATUS_INIT(CP_PERIOD, CHECKPOINT_PERIOD, UINT64, "period");
|
||||
CP_STATUS_INIT(CP_FOOTPRINT, CHECKPOINT_FOOTPRINT, UINT64, "footprint");
|
||||
CP_STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN, CHECKPOINT_LAST_BEGAN, UNIXTIME, "last checkpoint began");
|
||||
CP_STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, CHECKPOINT_LAST_COMPLETE_BEGAN, UNIXTIME, "last complete checkpoint began");
|
||||
CP_STATUS_INIT(CP_TIME_LAST_CHECKPOINT_END, CHECKPOINT_LAST_COMPLETE_ENDED, UNIXTIME, "last complete checkpoint ended");
|
||||
CP_STATUS_INIT(CP_TIME_CHECKPOINT_DURATION, CHECKPOINT_DURATION, UINT64, "time spent during checkpoint (begin and end phases)");
|
||||
CP_STATUS_INIT(CP_TIME_CHECKPOINT_DURATION_LAST, CHECKPOINT_DURATION_LAST, UINT64, "time spent during last checkpoint (begin and end phases)");
|
||||
CP_STATUS_INIT(CP_LAST_LSN, CHECKPOINT_LAST_LSN, UINT64, "last complete checkpoint LSN");
|
||||
CP_STATUS_INIT(CP_CHECKPOINT_COUNT, CHECKPOINT_TAKEN, UINT64, "checkpoints taken ");
|
||||
CP_STATUS_INIT(CP_CHECKPOINT_COUNT_FAIL, CHECKPOINT_FAILED, UINT64, "checkpoints failed");
|
||||
CP_STATUS_INIT(CP_WAITERS_NOW, CHECKPOINT_WAITERS_NOW, UINT64, "waiters now");
|
||||
CP_STATUS_INIT(CP_WAITERS_MAX, CHECKPOINT_WAITERS_MAX, UINT64, "waiters max");
|
||||
CP_STATUS_INIT(CP_CLIENT_WAIT_ON_MO, CHECKPOINT_CLIENT_WAIT_ON_MO, UINT64, "non-checkpoint client wait on mo lock");
|
||||
CP_STATUS_INIT(CP_CLIENT_WAIT_ON_CS, CHECKPOINT_CLIENT_WAIT_ON_CS, UINT64, "non-checkpoint client wait on cs lock");
|
||||
CP_STATUS_INIT(CP_BEGIN_TIME, CHECKPOINT_BEGIN_TIME, UINT64, "checkpoint begin time");
|
||||
CP_STATUS_INIT(CP_LONG_BEGIN_COUNT, CHECKPOINT_LONG_BEGIN_COUNT, UINT64, "long checkpoint begin count");
|
||||
CP_STATUS_INIT(CP_LONG_BEGIN_TIME, CHECKPOINT_LONG_BEGIN_TIME, UINT64, "long checkpoint begin time");
|
||||
CP_STATUS_INIT(CP_END_TIME, CHECKPOINT_END_TIME, UINT64, "checkpoint end time");
|
||||
CP_STATUS_INIT(CP_LONG_END_COUNT, CHECKPOINT_LONG_END_COUNT, UINT64, "long checkpoint end count");
|
||||
CP_STATUS_INIT(CP_LONG_END_TIME, CHECKPOINT_LONG_END_TIME, UINT64, "long checkpoint end time");
|
||||
|
||||
m_initialized = true;
|
||||
#undef CP_STATUS_INIT
|
||||
}
|
||||
void CHECKPOINT_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < CP_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CACHETABLE_STATUS_S ct_status;
|
||||
void CACHETABLE_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define CT_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "cachetable: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
CT_STATUS_INIT(CT_MISS, CACHETABLE_MISS, UINT64, "miss");
|
||||
CT_STATUS_INIT(CT_MISSTIME, CACHETABLE_MISS_TIME, UINT64, "miss time");
|
||||
CT_STATUS_INIT(CT_PREFETCHES, CACHETABLE_PREFETCHES, UINT64, "prefetches");
|
||||
CT_STATUS_INIT(CT_SIZE_CURRENT, CACHETABLE_SIZE_CURRENT, UINT64, "size current");
|
||||
CT_STATUS_INIT(CT_SIZE_LIMIT, CACHETABLE_SIZE_LIMIT, UINT64, "size limit");
|
||||
CT_STATUS_INIT(CT_SIZE_WRITING, CACHETABLE_SIZE_WRITING, UINT64, "size writing");
|
||||
CT_STATUS_INIT(CT_SIZE_NONLEAF, CACHETABLE_SIZE_NONLEAF, UINT64, "size nonleaf");
|
||||
CT_STATUS_INIT(CT_SIZE_LEAF, CACHETABLE_SIZE_LEAF, UINT64, "size leaf");
|
||||
CT_STATUS_INIT(CT_SIZE_ROLLBACK, CACHETABLE_SIZE_ROLLBACK, UINT64, "size rollback");
|
||||
CT_STATUS_INIT(CT_SIZE_CACHEPRESSURE, CACHETABLE_SIZE_CACHEPRESSURE, UINT64, "size cachepressure");
|
||||
CT_STATUS_INIT(CT_SIZE_CLONED, CACHETABLE_SIZE_CLONED, UINT64, "size currently cloned data for checkpoint");
|
||||
CT_STATUS_INIT(CT_EVICTIONS, CACHETABLE_EVICTIONS, UINT64, "evictions");
|
||||
CT_STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions");
|
||||
CT_STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period");
|
||||
CT_STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations");
|
||||
CT_STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure");
|
||||
CT_STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_TIME, UINT64, "time waiting on cache pressure");
|
||||
CT_STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_LONG_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure");
|
||||
CT_STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_LONG_WAIT_PRESSURE_TIME, UINT64, "long time waiting on cache pressure");
|
||||
|
||||
CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS, CACHETABLE_POOL_CLIENT_NUM_THREADS, UINT64, "client pool: number of threads in pool");
|
||||
CT_STATUS_INIT(CT_POOL_CLIENT_NUM_THREADS_ACTIVE, CACHETABLE_POOL_CLIENT_NUM_THREADS_ACTIVE, UINT64, "client pool: number of currently active threads in pool");
|
||||
CT_STATUS_INIT(CT_POOL_CLIENT_QUEUE_SIZE, CACHETABLE_POOL_CLIENT_QUEUE_SIZE, UINT64, "client pool: number of currently queued work items");
|
||||
CT_STATUS_INIT(CT_POOL_CLIENT_MAX_QUEUE_SIZE, CACHETABLE_POOL_CLIENT_MAX_QUEUE_SIZE, UINT64, "client pool: largest number of queued work items");
|
||||
CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_ITEMS_PROCESSED, CACHETABLE_POOL_CLIENT_TOTAL_ITEMS_PROCESSED, UINT64, "client pool: total number of work items processed");
|
||||
CT_STATUS_INIT(CT_POOL_CLIENT_TOTAL_EXECUTION_TIME, CACHETABLE_POOL_CLIENT_TOTAL_EXECUTION_TIME, UINT64, "client pool: total execution time of processing work items");
|
||||
CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS, CACHETABLE_POOL_CACHETABLE_NUM_THREADS, UINT64, "cachetable pool: number of threads in pool");
|
||||
CT_STATUS_INIT(CT_POOL_CACHETABLE_NUM_THREADS_ACTIVE, CACHETABLE_POOL_CACHETABLE_NUM_THREADS_ACTIVE, UINT64, "cachetable pool: number of currently active threads in pool");
|
||||
CT_STATUS_INIT(CT_POOL_CACHETABLE_QUEUE_SIZE, CACHETABLE_POOL_CACHETABLE_QUEUE_SIZE, UINT64, "cachetable pool: number of currently queued work items");
|
||||
CT_STATUS_INIT(CT_POOL_CACHETABLE_MAX_QUEUE_SIZE, CACHETABLE_POOL_CACHETABLE_MAX_QUEUE_SIZE, UINT64, "cachetable pool: largest number of queued work items");
|
||||
CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED, CACHETABLE_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED, UINT64, "cachetable pool: total number of work items processed");
|
||||
CT_STATUS_INIT(CT_POOL_CACHETABLE_TOTAL_EXECUTION_TIME, CACHETABLE_POOL_CACHETABLE_TOTAL_EXECUTION_TIME, UINT64, "cachetable pool: total execution time of processing work items");
|
||||
CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS, CACHETABLE_POOL_CHECKPOINT_NUM_THREADS, UINT64, "checkpoint pool: number of threads in pool");
|
||||
CT_STATUS_INIT(CT_POOL_CHECKPOINT_NUM_THREADS_ACTIVE, CACHETABLE_POOL_CHECKPOINT_NUM_THREADS_ACTIVE, UINT64, "checkpoint pool: number of currently active threads in pool");
|
||||
CT_STATUS_INIT(CT_POOL_CHECKPOINT_QUEUE_SIZE, CACHETABLE_POOL_CHECKPOINT_QUEUE_SIZE, UINT64, "checkpoint pool: number of currently queued work items");
|
||||
CT_STATUS_INIT(CT_POOL_CHECKPOINT_MAX_QUEUE_SIZE, CACHETABLE_POOL_CHECKPOINT_MAX_QUEUE_SIZE, UINT64, "checkpoint pool: largest number of queued work items");
|
||||
CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED, CACHETABLE_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED, UINT64, "checkpoint pool: total number of work items processed");
|
||||
CT_STATUS_INIT(CT_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME, CACHETABLE_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME, UINT64, "checkpoint pool: total execution time of processing work items");
|
||||
|
||||
m_initialized = true;
|
||||
#undef CT_STATUS_INIT
|
||||
}
|
||||
void CACHETABLE_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < CT_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
LTM_STATUS_S ltm_status;
|
||||
void LTM_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define LTM_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "locktree: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
LTM_STATUS_INIT(LTM_SIZE_CURRENT, LOCKTREE_MEMORY_SIZE, UINT64, "memory size");
|
||||
LTM_STATUS_INIT(LTM_SIZE_LIMIT, LOCKTREE_MEMORY_SIZE_LIMIT, UINT64, "memory size limit");
|
||||
LTM_STATUS_INIT(LTM_ESCALATION_COUNT, LOCKTREE_ESCALATION_NUM, UINT64, "number of times lock escalation ran");
|
||||
LTM_STATUS_INIT(LTM_ESCALATION_TIME, LOCKTREE_ESCALATION_SECONDS, TOKUTIME, "time spent running escalation (seconds)");
|
||||
LTM_STATUS_INIT(LTM_ESCALATION_LATEST_RESULT, LOCKTREE_LATEST_POST_ESCALATION_MEMORY_SIZE, UINT64, "latest post-escalation memory size");
|
||||
LTM_STATUS_INIT(LTM_NUM_LOCKTREES, LOCKTREE_OPEN_CURRENT, UINT64, "number of locktrees open now");
|
||||
LTM_STATUS_INIT(LTM_LOCK_REQUESTS_PENDING, LOCKTREE_PENDING_LOCK_REQUESTS, UINT64, "number of pending lock requests");
|
||||
LTM_STATUS_INIT(LTM_STO_NUM_ELIGIBLE, LOCKTREE_STO_ELIGIBLE_NUM, UINT64, "number of locktrees eligible for the STO");
|
||||
LTM_STATUS_INIT(LTM_STO_END_EARLY_COUNT, LOCKTREE_STO_ENDED_NUM, UINT64, "number of times a locktree ended the STO early");
|
||||
LTM_STATUS_INIT(LTM_STO_END_EARLY_TIME, LOCKTREE_STO_ENDED_SECONDS, TOKUTIME, "time spent ending the STO early (seconds)");
|
||||
LTM_STATUS_INIT(LTM_WAIT_COUNT, LOCKTREE_WAIT_COUNT, UINT64, "number of wait locks");
|
||||
LTM_STATUS_INIT(LTM_WAIT_TIME, LOCKTREE_WAIT_TIME, UINT64, "time waiting for locks");
|
||||
LTM_STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks");
|
||||
LTM_STATUS_INIT(LTM_LONG_WAIT_TIME, LOCKTREE_LONG_WAIT_TIME, UINT64, "long time waiting for locks");
|
||||
LTM_STATUS_INIT(LTM_TIMEOUT_COUNT, LOCKTREE_TIMEOUT_COUNT, UINT64, "number of lock timeouts");
|
||||
LTM_STATUS_INIT(LTM_WAIT_ESCALATION_COUNT, LOCKTREE_WAIT_ESCALATION_COUNT, UINT64, "number of waits on lock escalation");
|
||||
LTM_STATUS_INIT(LTM_WAIT_ESCALATION_TIME, LOCKTREE_WAIT_ESCALATION_TIME, UINT64, "time waiting on lock escalation");
|
||||
LTM_STATUS_INIT(LTM_LONG_WAIT_ESCALATION_COUNT, LOCKTREE_LONG_WAIT_ESCALATION_COUNT, UINT64, "number of long waits on lock escalation");
|
||||
LTM_STATUS_INIT(LTM_LONG_WAIT_ESCALATION_TIME, LOCKTREE_LONG_WAIT_ESCALATION_TIME, UINT64, "long time waiting on lock escalation");
|
||||
|
||||
m_initialized = true;
|
||||
#undef LTM_STATUS_INIT
|
||||
}
|
||||
void LTM_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < LTM_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
FT_STATUS_S ft_status;
|
||||
void FT_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define FT_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "ft: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
FT_STATUS_INIT(FT_UPDATES, DICTIONARY_UPDATES, PARCOUNT, "dictionary updates");
|
||||
FT_STATUS_INIT(FT_UPDATES_BROADCAST, DICTIONARY_BROADCAST_UPDATES, PARCOUNT, "dictionary broadcast updates");
|
||||
FT_STATUS_INIT(FT_DESCRIPTOR_SET, DESCRIPTOR_SET, PARCOUNT, "descriptor set");
|
||||
FT_STATUS_INIT(FT_MSN_DISCARDS, MESSAGES_IGNORED_BY_LEAF_DUE_TO_MSN, PARCOUNT, "messages ignored by leaf due to msn");
|
||||
FT_STATUS_INIT(FT_TOTAL_RETRIES, TOTAL_SEARCH_RETRIES, PARCOUNT, "total search retries due to TRY_AGAIN");
|
||||
FT_STATUS_INIT(FT_SEARCH_TRIES_GT_HEIGHT, SEARCH_TRIES_GT_HEIGHT, PARCOUNT, "searches requiring more tries than the height of the tree");
|
||||
FT_STATUS_INIT(FT_SEARCH_TRIES_GT_HEIGHTPLUS3, SEARCH_TRIES_GT_HEIGHTPLUS3, PARCOUNT, "searches requiring more tries than the height of the tree plus three");
|
||||
FT_STATUS_INIT(FT_CREATE_LEAF, LEAF_NODES_CREATED, PARCOUNT, "leaf nodes created");
|
||||
FT_STATUS_INIT(FT_CREATE_NONLEAF, NONLEAF_NODES_CREATED, PARCOUNT, "nonleaf nodes created");
|
||||
FT_STATUS_INIT(FT_DESTROY_LEAF, LEAF_NODES_DESTROYED, PARCOUNT, "leaf nodes destroyed");
|
||||
FT_STATUS_INIT(FT_DESTROY_NONLEAF, NONLEAF_NODES_DESTROYED, PARCOUNT, "nonleaf nodes destroyed");
|
||||
FT_STATUS_INIT(FT_MSG_BYTES_IN, MESSAGES_INJECTED_AT_ROOT_BYTES, PARCOUNT, "bytes of messages injected at root (all trees)");
|
||||
FT_STATUS_INIT(FT_MSG_BYTES_OUT, MESSAGES_FLUSHED_FROM_H1_TO_LEAVES_BYTES, PARCOUNT, "bytes of messages flushed from h1 nodes to leaves");
|
||||
FT_STATUS_INIT(FT_MSG_BYTES_CURR, MESSAGES_IN_TREES_ESTIMATE_BYTES, PARCOUNT, "bytes of messages currently in trees (estimate)");
|
||||
FT_STATUS_INIT(FT_MSG_NUM, MESSAGES_INJECTED_AT_ROOT, PARCOUNT, "messages injected at root");
|
||||
FT_STATUS_INIT(FT_MSG_NUM_BROADCAST, BROADCASE_MESSAGES_INJECTED_AT_ROOT, PARCOUNT, "broadcast messages injected at root");
|
||||
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, BASEMENTS_DECOMPRESSED_TARGET_QUERY, PARCOUNT, "basements decompressed as a target of a query");
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, BASEMENTS_DECOMPRESSED_PRELOCKED_RANGE, PARCOUNT, "basements decompressed for prelocked range");
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH, BASEMENTS_DECOMPRESSED_PREFETCH, PARCOUNT, "basements decompressed for prefetch");
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_DECOMPRESSED_WRITE, BASEMENTS_DECOMPRESSED_FOR_WRITE, PARCOUNT, "basements decompressed for write");
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, BUFFERS_DECOMPRESSED_TARGET_QUERY, PARCOUNT, "buffers decompressed as a target of a query");
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, BUFFERS_DECOMPRESSED_PRELOCKED_RANGE, PARCOUNT, "buffers decompressed for prelocked range");
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH, BUFFERS_DECOMPRESSED_PREFETCH, PARCOUNT, "buffers decompressed for prefetch");
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE, BUFFERS_DECOMPRESSED_FOR_WRITE, PARCOUNT, "buffers decompressed for write");
|
||||
|
||||
// Eviction statistics:
|
||||
FT_STATUS_INIT(FT_FULL_EVICTIONS_LEAF, LEAF_NODE_FULL_EVICTIONS, PARCOUNT, "leaf node full evictions");
|
||||
FT_STATUS_INIT(FT_FULL_EVICTIONS_LEAF_BYTES, LEAF_NODE_FULL_EVICTIONS_BYTES, PARCOUNT, "leaf node full evictions (bytes)");
|
||||
FT_STATUS_INIT(FT_FULL_EVICTIONS_NONLEAF, NONLEAF_NODE_FULL_EVICTIONS, PARCOUNT, "nonleaf node full evictions");
|
||||
FT_STATUS_INIT(FT_FULL_EVICTIONS_NONLEAF_BYTES, NONLEAF_NODE_FULL_EVICTIONS_BYTES, PARCOUNT, "nonleaf node full evictions (bytes)");
|
||||
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_LEAF, LEAF_NODE_PARTIAL_EVICTIONS, PARCOUNT, "leaf node partial evictions");
|
||||
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_LEAF_BYTES, LEAF_NODE_PARTIAL_EVICTIONS_BYTES, PARCOUNT, "leaf node partial evictions (bytes)");
|
||||
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_NONLEAF, NONLEAF_NODE_PARTIAL_EVICTIONS, PARCOUNT, "nonleaf node partial evictions");
|
||||
FT_STATUS_INIT(FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, NONLEAF_NODE_PARTIAL_EVICTIONS_BYTES, PARCOUNT, "nonleaf node partial evictions (bytes)");
|
||||
|
||||
// Disk read statistics:
|
||||
//
|
||||
// Pivots: For queries, prefetching, or writing.
|
||||
FT_STATUS_INIT(FT_NUM_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY, PARCOUNT, "pivots fetched for query");
|
||||
FT_STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY_BYTES, PARCOUNT, "pivots fetched for query (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_QUERY, PIVOTS_FETCHED_FOR_QUERY_SECONDS, TOKUTIME, "pivots fetched for query (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH, PARCOUNT, "pivots fetched for prefetch");
|
||||
FT_STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH_BYTES, PARCOUNT, "pivots fetched for prefetch (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_PREFETCH, PIVOTS_FETCHED_FOR_PREFETCH_SECONDS, TOKUTIME, "pivots fetched for prefetch (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE, PARCOUNT, "pivots fetched for write");
|
||||
FT_STATUS_INIT(FT_BYTES_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "pivots fetched for write (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_PIVOTS_FETCHED_WRITE, PIVOTS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "pivots fetched for write (seconds)");
|
||||
// Basements: For queries, aggressive fetching in prelocked range, prefetching, or writing.
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY, PARCOUNT, "basements fetched as a target of a query");
|
||||
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY_BYTES, PARCOUNT, "basements fetched as a target of a query (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_NORMAL, BASEMENTS_FETCHED_TARGET_QUERY_SECONDS, TOKUTIME, "basements fetched as a target of a query (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE, PARCOUNT, "basements fetched for prelocked range");
|
||||
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE_BYTES, PARCOUNT, "basements fetched for prelocked range (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_AGGRESSIVE, BASEMENTS_FETCHED_PRELOCKED_RANGE_SECONDS, TOKUTIME, "basements fetched for prelocked range (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH, PARCOUNT, "basements fetched for prefetch");
|
||||
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH_BYTES, PARCOUNT, "basements fetched for prefetch (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_PREFETCH, BASEMENTS_FETCHED_PREFETCH_SECONDS, TOKUTIME, "basements fetched for prefetch (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE, PARCOUNT, "basements fetched for write");
|
||||
FT_STATUS_INIT(FT_BYTES_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "basements fetched for write (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_BASEMENTS_FETCHED_WRITE, BASEMENTS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "basements fetched for write (seconds)");
|
||||
// Buffers: For queries, aggressive fetching in prelocked range, prefetching, or writing.
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY, PARCOUNT, "buffers fetched as a target of a query");
|
||||
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY_BYTES, PARCOUNT, "buffers fetched as a target of a query (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_NORMAL, BUFFERS_FETCHED_TARGET_QUERY_SECONDS, TOKUTIME, "buffers fetched as a target of a query (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE, PARCOUNT, "buffers fetched for prelocked range");
|
||||
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE_BYTES, PARCOUNT, "buffers fetched for prelocked range (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, BUFFERS_FETCHED_PRELOCKED_RANGE_SECONDS, TOKUTIME, "buffers fetched for prelocked range (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH, PARCOUNT, "buffers fetched for prefetch");
|
||||
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH_BYTES, PARCOUNT, "buffers fetched for prefetch (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_PREFETCH, BUFFERS_FETCHED_PREFETCH_SECONDS, TOKUTIME, "buffers fetched for prefetch (seconds)");
|
||||
FT_STATUS_INIT(FT_NUM_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE, PARCOUNT, "buffers fetched for write");
|
||||
FT_STATUS_INIT(FT_BYTES_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE_BYTES, PARCOUNT, "buffers fetched for write (bytes)");
|
||||
FT_STATUS_INIT(FT_TOKUTIME_MSG_BUFFER_FETCHED_WRITE, BUFFERS_FETCHED_FOR_WRITE_SECONDS, TOKUTIME, "buffers fetched for write (seconds)");
|
||||
|
||||
// Disk write statistics.
|
||||
//
|
||||
// Leaf/Nonleaf: Not for checkpoint
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF, LEAF_NODES_FLUSHED_NOT_CHECKPOINT, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_BYTES, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_BYTES, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint) (bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "leaf nodes flushed to disk (not for checkpoint) (uncompressed bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_TOKUTIME, LEAF_NODES_FLUSHED_NOT_CHECKPOINT_SECONDS, TOKUTIME, "leaf nodes flushed to disk (not for checkpoint) (seconds)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_BYTES, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint) (bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (not for checkpoint) (uncompressed bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_TOKUTIME, NONLEAF_NODES_FLUSHED_TO_DISK_NOT_CHECKPOINT_SECONDS, TOKUTIME, "nonleaf nodes flushed to disk (not for checkpoint) (seconds)");
|
||||
// Leaf/Nonleaf: For checkpoint
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT, PARCOUNT, "leaf nodes flushed to disk (for checkpoint)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_BYTES, PARCOUNT, "leaf nodes flushed to disk (for checkpoint) (bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "leaf nodes flushed to disk (for checkpoint) (uncompressed bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_TOKUTIME_FOR_CHECKPOINT, LEAF_NODES_FLUSHED_CHECKPOINT_SECONDS, TOKUTIME, "leaf nodes flushed to disk (for checkpoint) (seconds)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint) (bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_UNCOMPRESSED_BYTES, PARCOUNT, "nonleaf nodes flushed to disk (for checkpoint) (uncompressed bytes)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT, NONLEAF_NODES_FLUSHED_TO_DISK_CHECKPOINT_SECONDS, TOKUTIME, "nonleaf nodes flushed to disk (for checkpoint) (seconds)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO, LEAF_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (leaf)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO, NONLEAF_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (nonleaf)");
|
||||
FT_STATUS_INIT(FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO, OVERALL_NODE_COMPRESSION_RATIO, DOUBLE, "uncompressed / compressed bytes written (overall)");
|
||||
|
||||
// CPU time statistics for [de]serialization and [de]compression.
|
||||
FT_STATUS_INIT(FT_LEAF_COMPRESS_TOKUTIME, LEAF_COMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "leaf compression to memory (seconds)");
|
||||
FT_STATUS_INIT(FT_LEAF_SERIALIZE_TOKUTIME, LEAF_SERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "leaf serialization to memory (seconds)");
|
||||
FT_STATUS_INIT(FT_LEAF_DECOMPRESS_TOKUTIME, LEAF_DECOMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "leaf decompression to memory (seconds)");
|
||||
FT_STATUS_INIT(FT_LEAF_DESERIALIZE_TOKUTIME, LEAF_DESERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "leaf deserialization to memory (seconds)");
|
||||
FT_STATUS_INIT(FT_NONLEAF_COMPRESS_TOKUTIME, NONLEAF_COMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf compression to memory (seconds)");
|
||||
FT_STATUS_INIT(FT_NONLEAF_SERIALIZE_TOKUTIME, NONLEAF_SERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf serialization to memory (seconds)");
|
||||
FT_STATUS_INIT(FT_NONLEAF_DECOMPRESS_TOKUTIME, NONLEAF_DECOMPRESSION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf decompression to memory (seconds)");
|
||||
FT_STATUS_INIT(FT_NONLEAF_DESERIALIZE_TOKUTIME, NONLEAF_DESERIALIZATION_TO_MEMORY_SECONDS, TOKUTIME, "nonleaf deserialization to memory (seconds)");
|
||||
|
||||
// Promotion statistics.
|
||||
FT_STATUS_INIT(FT_PRO_NUM_ROOT_SPLIT, PROMOTION_ROOTS_SPLIT, PARCOUNT, "promotion: roots split");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_ROOT_H0_INJECT, PROMOTION_LEAF_ROOTS_INJECTED_INTO, PARCOUNT, "promotion: leaf roots injected into");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_ROOT_H1_INJECT, PROMOTION_H1_ROOTS_INJECTED_INTO, PARCOUNT, "promotion: h1 roots injected into");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_0, PROMOTION_INJECTIONS_AT_DEPTH_0, PARCOUNT, "promotion: injections at depth 0");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_1, PROMOTION_INJECTIONS_AT_DEPTH_1, PARCOUNT, "promotion: injections at depth 1");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_2, PROMOTION_INJECTIONS_AT_DEPTH_2, PARCOUNT, "promotion: injections at depth 2");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_3, PROMOTION_INJECTIONS_AT_DEPTH_3, PARCOUNT, "promotion: injections at depth 3");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_INJECT_DEPTH_GT3, PROMOTION_INJECTIONS_LOWER_THAN_DEPTH_3, PARCOUNT, "promotion: injections lower than depth 3");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_STOP_NONEMPTY_BUF, PROMOTION_STOPPED_NONEMPTY_BUFFER, PARCOUNT, "promotion: stopped because of a nonempty buffer");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_STOP_H1, PROMOTION_STOPPED_AT_HEIGHT_1, PARCOUNT, "promotion: stopped at height 1");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_STOP_LOCK_CHILD, PROMOTION_STOPPED_CHILD_LOCKED_OR_NOT_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was locked or not at all in memory");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_STOP_CHILD_INMEM, PROMOTION_STOPPED_CHILD_NOT_FULLY_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was not fully in memory");
|
||||
FT_STATUS_INIT(FT_PRO_NUM_DIDNT_WANT_PROMOTE, PROMOTION_STOPPED_AFTER_LOCKING_CHILD, PARCOUNT, "promotion: stopped anyway, after locking the child");
|
||||
FT_STATUS_INIT(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, BASEMENT_DESERIALIZATION_FIXED_KEY, PARCOUNT, "basement nodes deserialized with fixed-keysize");
|
||||
FT_STATUS_INIT(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, BASEMENT_DESERIALIZATION_VARIABLE_KEY, PARCOUNT, "basement nodes deserialized with variable-keysize");
|
||||
FT_STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS, PARCOUNT, "promotion: succeeded in using the rightmost leaf shortcut");
|
||||
FT_STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (out-of-bounds)");
|
||||
FT_STATUS_INIT(FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE,RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE, PARCOUNT, "promotion: tried the rightmost leaf shorcut but failed (child reactive)");
|
||||
|
||||
FT_STATUS_INIT(FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, CURSOR_SKIP_DELETED_LEAF_ENTRY, PARCOUNT, "cursor skipped deleted leaf entries");
|
||||
|
||||
m_initialized = true;
|
||||
#undef FT_STATUS_INIT
|
||||
}
|
||||
void FT_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < FT_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
FT_FLUSHER_STATUS_S fl_status;
|
||||
void FT_FLUSHER_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define FL_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "ft flusher: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_NODES, FLUSHER_CLEANER_TOTAL_NODES, UINT64, "total nodes potentially flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_H1_NODES, FLUSHER_CLEANER_H1_NODES, UINT64, "height-one nodes flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_HGT1_NODES, FLUSHER_CLEANER_HGT1_NODES, UINT64, "height-greater-than-one nodes flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_EMPTY_NODES, FLUSHER_CLEANER_EMPTY_NODES, UINT64, "nodes cleaned which had empty buffers");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NODES_DIRTIED, FLUSHER_CLEANER_NODES_DIRTIED, UINT64, "nodes dirtied by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE, FLUSHER_CLEANER_MAX_BUFFER_SIZE, UINT64, "max bytes in a buffer flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE, FLUSHER_CLEANER_MIN_BUFFER_SIZE, UINT64, "min bytes in a buffer flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE, FLUSHER_CLEANER_TOTAL_BUFFER_SIZE, UINT64, "total bytes in buffers flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, UINT64, "max workdone in a buffer flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE, FLUSHER_CLEANER_MIN_BUFFER_WORKDONE, UINT64, "min workdone in a buffer flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE, FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE, UINT64, "total workdone in buffers flushed by cleaner thread");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, UINT64, "times cleaner thread tries to merge a leaf");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, UINT64, "cleaner thread leaf merges in progress");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, UINT64, "cleaner thread leaf merges successful");
|
||||
FL_STATUS_INIT(FT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, UINT64, "nodes dirtied by cleaner thread leaf merges");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_TOTAL, FLUSHER_FLUSH_TOTAL, UINT64, "total number of flushes done by flusher threads or cleaner threads");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_IN_MEMORY, FLUSHER_FLUSH_IN_MEMORY, UINT64, "number of in memory flushes");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_NEEDED_IO, FLUSHER_FLUSH_NEEDED_IO, UINT64, "number of flushes that read something off disk");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES, FLUSHER_FLUSH_CASCADES, UINT64, "number of flushes that triggered another flush in child");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_1, FLUSHER_FLUSH_CASCADES_1, UINT64, "number of flushes that triggered 1 cascading flush");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_2, FLUSHER_FLUSH_CASCADES_2, UINT64, "number of flushes that triggered 2 cascading flushes");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_3, FLUSHER_FLUSH_CASCADES_3, UINT64, "number of flushes that triggered 3 cascading flushes");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_4, FLUSHER_FLUSH_CASCADES_4, UINT64, "number of flushes that triggered 4 cascading flushes");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_5, FLUSHER_FLUSH_CASCADES_5, UINT64, "number of flushes that triggered 5 cascading flushes");
|
||||
FL_STATUS_INIT(FT_FLUSHER_FLUSH_CASCADES_GT_5, FLUSHER_FLUSH_CASCADES_GT_5, UINT64, "number of flushes that triggered over 5 cascading flushes");
|
||||
FL_STATUS_INIT(FT_FLUSHER_SPLIT_LEAF, FLUSHER_SPLIT_LEAF, UINT64, "leaf node splits");
|
||||
FL_STATUS_INIT(FT_FLUSHER_SPLIT_NONLEAF, FLUSHER_SPLIT_NONLEAF, UINT64, "nonleaf node splits");
|
||||
FL_STATUS_INIT(FT_FLUSHER_MERGE_LEAF, FLUSHER_MERGE_LEAF, UINT64, "leaf node merges");
|
||||
FL_STATUS_INIT(FT_FLUSHER_MERGE_NONLEAF, FLUSHER_MERGE_NONLEAF, UINT64, "nonleaf node merges");
|
||||
FL_STATUS_INIT(FT_FLUSHER_BALANCE_LEAF, FLUSHER_BALANCE_LEAF, UINT64, "leaf node balances");
|
||||
|
||||
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE) = UINT64_MAX;
|
||||
FL_STATUS_VAL(FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE) = UINT64_MAX;
|
||||
|
||||
m_initialized = true;
|
||||
#undef FL_STATUS_INIT
|
||||
}
|
||||
void FT_FLUSHER_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < FT_FLUSHER_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
FT_HOT_STATUS_S hot_status;
|
||||
void FT_HOT_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define HOT_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "hot: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
HOT_STATUS_INIT(FT_HOT_NUM_STARTED, HOT_NUM_STARTED, UINT64, "operations ever started");
|
||||
HOT_STATUS_INIT(FT_HOT_NUM_COMPLETED, HOT_NUM_COMPLETED, UINT64, "operations successfully completed");
|
||||
HOT_STATUS_INIT(FT_HOT_NUM_ABORTED, HOT_NUM_ABORTED, UINT64, "operations aborted");
|
||||
HOT_STATUS_INIT(FT_HOT_MAX_ROOT_FLUSH_COUNT, HOT_MAX_ROOT_FLUSH_COUNT, UINT64, "max number of flushes from root ever required to optimize a tree");
|
||||
|
||||
m_initialized = true;
|
||||
#undef HOT_STATUS_INIT
|
||||
}
|
||||
void FT_HOT_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < FT_HOT_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
TXN_STATUS_S txn_status;
|
||||
void TXN_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define TXN_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "txn: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
TXN_STATUS_INIT(TXN_BEGIN, TXN_BEGIN, PARCOUNT, "begin");
|
||||
TXN_STATUS_INIT(TXN_READ_BEGIN, TXN_BEGIN_READ_ONLY, PARCOUNT, "begin read only");
|
||||
TXN_STATUS_INIT(TXN_COMMIT, TXN_COMMITS, PARCOUNT, "successful commits");
|
||||
TXN_STATUS_INIT(TXN_ABORT, TXN_ABORTS, PARCOUNT, "aborts");
|
||||
m_initialized = true;
|
||||
#undef TXN_STATUS_INIT
|
||||
}
|
||||
void TXN_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < TXN_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LOGGER_STATUS_S log_status;
|
||||
void LOGGER_STATUS_S::init() {
|
||||
if (m_initialized) return;
|
||||
#define LOG_STATUS_INIT(k,c,t,l) TOKUFT_STATUS_INIT((*this), k, c, t, "logger: " l, TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS)
|
||||
LOG_STATUS_INIT(LOGGER_NEXT_LSN, LOGGER_NEXT_LSN, UINT64, "next LSN");
|
||||
LOG_STATUS_INIT(LOGGER_NUM_WRITES, LOGGER_WRITES, UINT64, "writes");
|
||||
LOG_STATUS_INIT(LOGGER_BYTES_WRITTEN, LOGGER_WRITES_BYTES, UINT64, "writes (bytes)");
|
||||
LOG_STATUS_INIT(LOGGER_UNCOMPRESSED_BYTES_WRITTEN, LOGGER_WRITES_UNCOMPRESSED_BYTES, UINT64, "writes (uncompressed bytes)");
|
||||
LOG_STATUS_INIT(LOGGER_TOKUTIME_WRITES, LOGGER_WRITES_SECONDS, TOKUTIME, "writes (seconds)");
|
||||
LOG_STATUS_INIT(LOGGER_WAIT_BUF_LONG, LOGGER_WAIT_LONG, UINT64, "number of long logger write operations");
|
||||
m_initialized = true;
|
||||
#undef LOG_STATUS_INIT
|
||||
}
|
||||
void LOGGER_STATUS_S::destroy() {
|
||||
if (!m_initialized) return;
|
||||
for (int i = 0; i < LOGGER_STATUS_NUM_ROWS; ++i) {
|
||||
if (status[i].type == PARCOUNT) {
|
||||
destroy_partitioned_counter(status[i].value.parcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void toku_status_init(void) {
|
||||
le_status.init();
|
||||
cp_status.init();
|
||||
ltm_status.init();
|
||||
ft_status.init();
|
||||
fl_status.init();
|
||||
hot_status.init();
|
||||
txn_status.init();
|
||||
log_status.init();
|
||||
}
|
||||
void toku_status_destroy(void) {
|
||||
log_status.destroy();
|
||||
txn_status.destroy();
|
||||
hot_status.destroy();
|
||||
fl_status.destroy();
|
||||
ft_status.destroy();
|
||||
ltm_status.destroy();
|
||||
cp_status.destroy();
|
||||
le_status.destroy();
|
||||
}
|
@ -1,539 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
|
||||
#include "portability/toku_config.h"
|
||||
#include "portability/toku_list.h"
|
||||
#include "portability/toku_race_tools.h"
|
||||
|
||||
#include "util/status.h"
|
||||
|
||||
//
|
||||
// Leaf Entry statistics
|
||||
//
|
||||
class LE_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
LE_MAX_COMMITTED_XR = 0,
|
||||
LE_MAX_PROVISIONAL_XR,
|
||||
LE_EXPANDED,
|
||||
LE_MAX_MEMSIZE,
|
||||
LE_APPLY_GC_BYTES_IN,
|
||||
LE_APPLY_GC_BYTES_OUT,
|
||||
LE_NORMAL_GC_BYTES_IN,
|
||||
LE_NORMAL_GC_BYTES_OUT,
|
||||
LE_STATUS_NUM_ROWS
|
||||
};
|
||||
|
||||
void init();
|
||||
void destroy();
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[LE_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef LE_STATUS_S* LE_STATUS;
|
||||
extern LE_STATUS_S le_status;
|
||||
|
||||
// executed too often to be worth making threadsafe
|
||||
#define LE_STATUS_VAL(x) le_status.status[LE_STATUS_S::x].value.num
|
||||
#define LE_STATUS_INC(x, d) \
|
||||
do { \
|
||||
if (le_status.status[LE_STATUS_S::x].type == PARCOUNT) { \
|
||||
increment_partitioned_counter(le_status.status[LE_STATUS_S::x].value.parcount, d); \
|
||||
} else { \
|
||||
toku_sync_fetch_and_add(&le_status.status[LE_STATUS_S::x].value.num, d); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Checkpoint statistics
|
||||
//
|
||||
class CHECKPOINT_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
CP_PERIOD,
|
||||
CP_FOOTPRINT,
|
||||
CP_TIME_LAST_CHECKPOINT_BEGIN,
|
||||
CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE,
|
||||
CP_TIME_LAST_CHECKPOINT_END,
|
||||
CP_TIME_CHECKPOINT_DURATION,
|
||||
CP_TIME_CHECKPOINT_DURATION_LAST,
|
||||
CP_LAST_LSN,
|
||||
CP_CHECKPOINT_COUNT,
|
||||
CP_CHECKPOINT_COUNT_FAIL,
|
||||
CP_WAITERS_NOW, // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint
|
||||
CP_WAITERS_MAX, // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint
|
||||
CP_CLIENT_WAIT_ON_MO, // how many times a client thread waited to take the multi_operation lock, not for checkpoint
|
||||
CP_CLIENT_WAIT_ON_CS, // how many times a client thread waited for the checkpoint_safe lock, not for checkpoint
|
||||
CP_BEGIN_TIME,
|
||||
CP_LONG_BEGIN_TIME,
|
||||
CP_LONG_BEGIN_COUNT,
|
||||
CP_END_TIME,
|
||||
CP_LONG_END_TIME,
|
||||
CP_LONG_END_COUNT,
|
||||
CP_STATUS_NUM_ROWS // number of rows in this status array. must be last.
|
||||
};
|
||||
|
||||
void init();
|
||||
void destroy();
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[CP_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef CHECKPOINT_STATUS_S* CHECKPOINT_STATUS;
|
||||
extern CHECKPOINT_STATUS_S cp_status;
|
||||
|
||||
#define CP_STATUS_VAL(x) cp_status.status[CHECKPOINT_STATUS_S::x].value.num
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Cachetable statistics
|
||||
//
|
||||
class CACHETABLE_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
CT_MISS = 0,
|
||||
CT_MISSTIME, // how many usec spent waiting for disk read because of cache miss
|
||||
CT_PREFETCHES, // how many times has a block been prefetched into the cachetable?
|
||||
CT_SIZE_CURRENT, // the sum of the sizes of the nodes represented in the cachetable
|
||||
CT_SIZE_LIMIT, // the limit to the sum of the node sizes
|
||||
CT_SIZE_WRITING, // the sum of the sizes of the nodes being written
|
||||
CT_SIZE_NONLEAF, // number of bytes in cachetable belonging to nonleaf nodes
|
||||
CT_SIZE_LEAF, // number of bytes in cachetable belonging to leaf nodes
|
||||
CT_SIZE_ROLLBACK, // number of bytes in cachetable belonging to rollback nodes
|
||||
CT_SIZE_CACHEPRESSURE, // number of bytes causing cache pressure (sum of buffers and workdone counters)
|
||||
CT_SIZE_CLONED, // number of bytes of cloned data in the system
|
||||
CT_EVICTIONS,
|
||||
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
|
||||
CT_CLEANER_PERIOD,
|
||||
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
|
||||
CT_WAIT_PRESSURE_COUNT,
|
||||
CT_WAIT_PRESSURE_TIME,
|
||||
CT_LONG_WAIT_PRESSURE_COUNT,
|
||||
CT_LONG_WAIT_PRESSURE_TIME,
|
||||
|
||||
CT_POOL_CLIENT_NUM_THREADS,
|
||||
CT_POOL_CLIENT_NUM_THREADS_ACTIVE,
|
||||
CT_POOL_CLIENT_QUEUE_SIZE,
|
||||
CT_POOL_CLIENT_MAX_QUEUE_SIZE,
|
||||
CT_POOL_CLIENT_TOTAL_ITEMS_PROCESSED,
|
||||
CT_POOL_CLIENT_TOTAL_EXECUTION_TIME,
|
||||
CT_POOL_CACHETABLE_NUM_THREADS,
|
||||
CT_POOL_CACHETABLE_NUM_THREADS_ACTIVE,
|
||||
CT_POOL_CACHETABLE_QUEUE_SIZE,
|
||||
CT_POOL_CACHETABLE_MAX_QUEUE_SIZE,
|
||||
CT_POOL_CACHETABLE_TOTAL_ITEMS_PROCESSED,
|
||||
CT_POOL_CACHETABLE_TOTAL_EXECUTION_TIME,
|
||||
CT_POOL_CHECKPOINT_NUM_THREADS,
|
||||
CT_POOL_CHECKPOINT_NUM_THREADS_ACTIVE,
|
||||
CT_POOL_CHECKPOINT_QUEUE_SIZE,
|
||||
CT_POOL_CHECKPOINT_MAX_QUEUE_SIZE,
|
||||
CT_POOL_CHECKPOINT_TOTAL_ITEMS_PROCESSED,
|
||||
CT_POOL_CHECKPOINT_TOTAL_EXECUTION_TIME,
|
||||
|
||||
CT_STATUS_NUM_ROWS
|
||||
};
|
||||
|
||||
void init();
|
||||
void destroy();
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[CT_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef CACHETABLE_STATUS_S* CACHETABLE_STATUS;
|
||||
extern CACHETABLE_STATUS_S ct_status;
|
||||
|
||||
#define CT_STATUS_VAL(x) ct_status.status[CACHETABLE_STATUS_S::x].value.num
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Lock Tree Manager statistics
|
||||
//
|
||||
class LTM_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
LTM_SIZE_CURRENT = 0,
|
||||
LTM_SIZE_LIMIT,
|
||||
LTM_ESCALATION_COUNT,
|
||||
LTM_ESCALATION_TIME,
|
||||
LTM_ESCALATION_LATEST_RESULT,
|
||||
LTM_NUM_LOCKTREES,
|
||||
LTM_LOCK_REQUESTS_PENDING,
|
||||
LTM_STO_NUM_ELIGIBLE,
|
||||
LTM_STO_END_EARLY_COUNT,
|
||||
LTM_STO_END_EARLY_TIME,
|
||||
LTM_WAIT_COUNT,
|
||||
LTM_WAIT_TIME,
|
||||
LTM_LONG_WAIT_COUNT,
|
||||
LTM_LONG_WAIT_TIME,
|
||||
LTM_TIMEOUT_COUNT,
|
||||
LTM_WAIT_ESCALATION_COUNT,
|
||||
LTM_WAIT_ESCALATION_TIME,
|
||||
LTM_LONG_WAIT_ESCALATION_COUNT,
|
||||
LTM_LONG_WAIT_ESCALATION_TIME,
|
||||
LTM_STATUS_NUM_ROWS // must be last
|
||||
};
|
||||
|
||||
void init(void);
|
||||
void destroy(void);
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef LTM_STATUS_S* LTM_STATUS;
|
||||
extern LTM_STATUS_S ltm_status;
|
||||
|
||||
#define LTM_STATUS_VAL(x) ltm_status.status[LTM_STATUS_S::x].value.num
|
||||
|
||||
|
||||
//
|
||||
// Fractal Tree statistics
|
||||
//
|
||||
class FT_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
FT_UPDATES = 0,
|
||||
FT_UPDATES_BROADCAST,
|
||||
FT_DESCRIPTOR_SET,
|
||||
FT_MSN_DISCARDS, // how many messages were ignored by leaf because of msn
|
||||
FT_TOTAL_RETRIES, // total number of search retries due to TRY_AGAIN
|
||||
FT_SEARCH_TRIES_GT_HEIGHT, // number of searches that required more tries than the height of the tree
|
||||
FT_SEARCH_TRIES_GT_HEIGHTPLUS3, // number of searches that required more tries than the height of the tree plus three
|
||||
FT_DISK_FLUSH_LEAF, // number of leaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_BYTES, // number of leaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES, // number of leaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_TOKUTIME, // number of leaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF_BYTES, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF_TOKUTIME, // number of nonleaf nodes flushed to disk, not for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, // number of leaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_BYTES_FOR_CHECKPOINT, // number of leaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT,// number of leaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_TOKUTIME_FOR_CHECKPOINT,// number of leaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, // number of nonleaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF_UNCOMPRESSED_BYTES_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_NONLEAF_TOKUTIME_FOR_CHECKPOINT,// number of nonleaf nodes flushed to disk for checkpoint
|
||||
FT_DISK_FLUSH_LEAF_COMPRESSION_RATIO, // effective compression ratio for leaf bytes flushed to disk
|
||||
FT_DISK_FLUSH_NONLEAF_COMPRESSION_RATIO, // effective compression ratio for nonleaf bytes flushed to disk
|
||||
FT_DISK_FLUSH_OVERALL_COMPRESSION_RATIO, // effective compression ratio for all bytes flushed to disk
|
||||
FT_PARTIAL_EVICTIONS_NONLEAF, // number of nonleaf node partial evictions
|
||||
FT_PARTIAL_EVICTIONS_NONLEAF_BYTES, // number of nonleaf node partial evictions
|
||||
FT_PARTIAL_EVICTIONS_LEAF, // number of leaf node partial evictions
|
||||
FT_PARTIAL_EVICTIONS_LEAF_BYTES, // number of leaf node partial evictions
|
||||
FT_FULL_EVICTIONS_LEAF, // number of full cachetable evictions on leaf nodes
|
||||
FT_FULL_EVICTIONS_LEAF_BYTES, // number of full cachetable evictions on leaf nodes (bytes)
|
||||
FT_FULL_EVICTIONS_NONLEAF, // number of full cachetable evictions on nonleaf nodes
|
||||
FT_FULL_EVICTIONS_NONLEAF_BYTES, // number of full cachetable evictions on nonleaf nodes (bytes)
|
||||
FT_CREATE_LEAF, // number of leaf nodes created
|
||||
FT_CREATE_NONLEAF, // number of nonleaf nodes created
|
||||
FT_DESTROY_LEAF, // number of leaf nodes destroyed
|
||||
FT_DESTROY_NONLEAF, // number of nonleaf nodes destroyed
|
||||
FT_MSG_BYTES_IN, // how many bytes of messages injected at root (for all trees)
|
||||
FT_MSG_BYTES_OUT, // how many bytes of messages flushed from h1 nodes to leaves
|
||||
FT_MSG_BYTES_CURR, // how many bytes of messages currently in trees (estimate)
|
||||
FT_MSG_NUM, // how many messages injected at root
|
||||
FT_MSG_NUM_BROADCAST, // how many broadcast messages injected at root
|
||||
FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, // how many basement nodes were decompressed because they were the target of a query
|
||||
FT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH,
|
||||
FT_NUM_BASEMENTS_DECOMPRESSED_WRITE,
|
||||
FT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, // how many msg buffers were decompressed because they were the target of a query
|
||||
FT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH,
|
||||
FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE,
|
||||
FT_NUM_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
|
||||
FT_BYTES_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
|
||||
FT_TOKUTIME_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
|
||||
FT_NUM_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
|
||||
FT_BYTES_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
|
||||
FT_TOKUTIME_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
|
||||
FT_NUM_PIVOTS_FETCHED_WRITE, // ... for a write
|
||||
FT_BYTES_PIVOTS_FETCHED_WRITE, // ... for a write
|
||||
FT_TOKUTIME_PIVOTS_FETCHED_WRITE, // ... for a write
|
||||
FT_NUM_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
|
||||
FT_BYTES_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
|
||||
FT_TOKUTIME_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
|
||||
FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_TOKUTIME_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_NUM_BASEMENTS_FETCHED_PREFETCH,
|
||||
FT_BYTES_BASEMENTS_FETCHED_PREFETCH,
|
||||
FT_TOKUTIME_BASEMENTS_FETCHED_PREFETCH,
|
||||
FT_NUM_BASEMENTS_FETCHED_WRITE,
|
||||
FT_BYTES_BASEMENTS_FETCHED_WRITE,
|
||||
FT_TOKUTIME_BASEMENTS_FETCHED_WRITE,
|
||||
FT_NUM_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
|
||||
FT_BYTES_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
|
||||
FT_TOKUTIME_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
|
||||
FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_TOKUTIME_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
|
||||
FT_NUM_MSG_BUFFER_FETCHED_PREFETCH,
|
||||
FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH,
|
||||
FT_TOKUTIME_MSG_BUFFER_FETCHED_PREFETCH,
|
||||
FT_NUM_MSG_BUFFER_FETCHED_WRITE,
|
||||
FT_BYTES_MSG_BUFFER_FETCHED_WRITE,
|
||||
FT_TOKUTIME_MSG_BUFFER_FETCHED_WRITE,
|
||||
FT_LEAF_COMPRESS_TOKUTIME, // seconds spent compressing leaf leaf nodes to memory
|
||||
FT_LEAF_SERIALIZE_TOKUTIME, // seconds spent serializing leaf node to memory
|
||||
FT_LEAF_DECOMPRESS_TOKUTIME, // seconds spent decompressing leaf nodes to memory
|
||||
FT_LEAF_DESERIALIZE_TOKUTIME, // seconds spent deserializing leaf nodes to memory
|
||||
FT_NONLEAF_COMPRESS_TOKUTIME, // seconds spent compressing nonleaf nodes to memory
|
||||
FT_NONLEAF_SERIALIZE_TOKUTIME, // seconds spent serializing nonleaf nodes to memory
|
||||
FT_NONLEAF_DECOMPRESS_TOKUTIME, // seconds spent decompressing nonleaf nodes to memory
|
||||
FT_NONLEAF_DESERIALIZE_TOKUTIME, // seconds spent deserializing nonleaf nodes to memory
|
||||
FT_PRO_NUM_ROOT_SPLIT,
|
||||
FT_PRO_NUM_ROOT_H0_INJECT,
|
||||
FT_PRO_NUM_ROOT_H1_INJECT,
|
||||
FT_PRO_NUM_INJECT_DEPTH_0,
|
||||
FT_PRO_NUM_INJECT_DEPTH_1,
|
||||
FT_PRO_NUM_INJECT_DEPTH_2,
|
||||
FT_PRO_NUM_INJECT_DEPTH_3,
|
||||
FT_PRO_NUM_INJECT_DEPTH_GT3,
|
||||
FT_PRO_NUM_STOP_NONEMPTY_BUF,
|
||||
FT_PRO_NUM_STOP_H1,
|
||||
FT_PRO_NUM_STOP_LOCK_CHILD,
|
||||
FT_PRO_NUM_STOP_CHILD_INMEM,
|
||||
FT_PRO_NUM_DIDNT_WANT_PROMOTE,
|
||||
FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, // how many basement nodes were deserialized with a fixed keysize
|
||||
FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, // how many basement nodes were deserialized with a variable keysize
|
||||
FT_PRO_RIGHTMOST_LEAF_SHORTCUT_SUCCESS,
|
||||
FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_POS,
|
||||
FT_PRO_RIGHTMOST_LEAF_SHORTCUT_FAIL_REACTIVE,
|
||||
FT_CURSOR_SKIP_DELETED_LEAF_ENTRY, // how many deleted leaf entries were skipped by a cursor
|
||||
FT_STATUS_NUM_ROWS
|
||||
};
|
||||
|
||||
void init(void);
|
||||
void destroy(void);
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[FT_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef FT_STATUS_S* FT_STATUS;
|
||||
extern FT_STATUS_S ft_status;
|
||||
|
||||
#define FT_STATUS_VAL(x) \
|
||||
(ft_status.status[FT_STATUS_S::x].type == PARCOUNT ? \
|
||||
read_partitioned_counter(ft_status.status[FT_STATUS_S::x].value.parcount) : \
|
||||
ft_status.status[FT_STATUS_S::x].value.num)
|
||||
|
||||
#define FT_STATUS_INC(x, d) \
|
||||
do { \
|
||||
if (ft_status.status[FT_STATUS_S::x].type == PARCOUNT) { \
|
||||
increment_partitioned_counter(ft_status.status[FT_STATUS_S::x].value.parcount, d); \
|
||||
} else { \
|
||||
toku_sync_fetch_and_add(&ft_status.status[FT_STATUS_S::x].value.num, d); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Flusher statistics
|
||||
//
|
||||
class FT_FLUSHER_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
FT_FLUSHER_CLEANER_TOTAL_NODES = 0, // total number of nodes whose buffers are potentially flushed by cleaner thread
|
||||
FT_FLUSHER_CLEANER_H1_NODES, // number of nodes of height one whose message buffers are flushed by cleaner thread
|
||||
FT_FLUSHER_CLEANER_HGT1_NODES, // number of nodes of height > 1 whose message buffers are flushed by cleaner thread
|
||||
FT_FLUSHER_CLEANER_EMPTY_NODES, // number of nodes that are selected by cleaner, but whose buffers are empty
|
||||
FT_FLUSHER_CLEANER_NODES_DIRTIED, // number of nodes that are made dirty by the cleaner thread
|
||||
FT_FLUSHER_CLEANER_MAX_BUFFER_SIZE, // max number of bytes in message buffer flushed by cleaner thread
|
||||
FT_FLUSHER_CLEANER_MIN_BUFFER_SIZE,
|
||||
FT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE,
|
||||
FT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, // max workdone value of any message buffer flushed by cleaner thread
|
||||
FT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE,
|
||||
FT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE,
|
||||
FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, // number of times cleaner thread tries to merge a leaf
|
||||
FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, // number of cleaner thread leaf merges in progress
|
||||
FT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, // number of times cleaner thread successfully merges a leaf
|
||||
FT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, // nodes dirtied by the "flush from root" process to merge a leaf node
|
||||
FT_FLUSHER_FLUSH_TOTAL, // total number of flushes done by flusher threads or cleaner threads
|
||||
FT_FLUSHER_FLUSH_IN_MEMORY, // number of in memory flushes
|
||||
FT_FLUSHER_FLUSH_NEEDED_IO, // number of flushes that had to read a child (or part) off disk
|
||||
FT_FLUSHER_FLUSH_CASCADES, // number of flushes that triggered another flush in the child
|
||||
FT_FLUSHER_FLUSH_CASCADES_1, // number of flushes that triggered 1 cascading flush
|
||||
FT_FLUSHER_FLUSH_CASCADES_2, // number of flushes that triggered 2 cascading flushes
|
||||
FT_FLUSHER_FLUSH_CASCADES_3, // number of flushes that triggered 3 cascading flushes
|
||||
FT_FLUSHER_FLUSH_CASCADES_4, // number of flushes that triggered 4 cascading flushes
|
||||
FT_FLUSHER_FLUSH_CASCADES_5, // number of flushes that triggered 5 cascading flushes
|
||||
FT_FLUSHER_FLUSH_CASCADES_GT_5, // number of flushes that triggered more than 5 cascading flushes
|
||||
FT_FLUSHER_SPLIT_LEAF, // number of leaf nodes split
|
||||
FT_FLUSHER_SPLIT_NONLEAF, // number of nonleaf nodes split
|
||||
FT_FLUSHER_MERGE_LEAF, // number of times leaf nodes are merged
|
||||
FT_FLUSHER_MERGE_NONLEAF, // number of times nonleaf nodes are merged
|
||||
FT_FLUSHER_BALANCE_LEAF, // number of times a leaf node is balanced
|
||||
FT_FLUSHER_STATUS_NUM_ROWS
|
||||
};
|
||||
|
||||
void init(void);
|
||||
void destroy(void);
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[FT_FLUSHER_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef FT_FLUSHER_STATUS_S* FT_FLUSHER_STATUS;
|
||||
extern FT_FLUSHER_STATUS_S fl_status;
|
||||
|
||||
#define FL_STATUS_VAL(x) fl_status.status[FT_FLUSHER_STATUS_S::x].value.num
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Hot Flusher
|
||||
//
|
||||
class FT_HOT_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
FT_HOT_NUM_STARTED = 0, // number of HOT operations that have begun
|
||||
FT_HOT_NUM_COMPLETED, // number of HOT operations that have successfully completed
|
||||
FT_HOT_NUM_ABORTED, // number of HOT operations that have been aborted
|
||||
FT_HOT_MAX_ROOT_FLUSH_COUNT, // max number of flushes from root ever required to optimize a tree
|
||||
FT_HOT_STATUS_NUM_ROWS
|
||||
};
|
||||
|
||||
void init(void);
|
||||
void destroy(void);
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[FT_HOT_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef FT_HOT_STATUS_S* FT_HOT_STATUS;
|
||||
extern FT_HOT_STATUS_S hot_status;
|
||||
|
||||
#define HOT_STATUS_VAL(x) hot_status.status[FT_HOT_STATUS_S::x].value.num
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Transaction statistics
|
||||
//
|
||||
class TXN_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
TXN_BEGIN, // total number of transactions begun (does not include recovered txns)
|
||||
TXN_READ_BEGIN, // total number of read only transactions begun (does not include recovered txns)
|
||||
TXN_COMMIT, // successful commits
|
||||
TXN_ABORT,
|
||||
TXN_STATUS_NUM_ROWS
|
||||
};
|
||||
|
||||
void init(void);
|
||||
void destroy(void);
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[TXN_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef TXN_STATUS_S* TXN_STATUS;
|
||||
extern TXN_STATUS_S txn_status;
|
||||
|
||||
#define TXN_STATUS_INC(x, d) increment_partitioned_counter(txn_status.status[TXN_STATUS_S::x].value.parcount, d)
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Logger statistics
|
||||
//
|
||||
class LOGGER_STATUS_S {
|
||||
public:
|
||||
enum {
|
||||
LOGGER_NEXT_LSN = 0,
|
||||
LOGGER_NUM_WRITES,
|
||||
LOGGER_BYTES_WRITTEN,
|
||||
LOGGER_UNCOMPRESSED_BYTES_WRITTEN,
|
||||
LOGGER_TOKUTIME_WRITES,
|
||||
LOGGER_WAIT_BUF_LONG,
|
||||
LOGGER_STATUS_NUM_ROWS
|
||||
};
|
||||
|
||||
void init(void);
|
||||
void destroy(void);
|
||||
|
||||
TOKU_ENGINE_STATUS_ROW_S status[LOGGER_STATUS_NUM_ROWS];
|
||||
|
||||
private:
|
||||
bool m_initialized;
|
||||
};
|
||||
typedef LOGGER_STATUS_S* LOGGER_STATUS;
|
||||
extern LOGGER_STATUS_S log_status;
|
||||
|
||||
#define LOG_STATUS_VAL(x) log_status.status[LOGGER_STATUS_S::x].value.num
|
||||
|
||||
void toku_status_init(void);
|
||||
void toku_status_destroy(void);
|
@ -1,268 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "ft/ft.h"
|
||||
#include "ft/ft-cachetable-wrappers.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/ft-flusher.h"
|
||||
#include "ft/serialize/ft_node-serialize.h"
|
||||
#include "ft/node.h"
|
||||
#include "ft/ule.h"
|
||||
|
||||
// dummymsn needed to simulate msn because messages are injected at a lower level than toku_ft_root_put_msg()
|
||||
#define MIN_DUMMYMSN ((MSN) {(uint64_t)1 << 62})
|
||||
static MSN dummymsn;
|
||||
static int testsetup_initialized = 0;
|
||||
|
||||
|
||||
// Must be called before any other test_setup_xxx() functions are called.
|
||||
void
|
||||
toku_testsetup_initialize(void) {
|
||||
if (testsetup_initialized == 0) {
|
||||
testsetup_initialized = 1;
|
||||
dummymsn = MIN_DUMMYMSN;
|
||||
}
|
||||
}
|
||||
|
||||
static MSN
|
||||
next_dummymsn(void) {
|
||||
++(dummymsn.msn);
|
||||
return dummymsn;
|
||||
}
|
||||
|
||||
|
||||
bool ignore_if_was_already_open;
|
||||
int toku_testsetup_leaf(FT_HANDLE ft_handle, BLOCKNUM *blocknum, int n_children, char **keys, int *keylens) {
|
||||
FTNODE node;
|
||||
assert(testsetup_initialized);
|
||||
toku_create_new_ftnode(ft_handle, &node, 0, n_children);
|
||||
for (int i = 0; i < n_children; i++) {
|
||||
BP_STATE(node, i) = PT_AVAIL;
|
||||
}
|
||||
|
||||
DBT *XMALLOC_N(n_children - 1, pivotkeys);
|
||||
for (int i = 0; i + 1 < n_children; i++) {
|
||||
toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
|
||||
}
|
||||
node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
|
||||
for (int i = 0; i + 1 < n_children; i++) {
|
||||
toku_destroy_dbt(&pivotkeys[i]);
|
||||
}
|
||||
toku_free(pivotkeys);
|
||||
|
||||
*blocknum = node->blocknum;
|
||||
toku_unpin_ftnode(ft_handle->ft, node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Don't bother to clean up carefully if something goes wrong. (E.g., it's OK to have malloced stuff that hasn't been freed.)
|
||||
int toku_testsetup_nonleaf (FT_HANDLE ft_handle, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, char **keys, int *keylens) {
|
||||
FTNODE node;
|
||||
assert(testsetup_initialized);
|
||||
toku_create_new_ftnode(ft_handle, &node, height, n_children);
|
||||
for (int i = 0; i < n_children; i++) {
|
||||
BP_BLOCKNUM(node, i) = children[i];
|
||||
BP_STATE(node,i) = PT_AVAIL;
|
||||
}
|
||||
DBT *XMALLOC_N(n_children - 1, pivotkeys);
|
||||
for (int i = 0; i + 1 < n_children; i++) {
|
||||
toku_memdup_dbt(&pivotkeys[i], keys[i], keylens[i]);
|
||||
}
|
||||
node->pivotkeys.create_from_dbts(pivotkeys, n_children - 1);
|
||||
for (int i = 0; i + 1 < n_children; i++) {
|
||||
toku_destroy_dbt(&pivotkeys[i]);
|
||||
}
|
||||
toku_free(pivotkeys);
|
||||
|
||||
*blocknum = node->blocknum;
|
||||
toku_unpin_ftnode(ft_handle->ft, node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int toku_testsetup_root(FT_HANDLE ft_handle, BLOCKNUM blocknum) {
|
||||
assert(testsetup_initialized);
|
||||
ft_handle->ft->h->root_blocknum = blocknum;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int toku_testsetup_get_sersize(FT_HANDLE ft_handle, BLOCKNUM diskoff) // Return the size on disk
|
||||
{
|
||||
assert(testsetup_initialized);
|
||||
void *node_v;
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_full_read(ft_handle->ft);
|
||||
int r = toku_cachetable_get_and_pin(
|
||||
ft_handle->ft->cf, diskoff,
|
||||
toku_cachetable_hash(ft_handle->ft->cf, diskoff),
|
||||
&node_v,
|
||||
get_write_callbacks_for_node(ft_handle->ft),
|
||||
toku_ftnode_fetch_callback,
|
||||
toku_ftnode_pf_req_callback,
|
||||
toku_ftnode_pf_callback,
|
||||
true,
|
||||
&bfe
|
||||
);
|
||||
assert(r==0);
|
||||
FTNODE CAST_FROM_VOIDP(node, node_v);
|
||||
int size = toku_serialize_ftnode_size(node);
|
||||
toku_unpin_ftnode(ft_handle->ft, node);
|
||||
return size;
|
||||
}
|
||||
|
||||
int toku_testsetup_insert_to_leaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, const char *key, int keylen, const char *val, int vallen) {
|
||||
void *node_v;
|
||||
int r;
|
||||
|
||||
assert(testsetup_initialized);
|
||||
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_full_read(ft_handle->ft);
|
||||
r = toku_cachetable_get_and_pin(
|
||||
ft_handle->ft->cf,
|
||||
blocknum,
|
||||
toku_cachetable_hash(ft_handle->ft->cf, blocknum),
|
||||
&node_v,
|
||||
get_write_callbacks_for_node(ft_handle->ft),
|
||||
toku_ftnode_fetch_callback,
|
||||
toku_ftnode_pf_req_callback,
|
||||
toku_ftnode_pf_callback,
|
||||
true,
|
||||
&bfe
|
||||
);
|
||||
if (r!=0) return r;
|
||||
FTNODE CAST_FROM_VOIDP(node, node_v);
|
||||
toku_verify_or_set_counts(node);
|
||||
assert(node->height==0);
|
||||
|
||||
DBT kdbt, vdbt;
|
||||
ft_msg msg(
|
||||
toku_fill_dbt(&kdbt, key, keylen),
|
||||
toku_fill_dbt(&vdbt, val, vallen),
|
||||
FT_INSERT,
|
||||
next_dummymsn(),
|
||||
toku_xids_get_root_xids());
|
||||
|
||||
static size_t zero_flow_deltas[] = { 0, 0 };
|
||||
txn_gc_info gc_info(nullptr, TXNID_NONE, TXNID_NONE, true);
|
||||
toku_ftnode_put_msg(
|
||||
ft_handle->ft->cmp,
|
||||
ft_handle->ft->update_fun,
|
||||
node,
|
||||
-1,
|
||||
msg,
|
||||
true,
|
||||
&gc_info,
|
||||
zero_flow_deltas,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
toku_verify_or_set_counts(node);
|
||||
|
||||
toku_unpin_ftnode(ft_handle->ft, node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
testhelper_string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
|
||||
{
|
||||
char *CAST_FROM_VOIDP(s, a->data), *CAST_FROM_VOIDP(t, b->data);
|
||||
return strcmp(s, t);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
toku_pin_node_with_min_bfe(FTNODE* node, BLOCKNUM b, FT_HANDLE t)
|
||||
{
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_min_read(t->ft);
|
||||
toku_pin_ftnode(
|
||||
t->ft,
|
||||
b,
|
||||
toku_cachetable_hash(t->ft->cf, b),
|
||||
&bfe,
|
||||
PL_WRITE_EXPENSIVE,
|
||||
node,
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, enum ft_msg_type msgtype, const char *key, int keylen, const char *val, int vallen) {
|
||||
void *node_v;
|
||||
int r;
|
||||
|
||||
assert(testsetup_initialized);
|
||||
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_full_read(ft_handle->ft);
|
||||
r = toku_cachetable_get_and_pin(
|
||||
ft_handle->ft->cf,
|
||||
blocknum,
|
||||
toku_cachetable_hash(ft_handle->ft->cf, blocknum),
|
||||
&node_v,
|
||||
get_write_callbacks_for_node(ft_handle->ft),
|
||||
toku_ftnode_fetch_callback,
|
||||
toku_ftnode_pf_req_callback,
|
||||
toku_ftnode_pf_callback,
|
||||
true,
|
||||
&bfe
|
||||
);
|
||||
if (r!=0) return r;
|
||||
FTNODE CAST_FROM_VOIDP(node, node_v);
|
||||
assert(node->height>0);
|
||||
|
||||
DBT k;
|
||||
int childnum = toku_ftnode_which_child(node, toku_fill_dbt(&k, key, keylen), ft_handle->ft->cmp);
|
||||
|
||||
XIDS xids_0 = toku_xids_get_root_xids();
|
||||
MSN msn = next_dummymsn();
|
||||
toku::comparator cmp;
|
||||
cmp.create(testhelper_string_key_cmp, nullptr);
|
||||
toku_bnc_insert_msg(BNC(node, childnum), key, keylen, val, vallen, msgtype, msn, xids_0, true, cmp);
|
||||
cmp.destroy();
|
||||
// Hack to get the test working. The problem is that this test
|
||||
// is directly queueing something in a FIFO instead of
|
||||
// using ft APIs.
|
||||
node->max_msn_applied_to_node_on_disk = msn;
|
||||
node->set_dirty();
|
||||
// Also hack max_msn_in_ft
|
||||
ft_handle->ft->h->max_msn_in_ft = msn;
|
||||
|
||||
toku_unpin_ftnode(ft_handle->ft, node);
|
||||
return 0;
|
||||
}
|
@ -1,524 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
/* Verify an FT. */
|
||||
/* Check:
|
||||
* The tree is of uniform depth (and the height is correct at every node)
|
||||
* For each pivot key: the max of the stuff to the left is <= the pivot key < the min of the stuff to the right.
|
||||
* For each leaf node: All the keys are in strictly increasing order.
|
||||
* For each nonleaf node: All the messages have keys that are between the associated pivot keys ( left_pivot_key < message <= right_pivot_key)
|
||||
*/
|
||||
|
||||
#include <my_global.h>
|
||||
#include "ft/serialize/block_table.h"
|
||||
#include "ft/ft.h"
|
||||
#include "ft/ft-cachetable-wrappers.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/node.h"
|
||||
|
||||
static int
|
||||
compare_pairs (FT_HANDLE ft_handle, const DBT *a, const DBT *b) {
|
||||
return ft_handle->ft->cmp(a, b);
|
||||
}
|
||||
|
||||
static int
|
||||
compare_pair_to_key (FT_HANDLE ft_handle, const DBT *a, const void *key, uint32_t keylen) {
|
||||
DBT y;
|
||||
return ft_handle->ft->cmp(a, toku_fill_dbt(&y, key, keylen));
|
||||
}
|
||||
|
||||
static int
|
||||
verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot)
|
||||
__attribute__((warn_unused_result));
|
||||
|
||||
UU()
|
||||
static int
|
||||
verify_msg_in_child_buffer(FT_HANDLE ft_handle, enum ft_msg_type type, MSN msn, const void *key, uint32_t keylen, const void *UU(data), uint32_t UU(datalen), XIDS UU(xids), const DBT *lesser_pivot, const DBT *greatereq_pivot) {
|
||||
int result = 0;
|
||||
if (msn.msn == ZERO_MSN.msn)
|
||||
result = EINVAL;
|
||||
switch (type) {
|
||||
default:
|
||||
break;
|
||||
case FT_INSERT:
|
||||
case FT_INSERT_NO_OVERWRITE:
|
||||
case FT_DELETE_ANY:
|
||||
case FT_ABORT_ANY:
|
||||
case FT_COMMIT_ANY:
|
||||
// verify key in bounds
|
||||
if (lesser_pivot) {
|
||||
int compare = compare_pair_to_key(ft_handle, lesser_pivot, key, keylen);
|
||||
if (compare >= 0)
|
||||
result = EINVAL;
|
||||
}
|
||||
if (result == 0 && greatereq_pivot) {
|
||||
int compare = compare_pair_to_key(ft_handle, greatereq_pivot, key, keylen);
|
||||
if (compare < 0)
|
||||
result = EINVAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static DBT
|
||||
get_ith_key_dbt (BASEMENTNODE bn, int i) {
|
||||
DBT kdbt;
|
||||
int r = bn->data_buffer.fetch_key_and_len(i, &kdbt.size, &kdbt.data);
|
||||
invariant_zero(r); // this is a bad failure if it happens.
|
||||
return kdbt;
|
||||
}
|
||||
|
||||
#define VERIFY_ASSERTION(predicate, i, string) ({ \
|
||||
if(!(predicate)) { \
|
||||
fprintf(stderr, "%s:%d: Looking at child %d of block %" PRId64 ": %s\n", __FILE__, __LINE__, i, blocknum.b, string); \
|
||||
result = TOKUDB_NEEDS_REPAIR; \
|
||||
if (!keep_going_on_failure) goto done; \
|
||||
}})
|
||||
|
||||
#define VERIFY_ASSERTION_BASEMENT(predicate, bn, entry, string) ({ \
|
||||
if(!(predicate)) { \
|
||||
fprintf(stderr, "%s:%d: Looking at block %" PRId64 " bn %d entry %d: %s\n", __FILE__, __LINE__, blocknum.b, bn, entry, string); \
|
||||
result = TOKUDB_NEEDS_REPAIR; \
|
||||
if (!keep_going_on_failure) goto done; \
|
||||
}})
|
||||
|
||||
struct count_msgs_extra {
|
||||
int count;
|
||||
MSN msn;
|
||||
message_buffer *msg_buffer;
|
||||
};
|
||||
|
||||
// template-only function, but must be extern
|
||||
int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_extra *const e)
|
||||
__attribute__((nonnull(3)));
|
||||
int count_msgs(const int32_t &offset, const uint32_t UU(idx), struct count_msgs_extra *const e)
|
||||
{
|
||||
MSN msn;
|
||||
e->msg_buffer->get_message_key_msn(offset, nullptr, &msn);
|
||||
if (msn.msn == e->msn.msn) {
|
||||
e->count++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct verify_message_tree_extra {
|
||||
message_buffer *msg_buffer;
|
||||
bool broadcast;
|
||||
bool is_fresh;
|
||||
int i;
|
||||
int verbose;
|
||||
BLOCKNUM blocknum;
|
||||
int keep_going_on_failure;
|
||||
bool messages_have_been_moved;
|
||||
};
|
||||
|
||||
int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e) __attribute__((nonnull(3)));
|
||||
int verify_message_tree(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e)
|
||||
{
|
||||
BLOCKNUM blocknum = e->blocknum;
|
||||
int keep_going_on_failure = e->keep_going_on_failure;
|
||||
int result = 0;
|
||||
DBT k, v;
|
||||
ft_msg msg = e->msg_buffer->get_message(offset, &k, &v);
|
||||
bool is_fresh = e->msg_buffer->get_freshness(offset);
|
||||
if (e->broadcast) {
|
||||
VERIFY_ASSERTION(ft_msg_type_applies_all((enum ft_msg_type) msg.type()) || ft_msg_type_does_nothing((enum ft_msg_type) msg.type()),
|
||||
e->i, "message found in broadcast list that is not a broadcast");
|
||||
} else {
|
||||
VERIFY_ASSERTION(ft_msg_type_applies_once((enum ft_msg_type) msg.type()),
|
||||
e->i, "message found in fresh or stale message tree that does not apply once");
|
||||
if (e->is_fresh) {
|
||||
if (e->messages_have_been_moved) {
|
||||
VERIFY_ASSERTION(is_fresh,
|
||||
e->i, "message found in fresh message tree that is not fresh");
|
||||
}
|
||||
} else {
|
||||
VERIFY_ASSERTION(!is_fresh,
|
||||
e->i, "message found in stale message tree that is fresh");
|
||||
}
|
||||
}
|
||||
done:
|
||||
return result;
|
||||
}
|
||||
|
||||
int error_on_iter(const int32_t &UU(offset), const uint32_t UU(idx), void *UU(e));
|
||||
int error_on_iter(const int32_t &UU(offset), const uint32_t UU(idx), void *UU(e)) {
|
||||
return TOKUDB_NEEDS_REPAIR;
|
||||
}
|
||||
|
||||
int verify_marked_messages(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e) __attribute__((nonnull(3)));
|
||||
int verify_marked_messages(const int32_t &offset, const uint32_t UU(idx), struct verify_message_tree_extra *const e)
|
||||
{
|
||||
BLOCKNUM blocknum = e->blocknum;
|
||||
int keep_going_on_failure = e->keep_going_on_failure;
|
||||
int result = 0;
|
||||
bool is_fresh = e->msg_buffer->get_freshness(offset);
|
||||
VERIFY_ASSERTION(!is_fresh, e->i, "marked message found in the fresh message tree that is fresh");
|
||||
done:
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename verify_omt_t>
|
||||
static int
|
||||
verify_sorted_by_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const verify_omt_t &mt) {
|
||||
int result = 0;
|
||||
size_t last_offset = 0;
|
||||
for (uint32_t i = 0; i < mt.size(); i++) {
|
||||
int32_t offset;
|
||||
int r = mt.fetch(i, &offset);
|
||||
assert_zero(r);
|
||||
if (i > 0) {
|
||||
struct toku_msg_buffer_key_msn_cmp_extra extra(ft_handle->ft->cmp, msg_buffer);
|
||||
if (toku_msg_buffer_key_msn_cmp(extra, last_offset, offset) >= 0) {
|
||||
result = TOKUDB_NEEDS_REPAIR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
last_offset = offset;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename count_omt_t>
|
||||
static int
|
||||
count_eq_key_msn(FT_HANDLE ft_handle, message_buffer *msg_buffer, const count_omt_t &mt, const DBT *key, MSN msn) {
|
||||
struct toku_msg_buffer_key_msn_heaviside_extra extra(ft_handle->ft->cmp, msg_buffer, key, msn);
|
||||
int r = mt.template find_zero<struct toku_msg_buffer_key_msn_heaviside_extra, toku_msg_buffer_key_msn_heaviside>(extra, nullptr, nullptr);
|
||||
int count;
|
||||
if (r == 0) {
|
||||
count = 1;
|
||||
} else {
|
||||
assert(r == DB_NOTFOUND);
|
||||
count = 0;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void
|
||||
toku_get_node_for_verify(
|
||||
BLOCKNUM blocknum,
|
||||
FT_HANDLE ft_handle,
|
||||
FTNODE* nodep
|
||||
)
|
||||
{
|
||||
uint32_t fullhash = toku_cachetable_hash(ft_handle->ft->cf, blocknum);
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_full_read(ft_handle->ft);
|
||||
toku_pin_ftnode(
|
||||
ft_handle->ft,
|
||||
blocknum,
|
||||
fullhash,
|
||||
&bfe,
|
||||
PL_WRITE_EXPENSIVE, // may_modify_node
|
||||
nodep,
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
struct verify_msg_fn {
|
||||
FT_HANDLE ft_handle;
|
||||
NONLEAF_CHILDINFO bnc;
|
||||
const DBT *curr_less_pivot;
|
||||
const DBT *curr_geq_pivot;
|
||||
BLOCKNUM blocknum;
|
||||
MSN this_msn;
|
||||
int verbose;
|
||||
int keep_going_on_failure;
|
||||
bool messages_have_been_moved;
|
||||
|
||||
MSN last_msn;
|
||||
int msg_i;
|
||||
int result = 0; // needed by VERIFY_ASSERTION
|
||||
|
||||
verify_msg_fn(FT_HANDLE handle, NONLEAF_CHILDINFO nl, const DBT *less, const DBT *geq,
|
||||
BLOCKNUM b, MSN tmsn, int v, int k, bool m) :
|
||||
ft_handle(handle), bnc(nl), curr_less_pivot(less), curr_geq_pivot(geq),
|
||||
blocknum(b), this_msn(tmsn), verbose(v), keep_going_on_failure(k), messages_have_been_moved(m), last_msn(ZERO_MSN), msg_i(0) {
|
||||
}
|
||||
|
||||
int operator()(const ft_msg &msg, bool is_fresh) {
|
||||
enum ft_msg_type type = (enum ft_msg_type) msg.type();
|
||||
MSN msn = msg.msn();
|
||||
XIDS xid = msg.xids();
|
||||
const void *key = msg.kdbt()->data;
|
||||
const void *data = msg.vdbt()->data;
|
||||
uint32_t keylen = msg.kdbt()->size;
|
||||
uint32_t datalen = msg.vdbt()->size;
|
||||
|
||||
int r = verify_msg_in_child_buffer(ft_handle, type, msn, key, keylen, data, datalen, xid,
|
||||
curr_less_pivot,
|
||||
curr_geq_pivot);
|
||||
VERIFY_ASSERTION(r == 0, msg_i, "A message in the buffer is out of place");
|
||||
VERIFY_ASSERTION((msn.msn > last_msn.msn), msg_i, "msn per msg must be monotonically increasing toward newer messages in buffer");
|
||||
VERIFY_ASSERTION((msn.msn <= this_msn.msn), msg_i, "all messages must have msn within limit of this node's max_msn_applied_to_node_in_memory");
|
||||
if (ft_msg_type_applies_once(type)) {
|
||||
int count;
|
||||
DBT keydbt;
|
||||
toku_fill_dbt(&keydbt, key, keylen);
|
||||
int total_count = 0;
|
||||
count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree, toku_fill_dbt(&keydbt, key, keylen), msn);
|
||||
total_count += count;
|
||||
if (is_fresh) {
|
||||
VERIFY_ASSERTION(count == 1, msg_i, "a fresh message was not found in the fresh message tree");
|
||||
} else if (messages_have_been_moved) {
|
||||
VERIFY_ASSERTION(count == 0, msg_i, "a stale message was found in the fresh message tree");
|
||||
}
|
||||
VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the fresh message tree");
|
||||
count = count_eq_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree, &keydbt, msn);
|
||||
|
||||
total_count += count;
|
||||
if (is_fresh) {
|
||||
VERIFY_ASSERTION(count == 0, msg_i, "a fresh message was found in the stale message tree");
|
||||
} else if (messages_have_been_moved) {
|
||||
VERIFY_ASSERTION(count == 1, msg_i, "a stale message was not found in the stale message tree");
|
||||
}
|
||||
VERIFY_ASSERTION(count <= 1, msg_i, "a message was found multiple times in the stale message tree");
|
||||
|
||||
VERIFY_ASSERTION(total_count <= 1, msg_i, "a message was found in both message trees (or more than once in a single tree)");
|
||||
VERIFY_ASSERTION(total_count >= 1, msg_i, "a message was not found in either message tree");
|
||||
} else {
|
||||
VERIFY_ASSERTION(ft_msg_type_applies_all(type) || ft_msg_type_does_nothing(type), msg_i, "a message was found that does not apply either to all or to only one key");
|
||||
struct count_msgs_extra extra = { .count = 0, .msn = msn, .msg_buffer = &bnc->msg_buffer };
|
||||
bnc->broadcast_list.iterate<struct count_msgs_extra, count_msgs>(&extra);
|
||||
VERIFY_ASSERTION(extra.count == 1, msg_i, "a broadcast message was not found in the broadcast list");
|
||||
}
|
||||
last_msn = msn;
|
||||
msg_i++;
|
||||
done:
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
static int
|
||||
toku_verify_ftnode_internal(FT_HANDLE ft_handle,
|
||||
MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above,
|
||||
FTNODE node, int height,
|
||||
const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||
const DBT *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||
int verbose, int keep_going_on_failure, bool messages_have_been_moved)
|
||||
{
|
||||
int result=0;
|
||||
MSN this_msn;
|
||||
BLOCKNUM blocknum = node->blocknum;
|
||||
|
||||
//printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
|
||||
toku_ftnode_assert_fully_in_memory(node);
|
||||
this_msn = node->max_msn_applied_to_node_on_disk;
|
||||
|
||||
if (height >= 0) {
|
||||
invariant(height == node->height); // this is a bad failure if wrong
|
||||
}
|
||||
if (node->height > 0 && messages_exist_above) {
|
||||
VERIFY_ASSERTION((parentmsn_with_messages.msn >= this_msn.msn), 0, "node msn must be descending down tree, newest messages at top");
|
||||
}
|
||||
// Verify that all the pivot keys are in order.
|
||||
for (int i = 0; i < node->n_children-2; i++) {
|
||||
DBT x, y;
|
||||
int compare = compare_pairs(ft_handle, node->pivotkeys.fill_pivot(i, &x), node->pivotkeys.fill_pivot(i + 1, &y));
|
||||
VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value");
|
||||
}
|
||||
// Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot
|
||||
for (int i = 0; i < node->n_children-1; i++) {
|
||||
DBT x;
|
||||
if (lesser_pivot) {
|
||||
int compare = compare_pairs(ft_handle, lesser_pivot, node->pivotkeys.fill_pivot(i, &x));
|
||||
VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot");
|
||||
}
|
||||
if (greatereq_pivot) {
|
||||
int compare = compare_pairs(ft_handle, greatereq_pivot, node->pivotkeys.fill_pivot(i, &x));
|
||||
VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot");
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < node->n_children; i++) {
|
||||
DBT x, y;
|
||||
const DBT *curr_less_pivot = (i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x);
|
||||
const DBT *curr_geq_pivot = (i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y);
|
||||
if (node->height > 0) {
|
||||
NONLEAF_CHILDINFO bnc = BNC(node, i);
|
||||
// Verify that messages in the buffers are in the right place.
|
||||
VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->fresh_message_tree) == 0, i, "fresh_message_tree");
|
||||
VERIFY_ASSERTION(verify_sorted_by_key_msn(ft_handle, &bnc->msg_buffer, bnc->stale_message_tree) == 0, i, "stale_message_tree");
|
||||
|
||||
verify_msg_fn verify_msg(ft_handle, bnc, curr_less_pivot, curr_geq_pivot,
|
||||
blocknum, this_msn, verbose, keep_going_on_failure, messages_have_been_moved);
|
||||
int r = bnc->msg_buffer.iterate(verify_msg);
|
||||
if (r != 0) { result = r; goto done; }
|
||||
|
||||
struct verify_message_tree_extra extra = { .msg_buffer = &bnc->msg_buffer, .broadcast = false, .is_fresh = true, .i = i, .verbose = verbose, .blocknum = node->blocknum, .keep_going_on_failure = keep_going_on_failure, .messages_have_been_moved = messages_have_been_moved };
|
||||
r = bnc->fresh_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
|
||||
if (r != 0) { result = r; goto done; }
|
||||
extra.is_fresh = false;
|
||||
r = bnc->stale_message_tree.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
|
||||
if (r != 0) { result = r; goto done; }
|
||||
|
||||
bnc->fresh_message_tree.verify_marks_consistent();
|
||||
if (messages_have_been_moved) {
|
||||
VERIFY_ASSERTION(!bnc->fresh_message_tree.has_marks(), i, "fresh message tree still has marks after moving messages");
|
||||
r = bnc->fresh_message_tree.iterate_over_marked<void, error_on_iter>(nullptr);
|
||||
if (r != 0) { result = r; goto done; }
|
||||
}
|
||||
else {
|
||||
r = bnc->fresh_message_tree.iterate_over_marked<struct verify_message_tree_extra, verify_marked_messages>(&extra);
|
||||
if (r != 0) { result = r; goto done; }
|
||||
}
|
||||
|
||||
extra.broadcast = true;
|
||||
r = bnc->broadcast_list.iterate<struct verify_message_tree_extra, verify_message_tree>(&extra);
|
||||
if (r != 0) { result = r; goto done; }
|
||||
}
|
||||
else {
|
||||
BASEMENTNODE bn = BLB(node, i);
|
||||
for (uint32_t j = 0; j < bn->data_buffer.num_klpairs(); j++) {
|
||||
VERIFY_ASSERTION((rootmsn.msn >= this_msn.msn), 0, "leaf may have latest msn, but cannot be greater than root msn");
|
||||
DBT kdbt = get_ith_key_dbt(bn, j);
|
||||
if (curr_less_pivot) {
|
||||
int compare = compare_pairs(ft_handle, curr_less_pivot, &kdbt);
|
||||
VERIFY_ASSERTION_BASEMENT(compare < 0, i, j, "The leafentry is >= the lower-bound pivot");
|
||||
}
|
||||
if (curr_geq_pivot) {
|
||||
int compare = compare_pairs(ft_handle, curr_geq_pivot, &kdbt);
|
||||
VERIFY_ASSERTION_BASEMENT(compare >= 0, i, j, "The leafentry is < the upper-bound pivot");
|
||||
}
|
||||
if (0 < j) {
|
||||
DBT prev_key_dbt = get_ith_key_dbt(bn, j-1);
|
||||
int compare = compare_pairs(ft_handle, &prev_key_dbt, &kdbt);
|
||||
VERIFY_ASSERTION_BASEMENT(compare < 0, i, j, "Adjacent leafentries are out of order");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// input is a pinned node, on exit, node is unpinned
|
||||
int
|
||||
toku_verify_ftnode (FT_HANDLE ft_handle,
|
||||
MSN rootmsn, MSN parentmsn_with_messages, bool messages_exist_above,
|
||||
FTNODE node, int height,
|
||||
const DBT *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||
const DBT *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.)
|
||||
int (*progress_callback)(void *extra, float progress), void *progress_extra,
|
||||
int recurse, int verbose, int keep_going_on_failure)
|
||||
{
|
||||
MSN this_msn;
|
||||
|
||||
//printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
|
||||
toku_ftnode_assert_fully_in_memory(node);
|
||||
this_msn = node->max_msn_applied_to_node_on_disk;
|
||||
|
||||
int result = 0;
|
||||
int result2 = 0;
|
||||
if (node->height > 0) {
|
||||
// Otherwise we'll just do the next call
|
||||
|
||||
result = toku_verify_ftnode_internal(
|
||||
ft_handle, rootmsn, parentmsn_with_messages, messages_exist_above, node, height, lesser_pivot, greatereq_pivot,
|
||||
verbose, keep_going_on_failure, false);
|
||||
if (result != 0 && (!keep_going_on_failure || result != TOKUDB_NEEDS_REPAIR)) goto done;
|
||||
}
|
||||
if (node->height > 0) {
|
||||
toku_move_ftnode_messages_to_stale(ft_handle->ft, node);
|
||||
}
|
||||
result2 = toku_verify_ftnode_internal(
|
||||
ft_handle, rootmsn, parentmsn_with_messages, messages_exist_above, node, height, lesser_pivot, greatereq_pivot,
|
||||
verbose, keep_going_on_failure, true);
|
||||
if (result == 0) {
|
||||
result = result2;
|
||||
if (result != 0 && (!keep_going_on_failure || result != TOKUDB_NEEDS_REPAIR)) goto done;
|
||||
}
|
||||
|
||||
// Verify that the subtrees have the right properties.
|
||||
if (recurse && node->height > 0) {
|
||||
for (int i = 0; i < node->n_children; i++) {
|
||||
FTNODE child_node;
|
||||
toku_get_node_for_verify(BP_BLOCKNUM(node, i), ft_handle, &child_node);
|
||||
DBT x, y;
|
||||
int r = toku_verify_ftnode(ft_handle, rootmsn,
|
||||
(toku_bnc_n_entries(BNC(node, i)) > 0
|
||||
? this_msn
|
||||
: parentmsn_with_messages),
|
||||
messages_exist_above || toku_bnc_n_entries(BNC(node, i)) > 0,
|
||||
child_node, node->height-1,
|
||||
(i==0) ? lesser_pivot : node->pivotkeys.fill_pivot(i - 1, &x),
|
||||
(i==node->n_children-1) ? greatereq_pivot : node->pivotkeys.fill_pivot(i, &y),
|
||||
progress_callback, progress_extra,
|
||||
recurse, verbose, keep_going_on_failure);
|
||||
if (r) {
|
||||
result = r;
|
||||
if (!keep_going_on_failure || result != TOKUDB_NEEDS_REPAIR) goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
done:
|
||||
toku_unpin_ftnode(ft_handle->ft, node);
|
||||
|
||||
if (result == 0 && progress_callback)
|
||||
result = progress_callback(progress_extra, 0.0);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
toku_verify_ft_with_progress (FT_HANDLE ft_handle, int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_on_going) {
|
||||
assert(ft_handle->ft);
|
||||
FTNODE root_node = NULL;
|
||||
{
|
||||
uint32_t root_hash;
|
||||
CACHEKEY root_key;
|
||||
toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &root_hash);
|
||||
toku_get_node_for_verify(root_key, ft_handle, &root_node);
|
||||
}
|
||||
int r = toku_verify_ftnode(ft_handle, ft_handle->ft->h->max_msn_in_ft, ft_handle->ft->h->max_msn_in_ft, false, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going);
|
||||
if (r == 0) {
|
||||
toku_ft_lock(ft_handle->ft);
|
||||
ft_handle->ft->h->time_of_last_verification = time(NULL);
|
||||
ft_handle->ft->h->set_dirty();
|
||||
toku_ft_unlock(ft_handle->ft);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int
|
||||
toku_verify_ft (FT_HANDLE ft_handle) {
|
||||
return toku_verify_ft_with_progress(ft_handle, NULL, NULL, 0, 0);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,195 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
|
||||
#include "ft/cachetable/cachetable.h"
|
||||
#include "ft/ft-ops.h"
|
||||
#include "ft/logger/log.h"
|
||||
#include "util/dbt.h"
|
||||
#ifndef TOKU_MYSQL_WITH_PFS
|
||||
#include <my_global.h>
|
||||
#endif
|
||||
|
||||
typedef struct ft *FT;
|
||||
typedef struct ft_options *FT_OPTIONS;
|
||||
|
||||
// unlink a ft from the filesystem with or without a txn.
|
||||
// if with a txn, then the unlink happens on commit.
|
||||
void toku_ft_unlink(FT_HANDLE handle);
|
||||
void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn);
|
||||
|
||||
int toku_ft_rename_iname(DB_TXN *txn,
|
||||
const char *data_dir,
|
||||
const char *old_iname,
|
||||
const char *new_iname,
|
||||
CACHETABLE ct);
|
||||
|
||||
void toku_ft_init_reflock(FT ft);
|
||||
void toku_ft_destroy_reflock(FT ft);
|
||||
void toku_ft_grab_reflock(FT ft);
|
||||
void toku_ft_release_reflock(FT ft);
|
||||
|
||||
void toku_ft_lock(struct ft *ft);
|
||||
void toku_ft_unlock(struct ft *ft);
|
||||
|
||||
void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
|
||||
void toku_ft_free (FT ft);
|
||||
|
||||
int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_h, CACHEFILE cf, LSN max_acceptable_lsn, FT *header);
|
||||
void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
|
||||
|
||||
bool toku_ft_needed_unlocked(FT ft);
|
||||
bool toku_ft_has_one_reference_unlocked(FT ft);
|
||||
|
||||
// evict a ft from memory by closing its cachefile. any future work
|
||||
// will have to read in the ft in a new cachefile and new FT object.
|
||||
void toku_ft_evict_from_memory(FT ft, bool oplsn_valid, LSN oplsn);
|
||||
|
||||
FT_HANDLE toku_ft_get_only_existing_ft_handle(FT ft);
|
||||
|
||||
void toku_ft_note_hot_begin(FT_HANDLE ft_h);
|
||||
void toku_ft_note_hot_complete(FT_HANDLE ft_h, bool success, MSN msn_at_start_of_hot);
|
||||
|
||||
void
|
||||
toku_ft_init(
|
||||
FT ft,
|
||||
BLOCKNUM root_blocknum_on_disk,
|
||||
LSN checkpoint_lsn,
|
||||
TXNID root_xid_that_created,
|
||||
uint32_t target_nodesize,
|
||||
uint32_t target_basementnodesize,
|
||||
enum toku_compression_method compression_method,
|
||||
uint32_t fanout
|
||||
);
|
||||
|
||||
int toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) __attribute__ ((warn_unused_result));
|
||||
int toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft, TOKUTXN txn);
|
||||
void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created);
|
||||
// Reset the root_xid_that_created field to the given value.
|
||||
// This redefines which xid created the dictionary.
|
||||
|
||||
void toku_ft_add_txn_ref(FT ft);
|
||||
void toku_ft_remove_txn_ref(FT ft);
|
||||
|
||||
void toku_calculate_root_offset_pointer (FT ft, CACHEKEY* root_key, uint32_t *roothash);
|
||||
void toku_ft_set_new_root_blocknum(FT ft, CACHEKEY new_root_key);
|
||||
LSN toku_ft_checkpoint_lsn(FT ft) __attribute__ ((warn_unused_result));
|
||||
void toku_ft_stat64 (FT ft, struct ftstat64_s *s);
|
||||
void toku_ft_get_fractal_tree_info64 (FT ft, struct ftinfo64 *s);
|
||||
int toku_ft_iterate_fractal_tree_block_map(FT ft, int (*iter)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *iter_extra);
|
||||
|
||||
// unconditionally set the descriptor for an open FT. can't do this when
|
||||
// any operation has already occurred on the ft.
|
||||
// see toku_ft_change_descriptor(), which is the transactional version
|
||||
// used by the ydb layer. it better describes the client contract.
|
||||
void toku_ft_update_descriptor(FT ft, DESCRIPTOR desc);
|
||||
// use this version if the FT is not fully user-opened with a valid cachefile.
|
||||
// this is a clean hack to get deserialization code to update a descriptor
|
||||
// while the FT and cf are in the process of opening, for upgrade purposes
|
||||
void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd);
|
||||
void toku_ft_update_cmp_descriptor(FT ft);
|
||||
|
||||
// get the descriptor for a ft. safe to read as long as clients honor the
|
||||
// strict contract put forth by toku_ft_update_descriptor/toku_ft_change_descriptor
|
||||
// essentially, there should never be a reader while there is a writer, enforced
|
||||
// by the client, not the FT.
|
||||
DESCRIPTOR toku_ft_get_descriptor(FT_HANDLE ft_handle);
|
||||
DESCRIPTOR toku_ft_get_cmp_descriptor(FT_HANDLE ft_handle);
|
||||
|
||||
typedef struct {
|
||||
// delta versions in basements could be negative
|
||||
// These represent the physical leaf entries and do not account
|
||||
// for pending deletes or other in-flight messages that have not been
|
||||
// applied to a leaf entry.
|
||||
int64_t numrows;
|
||||
int64_t numbytes;
|
||||
} STAT64INFO_S, *STAT64INFO;
|
||||
static const STAT64INFO_S ZEROSTATS = { .numrows = 0, .numbytes = 0 };
|
||||
|
||||
void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
||||
void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
||||
void toku_ft_adjust_logical_row_count(FT ft, int64_t delta);
|
||||
|
||||
typedef void (*remove_ft_ref_callback)(FT ft, void *extra);
|
||||
void toku_ft_remove_reference(FT ft,
|
||||
bool oplsn_valid, LSN oplsn,
|
||||
remove_ft_ref_callback remove_ref, void *extra);
|
||||
|
||||
void toku_ft_set_nodesize(FT ft, unsigned int nodesize);
|
||||
void toku_ft_get_nodesize(FT ft, unsigned int *nodesize);
|
||||
void toku_ft_set_basementnodesize(FT ft, unsigned int basementnodesize);
|
||||
void toku_ft_get_basementnodesize(FT ft, unsigned int *basementnodesize);
|
||||
void toku_ft_set_compression_method(FT ft, enum toku_compression_method method);
|
||||
void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp);
|
||||
void toku_ft_set_fanout(FT ft, unsigned int fanout);
|
||||
void toku_ft_get_fanout(FT ft, unsigned int *fanout);
|
||||
|
||||
// mark the ft as a blackhole. any message injections will be a no op.
|
||||
void toku_ft_set_blackhole(FT_HANDLE ft_handle);
|
||||
|
||||
// Effect: Calculates the total space and used space for a FT's leaf data.
|
||||
// The difference between the two is MVCC garbage.
|
||||
void toku_ft_get_garbage(FT ft, uint64_t *total_space, uint64_t *used_space);
|
||||
|
||||
// TODO: Should be in portability
|
||||
int get_num_cores(void);
|
||||
|
||||
// TODO: Use the cachetable's worker pool instead of something managed by the FT...
|
||||
struct toku_thread_pool *get_ft_pool(void);
|
||||
|
||||
// TODO: Should be in portability
|
||||
int toku_single_process_lock(const char *lock_dir, const char *which, int *lockfd);
|
||||
int toku_single_process_unlock(int *lockfd);
|
||||
|
||||
void tokuft_update_product_name_strings(void);
|
||||
#define TOKU_MAX_PRODUCT_NAME_LENGTH (256)
|
||||
extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH];
|
||||
|
||||
struct toku_product_name_strings_struct {
|
||||
char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256 + 1];
|
||||
char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment") + 1];
|
||||
char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory") + 1];
|
||||
char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me") + 1];
|
||||
char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback") + 1];
|
||||
};
|
||||
|
||||
extern struct toku_product_name_strings_struct toku_product_name_strings;
|
||||
extern int tokuft_num_envs;
|
@ -1,139 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "ft/ft.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/le-cursor.h"
|
||||
#include "ft/cursor.h"
|
||||
|
||||
// A LE_CURSOR is a special purpose FT_CURSOR that:
|
||||
// - enables prefetching
|
||||
// - does not perform snapshot reads. it reads everything, including uncommitted.
|
||||
//
|
||||
// A LE_CURSOR is good for scanning a FT from beginning to end. Useful for hot indexing.
|
||||
|
||||
struct le_cursor {
|
||||
FT_CURSOR ft_cursor;
|
||||
bool neg_infinity; // true when the le cursor is positioned at -infinity (initial setting)
|
||||
bool pos_infinity; // true when the le cursor is positioned at +infinity (when _next returns DB_NOTFOUND)
|
||||
};
|
||||
|
||||
int
|
||||
toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_handle, TOKUTXN txn) {
|
||||
int result = 0;
|
||||
LE_CURSOR MALLOC(le_cursor);
|
||||
if (le_cursor == NULL) {
|
||||
result = get_error_errno();
|
||||
}
|
||||
else {
|
||||
result = toku_ft_cursor(ft_handle, &le_cursor->ft_cursor, txn, false, false);
|
||||
if (result == 0) {
|
||||
// TODO move the leaf mode to the ft cursor constructor
|
||||
toku_ft_cursor_set_leaf_mode(le_cursor->ft_cursor);
|
||||
le_cursor->neg_infinity = false;
|
||||
le_cursor->pos_infinity = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (result == 0) {
|
||||
*le_cursor_result = le_cursor;
|
||||
} else {
|
||||
toku_free(le_cursor);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void toku_le_cursor_close(LE_CURSOR le_cursor) {
|
||||
toku_ft_cursor_close(le_cursor->ft_cursor);
|
||||
toku_free(le_cursor);
|
||||
}
|
||||
|
||||
// Move to the next leaf entry under the LE_CURSOR
|
||||
// Success: returns zero, calls the getf callback with the getf_v parameter
|
||||
// Failure: returns a non-zero error number
|
||||
int
|
||||
toku_le_cursor_next(LE_CURSOR le_cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v) {
|
||||
int result;
|
||||
if (le_cursor->neg_infinity) {
|
||||
result = DB_NOTFOUND;
|
||||
} else {
|
||||
le_cursor->pos_infinity = false;
|
||||
// TODO replace this with a non deprecated function. Which?
|
||||
result = toku_ft_cursor_get(le_cursor->ft_cursor, NULL, getf, getf_v, DB_PREV);
|
||||
if (result == DB_NOTFOUND) {
|
||||
le_cursor->neg_infinity = true;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool
|
||||
toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key) {
|
||||
bool result;
|
||||
if (le_cursor->neg_infinity) {
|
||||
result = true; // all keys are greater than -infinity
|
||||
} else if (le_cursor->pos_infinity) {
|
||||
result = false; // all keys are less than +infinity
|
||||
} else {
|
||||
FT ft = le_cursor->ft_cursor->ft_handle->ft;
|
||||
// get the current position from the cursor and compare it to the given key.
|
||||
int r = ft->cmp(&le_cursor->ft_cursor->key, key);
|
||||
if (r <= 0) {
|
||||
result = true; // key is right of the cursor key
|
||||
} else {
|
||||
result = false; // key is at or left of the cursor key
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate) {
|
||||
// don't handle these edge cases, not worth it.
|
||||
// estimate stays same
|
||||
if (le_cursor->pos_infinity || le_cursor->neg_infinity) {
|
||||
return;
|
||||
}
|
||||
DBT *cursor_key = &le_cursor->ft_cursor->key;
|
||||
estimate->data = toku_xrealloc(estimate->data, cursor_key->size);
|
||||
memcpy(estimate->data, cursor_key->data, cursor_key->size);
|
||||
estimate->size = cursor_key->size;
|
||||
estimate->flags = DB_DBT_REALLOC;
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ft/ft-internal.h"
|
||||
|
||||
// A leaf entry cursor (LE_CURSOR) is a special type of FT_CURSOR that visits all of the leaf entries in a tree
|
||||
// and returns the leaf entry to the caller. It maintains a copy of the key that it was last positioned over to
|
||||
// speed up key comparisions with a given key. For example, the hot indexing could use the _key_right_of_cursor
|
||||
// function to determine where a given key sits relative to the LE_CURSOR position.
|
||||
|
||||
// When _next and _key_right_of_cursor functions are run on multiple threads, they must be protected by a lock. This
|
||||
// lock is assumed to exist outside of the LE_CURSOR.
|
||||
|
||||
typedef struct le_cursor *LE_CURSOR;
|
||||
|
||||
// Create a leaf cursor for a tree (ft_h) within a transaction (txn)
|
||||
// Success: returns 0, stores the LE_CURSOR in the le_cursor_result
|
||||
// Failure: returns a non-zero error number
|
||||
int toku_le_cursor_create(LE_CURSOR *le_cursor_result, FT_HANDLE ft_h, TOKUTXN txn);
|
||||
|
||||
// Close and free the LE_CURSOR
|
||||
void toku_le_cursor_close(LE_CURSOR le_cursor);
|
||||
|
||||
// Move to the next leaf entry under the LE_CURSOR
|
||||
// Success: returns zero, calls the getf callback with the getf_v parameter
|
||||
// Failure: returns a non-zero error number
|
||||
int toku_le_cursor_next(LE_CURSOR le_cursor, FT_GET_CALLBACK_FUNCTION getf, void *getf_v);
|
||||
|
||||
// Return true if the key is to the right of the LE_CURSOR position. that is, current cursor key < given key
|
||||
// Otherwise returns false when the key is at or to the left of the LE_CURSOR position. that is, current cursor key >= given key
|
||||
// The LE_CURSOR position is intialized to -infinity. Any key comparision with -infinity returns true.
|
||||
// When the cursor runs off the right edge of the tree, the LE_CURSOR position is set to +infinity. Any key comparision with +infinity
|
||||
// returns false.
|
||||
bool toku_le_cursor_is_key_greater_or_equal(LE_CURSOR le_cursor, const DBT *key);
|
||||
|
||||
// extracts position of le_cursor into estimate. Responsibility of caller to handle
|
||||
// thread safety. Caller (the indexer), does so by ensuring indexer lock is held
|
||||
void toku_le_cursor_update_estimate(LE_CURSOR le_cursor, DBT* estimate);
|
@ -1,45 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "serialize/wbuf.h"
|
||||
#include "leafentry.h"
|
||||
|
||||
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le) {
|
||||
wbuf_nocrc_literal_bytes(w, le, leafentry_disksize(le));
|
||||
}
|
@ -1,236 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <toku_portability.h>
|
||||
|
||||
#include <util/mempool.h>
|
||||
#include <util/omt.h>
|
||||
|
||||
#include "ft/txn/txn_manager.h"
|
||||
#include "ft/serialize/rbuf.h"
|
||||
#include "ft/msg.h"
|
||||
|
||||
/*
|
||||
Memory format of packed leaf entry
|
||||
CONSTANTS:
|
||||
num_uxrs
|
||||
keylen
|
||||
Run-time-constants
|
||||
voffset of val/vallen??? (for le_any_val) This must be small if it is interpreted as voffset = realoffset_of_val - keylen
|
||||
GOOD performance optimization.
|
||||
ALSO good for simplicity (no having to scan packed version)
|
||||
key[]
|
||||
variable length
|
||||
|
||||
|
||||
Memory format of packed dup leaf entry
|
||||
CONSTANTS:
|
||||
num_uxrs
|
||||
keylen
|
||||
vallen
|
||||
Run-time-constants
|
||||
key[]
|
||||
val[]
|
||||
*/
|
||||
|
||||
enum cursor_read_type {
|
||||
C_READ_ANY = 0,
|
||||
C_READ_SNAPSHOT = 1,
|
||||
C_READ_COMMITTED = 2
|
||||
};
|
||||
|
||||
//
|
||||
// enum of possible values for LEAFENTRY->type field
|
||||
// LE_CLEAN means that there is a single committed value in a format that saves disk space
|
||||
// LE_MVCC means that there may be multiple committed values or there are provisional values
|
||||
//
|
||||
enum { LE_CLEAN = 0, LE_MVCC = 1 };
|
||||
|
||||
// This is an on-disk format. static_asserts verify everything is packed and aligned correctly.
|
||||
struct leafentry {
|
||||
struct leafentry_clean {
|
||||
uint32_t vallen;
|
||||
uint8_t val[0]; //actual val
|
||||
}; // For the case where LEAFENTRY->type is LE_CLEAN
|
||||
static_assert(4 == sizeof(leafentry::leafentry_clean), "leafentry_clean size is wrong");
|
||||
static_assert(4 == __builtin_offsetof(leafentry::leafentry_clean, val), "val is in the wrong place");
|
||||
struct __attribute__ ((__packed__)) leafentry_mvcc {
|
||||
uint32_t num_cxrs; // number of committed transaction records
|
||||
uint8_t num_pxrs; // number of provisional transaction records
|
||||
uint8_t xrs[0]; //then TXNIDs of XRs relevant for reads:
|
||||
// if provisional XRs exist, store OUTERMOST TXNID
|
||||
// store committed TXNIDs, from most recently committed to least recently committed (newest first)
|
||||
//then lengths of XRs relevant for reads (length is at most 1<<31, MSB is 1 for insert, 0 for delete):
|
||||
// if provisional XRs exist (num_pxrs>0), store length and insert/delete flag associated with INNERMOST TXNID
|
||||
// store length and insert/delete flag associated with each committed TXNID, in same order as above (newest first)
|
||||
//then data of XRs relevant for reads
|
||||
// if provisional XRs exist (num_pxrs>0), store data associated with INNERMOST provisional TXNID
|
||||
// store data associated with committed TXNIDs (all committed data, newest committed values first)
|
||||
//if provisional XRs still exist (that is, num_puxrs > 1, so INNERMOST provisional TXNID != OUTERMOST provisional TXNID):
|
||||
// for OUTERMOST provisional XR:
|
||||
// 1 byte: store type (insert/delete/placeholder)
|
||||
// 4 bytes: length (if type is INSERT, no length stored if placeholder or delete)
|
||||
// data
|
||||
// for rest of provisional stack (if num_pxrs > 2), from second-outermost to second-innermost (outermost is stored above, innermost is stored separately):
|
||||
// 8 bytes: TXNID
|
||||
// 1 byte: store type (insert/delete/placeholder)
|
||||
// 4 bytes: length (if type is INSERT)
|
||||
// data
|
||||
// for INNERMOST provisional XR:
|
||||
// 8 bytes: TXNID
|
||||
// (innermost data and length with insert/delete flag are stored above, cannot be a placeholder)
|
||||
}; // For the case where LEAFENTRY->type is LE_MVCC
|
||||
static_assert(5 == sizeof(leafentry::leafentry_mvcc), "leafentry_mvcc size is wrong");
|
||||
static_assert(5 == __builtin_offsetof(leafentry::leafentry_mvcc, xrs), "xrs is in the wrong place");
|
||||
|
||||
uint8_t type; // type is LE_CLEAN or LE_MVCC
|
||||
//uint32_t keylen;
|
||||
union __attribute__ ((__packed__)) {
|
||||
struct leafentry_clean clean;
|
||||
struct leafentry_mvcc mvcc;
|
||||
} u;
|
||||
};
|
||||
static_assert(6 == sizeof(leafentry), "leafentry size is wrong");
|
||||
static_assert(1 == __builtin_offsetof(leafentry, u), "union is in the wrong place");
|
||||
|
||||
#define LE_CLEAN_MEMSIZE(_vallen) \
|
||||
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
||||
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen) /* vallen */ \
|
||||
+(_vallen)) /* actual val */
|
||||
|
||||
#define LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
||||
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
|
||||
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_cxrs) /* committed */ \
|
||||
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_pxrs) /* provisional */ \
|
||||
+sizeof(TXNID) /* transaction */ \
|
||||
+sizeof(uint32_t) /* length+bit */ \
|
||||
+sizeof(uint32_t)) /* length+bit */
|
||||
|
||||
#define LE_MVCC_COMMITTED_MEMSIZE(_vallen) \
|
||||
(LE_MVCC_COMMITTED_HEADER_MEMSIZE \
|
||||
+(_vallen)) /* actual val */
|
||||
|
||||
|
||||
typedef struct leafentry *LEAFENTRY;
|
||||
typedef struct leafentry_13 *LEAFENTRY_13;
|
||||
|
||||
//
|
||||
// TODO: consistency among names is very poor.
|
||||
//
|
||||
|
||||
// TODO: rename this helper function for deserialization
|
||||
size_t leafentry_rest_memsize(uint32_t num_puxrs, uint32_t num_cuxrs, uint8_t* start);
|
||||
size_t leafentry_memsize (LEAFENTRY le); // the size of a leafentry in memory.
|
||||
size_t leafentry_disksize (LEAFENTRY le); // this is the same as logsizeof_LEAFENTRY. The size of a leafentry on disk.
|
||||
void wbuf_nocrc_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
|
||||
int print_klpair (FILE *outf, const void* key, uint32_t keylen, LEAFENTRY v); // Print a leafentry out in human-readable form.
|
||||
|
||||
int le_latest_is_del(LEAFENTRY le); // Return true if it is a provisional delete.
|
||||
int le_val_is_del(LEAFENTRY le, enum cursor_read_type read_type, TOKUTXN txn); // Returns true if the value that is to be read is empty
|
||||
bool le_is_clean(LEAFENTRY le); //Return how many xids exist (0 does not count)
|
||||
bool le_has_xids(LEAFENTRY le, XIDS xids); // Return true transaction represented by xids is still provisional in this leafentry (le's xid stack is a superset or equal to xids)
|
||||
void* le_latest_val (LEAFENTRY le); // Return the latest val (return NULL for provisional deletes)
|
||||
uint32_t le_latest_vallen (LEAFENTRY le); // Return the latest vallen. Returns 0 for provisional deletes.
|
||||
void* le_latest_val_and_len (LEAFENTRY le, uint32_t *len);
|
||||
|
||||
uint64_t le_outermost_uncommitted_xid (LEAFENTRY le);
|
||||
|
||||
//Callback contract:
|
||||
// Function checks to see if id is accepted by context.
|
||||
// Returns:
|
||||
// 0: context ignores this entry, id.
|
||||
// TOKUDB_ACCEPT: context accepts id
|
||||
// r|r!=0&&r!=TOKUDB_ACCEPT: Quit early, return r, because something unexpected went wrong (error case)
|
||||
typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context, bool is_provisional);
|
||||
|
||||
int le_iterate_val(
|
||||
LEAFENTRY le,
|
||||
LE_ITERATE_CALLBACK f,
|
||||
void** valpp,
|
||||
uint32_t* vallenp,
|
||||
TOKUTXN context);
|
||||
|
||||
void le_extract_val(
|
||||
LEAFENTRY le,
|
||||
// should we return the entire leafentry as the val?
|
||||
bool is_leaf_mode,
|
||||
enum cursor_read_type read_type,
|
||||
TOKUTXN ttxn,
|
||||
uint32_t* vallen,
|
||||
void** val);
|
||||
|
||||
size_t leafentry_disksize_13(LEAFENTRY_13 le);
|
||||
|
||||
int toku_le_upgrade_13_14(
|
||||
// NULL if there was no stored data.
|
||||
LEAFENTRY_13 old_leafentry,
|
||||
void** keyp,
|
||||
uint32_t* keylen,
|
||||
size_t* new_leafentry_memorysize,
|
||||
LEAFENTRY *new_leafentry_p);
|
||||
|
||||
class bn_data;
|
||||
|
||||
int64_t toku_le_apply_msg(
|
||||
const ft_msg &msg,
|
||||
// NULL if there was no stored data.
|
||||
LEAFENTRY old_leafentry,
|
||||
// bn_data storing leafentry, if NULL, means there is no bn_data
|
||||
bn_data* data_buffer,
|
||||
// index in data_buffer where leafentry is stored (and should be replaced
|
||||
uint32_t idx,
|
||||
uint32_t old_keylen,
|
||||
txn_gc_info* gc_info,
|
||||
LEAFENTRY *new_leafentry_p,
|
||||
int64_t* numbytes_delta_p);
|
||||
|
||||
bool toku_le_worth_running_garbage_collection(
|
||||
LEAFENTRY le,
|
||||
txn_gc_info* gc_info);
|
||||
|
||||
void toku_le_garbage_collect(
|
||||
LEAFENTRY old_leaf_entry,
|
||||
bn_data* data_buffer,
|
||||
uint32_t idx,
|
||||
void* keyp,
|
||||
uint32_t keylen,
|
||||
txn_gc_info* gc_info,
|
||||
LEAFENTRY* new_leaf_entry,
|
||||
int64_t* numbytes_delta_p);
|
@ -1,148 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <toku_portability.h>
|
||||
#include <toku_assert.h>
|
||||
#include <toku_pthread.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "loader/loader-internal.h"
|
||||
#include "util/dbt.h"
|
||||
|
||||
toku_instr_key *loader_error_mutex_key;
|
||||
|
||||
static void error_callback_lock(ft_loader_error_callback loader_error) {
|
||||
toku_mutex_lock(&loader_error->mutex);
|
||||
}
|
||||
|
||||
static void error_callback_unlock(ft_loader_error_callback loader_error) {
|
||||
toku_mutex_unlock(&loader_error->mutex);
|
||||
}
|
||||
|
||||
void ft_loader_init_error_callback(ft_loader_error_callback loader_error) {
|
||||
memset(loader_error, 0, sizeof *loader_error);
|
||||
toku_init_dbt(&loader_error->key);
|
||||
toku_init_dbt(&loader_error->val);
|
||||
toku_mutex_init(*loader_error_mutex_key, &loader_error->mutex, nullptr);
|
||||
}
|
||||
|
||||
void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) {
|
||||
toku_mutex_destroy(&loader_error->mutex);
|
||||
toku_destroy_dbt(&loader_error->key);
|
||||
toku_destroy_dbt(&loader_error->val);
|
||||
memset(loader_error, 0, sizeof *loader_error);
|
||||
}
|
||||
|
||||
int ft_loader_get_error(ft_loader_error_callback loader_error) {
|
||||
error_callback_lock(loader_error);
|
||||
int r = loader_error->error;
|
||||
error_callback_unlock(loader_error);
|
||||
return r;
|
||||
}
|
||||
|
||||
void ft_loader_set_error_function(ft_loader_error_callback loader_error, ft_loader_error_func error_function, void *error_extra) {
|
||||
loader_error->error_callback = error_function;
|
||||
loader_error->extra = error_extra;
|
||||
}
|
||||
|
||||
int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) {
|
||||
int r;
|
||||
error_callback_lock(loader_error);
|
||||
if (loader_error->error) { // there can be only one
|
||||
r = EEXIST;
|
||||
} else {
|
||||
r = 0;
|
||||
loader_error->error = error; // set the error
|
||||
loader_error->db = db;
|
||||
loader_error->which_db = which_db;
|
||||
if (key != nullptr) {
|
||||
toku_clone_dbt(&loader_error->key, *key);
|
||||
}
|
||||
if (val != nullptr) {
|
||||
toku_clone_dbt(&loader_error->val, *val);
|
||||
}
|
||||
}
|
||||
error_callback_unlock(loader_error);
|
||||
return r;
|
||||
}
|
||||
|
||||
int ft_loader_call_error_function(ft_loader_error_callback loader_error) {
|
||||
int r;
|
||||
error_callback_lock(loader_error);
|
||||
r = loader_error->error;
|
||||
if (r && loader_error->error_callback && !loader_error->did_callback) {
|
||||
loader_error->did_callback = true;
|
||||
loader_error->error_callback(loader_error->db,
|
||||
loader_error->which_db,
|
||||
loader_error->error,
|
||||
&loader_error->key,
|
||||
&loader_error->val,
|
||||
loader_error->extra);
|
||||
}
|
||||
error_callback_unlock(loader_error);
|
||||
return r;
|
||||
}
|
||||
|
||||
int ft_loader_set_error_and_callback(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) {
|
||||
int r = ft_loader_set_error(loader_error, error, db, which_db, key, val);
|
||||
if (r == 0)
|
||||
r = ft_loader_call_error_function(loader_error);
|
||||
return r;
|
||||
}
|
||||
|
||||
int ft_loader_init_poll_callback(ft_loader_poll_callback p) {
|
||||
memset(p, 0, sizeof *p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ft_loader_destroy_poll_callback(ft_loader_poll_callback p) {
|
||||
memset(p, 0, sizeof *p);
|
||||
}
|
||||
|
||||
void ft_loader_set_poll_function(ft_loader_poll_callback p, ft_loader_poll_func poll_function, void *poll_extra) {
|
||||
p->poll_function = poll_function;
|
||||
p->poll_extra = poll_extra;
|
||||
}
|
||||
|
||||
int ft_loader_call_poll_function(ft_loader_poll_callback p, float progress) {
|
||||
int r = 0;
|
||||
if (p->poll_function)
|
||||
r = p->poll_function(p->poll_extra, progress);
|
||||
return r;
|
||||
}
|
@ -1,598 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "portability/toku_assert.h"
|
||||
#include "portability/memory.h"
|
||||
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/serialize/ft_node-serialize.h"
|
||||
#include "loader/dbufio.h"
|
||||
#include "loader/loader-internal.h"
|
||||
|
||||
toku_instr_key *bfs_mutex_key;
|
||||
toku_instr_key *bfs_cond_key;
|
||||
toku_instr_key *io_thread_key;
|
||||
|
||||
struct dbufio_file {
|
||||
// i/o thread owns these
|
||||
int fd;
|
||||
|
||||
// consumers own these
|
||||
size_t offset_in_buf;
|
||||
toku_off_t offset_in_uncompressed_file;
|
||||
|
||||
// need the mutex to modify these
|
||||
struct dbufio_file *next;
|
||||
bool second_buf_ready; // if true, the i/o thread is not touching anything.
|
||||
|
||||
// consumers own [0], i/o thread owns [1], they are swapped by the consumer only when the condition mutex is held and second_buf_ready is true.
|
||||
char *buf[2];
|
||||
size_t n_in_buf[2];
|
||||
int error_code[2]; // includes errno or eof. [0] is the error code associated with buf[0], [1] is the code for buf[1]
|
||||
|
||||
bool io_done;
|
||||
};
|
||||
|
||||
|
||||
/* A dbufio_fileset */
|
||||
struct dbufio_fileset {
|
||||
// The mutex/condition variables protect
|
||||
// the singly-linked list of files that need I/O (head/tail in the fileset, and next in each file)
|
||||
// in each file:
|
||||
// the second_buf_ready boolean (which says the second buffer is full of data).
|
||||
// the swapping of the buf[], n_in_buf[], and error_code[] values.
|
||||
toku_mutex_t mutex;
|
||||
toku_cond_t cond;
|
||||
int N; // How many files. This is constant once established.
|
||||
int n_not_done; // how many of the files require more I/O? Owned by the i/o thread.
|
||||
struct dbufio_file *files; // an array of length N.
|
||||
struct dbufio_file *head, *tail; // must have the mutex to fiddle with these.
|
||||
size_t bufsize; // the bufsize is the constant (the same for all buffers).
|
||||
|
||||
bool panic;
|
||||
bool compressed;
|
||||
int panic_errno;
|
||||
toku_pthread_t iothread;
|
||||
};
|
||||
|
||||
|
||||
static void enq (DBUFIO_FILESET bfs, struct dbufio_file *f) {
|
||||
if (bfs->tail==NULL) {
|
||||
bfs->head = f;
|
||||
} else {
|
||||
bfs->tail->next = f;
|
||||
}
|
||||
bfs->tail = f;
|
||||
f->next = NULL;
|
||||
}
|
||||
|
||||
static void panic (DBUFIO_FILESET bfs, int r) {
|
||||
if (bfs->panic) return;
|
||||
bfs->panic_errno = r; // Don't really care about a race on this variable... Writes to it are atomic, so at least one good panic reason will be stored.
|
||||
bfs->panic = true;
|
||||
return;
|
||||
}
|
||||
|
||||
static bool paniced (DBUFIO_FILESET bfs) {
|
||||
return bfs->panic;
|
||||
}
|
||||
|
||||
static ssize_t dbf_read_some_compressed(struct dbufio_file *dbf, char *buf, size_t bufsize) {
|
||||
ssize_t ret;
|
||||
invariant(bufsize >= MAX_UNCOMPRESSED_BUF);
|
||||
unsigned char *raw_block = NULL;
|
||||
|
||||
// deserialize the sub block header
|
||||
|
||||
// total_size
|
||||
// num_sub_blocks
|
||||
// compressed_size,uncompressed_size,xsum (repeated num_sub_blocks times)
|
||||
ssize_t readcode;
|
||||
const uint32_t header_size = sizeof(uint32_t);
|
||||
char header[header_size];
|
||||
|
||||
readcode = toku_os_read(dbf->fd, &header, header_size);
|
||||
if (readcode < 0) {
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
if (readcode == 0) {
|
||||
ret = 0;
|
||||
goto exit;
|
||||
}
|
||||
if (readcode < (ssize_t) header_size) {
|
||||
errno = TOKUDB_NO_DATA;
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
uint32_t total_size;
|
||||
{
|
||||
uint32_t *p = (uint32_t *) &header[0];
|
||||
total_size = toku_dtoh32(p[0]);
|
||||
}
|
||||
if (total_size == 0 || total_size > (1<<30)) {
|
||||
errno = toku_db_badformat();
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
//Cannot use XMALLOC
|
||||
MALLOC_N(total_size, raw_block);
|
||||
if (raw_block == nullptr) {
|
||||
errno = ENOMEM;
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
readcode = toku_os_read(dbf->fd, raw_block, total_size);
|
||||
if (readcode < 0) {
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
if (readcode < (ssize_t) total_size) {
|
||||
errno = TOKUDB_NO_DATA;
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
struct sub_block sub_block[max_sub_blocks];
|
||||
uint32_t *sub_block_header;
|
||||
sub_block_header = (uint32_t *) &raw_block[0];
|
||||
int32_t n_sub_blocks;
|
||||
n_sub_blocks = toku_dtoh32(sub_block_header[0]);
|
||||
sub_block_header++;
|
||||
size_t size_subblock_header;
|
||||
size_subblock_header = sub_block_header_size(n_sub_blocks);
|
||||
if (n_sub_blocks == 0 || n_sub_blocks > max_sub_blocks || size_subblock_header > total_size) {
|
||||
errno = toku_db_badformat();
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
for (int i = 0; i < n_sub_blocks; i++) {
|
||||
sub_block_init(&sub_block[i]);
|
||||
sub_block[i].compressed_size = toku_dtoh32(sub_block_header[0]);
|
||||
sub_block[i].uncompressed_size = toku_dtoh32(sub_block_header[1]);
|
||||
sub_block[i].xsum = toku_dtoh32(sub_block_header[2]);
|
||||
sub_block_header += 3;
|
||||
}
|
||||
|
||||
// verify sub block sizes
|
||||
size_t total_compressed_size;
|
||||
total_compressed_size = 0;
|
||||
for (int i = 0; i < n_sub_blocks; i++) {
|
||||
uint32_t compressed_size = sub_block[i].compressed_size;
|
||||
if (compressed_size<=0 || compressed_size>(1<<30)) {
|
||||
errno = toku_db_badformat();
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint32_t uncompressed_size = sub_block[i].uncompressed_size;
|
||||
if (uncompressed_size<=0 || uncompressed_size>(1<<30)) {
|
||||
errno = toku_db_badformat();
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
total_compressed_size += compressed_size;
|
||||
}
|
||||
if (total_size != total_compressed_size + size_subblock_header) {
|
||||
errno = toku_db_badformat();
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// sum up the uncompressed size of the sub blocks
|
||||
size_t uncompressed_size;
|
||||
uncompressed_size = get_sum_uncompressed_size(n_sub_blocks, sub_block);
|
||||
if (uncompressed_size > bufsize || uncompressed_size > MAX_UNCOMPRESSED_BUF) {
|
||||
errno = toku_db_badformat();
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
unsigned char *uncompressed_data;
|
||||
uncompressed_data = (unsigned char *)buf;
|
||||
|
||||
// point at the start of the compressed data (past the node header, the sub block header, and the header checksum)
|
||||
unsigned char *compressed_data;
|
||||
compressed_data = raw_block + size_subblock_header;
|
||||
|
||||
// decompress all the compressed sub blocks into the uncompressed buffer
|
||||
{
|
||||
int r;
|
||||
r = decompress_all_sub_blocks(n_sub_blocks, sub_block, compressed_data, uncompressed_data, get_num_cores(), get_ft_pool());
|
||||
if (r != 0) {
|
||||
fprintf(stderr, "%s:%d loader failed %d at %p size %" PRIu32"\n", __FUNCTION__, __LINE__, r, raw_block, total_size);
|
||||
dump_bad_block(raw_block, total_size);
|
||||
errno = r;
|
||||
ret = -1;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
ret = uncompressed_size;
|
||||
exit:
|
||||
if (raw_block) {
|
||||
toku_free(raw_block);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t dbf_read_compressed(struct dbufio_file *dbf, char *buf, size_t bufsize) {
|
||||
invariant(bufsize >= MAX_UNCOMPRESSED_BUF);
|
||||
size_t count = 0;
|
||||
|
||||
while (count + MAX_UNCOMPRESSED_BUF <= bufsize) {
|
||||
ssize_t readcode = dbf_read_some_compressed(dbf, buf + count, bufsize - count);
|
||||
if (readcode < 0) {
|
||||
return readcode;
|
||||
}
|
||||
count += readcode;
|
||||
if (readcode == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static void* io_thread (void *v)
|
||||
// The dbuf_thread does all the asynchronous I/O.
|
||||
{
|
||||
DBUFIO_FILESET bfs = (DBUFIO_FILESET)v;
|
||||
toku_mutex_lock(&bfs->mutex);
|
||||
//printf("%s:%d Locked\n", __FILE__, __LINE__);
|
||||
while (1) {
|
||||
if (paniced(bfs)) {
|
||||
toku_mutex_unlock(&bfs->mutex); // ignore any error
|
||||
toku_instr_delete_current_thread();
|
||||
return toku_pthread_done(nullptr);
|
||||
}
|
||||
// printf("n_not_done=%d\n", bfs->n_not_done);
|
||||
if (bfs->n_not_done == 0) {
|
||||
// all done (meaning we stored EOF (or another error) in
|
||||
// error_code[0] for the file.
|
||||
// printf("unlocked\n");
|
||||
toku_mutex_unlock(&bfs->mutex);
|
||||
toku_instr_delete_current_thread();
|
||||
return toku_pthread_done(nullptr);
|
||||
}
|
||||
|
||||
struct dbufio_file *dbf = bfs->head;
|
||||
if (dbf == NULL) {
|
||||
// No I/O needs to be done yet.
|
||||
// Wait until something happens that will wake us up.
|
||||
toku_cond_wait(&bfs->cond, &bfs->mutex);
|
||||
if (paniced(bfs)) {
|
||||
toku_mutex_unlock(&bfs->mutex); // ignore any error
|
||||
toku_instr_delete_current_thread();
|
||||
return toku_pthread_done(nullptr);
|
||||
}
|
||||
// Have the lock so go around.
|
||||
} else {
|
||||
// Some I/O needs to be done.
|
||||
// printf("%s:%d Need I/O\n", __FILE__, __LINE__);
|
||||
assert(dbf->second_buf_ready == false);
|
||||
assert(!dbf->io_done);
|
||||
bfs->head = dbf->next;
|
||||
if (bfs->head == NULL)
|
||||
bfs->tail = NULL;
|
||||
|
||||
// Unlock the mutex now that we have ownership of dbf to allow
|
||||
// consumers to get the mutex and perform swaps. They won't swap
|
||||
// this buffer because second_buf_ready is false.
|
||||
toku_mutex_unlock(&bfs->mutex);
|
||||
//printf("%s:%d Doing read fd=%d\n", __FILE__, __LINE__, dbf->fd);
|
||||
{
|
||||
ssize_t readcode;
|
||||
if (bfs->compressed) {
|
||||
readcode = dbf_read_compressed(dbf, dbf->buf[1], bfs->bufsize);
|
||||
}
|
||||
else {
|
||||
readcode = toku_os_read(dbf->fd, dbf->buf[1], bfs->bufsize);
|
||||
}
|
||||
//printf("%s:%d readcode=%ld\n", __FILE__, __LINE__, readcode);
|
||||
if (readcode==-1) {
|
||||
// a real error. Save the real error.
|
||||
int the_errno = get_error_errno();
|
||||
fprintf(stderr, "%s:%d dbf=%p fd=%d errno=%d\n", __FILE__, __LINE__, dbf, dbf->fd, the_errno);
|
||||
dbf->error_code[1] = the_errno;
|
||||
dbf->n_in_buf[1] = 0;
|
||||
} else if (readcode==0) {
|
||||
// End of file. Save it.
|
||||
dbf->error_code[1] = EOF;
|
||||
dbf->n_in_buf[1] = 0;
|
||||
dbf->io_done = true;
|
||||
|
||||
} else {
|
||||
dbf->error_code[1] = 0;
|
||||
dbf->n_in_buf[1] = readcode;
|
||||
}
|
||||
|
||||
//printf("%s:%d locking mutex again=%ld\n", __FILE__, __LINE__, readcode);
|
||||
{
|
||||
toku_mutex_lock(&bfs->mutex);
|
||||
if (paniced(bfs)) {
|
||||
toku_mutex_unlock(&bfs->mutex); // ignore any error
|
||||
toku_instr_delete_current_thread();
|
||||
return toku_pthread_done(nullptr);
|
||||
}
|
||||
}
|
||||
// Now that we have the mutex, we can decrement n_not_done (if
|
||||
// applicable) and set second_buf_ready
|
||||
if (readcode<=0) {
|
||||
bfs->n_not_done--;
|
||||
}
|
||||
//printf("%s:%d n_not_done=%d\n", __FILE__, __LINE__, bfs->n_not_done);
|
||||
dbf->second_buf_ready = true;
|
||||
toku_cond_broadcast(&bfs->cond);
|
||||
//printf("%s:%d did broadcast=%d\n", __FILE__, __LINE__, bfs->n_not_done);
|
||||
// Still have the lock so go around the loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t bufsize, bool compressed) {
|
||||
//printf("%s:%d here\n", __FILE__, __LINE__);
|
||||
int result = 0;
|
||||
DBUFIO_FILESET CALLOC(bfs);
|
||||
if (bfs==0) { result = get_error_errno(); }
|
||||
|
||||
bfs->compressed = compressed;
|
||||
|
||||
bool mutex_inited = false, cond_inited = false;
|
||||
if (result==0) {
|
||||
CALLOC_N(N, bfs->files);
|
||||
if (bfs->files==NULL) { result = get_error_errno(); }
|
||||
else {
|
||||
for (int i=0; i<N; i++) {
|
||||
bfs->files[i].buf[0] = bfs->files[i].buf[1] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
// printf("%s:%d here\n", __FILE__, __LINE__);
|
||||
if (result == 0) {
|
||||
toku_mutex_init(*bfs_mutex_key, &bfs->mutex, nullptr);
|
||||
mutex_inited = true;
|
||||
}
|
||||
if (result == 0) {
|
||||
toku_cond_init(*bfs_cond_key, &bfs->cond, nullptr);
|
||||
cond_inited = true;
|
||||
}
|
||||
if (result == 0) {
|
||||
bfs->N = N;
|
||||
bfs->n_not_done = N;
|
||||
bfs->head = bfs->tail = NULL;
|
||||
for (int i = 0; i < N; i++) {
|
||||
bfs->files[i].fd = fds[i];
|
||||
bfs->files[i].offset_in_buf = 0;
|
||||
bfs->files[i].offset_in_uncompressed_file = 0;
|
||||
bfs->files[i].next = NULL;
|
||||
bfs->files[i].second_buf_ready = false;
|
||||
for (int j = 0; j < 2; j++) {
|
||||
if (result == 0) {
|
||||
MALLOC_N(bufsize, bfs->files[i].buf[j]);
|
||||
if (bfs->files[i].buf[j] == NULL) {
|
||||
result = get_error_errno();
|
||||
}
|
||||
}
|
||||
bfs->files[i].n_in_buf[j] = 0;
|
||||
bfs->files[i].error_code[j] = 0;
|
||||
}
|
||||
bfs->files[i].io_done = false;
|
||||
ssize_t r;
|
||||
if (bfs->compressed) {
|
||||
r = dbf_read_compressed(&bfs->files[i], bfs->files[i].buf[0], bufsize);
|
||||
} else {
|
||||
r = toku_os_read(bfs->files[i].fd, bfs->files[i].buf[0], bufsize);
|
||||
}
|
||||
{
|
||||
if (r<0) {
|
||||
result=get_error_errno();
|
||||
break;
|
||||
} else if (r==0) {
|
||||
// it's EOF
|
||||
bfs->files[i].io_done = true;
|
||||
bfs->n_not_done--;
|
||||
bfs->files[i].error_code[0] = EOF;
|
||||
} else {
|
||||
bfs->files[i].n_in_buf[0] = r;
|
||||
//printf("%s:%d enq [%d]\n", __FILE__, __LINE__, i);
|
||||
enq(bfs, &bfs->files[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
bfs->bufsize = bufsize;
|
||||
bfs->panic = false;
|
||||
bfs->panic_errno = 0;
|
||||
}
|
||||
// printf("Creating IO thread\n");
|
||||
if (result == 0) {
|
||||
result = toku_pthread_create(*io_thread_key,
|
||||
&bfs->iothread,
|
||||
nullptr,
|
||||
io_thread,
|
||||
static_cast<void *>(bfs));
|
||||
}
|
||||
if (result == 0) {
|
||||
*bfsp = bfs;
|
||||
return 0;
|
||||
}
|
||||
// Now undo everything.
|
||||
// If we got here, there is no thread (either result was zero before the
|
||||
// thread was created, or else the thread creation itself failed.
|
||||
if (bfs) {
|
||||
if (bfs->files) {
|
||||
// the files were allocated, so we have to free all the bufs.
|
||||
for (int i=0; i<N; i++) {
|
||||
for (int j=0; j<2; j++) {
|
||||
if (bfs->files[i].buf[j])
|
||||
toku_free(bfs->files[i].buf[j]);
|
||||
bfs->files[i].buf[j]=NULL;
|
||||
}
|
||||
}
|
||||
toku_free(bfs->files);
|
||||
bfs->files=NULL;
|
||||
}
|
||||
if (cond_inited) {
|
||||
toku_cond_destroy(&bfs->cond); // don't check error status
|
||||
}
|
||||
if (mutex_inited) {
|
||||
toku_mutex_destroy(&bfs->mutex); // don't check error status
|
||||
}
|
||||
toku_free(bfs);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int panic_dbufio_fileset(DBUFIO_FILESET bfs, int error) {
|
||||
toku_mutex_lock(&bfs->mutex);
|
||||
panic(bfs, error);
|
||||
toku_cond_broadcast(&bfs->cond);
|
||||
toku_mutex_unlock(&bfs->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int destroy_dbufio_fileset (DBUFIO_FILESET bfs) {
|
||||
int result = 0;
|
||||
{
|
||||
void *retval;
|
||||
int r = toku_pthread_join(bfs->iothread, &retval);
|
||||
assert(r==0);
|
||||
assert(retval==NULL);
|
||||
}
|
||||
{
|
||||
toku_mutex_destroy(&bfs->mutex);
|
||||
}
|
||||
{
|
||||
toku_cond_destroy(&bfs->cond);
|
||||
}
|
||||
if (bfs->files) {
|
||||
for (int i=0; i<bfs->N; i++) {
|
||||
for (int j=0; j<2; j++) {
|
||||
//printf("%s:%d free([%d][%d]=%p\n", __FILE__, __LINE__, i,j, bfs->files[i].buf[j]);
|
||||
toku_free(bfs->files[i].buf[j]);
|
||||
}
|
||||
}
|
||||
toku_free(bfs->files);
|
||||
}
|
||||
toku_free(bfs);
|
||||
return result;
|
||||
}
|
||||
|
||||
int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t count, size_t *n_read) {
|
||||
char *buf = (char*)buf_v;
|
||||
struct dbufio_file *dbf = &bfs->files[filenum];
|
||||
if (dbf->error_code[0]!=0) return dbf->error_code[0];
|
||||
if (dbf->offset_in_buf + count <= dbf->n_in_buf[0]) {
|
||||
// Enough data is present to do it all now
|
||||
memcpy(buf, dbf->buf[0]+dbf->offset_in_buf, count);
|
||||
dbf->offset_in_buf += count;
|
||||
dbf->offset_in_uncompressed_file += count;
|
||||
*n_read = count;
|
||||
return 0;
|
||||
} else if (dbf->n_in_buf[0] > dbf->offset_in_buf) {
|
||||
// There is something in buf[0]
|
||||
size_t this_count = dbf->n_in_buf[0]-dbf->offset_in_buf;
|
||||
assert(dbf->offset_in_buf + this_count <= bfs->bufsize);
|
||||
memcpy(buf, dbf->buf[0]+dbf->offset_in_buf, this_count);
|
||||
dbf->offset_in_buf += this_count;
|
||||
dbf->offset_in_uncompressed_file += this_count;
|
||||
size_t sub_n_read;
|
||||
int r = dbufio_fileset_read(bfs, filenum, buf+this_count, count-this_count, &sub_n_read);
|
||||
if (r==0) {
|
||||
*n_read = this_count + sub_n_read;
|
||||
return 0;
|
||||
} else {
|
||||
// The error code will have been saved. We got some data so return that
|
||||
*n_read = this_count;
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
// There is nothing in buf[0]. So we need to swap buffers
|
||||
toku_mutex_lock(&bfs->mutex);
|
||||
while (1) {
|
||||
if (dbf->second_buf_ready) {
|
||||
dbf->n_in_buf[0] = dbf->n_in_buf[1];
|
||||
{
|
||||
char *tmp = dbf->buf[0];
|
||||
dbf->buf[0] = dbf->buf[1];
|
||||
dbf->buf[1] = tmp;
|
||||
}
|
||||
dbf->error_code[0] = dbf->error_code[1];
|
||||
dbf->second_buf_ready = false;
|
||||
dbf->offset_in_buf = 0;
|
||||
if (!dbf->io_done) {
|
||||
// Don't enqueue it if the I/O is all done.
|
||||
//printf("%s:%d enq [%ld]\n", __FILE__, __LINE__, dbf-&bfs->files[0]);
|
||||
enq(bfs, dbf);
|
||||
}
|
||||
toku_cond_broadcast(&bfs->cond);
|
||||
toku_mutex_unlock(&bfs->mutex);
|
||||
if (dbf->error_code[0]==0) {
|
||||
assert(dbf->n_in_buf[0]>0);
|
||||
return dbufio_fileset_read(bfs, filenum, buf_v, count, n_read);
|
||||
} else {
|
||||
*n_read = 0;
|
||||
return dbf->error_code[0];
|
||||
}
|
||||
} else {
|
||||
toku_cond_wait(&bfs->cond, &bfs->mutex);
|
||||
}
|
||||
}
|
||||
assert(0); // cannot get here.
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dbufio_print(DBUFIO_FILESET bfs) {
|
||||
fprintf(stderr, "%s:%d bfs=%p", __FILE__, __LINE__, bfs);
|
||||
if (bfs->panic)
|
||||
fprintf(stderr, " panic=%d", bfs->panic_errno);
|
||||
fprintf(stderr, " N=%d %d %" PRIuMAX, bfs->N, bfs->n_not_done, (uintmax_t) bfs->bufsize);
|
||||
for (int i = 0; i < bfs->N; i++) {
|
||||
struct dbufio_file *dbf = &bfs->files[i];
|
||||
if (dbf->error_code[0] || dbf->error_code[1])
|
||||
fprintf(stderr, " %d=[%d,%d]", i, dbf->error_code[0], dbf->error_code[1]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <toku_portability.h>
|
||||
#include <toku_pthread.h>
|
||||
|
||||
/* Maintain a set of files for reading, with double buffering for the reads. */
|
||||
|
||||
/* A DBUFIO_FILESET is a set of files. The files are indexed from 0 to N-1, where N is specified when the set is created (and the files are also provided when the set is creaed). */
|
||||
/* An implementation would typically use a separate thread or asynchronous I/O to fetch ahead data for each file. The system will typically fill two buffers of size M for each file. One buffer is being read out of using dbuf_read(), and the other buffer is either empty (waiting on the asynchronous I/O to start), being filled in by the asynchronous I/O mechanism, or is waiting for the caller to read data from it. */
|
||||
typedef struct dbufio_fileset *DBUFIO_FILESET;
|
||||
|
||||
int create_dbufio_fileset (DBUFIO_FILESET *bfsp, int N, int fds[/*N*/], size_t bufsize, bool compressed);
|
||||
|
||||
int destroy_dbufio_fileset(DBUFIO_FILESET);
|
||||
|
||||
int dbufio_fileset_read (DBUFIO_FILESET bfs, int filenum, void *buf_v, size_t count, size_t *n_read);
|
||||
|
||||
int panic_dbufio_fileset(DBUFIO_FILESET, int error);
|
||||
|
||||
void dbufio_print(DBUFIO_FILESET);
|
@ -1,320 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
|
||||
#include "portability/toku_pthread.h"
|
||||
|
||||
#include "loader/dbufio.h"
|
||||
#include "loader/loader.h"
|
||||
#include "util/queue.h"
|
||||
|
||||
enum {
|
||||
EXTRACTOR_QUEUE_DEPTH = 2,
|
||||
FILE_BUFFER_SIZE = 1<<24,
|
||||
MIN_ROWSET_MEMORY = 1<<23,
|
||||
MIN_MERGE_FANIN = 2,
|
||||
FRACTAL_WRITER_QUEUE_DEPTH = 3,
|
||||
FRACTAL_WRITER_ROWSETS = FRACTAL_WRITER_QUEUE_DEPTH + 2,
|
||||
DBUFIO_DEPTH = 2,
|
||||
TARGET_MERGE_BUF_SIZE = 1<<24, // we'd like the merge buffer to be this big.
|
||||
MIN_MERGE_BUF_SIZE = 1<<20, // always use at least this much
|
||||
MAX_UNCOMPRESSED_BUF = MIN_MERGE_BUF_SIZE
|
||||
};
|
||||
|
||||
/* These functions are exported to allow the tests to compile. */
|
||||
|
||||
/* These structures maintain a collection of all the open temporary files used by the loader. */
|
||||
struct file_info {
|
||||
bool is_open;
|
||||
bool is_extant; // if true, the file must be unlinked.
|
||||
char *fname;
|
||||
TOKU_FILE *file;
|
||||
uint64_t n_rows; // how many rows were written into that file
|
||||
size_t buffer_size;
|
||||
void *buffer;
|
||||
};
|
||||
struct file_infos {
|
||||
int n_files;
|
||||
int n_files_limit;
|
||||
struct file_info *file_infos;
|
||||
int n_files_open, n_files_extant;
|
||||
toku_mutex_t lock; // must protect this data structure because current activity performs a REALLOC(fi->file_infos).
|
||||
};
|
||||
typedef struct fidx { int idx; } FIDX;
|
||||
static const FIDX FIDX_NULL __attribute__((__unused__)) = {-1};
|
||||
static int fidx_is_null(const FIDX f) __attribute__((__unused__));
|
||||
static int fidx_is_null(const FIDX f) { return f.idx == -1; }
|
||||
TOKU_FILE *toku_bl_fidx2file(FTLOADER bl, FIDX i);
|
||||
|
||||
int ft_loader_open_temp_file(FTLOADER bl, FIDX *file_idx);
|
||||
|
||||
/* These data structures are used for manipulating a collection of rows in main memory. */
|
||||
struct row {
|
||||
size_t off; // the offset in the data array.
|
||||
int klen,vlen;
|
||||
};
|
||||
struct rowset {
|
||||
uint64_t memory_budget;
|
||||
size_t n_rows, n_rows_limit;
|
||||
struct row *rows;
|
||||
size_t n_bytes, n_bytes_limit;
|
||||
char *data;
|
||||
};
|
||||
|
||||
int init_rowset (struct rowset *rows, uint64_t memory_budget);
|
||||
void destroy_rowset(struct rowset *rows);
|
||||
int add_row(struct rowset *rows, DBT *key, DBT *val);
|
||||
|
||||
int loader_write_row(DBT *key,
|
||||
DBT *val,
|
||||
FIDX data,
|
||||
TOKU_FILE *,
|
||||
uint64_t *dataoff,
|
||||
struct wbuf *wb,
|
||||
FTLOADER bl);
|
||||
int loader_read_row(TOKU_FILE *f, DBT *key, DBT *val);
|
||||
|
||||
struct merge_fileset {
|
||||
bool have_sorted_output; // Is there an previous key?
|
||||
FIDX sorted_output; // this points to one of the data_fidxs. If output_is_sorted then this is the file containing sorted data. It's still open
|
||||
DBT prev_key; // What is it? If it's here, its the last output in the merge fileset
|
||||
|
||||
int n_temp_files, n_temp_files_limit;
|
||||
FIDX *data_fidxs;
|
||||
};
|
||||
|
||||
void init_merge_fileset (struct merge_fileset *fs);
|
||||
void destroy_merge_fileset (struct merge_fileset *fs);
|
||||
|
||||
struct poll_callback_s {
|
||||
ft_loader_poll_func poll_function;
|
||||
void *poll_extra;
|
||||
};
|
||||
typedef struct poll_callback_s *ft_loader_poll_callback;
|
||||
|
||||
int ft_loader_init_poll_callback(ft_loader_poll_callback);
|
||||
|
||||
void ft_loader_destroy_poll_callback(ft_loader_poll_callback);
|
||||
|
||||
void ft_loader_set_poll_function(ft_loader_poll_callback, ft_loader_poll_func poll_function, void *poll_extra);
|
||||
|
||||
int ft_loader_call_poll_function(ft_loader_poll_callback, float progress);
|
||||
|
||||
struct error_callback_s {
|
||||
int error;
|
||||
ft_loader_error_func error_callback;
|
||||
void *extra;
|
||||
DB *db;
|
||||
int which_db;
|
||||
DBT key;
|
||||
DBT val;
|
||||
bool did_callback;
|
||||
toku_mutex_t mutex;
|
||||
};
|
||||
typedef struct error_callback_s *ft_loader_error_callback;
|
||||
|
||||
void ft_loader_init_error_callback(ft_loader_error_callback);
|
||||
|
||||
void ft_loader_destroy_error_callback(ft_loader_error_callback);
|
||||
|
||||
int ft_loader_get_error(ft_loader_error_callback);
|
||||
|
||||
void ft_loader_set_error_function(ft_loader_error_callback, ft_loader_error_func error_function, void *extra);
|
||||
|
||||
int ft_loader_set_error(ft_loader_error_callback, int error, DB *db, int which_db, DBT *key, DBT *val);
|
||||
|
||||
int ft_loader_call_error_function(ft_loader_error_callback);
|
||||
|
||||
int ft_loader_set_error_and_callback(ft_loader_error_callback, int error, DB *db, int which_db, DBT *key, DBT *val);
|
||||
|
||||
struct ft_loader_s {
|
||||
// These two are set in the close function, and used while running close
|
||||
struct error_callback_s error_callback;
|
||||
struct poll_callback_s poll_callback;
|
||||
|
||||
generate_row_for_put_func generate_row_for_put;
|
||||
ft_compare_func *bt_compare_funs;
|
||||
|
||||
DB *src_db;
|
||||
int N;
|
||||
DB **dbs; // N of these
|
||||
DESCRIPTOR *descriptors; // N of these.
|
||||
TXNID *root_xids_that_created; // N of these.
|
||||
const char **new_fnames_in_env; // N of these. The file names that the final data will be written to (relative to env).
|
||||
|
||||
uint64_t *extracted_datasizes; // N of these.
|
||||
|
||||
struct rowset primary_rowset; // the primary rows that have been put, but the secondary rows haven't been generated.
|
||||
struct rowset primary_rowset_temp; // the primary rows that are being worked on by the extractor_thread.
|
||||
|
||||
QUEUE primary_rowset_queue; // main thread enqueues rowsets in this queue (in maybe 64MB chunks). The extractor thread removes them, sorts them, adn writes to file.
|
||||
toku_pthread_t extractor_thread; // the thread that takes primary rowset and does extraction and the first level sort and write to file.
|
||||
bool extractor_live;
|
||||
|
||||
DBT *last_key; // for each rowset, remember the most recently output key. The system may choose not to keep this up-to-date when a rowset is unsorted. These keys are malloced and ulen maintains the size of the malloced block.
|
||||
|
||||
struct rowset *rows; // secondary rows that have been put, but haven't been sorted and written to a file.
|
||||
uint64_t n_rows; // how many rows have been put?
|
||||
struct merge_fileset *fs;
|
||||
|
||||
const char *temp_file_template;
|
||||
|
||||
CACHETABLE cachetable;
|
||||
bool did_reserve_memory;
|
||||
bool compress_intermediates;
|
||||
bool allow_puts;
|
||||
uint64_t reserved_memory; // how much memory are we allowed to use?
|
||||
|
||||
/* To make it easier to recover from errors, we don't use TOKU_FILE*,
|
||||
* instead we use an index into the file_infos. */
|
||||
struct file_infos file_infos;
|
||||
|
||||
#define PROGRESS_MAX (1 << 16)
|
||||
int progress; // Progress runs from 0 to PROGRESS_MAX. When we call the poll function we convert to a float from 0.0 to 1.0
|
||||
// We use an integer so that we can add to the progress using a fetch-and-add instruction.
|
||||
|
||||
int progress_callback_result; // initially zero, if any call to the poll function callback returns nonzero, we save the result here (and don't call the poll callback function again).
|
||||
|
||||
LSN load_lsn; //LSN of the fsynced 'load' log entry. Write this LSN (as checkpoint_lsn) in ft headers made by this loader.
|
||||
TXNID load_root_xid; //(Root) transaction that performed the load.
|
||||
|
||||
QUEUE *fractal_queues; // an array of work queues, one for each secondary index.
|
||||
toku_pthread_t *fractal_threads;
|
||||
bool *fractal_threads_live; // an array of bools indicating that fractal_threads[i] is a live thread. (There is no NULL for a pthread_t, so we have to maintain this separately).
|
||||
|
||||
unsigned fractal_workers; // number of fractal tree writer threads
|
||||
|
||||
toku_mutex_t mutex;
|
||||
bool mutex_init;
|
||||
};
|
||||
|
||||
// Set the number of rows in the loader. Used for test.
|
||||
void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows);
|
||||
|
||||
// Get the number of rows in the loader. Used for test.
|
||||
uint64_t toku_ft_loader_get_n_rows(FTLOADER bl);
|
||||
|
||||
// The data passed into a fractal_thread via pthread_create.
|
||||
struct fractal_thread_args {
|
||||
FTLOADER bl;
|
||||
const DESCRIPTOR descriptor;
|
||||
int fd; // write the ft into fd.
|
||||
int progress_allocation;
|
||||
QUEUE q;
|
||||
uint64_t total_disksize_estimate;
|
||||
int errno_result; // the final result.
|
||||
int which_db;
|
||||
uint32_t target_nodesize;
|
||||
uint32_t target_basementnodesize;
|
||||
enum toku_compression_method target_compression_method;
|
||||
uint32_t target_fanout;
|
||||
};
|
||||
|
||||
void toku_ft_loader_set_n_rows(FTLOADER bl, uint64_t n_rows);
|
||||
uint64_t toku_ft_loader_get_n_rows(FTLOADER bl);
|
||||
|
||||
int merge_row_arrays_base (struct row dest[/*an+bn*/], struct row a[/*an*/], int an, struct row b[/*bn*/], int bn,
|
||||
int which_db, DB *dest_db, ft_compare_func,
|
||||
FTLOADER,
|
||||
struct rowset *);
|
||||
|
||||
int merge_files (struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func, int progress_allocation, QUEUE);
|
||||
|
||||
int sort_and_write_rows (struct rowset rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func);
|
||||
|
||||
int mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *);
|
||||
|
||||
//int write_file_to_dbfile (int outfile, FIDX infile, FTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation);
|
||||
int toku_merge_some_files_using_dbufio (const bool to_q, FIDX dest_data, QUEUE q, int n_sources, DBUFIO_FILESET bfs, FIDX srcs_fidxs[/*n_sources*/], FTLOADER bl, int which_db, DB *dest_db, ft_compare_func compare, int progress_allocation);
|
||||
|
||||
int ft_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs, FTLOADER bl, int which_db, DB *dest_db, ft_compare_func);
|
||||
|
||||
// This is probably only for testing.
|
||||
int toku_loader_write_ft_from_q_in_C (FTLOADER bl,
|
||||
const DESCRIPTOR descriptor,
|
||||
int fd, // write to here
|
||||
int progress_allocation,
|
||||
QUEUE q,
|
||||
uint64_t total_disksize_estimate,
|
||||
int which_db,
|
||||
uint32_t target_nodesize,
|
||||
uint32_t target_basementnodesize,
|
||||
enum toku_compression_method target_compression_method,
|
||||
uint32_t fanout);
|
||||
|
||||
int ft_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, ft_compare_func, FTLOADER, struct rowset *);
|
||||
|
||||
int ft_loader_write_file_to_dbfile (int outfile, FIDX infile, FTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation);
|
||||
|
||||
int ft_loader_init_file_infos (struct file_infos *fi);
|
||||
void ft_loader_fi_destroy (struct file_infos *fi, bool is_error);
|
||||
int ft_loader_fi_close (struct file_infos *fi, FIDX idx, bool require_open);
|
||||
int ft_loader_fi_close_all (struct file_infos *fi);
|
||||
int ft_loader_fi_reopen (struct file_infos *fi, FIDX idx, const char *mode);
|
||||
int ft_loader_fi_unlink (struct file_infos *fi, FIDX idx);
|
||||
|
||||
int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
|
||||
CACHETABLE cachetable,
|
||||
generate_row_for_put_func g,
|
||||
DB *src_db,
|
||||
int N, FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/],
|
||||
const char *new_fnames_in_env[/*N*/],
|
||||
ft_compare_func bt_compare_functions[/*N*/],
|
||||
const char *temp_file_template,
|
||||
LSN load_lsn,
|
||||
TOKUTXN txn,
|
||||
bool reserve_memory,
|
||||
uint64_t reserve_memory_size,
|
||||
bool compress_intermediates,
|
||||
bool allow_puts);
|
||||
|
||||
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
|
||||
|
||||
// For test purposes only. (In production, the rowset size is determined by negotiation with the cachetable for some memory. See #2613.)
|
||||
uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
|
||||
|
||||
int toku_ft_loader_finish_extractor(FTLOADER bl);
|
||||
|
||||
int toku_ft_loader_get_error(FTLOADER bl, int *loader_errno);
|
||||
|
||||
void ft_loader_lock_init(FTLOADER bl);
|
||||
void ft_loader_lock_destroy(FTLOADER bl);
|
||||
void ft_loader_set_fractal_workers_count_from_c(FTLOADER bl);
|
File diff suppressed because it is too large
Load Diff
@ -1,83 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "toku_portability.h"
|
||||
#include "ft/txn/txn.h"
|
||||
#include "ft/cachetable/cachetable.h"
|
||||
#include "ft/comparator.h"
|
||||
#include "ft/ft-ops.h"
|
||||
|
||||
// The loader callbacks are C functions and need to be defined as such
|
||||
|
||||
typedef void (*ft_loader_error_func)(DB *, int which_db, int err, DBT *key, DBT *val, void *extra);
|
||||
|
||||
typedef int (*ft_loader_poll_func)(void *extra, float progress);
|
||||
|
||||
typedef struct ft_loader_s *FTLOADER;
|
||||
|
||||
int toku_ft_loader_open (FTLOADER *bl,
|
||||
CACHETABLE cachetable,
|
||||
generate_row_for_put_func g,
|
||||
DB *src_db,
|
||||
int N,
|
||||
FT_HANDLE ft_hs[/*N*/], DB* dbs[/*N*/],
|
||||
const char * new_fnames_in_env[/*N*/],
|
||||
ft_compare_func bt_compare_functions[/*N*/],
|
||||
const char *temp_file_template,
|
||||
LSN load_lsn,
|
||||
TOKUTXN txn,
|
||||
bool reserve_memory,
|
||||
uint64_t reserve_memory_size,
|
||||
bool compress_intermediates,
|
||||
bool allow_puts);
|
||||
|
||||
int toku_ft_loader_put (FTLOADER bl, DBT *key, DBT *val);
|
||||
|
||||
int toku_ft_loader_close (FTLOADER bl,
|
||||
ft_loader_error_func error_callback, void *error_callback_extra,
|
||||
ft_loader_poll_func poll_callback, void *poll_callback_extra);
|
||||
|
||||
int toku_ft_loader_abort(FTLOADER bl,
|
||||
bool is_error);
|
||||
|
||||
// For test purposes only
|
||||
void toku_ft_loader_set_size_factor(uint32_t factor);
|
||||
|
||||
size_t ft_loader_leafentry_size(size_t key_size, size_t val_size, TXNID xid);
|
@ -1,181 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include <toku_portability.h>
|
||||
#include "toku_os.h"
|
||||
#include "ft-internal.h"
|
||||
#include "loader/loader-internal.h"
|
||||
#include "loader/pqueue.h"
|
||||
|
||||
#define pqueue_left(i) ((i) << 1)
|
||||
#define pqueue_right(i) (((i) << 1) + 1)
|
||||
#define pqueue_parent(i) ((i) >> 1)
|
||||
|
||||
int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback)
|
||||
{
|
||||
pqueue_t *MALLOC(q);
|
||||
if (!q) {
|
||||
return get_error_errno();
|
||||
}
|
||||
|
||||
/* Need to allocate n+1 elements since element 0 isn't used. */
|
||||
MALLOC_N(n + 1, q->d);
|
||||
if (!q->d) {
|
||||
int r = get_error_errno();
|
||||
toku_free(q);
|
||||
return r;
|
||||
}
|
||||
q->size = 1;
|
||||
q->avail = q->step = (n+1); /* see comment above about n+1 */
|
||||
|
||||
q->which_db = which_db;
|
||||
q->db = db;
|
||||
q->compare = compare;
|
||||
q->dup_error = 0;
|
||||
|
||||
q->error_callback = err_callback;
|
||||
|
||||
*result = q;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pqueue_free(pqueue_t *q)
|
||||
{
|
||||
toku_free(q->d);
|
||||
toku_free(q);
|
||||
}
|
||||
|
||||
|
||||
size_t pqueue_size(pqueue_t *q)
|
||||
{
|
||||
/* queue element 0 exists but doesn't count since it isn't used. */
|
||||
return (q->size - 1);
|
||||
}
|
||||
|
||||
static int pqueue_compare(pqueue_t *q, DBT *next_key, DBT *next_val, DBT *curr_key)
|
||||
{
|
||||
int r = q->compare(q->db, next_key, curr_key);
|
||||
if ( r == 0 ) { // duplicate key : next_key == curr_key
|
||||
q->dup_error = 1;
|
||||
if (q->error_callback)
|
||||
ft_loader_set_error_and_callback(q->error_callback, DB_KEYEXIST, q->db, q->which_db, next_key, next_val);
|
||||
}
|
||||
return ( r > -1 );
|
||||
}
|
||||
|
||||
static void pqueue_bubble_up(pqueue_t *q, size_t i)
|
||||
{
|
||||
size_t parent_node;
|
||||
pqueue_node_t *moving_node = q->d[i];
|
||||
DBT *moving_key = moving_node->key;
|
||||
|
||||
for (parent_node = pqueue_parent(i);
|
||||
((i > 1) && pqueue_compare(q, q->d[parent_node]->key, q->d[parent_node]->val, moving_key));
|
||||
i = parent_node, parent_node = pqueue_parent(i))
|
||||
{
|
||||
q->d[i] = q->d[parent_node];
|
||||
}
|
||||
|
||||
q->d[i] = moving_node;
|
||||
}
|
||||
|
||||
|
||||
static size_t pqueue_maxchild(pqueue_t *q, size_t i)
|
||||
{
|
||||
size_t child_node = pqueue_left(i);
|
||||
|
||||
if (child_node >= q->size)
|
||||
return 0;
|
||||
|
||||
if ((child_node+1) < q->size &&
|
||||
pqueue_compare(q, q->d[child_node]->key, q->d[child_node]->val, q->d[child_node+1]->key))
|
||||
child_node++; /* use right child instead of left */
|
||||
|
||||
return child_node;
|
||||
}
|
||||
|
||||
|
||||
static void pqueue_percolate_down(pqueue_t *q, size_t i)
|
||||
{
|
||||
size_t child_node;
|
||||
pqueue_node_t *moving_node = q->d[i];
|
||||
DBT *moving_key = moving_node->key;
|
||||
DBT *moving_val = moving_node->val;
|
||||
|
||||
while ((child_node = pqueue_maxchild(q, i)) &&
|
||||
pqueue_compare(q, moving_key, moving_val, q->d[child_node]->key))
|
||||
{
|
||||
q->d[i] = q->d[child_node];
|
||||
i = child_node;
|
||||
}
|
||||
|
||||
q->d[i] = moving_node;
|
||||
}
|
||||
|
||||
|
||||
int pqueue_insert(pqueue_t *q, pqueue_node_t *d)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (!q) return 1;
|
||||
if (q->size >= q->avail) return 1;
|
||||
|
||||
/* insert item */
|
||||
i = q->size++;
|
||||
q->d[i] = d;
|
||||
pqueue_bubble_up(q, i);
|
||||
|
||||
if ( q->dup_error ) return DB_KEYEXIST;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pqueue_pop(pqueue_t *q, pqueue_node_t **d)
|
||||
{
|
||||
if (!q || q->size == 1) {
|
||||
*d = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*d = q->d[1];
|
||||
q->d[1] = q->d[--q->size];
|
||||
pqueue_percolate_down(q, 1);
|
||||
|
||||
if ( q->dup_error ) return DB_KEYEXIST;
|
||||
return 0;
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
typedef struct ft_pqueue_node_t
|
||||
{
|
||||
DBT *key;
|
||||
DBT *val;
|
||||
int i;
|
||||
} pqueue_node_t;
|
||||
|
||||
typedef struct ft_pqueue_t
|
||||
{
|
||||
size_t size;
|
||||
size_t avail;
|
||||
size_t step;
|
||||
|
||||
int which_db;
|
||||
DB *db; // needed for compare function
|
||||
ft_compare_func compare;
|
||||
pqueue_node_t **d;
|
||||
int dup_error;
|
||||
|
||||
struct error_callback_s *error_callback;
|
||||
|
||||
} pqueue_t;
|
||||
|
||||
int pqueue_init(pqueue_t **result, size_t n, int which_db, DB *db, ft_compare_func compare, struct error_callback_s *err_callback);
|
||||
void pqueue_free(pqueue_t *q);
|
||||
size_t pqueue_size(pqueue_t *q);
|
||||
int pqueue_insert(pqueue_t *q, pqueue_node_t *d);
|
||||
int pqueue_pop(pqueue_t *q, pqueue_node_t **d);
|
@ -1,225 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <string.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include "portability/toku_list.h"
|
||||
#include "portability/toku_pthread.h"
|
||||
#include "ft/ft-internal.h"
|
||||
#include "ft/logger/log.h"
|
||||
#include "ft/logger/logfilemgr.h"
|
||||
#include "ft/txn/txn.h"
|
||||
#include "ft/txn/txn_manager.h"
|
||||
#include "ft/txn/rollback_log_node_cache.h"
|
||||
|
||||
#include "util/memarena.h"
|
||||
#include "util/omt.h"
|
||||
|
||||
using namespace toku;
|
||||
// Locking for the logger
|
||||
// For most purposes we use the big ydb lock.
|
||||
// To log: grab the buf lock
|
||||
// If the buf would overflow, then grab the file lock, swap file&buf, release buf lock, write the file, write the entry, release the file lock
|
||||
// else append to buf & release lock
|
||||
|
||||
#define LOGGER_MIN_BUF_SIZE (1<<24)
|
||||
|
||||
// TODO: Remove mylock, it has no value
|
||||
struct mylock {
|
||||
toku_mutex_t lock;
|
||||
};
|
||||
|
||||
static inline void ml_init(struct mylock *l) {
|
||||
toku_mutex_init(*log_internal_lock_mutex_key, &l->lock, nullptr);
|
||||
}
|
||||
// TODO: source location info might have be to be pulled up one caller
|
||||
// to be useful
|
||||
static inline void ml_lock(struct mylock *l) { toku_mutex_lock(&l->lock); }
|
||||
static inline void ml_unlock(struct mylock *l) {
|
||||
toku_mutex_unlock(&l->lock);
|
||||
}
|
||||
static inline void ml_destroy(struct mylock *l) {
|
||||
toku_mutex_destroy(&l->lock);
|
||||
}
|
||||
|
||||
struct logbuf {
|
||||
int n_in_buf;
|
||||
int buf_size;
|
||||
char *buf;
|
||||
LSN max_lsn_in_buf;
|
||||
};
|
||||
|
||||
struct tokulogger {
|
||||
struct mylock input_lock;
|
||||
|
||||
toku_mutex_t output_condition_lock; // if you need both this lock and input_lock, acquire the output_lock first, then input_lock. More typical is to get the output_is_available condition to be false, and then acquire the input_lock.
|
||||
toku_cond_t output_condition; //
|
||||
bool output_is_available; // this is part of the predicate for the output condition. It's true if no thread is modifying the output (either doing an fsync or otherwise fiddling with the output).
|
||||
|
||||
bool is_open;
|
||||
bool write_log_files;
|
||||
bool trim_log_files; // for test purposes
|
||||
char *directory; // file system directory
|
||||
DIR *dir; // descriptor for directory
|
||||
int fd;
|
||||
CACHETABLE ct;
|
||||
int lg_max; // The size of the single file in the log. Default is 100MB.
|
||||
|
||||
// To access these, you must have the input lock
|
||||
LSN lsn; // the next available lsn
|
||||
struct logbuf inbuf; // data being accumulated for the write
|
||||
|
||||
// To access these, you must have the output condition lock.
|
||||
LSN written_lsn; // the last lsn written
|
||||
LSN fsynced_lsn; // What is the LSN of the highest fsynced log entry (accessed only while holding the output lock, and updated only when the output lock and output permission are held)
|
||||
LSN last_completed_checkpoint_lsn; // What is the LSN of the most recent completed checkpoint.
|
||||
long long next_log_file_number;
|
||||
struct logbuf outbuf; // data being written to the file
|
||||
int n_in_file; // The amount of data in the current file
|
||||
|
||||
// To access the logfilemgr you must have the output condition lock.
|
||||
TOKULOGFILEMGR logfilemgr;
|
||||
|
||||
uint32_t write_block_size; // How big should the blocks be written to various logs?
|
||||
|
||||
uint64_t num_writes_to_disk; // how many times did we write to disk?
|
||||
uint64_t bytes_written_to_disk; // how many bytes have been written to disk?
|
||||
tokutime_t time_spent_writing_to_disk; // how much tokutime did we spend writing to disk?
|
||||
uint64_t num_wait_buf_long; // how many times we waited >= 100ms for the in buf
|
||||
|
||||
CACHEFILE rollback_cachefile;
|
||||
rollback_log_node_cache rollback_cache;
|
||||
TXN_MANAGER txn_manager;
|
||||
};
|
||||
|
||||
int toku_logger_find_next_unused_log_file(const char *directory, long long *result);
|
||||
int toku_logger_find_logfiles (const char *directory, char ***resultp, int *n_logfiles);
|
||||
void toku_logger_free_logfiles (char **logfiles, int n_logfiles);
|
||||
|
||||
static inline int
|
||||
txn_has_current_rollback_log(TOKUTXN txn) {
|
||||
return txn->roll_info.current_rollback.b != ROLLBACK_NONE.b;
|
||||
}
|
||||
|
||||
static inline int
|
||||
txn_has_spilled_rollback_logs(TOKUTXN txn) {
|
||||
return txn->roll_info.spilled_rollback_tail.b != ROLLBACK_NONE.b;
|
||||
}
|
||||
|
||||
struct txninfo {
|
||||
uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children.
|
||||
uint32_t num_fts;
|
||||
FT *open_fts;
|
||||
bool force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn)
|
||||
uint64_t num_rollback_nodes;
|
||||
uint64_t num_rollentries;
|
||||
BLOCKNUM spilled_rollback_head;
|
||||
BLOCKNUM spilled_rollback_tail;
|
||||
BLOCKNUM current_rollback;
|
||||
};
|
||||
|
||||
static inline int toku_logsizeof_uint8_t (uint32_t v __attribute__((__unused__))) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_uint32_t (uint32_t v __attribute__((__unused__))) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_uint64_t (uint32_t v __attribute__((__unused__))) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_bool (uint32_t v __attribute__((__unused__))) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_FILENUM (FILENUM v __attribute__((__unused__))) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_DISKOFF (DISKOFF v __attribute__((__unused__))) {
|
||||
return 8;
|
||||
}
|
||||
static inline int toku_logsizeof_BLOCKNUM (BLOCKNUM v __attribute__((__unused__))) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_LSN (LSN lsn __attribute__((__unused__))) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_TXNID (TXNID txnid __attribute__((__unused__))) {
|
||||
return 8;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_TXNID_PAIR (TXNID_PAIR txnid __attribute__((__unused__))) {
|
||||
return 16;
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_XIDP (XIDP xid) {
|
||||
assert(0<=xid->gtrid_length && xid->gtrid_length<=64);
|
||||
assert(0<=xid->bqual_length && xid->bqual_length<=64);
|
||||
return xid->gtrid_length
|
||||
+ xid->bqual_length
|
||||
+ 4 // formatID
|
||||
+ 1 // gtrid_length
|
||||
+ 1; // bqual_length
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_FILENUMS (FILENUMS fs) {
|
||||
static const FILENUM f = {0}; //fs could have .num==0 and then we cannot dereference
|
||||
return 4 + fs.num * toku_logsizeof_FILENUM(f);
|
||||
}
|
||||
|
||||
static inline int toku_logsizeof_BYTESTRING (BYTESTRING bs) {
|
||||
return 4+bs.len;
|
||||
}
|
||||
|
||||
static inline char *fixup_fname(BYTESTRING *f) {
|
||||
assert(f->len>0);
|
||||
char *fname = (char*)toku_xmalloc(f->len+1);
|
||||
memcpy(fname, f->data, f->len);
|
||||
fname[f->len]=0;
|
||||
return fname;
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "portability/memory.h"
|
||||
#include "portability/toku_portability.h"
|
||||
|
||||
#include "ft/logger/recover.h"
|
||||
#include "ft/txn/rollback.h"
|
||||
#include "ft/txn/txn.h"
|
||||
#include "util/bytestring.h"
|
||||
|
||||
struct roll_entry;
|
||||
|
||||
static inline void toku_free_TXNID(TXNID txnid __attribute__((__unused__))) {}
|
||||
static inline void toku_free_TXNID_PAIR(TXNID_PAIR txnid __attribute__((__unused__))) {}
|
||||
|
||||
static inline void toku_free_LSN(LSN lsn __attribute__((__unused__))) {}
|
||||
static inline void toku_free_uint64_t(uint64_t u __attribute__((__unused__))) {}
|
||||
static inline void toku_free_uint32_t(uint32_t u __attribute__((__unused__))) {}
|
||||
static inline void toku_free_uint8_t(uint8_t u __attribute__((__unused__))) {}
|
||||
static inline void toku_free_FILENUM(FILENUM u __attribute__((__unused__))) {}
|
||||
static inline void toku_free_BLOCKNUM(BLOCKNUM u __attribute__((__unused__))) {}
|
||||
static inline void toku_free_bool(bool u __attribute__((__unused__))) {}
|
||||
static inline void toku_free_XIDP(XIDP xidp) { toku_free(xidp); }
|
||||
static inline void toku_free_BYTESTRING(BYTESTRING val) { toku_free(val.data); }
|
||||
static inline void toku_free_FILENUMS(FILENUMS val) { toku_free(val.filenums); }
|
||||
|
||||
int toku_maybe_upgrade_log (const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress);
|
||||
uint64_t toku_log_upgrade_get_footprint(void);
|
@ -1,295 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include <ft/log_header.h>
|
||||
|
||||
#include "log-internal.h"
|
||||
#include "logger/logcursor.h"
|
||||
#include "cachetable/checkpoint.h"
|
||||
|
||||
static uint64_t footprint = 0; // for debug and accountability
|
||||
|
||||
uint64_t
|
||||
toku_log_upgrade_get_footprint(void) {
|
||||
return footprint;
|
||||
}
|
||||
|
||||
// Footprint concept here is that each function increments a different decimal digit.
|
||||
// The cumulative total shows the path taken for the upgrade.
|
||||
// Each function must have a single return for this to work.
|
||||
#define FOOTPRINT(x) function_footprint=(x*footprint_increment)
|
||||
#define FOOTPRINTSETUP(increment) uint64_t function_footprint = 0; uint64_t footprint_increment=increment;
|
||||
#define FOOTPRINTCAPTURE footprint+=function_footprint;
|
||||
|
||||
|
||||
// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown
|
||||
static int
|
||||
verify_clean_shutdown_of_log_version_current(const char *log_dir, LSN * last_lsn, TXNID *last_xid) {
|
||||
int rval = TOKUDB_UPGRADE_FAILURE;
|
||||
TOKULOGCURSOR cursor = NULL;
|
||||
int r;
|
||||
FOOTPRINTSETUP(100);
|
||||
|
||||
FOOTPRINT(1);
|
||||
|
||||
r = toku_logcursor_create(&cursor, log_dir);
|
||||
assert(r == 0);
|
||||
struct log_entry *le = NULL;
|
||||
r = toku_logcursor_last(cursor, &le);
|
||||
if (r == 0) {
|
||||
FOOTPRINT(2);
|
||||
if (le->cmd==LT_shutdown) {
|
||||
LSN lsn = le->u.shutdown.lsn;
|
||||
if (last_lsn) {
|
||||
*last_lsn = lsn;
|
||||
}
|
||||
if (last_xid) {
|
||||
*last_xid = le->u.shutdown.last_xid;
|
||||
}
|
||||
rval = 0;
|
||||
}
|
||||
}
|
||||
r = toku_logcursor_destroy(&cursor);
|
||||
assert(r == 0);
|
||||
FOOTPRINTCAPTURE;
|
||||
return rval;
|
||||
}
|
||||
|
||||
|
||||
// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown
|
||||
static int
|
||||
verify_clean_shutdown_of_log_version_old(const char *log_dir, LSN * last_lsn, TXNID *last_xid, uint32_t version) {
|
||||
int rval = TOKUDB_UPGRADE_FAILURE;
|
||||
int r;
|
||||
FOOTPRINTSETUP(10);
|
||||
|
||||
FOOTPRINT(1);
|
||||
|
||||
int n_logfiles;
|
||||
char **logfiles;
|
||||
r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles);
|
||||
if (r!=0) return r;
|
||||
|
||||
char *basename;
|
||||
TOKULOGCURSOR cursor;
|
||||
struct log_entry *entry;
|
||||
// Only look at newest log
|
||||
// basename points to first char after last / in file pathname
|
||||
basename = strrchr(logfiles[n_logfiles-1], '/') + 1;
|
||||
uint32_t version_name;
|
||||
long long index = -1;
|
||||
r = sscanf(basename, "log%lld.tokulog%u", &index, &version_name);
|
||||
assert(r==2); // found index and version
|
||||
invariant(version_name == version);
|
||||
assert(version>=TOKU_LOG_MIN_SUPPORTED_VERSION);
|
||||
assert(version< TOKU_LOG_VERSION); //Must be old
|
||||
// find last LSN
|
||||
r = toku_logcursor_create_for_file(&cursor, log_dir, basename);
|
||||
if (r != 0) {
|
||||
goto cleanup_no_logcursor;
|
||||
}
|
||||
r = toku_logcursor_last(cursor, &entry);
|
||||
if (r != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
FOOTPRINT(2);
|
||||
//TODO: Remove this special case once FT_LAYOUT_VERSION_19 (and older) are not supported.
|
||||
if (version <= FT_LAYOUT_VERSION_19) {
|
||||
if (entry->cmd==LT_shutdown_up_to_19) {
|
||||
LSN lsn = entry->u.shutdown_up_to_19.lsn;
|
||||
if (last_lsn) {
|
||||
*last_lsn = lsn;
|
||||
}
|
||||
if (last_xid) {
|
||||
// Use lsn as last_xid.
|
||||
*last_xid = lsn.lsn;
|
||||
}
|
||||
rval = 0;
|
||||
}
|
||||
}
|
||||
else if (entry->cmd==LT_shutdown) {
|
||||
LSN lsn = entry->u.shutdown.lsn;
|
||||
if (last_lsn) {
|
||||
*last_lsn = lsn;
|
||||
}
|
||||
if (last_xid) {
|
||||
*last_xid = entry->u.shutdown.last_xid;
|
||||
}
|
||||
rval = 0;
|
||||
}
|
||||
cleanup:
|
||||
r = toku_logcursor_destroy(&cursor);
|
||||
assert(r == 0);
|
||||
cleanup_no_logcursor:
|
||||
toku_logger_free_logfiles(logfiles, n_logfiles);
|
||||
FOOTPRINTCAPTURE;
|
||||
return rval;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
verify_clean_shutdown_of_log_version(const char *log_dir, uint32_t version, LSN *last_lsn, TXNID *last_xid) {
|
||||
// return 0 if clean shutdown, TOKUDB_UPGRADE_FAILURE if not clean shutdown
|
||||
int r = 0;
|
||||
FOOTPRINTSETUP(1000);
|
||||
|
||||
if (version < TOKU_LOG_VERSION) {
|
||||
FOOTPRINT(1);
|
||||
r = verify_clean_shutdown_of_log_version_old(log_dir, last_lsn, last_xid, version);
|
||||
}
|
||||
else {
|
||||
FOOTPRINT(2);
|
||||
assert(version == TOKU_LOG_VERSION);
|
||||
r = verify_clean_shutdown_of_log_version_current(log_dir, last_lsn, last_xid);
|
||||
}
|
||||
FOOTPRINTCAPTURE;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
// Actually create a log file of the current version, making the environment be of the current version.
|
||||
// TODO: can't fail
|
||||
static int
|
||||
upgrade_log(const char *env_dir, const char *log_dir, LSN last_lsn, TXNID last_xid) { // the real deal
|
||||
int r;
|
||||
FOOTPRINTSETUP(10000);
|
||||
|
||||
LSN initial_lsn = last_lsn;
|
||||
initial_lsn.lsn++;
|
||||
CACHETABLE ct;
|
||||
TOKULOGGER logger;
|
||||
|
||||
FOOTPRINT(1);
|
||||
|
||||
{ //Create temporary environment
|
||||
toku_cachetable_create(&ct, 1<<25, initial_lsn, NULL);
|
||||
toku_cachetable_set_env_dir(ct, env_dir);
|
||||
r = toku_logger_create(&logger);
|
||||
assert(r == 0);
|
||||
toku_logger_set_cachetable(logger, ct);
|
||||
r = toku_logger_open_with_last_xid(log_dir, logger, last_xid);
|
||||
assert(r==0);
|
||||
}
|
||||
{ //Checkpoint
|
||||
CHECKPOINTER cp = toku_cachetable_get_checkpointer(ct);
|
||||
r = toku_checkpoint(cp, logger, NULL, NULL, NULL, NULL, UPGRADE_CHECKPOINT); //fsyncs log dir
|
||||
assert(r == 0);
|
||||
}
|
||||
{ //Close cachetable and logger
|
||||
toku_logger_shutdown(logger);
|
||||
toku_cachetable_close(&ct);
|
||||
r = toku_logger_close(&logger);
|
||||
assert(r==0);
|
||||
}
|
||||
{
|
||||
r = verify_clean_shutdown_of_log_version(log_dir, TOKU_LOG_VERSION, NULL, NULL);
|
||||
assert(r==0);
|
||||
}
|
||||
FOOTPRINTCAPTURE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If log on disk is old (environment is old) and clean shutdown, then create log of current version,
|
||||
// which will make the environment of the current version (and delete the old logs).
|
||||
int
|
||||
toku_maybe_upgrade_log(const char *env_dir, const char *log_dir, LSN * lsn_of_clean_shutdown, bool * upgrade_in_progress) {
|
||||
int r;
|
||||
int lockfd = -1;
|
||||
FOOTPRINTSETUP(100000);
|
||||
|
||||
footprint = 0;
|
||||
*upgrade_in_progress = false; // set true only if all criteria are met and we're actually doing an upgrade
|
||||
|
||||
FOOTPRINT(1);
|
||||
r = toku_recover_lock(log_dir, &lockfd);
|
||||
if (r != 0) {
|
||||
goto cleanup_no_lock;
|
||||
}
|
||||
FOOTPRINT(2);
|
||||
assert(log_dir);
|
||||
assert(env_dir);
|
||||
|
||||
uint32_t version_of_logs_on_disk;
|
||||
bool found_any_logs;
|
||||
r = toku_get_version_of_logs_on_disk(log_dir, &found_any_logs, &version_of_logs_on_disk);
|
||||
if (r != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
FOOTPRINT(3);
|
||||
if (!found_any_logs)
|
||||
r = 0; //No logs means no logs to upgrade.
|
||||
else if (version_of_logs_on_disk > TOKU_LOG_VERSION)
|
||||
r = TOKUDB_DICTIONARY_TOO_NEW;
|
||||
else if (version_of_logs_on_disk < TOKU_LOG_MIN_SUPPORTED_VERSION)
|
||||
r = TOKUDB_DICTIONARY_TOO_OLD;
|
||||
else if (version_of_logs_on_disk == TOKU_LOG_VERSION)
|
||||
r = 0; //Logs are up to date
|
||||
else {
|
||||
FOOTPRINT(4);
|
||||
LSN last_lsn = ZERO_LSN;
|
||||
TXNID last_xid = TXNID_NONE;
|
||||
r = verify_clean_shutdown_of_log_version(log_dir, version_of_logs_on_disk, &last_lsn, &last_xid);
|
||||
if (r != 0) {
|
||||
if (version_of_logs_on_disk >= TOKU_LOG_VERSION_25 &&
|
||||
version_of_logs_on_disk <= TOKU_LOG_VERSION_29 &&
|
||||
TOKU_LOG_VERSION_29 == TOKU_LOG_VERSION) {
|
||||
r = 0; // can do recovery on dirty shutdown
|
||||
} else {
|
||||
fprintf(stderr, "Cannot upgrade PerconaFT version %d database.", version_of_logs_on_disk);
|
||||
fprintf(stderr, " Previous improper shutdown detected.\n");
|
||||
}
|
||||
goto cleanup;
|
||||
}
|
||||
FOOTPRINT(5);
|
||||
*lsn_of_clean_shutdown = last_lsn;
|
||||
*upgrade_in_progress = true;
|
||||
r = upgrade_log(env_dir, log_dir, last_lsn, last_xid);
|
||||
}
|
||||
cleanup:
|
||||
{
|
||||
//Clean up
|
||||
int rc;
|
||||
rc = toku_recover_unlock(lockfd);
|
||||
if (r==0) r = rc;
|
||||
}
|
||||
cleanup_no_lock:
|
||||
FOOTPRINTCAPTURE;
|
||||
return r;
|
||||
}
|
||||
|
@ -1,497 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "log-internal.h"
|
||||
#include "logger/logcursor.h"
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
|
||||
enum lc_direction { LC_FORWARD, LC_BACKWARD, LC_FIRST, LC_LAST };
|
||||
|
||||
struct toku_logcursor {
|
||||
char *logdir; // absolute directory name
|
||||
char **logfiles;
|
||||
int n_logfiles;
|
||||
int cur_logfiles_index;
|
||||
FILE *cur_fp;
|
||||
size_t buffer_size;
|
||||
void *buffer;
|
||||
bool is_open;
|
||||
struct log_entry entry;
|
||||
bool entry_valid;
|
||||
LSN cur_lsn;
|
||||
enum lc_direction last_direction;
|
||||
};
|
||||
|
||||
#define LC_LSN_ERROR (DB_RUNRECOVERY)
|
||||
|
||||
void toku_logcursor_print(TOKULOGCURSOR lc) {
|
||||
printf("lc = %p\n", lc);
|
||||
printf(" logdir = %s\n", lc->logdir);
|
||||
printf(" logfiles = %p\n", lc->logfiles);
|
||||
for (int lf=0;lf<lc->n_logfiles;lf++) {
|
||||
printf(" logfile[%d] = %p (%s)\n", lf, lc->logfiles[lf], lc->logfiles[lf]);
|
||||
}
|
||||
printf(" n_logfiles = %d\n", lc->n_logfiles);
|
||||
printf(" cur_logfiles_index = %d\n", lc->cur_logfiles_index);
|
||||
printf(" cur_fp = %p\n", lc->cur_fp);
|
||||
printf(" cur_lsn = %" PRIu64 "\n", lc->cur_lsn.lsn);
|
||||
printf(" last_direction = %d\n", (int) lc->last_direction);
|
||||
}
|
||||
|
||||
static int lc_close_cur_logfile(TOKULOGCURSOR lc) {
|
||||
int r=0;
|
||||
if ( lc->is_open ) {
|
||||
r = fclose(lc->cur_fp);
|
||||
assert(0==r);
|
||||
lc->is_open = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static toku_off_t lc_file_len(const char *name) {
|
||||
toku_struct_stat buf;
|
||||
int r = toku_stat(name, &buf, *tokudb_file_data_key);
|
||||
assert(r == 0);
|
||||
return buf.st_size;
|
||||
}
|
||||
|
||||
// Cat the file and throw away the contents. This brings the file into the file system cache
|
||||
// and makes subsequent accesses to it fast. The intention is to speed up backward scans of the
|
||||
// file.
|
||||
static void lc_catfile(const char *fname, void *buffer, size_t buffer_size) {
|
||||
int fd = open(fname, O_RDONLY);
|
||||
if (fd >= 0) {
|
||||
while (1) {
|
||||
ssize_t r = read(fd, buffer, buffer_size);
|
||||
if ((int)r <= 0)
|
||||
break;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
static int lc_open_logfile(TOKULOGCURSOR lc, int index) {
|
||||
int r=0;
|
||||
assert( !lc->is_open );
|
||||
if( index == -1 || index >= lc->n_logfiles) return DB_NOTFOUND;
|
||||
lc_catfile(lc->logfiles[index], lc->buffer, lc->buffer_size);
|
||||
lc->cur_fp = fopen(lc->logfiles[index], "rb");
|
||||
if ( lc->cur_fp == NULL )
|
||||
return DB_NOTFOUND;
|
||||
r = setvbuf(lc->cur_fp, (char *) lc->buffer, _IOFBF, lc->buffer_size);
|
||||
assert(r == 0);
|
||||
// position fp past header, ignore 0 length file (t:2384)
|
||||
unsigned int version=0;
|
||||
if ( lc_file_len(lc->logfiles[index]) >= 12 ) {
|
||||
r = toku_read_logmagic(lc->cur_fp, &version);
|
||||
if (r!=0)
|
||||
return DB_BADFORMAT;
|
||||
if (version < TOKU_LOG_MIN_SUPPORTED_VERSION || version > TOKU_LOG_VERSION)
|
||||
return DB_BADFORMAT;
|
||||
}
|
||||
// mark as open
|
||||
lc->is_open = true;
|
||||
return r;
|
||||
}
|
||||
|
||||
static int lc_check_lsn(TOKULOGCURSOR lc, int dir) {
|
||||
int r=0;
|
||||
LSN lsn = toku_log_entry_get_lsn(&(lc->entry));
|
||||
if (((dir == LC_FORWARD) && ( lsn.lsn != lc->cur_lsn.lsn + 1 )) ||
|
||||
((dir == LC_BACKWARD) && ( lsn.lsn != lc->cur_lsn.lsn - 1 ))) {
|
||||
// int index = lc->cur_logfiles_index;
|
||||
// fprintf(stderr, "Bad LSN: %d %s direction = %d, lsn.lsn = %" PRIu64 ", cur_lsn.lsn=%" PRIu64 "\n",
|
||||
// index, lc->logfiles[index], dir, lsn.lsn, lc->cur_lsn.lsn);
|
||||
if (tokuft_recovery_trace)
|
||||
printf("DB_RUNRECOVERY: %s:%d r=%d\n", __FUNCTION__, __LINE__, 0);
|
||||
return LC_LSN_ERROR;
|
||||
}
|
||||
lc->cur_lsn.lsn = lsn.lsn;
|
||||
return r;
|
||||
}
|
||||
|
||||
// toku_logcursor_create()
|
||||
// - returns a pointer to a logcursor
|
||||
|
||||
static int lc_create(TOKULOGCURSOR *lc, const char *log_dir) {
|
||||
|
||||
// malloc a cursor
|
||||
TOKULOGCURSOR cursor = (TOKULOGCURSOR) toku_xmalloc(sizeof(struct toku_logcursor));
|
||||
// find logfiles in logdir
|
||||
cursor->is_open = false;
|
||||
cursor->cur_logfiles_index = 0;
|
||||
cursor->entry_valid = false;
|
||||
cursor->buffer_size = 1<<20; // use a 1MB stream buffer (setvbuf)
|
||||
cursor->buffer = toku_malloc(cursor->buffer_size); // it does not matter if it failes
|
||||
// cursor->logdir must be an absolute path
|
||||
if (toku_os_is_absolute_name(log_dir)) {
|
||||
cursor->logdir = (char *) toku_xmalloc(strlen(log_dir)+1);
|
||||
sprintf(cursor->logdir, "%s", log_dir);
|
||||
} else {
|
||||
char cwdbuf[PATH_MAX];
|
||||
char *cwd = getcwd(cwdbuf, PATH_MAX);
|
||||
assert(cwd);
|
||||
cursor->logdir = (char *) toku_xmalloc(strlen(cwd)+strlen(log_dir)+2);
|
||||
sprintf(cursor->logdir, "%s/%s", cwd, log_dir);
|
||||
}
|
||||
cursor->logfiles = NULL;
|
||||
cursor->n_logfiles = 0;
|
||||
cursor->cur_fp = NULL;
|
||||
cursor->cur_lsn.lsn=0;
|
||||
cursor->last_direction=LC_FIRST;
|
||||
|
||||
*lc = cursor;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lc_fix_bad_logfile(TOKULOGCURSOR lc);
|
||||
|
||||
int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir) {
|
||||
TOKULOGCURSOR cursor;
|
||||
int r = lc_create(&cursor, log_dir);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
|
||||
r = toku_logger_find_logfiles(cursor->logdir, &(cursor->logfiles), &(cursor->n_logfiles));
|
||||
if (r!=0) {
|
||||
toku_logcursor_destroy(&cursor);
|
||||
} else {
|
||||
*lc = cursor;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file) {
|
||||
int r = lc_create(lc, log_dir);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
|
||||
TOKULOGCURSOR cursor = *lc;
|
||||
int fullnamelen = strlen(cursor->logdir) + strlen(log_file) + 3;
|
||||
char *XMALLOC_N(fullnamelen, log_file_fullname);
|
||||
sprintf(log_file_fullname, "%s/%s", cursor->logdir, log_file);
|
||||
|
||||
cursor->n_logfiles=1;
|
||||
|
||||
char **XMALLOC(logfiles);
|
||||
cursor->logfiles = logfiles;
|
||||
cursor->logfiles[0] = log_file_fullname;
|
||||
*lc = cursor;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int toku_logcursor_destroy(TOKULOGCURSOR *lc) {
|
||||
int r=0;
|
||||
if ( *lc ) {
|
||||
if ( (*lc)->entry_valid ) {
|
||||
toku_log_free_log_entry_resources(&((*lc)->entry));
|
||||
(*lc)->entry_valid = false;
|
||||
}
|
||||
r = lc_close_cur_logfile(*lc);
|
||||
toku_logger_free_logfiles((*lc)->logfiles, (*lc)->n_logfiles);
|
||||
if ( (*lc)->logdir ) toku_free((*lc)->logdir);
|
||||
if ( (*lc)->buffer ) toku_free((*lc)->buffer);
|
||||
toku_free(*lc);
|
||||
*lc = NULL;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int lc_log_read(TOKULOGCURSOR lc)
|
||||
{
|
||||
int r = toku_log_fread(lc->cur_fp, &(lc->entry));
|
||||
while ( r == EOF ) {
|
||||
// move to next file
|
||||
r = lc_close_cur_logfile(lc);
|
||||
if (r!=0) return r;
|
||||
if ( lc->cur_logfiles_index == lc->n_logfiles-1) return DB_NOTFOUND;
|
||||
lc->cur_logfiles_index++;
|
||||
r = lc_open_logfile(lc, lc->cur_logfiles_index);
|
||||
if (r!=0) return r;
|
||||
r = toku_log_fread(lc->cur_fp, &(lc->entry));
|
||||
}
|
||||
if (r!=0) {
|
||||
toku_log_free_log_entry_resources(&(lc->entry));
|
||||
time_t tnow = time(NULL);
|
||||
if (r==DB_BADFORMAT) {
|
||||
fprintf(stderr, "%.24s PerconaFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "%.24s PerconaFT unexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int lc_log_read_backward(TOKULOGCURSOR lc)
|
||||
{
|
||||
int r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||
while ( -1 == r) { // if within header length of top of file
|
||||
// move to previous file
|
||||
r = lc_close_cur_logfile(lc);
|
||||
if (r!=0)
|
||||
return r;
|
||||
if ( lc->cur_logfiles_index == 0 )
|
||||
return DB_NOTFOUND;
|
||||
lc->cur_logfiles_index--;
|
||||
r = lc_open_logfile(lc, lc->cur_logfiles_index);
|
||||
if (r!=0)
|
||||
return r;
|
||||
// seek to end
|
||||
r = fseek(lc->cur_fp, 0, SEEK_END);
|
||||
assert(0==r);
|
||||
r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||
}
|
||||
if (r!=0) {
|
||||
toku_log_free_log_entry_resources(&(lc->entry));
|
||||
time_t tnow = time(NULL);
|
||||
if (r==DB_BADFORMAT) {
|
||||
fprintf(stderr, "%.24s PerconaFT bad log format in %s\n", ctime(&tnow), lc->logfiles[lc->cur_logfiles_index]);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "%.24s PerconaFT uUnexpected log format error '%s' in %s\n", ctime(&tnow), strerror(r), lc->logfiles[lc->cur_logfiles_index]);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||
int r=0;
|
||||
if ( lc->entry_valid ) {
|
||||
toku_log_free_log_entry_resources(&(lc->entry));
|
||||
lc->entry_valid = false;
|
||||
if (lc->last_direction == LC_BACKWARD) {
|
||||
struct log_entry junk;
|
||||
r = toku_log_fread(lc->cur_fp, &junk);
|
||||
assert(r == 0);
|
||||
toku_log_free_log_entry_resources(&junk);
|
||||
}
|
||||
} else {
|
||||
r = toku_logcursor_first(lc, le);
|
||||
return r;
|
||||
}
|
||||
// read the entry
|
||||
r = lc_log_read(lc);
|
||||
if (r!=0) return r;
|
||||
r = lc_check_lsn(lc, LC_FORWARD);
|
||||
if (r!=0) return r;
|
||||
lc->last_direction = LC_FORWARD;
|
||||
lc->entry_valid = true;
|
||||
*le = &(lc->entry);
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||
int r=0;
|
||||
if ( lc->entry_valid ) {
|
||||
toku_log_free_log_entry_resources(&(lc->entry));
|
||||
lc->entry_valid = false;
|
||||
if (lc->last_direction == LC_FORWARD) {
|
||||
struct log_entry junk;
|
||||
r = toku_log_fread_backward(lc->cur_fp, &junk);
|
||||
assert(r == 0);
|
||||
toku_log_free_log_entry_resources(&junk);
|
||||
}
|
||||
} else {
|
||||
r = toku_logcursor_last(lc, le);
|
||||
return r;
|
||||
}
|
||||
// read the entry
|
||||
r = lc_log_read_backward(lc);
|
||||
if (r!=0) return r;
|
||||
r = lc_check_lsn(lc, LC_BACKWARD);
|
||||
if (r!=0) return r;
|
||||
lc->last_direction = LC_BACKWARD;
|
||||
lc->entry_valid = true;
|
||||
*le = &(lc->entry);
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_logcursor_first(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||
int r=0;
|
||||
if ( lc->entry_valid ) {
|
||||
toku_log_free_log_entry_resources(&(lc->entry));
|
||||
lc->entry_valid = false;
|
||||
}
|
||||
// close any but the first log file
|
||||
if ( lc->cur_logfiles_index != 0 ) {
|
||||
lc_close_cur_logfile(lc);
|
||||
}
|
||||
// open first log file if needed
|
||||
if ( !lc->is_open ) {
|
||||
r = lc_open_logfile(lc, 0);
|
||||
if (r!=0)
|
||||
return r;
|
||||
lc->cur_logfiles_index = 0;
|
||||
}
|
||||
// read the entry
|
||||
r = lc_log_read(lc);
|
||||
if (r!=0) return r;
|
||||
|
||||
r = lc_check_lsn(lc, LC_FIRST);
|
||||
if (r!=0) return r;
|
||||
lc->last_direction = LC_FIRST;
|
||||
lc->entry_valid = true;
|
||||
*le = &(lc->entry);
|
||||
return r;
|
||||
}
|
||||
|
||||
//get last entry in the logfile specified by logcursor
|
||||
int toku_logcursor_last(TOKULOGCURSOR lc, struct log_entry **le) {
|
||||
int r=0;
|
||||
if ( lc->entry_valid ) {
|
||||
toku_log_free_log_entry_resources(&(lc->entry));
|
||||
lc->entry_valid = false;
|
||||
}
|
||||
// close any but last log file
|
||||
if ( lc->cur_logfiles_index != lc->n_logfiles-1 ) {
|
||||
lc_close_cur_logfile(lc);
|
||||
}
|
||||
// open last log file if needed
|
||||
if ( !lc->is_open ) {
|
||||
r = lc_open_logfile(lc, lc->n_logfiles-1);
|
||||
if (r!=0)
|
||||
return r;
|
||||
lc->cur_logfiles_index = lc->n_logfiles-1;
|
||||
}
|
||||
while (1) {
|
||||
// seek to end
|
||||
r = fseek(lc->cur_fp, 0, SEEK_END); assert(r==0);
|
||||
// read backward
|
||||
r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||
if (r==0) // got a good entry
|
||||
break;
|
||||
if (r>0) {
|
||||
toku_log_free_log_entry_resources(&(lc->entry));
|
||||
// got an error,
|
||||
// probably a corrupted last log entry due to a crash
|
||||
// try scanning forward from the beginning to find the last good entry
|
||||
time_t tnow = time(NULL);
|
||||
fprintf(stderr, "%.24s PerconaFT recovery repairing log\n", ctime(&tnow));
|
||||
r = lc_fix_bad_logfile(lc);
|
||||
if ( r != 0 ) {
|
||||
fprintf(stderr, "%.24s PerconaFT recovery repair unsuccessful\n", ctime(&tnow));
|
||||
return DB_BADFORMAT;
|
||||
}
|
||||
// try reading again
|
||||
r = toku_log_fread_backward(lc->cur_fp, &(lc->entry));
|
||||
if (r==0) // got a good entry
|
||||
break;
|
||||
}
|
||||
// move to previous file
|
||||
r = lc_close_cur_logfile(lc);
|
||||
if (r!=0)
|
||||
return r;
|
||||
if ( lc->cur_logfiles_index == 0 )
|
||||
return DB_NOTFOUND;
|
||||
lc->cur_logfiles_index--;
|
||||
r = lc_open_logfile(lc, lc->cur_logfiles_index);
|
||||
if (r!=0)
|
||||
return r;
|
||||
}
|
||||
r = lc_check_lsn(lc, LC_LAST);
|
||||
if (r!=0)
|
||||
return r;
|
||||
lc->last_direction = LC_LAST;
|
||||
lc->entry_valid = true;
|
||||
*le = &(lc->entry);
|
||||
return r;
|
||||
}
|
||||
|
||||
// return 0 if log exists, ENOENT if no log
|
||||
int
|
||||
toku_logcursor_log_exists(const TOKULOGCURSOR lc) {
|
||||
int r;
|
||||
|
||||
if (lc->n_logfiles)
|
||||
r = 0;
|
||||
else
|
||||
r = ENOENT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// fix a logfile with a bad last entry
|
||||
// - return with fp pointing to end-of-file so that toku_logcursor_last can be retried
|
||||
static int lc_fix_bad_logfile(TOKULOGCURSOR lc) {
|
||||
struct log_entry le;
|
||||
unsigned int version=0;
|
||||
int r = 0;
|
||||
|
||||
r = fseek(lc->cur_fp, 0, SEEK_SET);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
r = toku_read_logmagic(lc->cur_fp, &version);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
if (version != TOKU_LOG_VERSION)
|
||||
return -1;
|
||||
|
||||
toku_off_t last_good_pos;
|
||||
last_good_pos = ftello(lc->cur_fp);
|
||||
while (1) {
|
||||
// initialize le
|
||||
// - reading incomplete entries can result in fields that cannot be freed
|
||||
memset(&le, 0, sizeof(le));
|
||||
r = toku_log_fread(lc->cur_fp, &le);
|
||||
toku_log_free_log_entry_resources(&le);
|
||||
if ( r!=0 )
|
||||
break;
|
||||
last_good_pos = ftello(lc->cur_fp);
|
||||
}
|
||||
// now have position of last good entry
|
||||
// 1) close the file
|
||||
// 2) truncate the file to remove the error
|
||||
// 3) reopen the file
|
||||
// 4) set the pos to last
|
||||
r = lc_close_cur_logfile(lc);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
r = truncate(lc->logfiles[lc->n_logfiles - 1], last_good_pos);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
r = lc_open_logfile(lc, lc->n_logfiles-1);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
r = fseek(lc->cur_fp, 0, SEEK_END);
|
||||
if ( r!=0 )
|
||||
return r;
|
||||
return 0;
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ft/log_header.h>
|
||||
|
||||
struct toku_logcursor;
|
||||
typedef struct toku_logcursor *TOKULOGCURSOR;
|
||||
|
||||
// All routines return 0 on success
|
||||
|
||||
// toku_logcursor_create()
|
||||
// - creates a logcursor (lc)
|
||||
// - following toku_logcursor_create()
|
||||
// if toku_logcursor_next() is called, it returns the first entry in the log
|
||||
// if toku_logcursor_prev() is called, it returns the last entry in the log
|
||||
int toku_logcursor_create(TOKULOGCURSOR *lc, const char *log_dir);
|
||||
// toku_logcursor_create_for_file()
|
||||
// - creates a logcusor (lc) that only knows about the file log_file
|
||||
int toku_logcursor_create_for_file(TOKULOGCURSOR *lc, const char *log_dir, const char *log_file);
|
||||
// toku_logcursor_destroy()
|
||||
// - frees all resources associated with the logcursor, including the log_entry
|
||||
// associated with the latest cursor action
|
||||
int toku_logcursor_destroy(TOKULOGCURSOR *lc);
|
||||
|
||||
// toku_logcursor_[next,prev,first,last] take care of malloc'ing and free'ing log_entrys.
|
||||
// - routines NULL out the **le pointers on entry, then set the **le pointers to
|
||||
// the malloc'ed entries when successful,
|
||||
int toku_logcursor_next(TOKULOGCURSOR lc, struct log_entry **le);
|
||||
int toku_logcursor_prev(TOKULOGCURSOR lc, struct log_entry **le);
|
||||
|
||||
int toku_logcursor_first(const TOKULOGCURSOR lc, struct log_entry **le);
|
||||
int toku_logcursor_last(const TOKULOGCURSOR lc, struct log_entry **le);
|
||||
|
||||
// return 0 if log exists, ENOENT if no log
|
||||
int toku_logcursor_log_exists(const TOKULOGCURSOR lc);
|
||||
|
||||
void toku_logcursor_print(TOKULOGCURSOR lc);
|
@ -1,205 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "logger/log-internal.h"
|
||||
#include "logger/logcursor.h"
|
||||
#include "logger/logfilemgr.h"
|
||||
|
||||
// for now, implement with singlely-linked-list
|
||||
// first = oldest (delete from beginning)
|
||||
// last = newest (add to end)
|
||||
|
||||
struct lfm_entry {
|
||||
TOKULOGFILEINFO lf_info;
|
||||
struct lfm_entry *next;
|
||||
};
|
||||
|
||||
struct toku_logfilemgr {
|
||||
struct lfm_entry *first;
|
||||
struct lfm_entry *last;
|
||||
int n_entries;
|
||||
};
|
||||
|
||||
int toku_logfilemgr_create(TOKULOGFILEMGR *lfm) {
|
||||
// malloc a logfilemgr
|
||||
TOKULOGFILEMGR XMALLOC(mgr);
|
||||
mgr->first = NULL;
|
||||
mgr->last = NULL;
|
||||
mgr->n_entries = 0;
|
||||
*lfm = mgr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm) {
|
||||
int r=0;
|
||||
if ( *lfm != NULL ) { // be tolerant of being passed a NULL
|
||||
TOKULOGFILEMGR mgr = *lfm;
|
||||
while ( mgr->n_entries > 0 ) {
|
||||
toku_logfilemgr_delete_oldest_logfile_info(mgr);
|
||||
}
|
||||
toku_free(*lfm);
|
||||
*lfm = NULL;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown) {
|
||||
invariant_notnull(lfm);
|
||||
invariant_notnull(last_xid_if_clean_shutdown);
|
||||
|
||||
int r;
|
||||
int n_logfiles;
|
||||
char **logfiles;
|
||||
r = toku_logger_find_logfiles(log_dir, &logfiles, &n_logfiles);
|
||||
if (r!=0)
|
||||
return r;
|
||||
|
||||
TOKULOGCURSOR cursor;
|
||||
struct log_entry *entry;
|
||||
TOKULOGFILEINFO lf_info;
|
||||
long long index = -1;
|
||||
char *basename;
|
||||
LSN tmp_lsn = {0};
|
||||
TXNID last_xid = TXNID_NONE;
|
||||
for(int i=0;i<n_logfiles;i++){
|
||||
XMALLOC(lf_info);
|
||||
// find the index
|
||||
// basename is the filename of the i-th logfile
|
||||
basename = strrchr(logfiles[i], '/') + 1;
|
||||
int version;
|
||||
r = sscanf(basename, "log%lld.tokulog%d", &index, &version);
|
||||
assert(r==2); // found index and version
|
||||
assert(version>=TOKU_LOG_MIN_SUPPORTED_VERSION);
|
||||
assert(version<=TOKU_LOG_VERSION);
|
||||
lf_info->index = index;
|
||||
lf_info->version = version;
|
||||
// find last LSN in logfile
|
||||
r = toku_logcursor_create_for_file(&cursor, log_dir, basename);
|
||||
if (r!=0) {
|
||||
return r;
|
||||
}
|
||||
r = toku_logcursor_last(cursor, &entry); // set "entry" to last log entry in logfile
|
||||
if (r == 0) {
|
||||
lf_info->maxlsn = toku_log_entry_get_lsn(entry);
|
||||
|
||||
invariant(lf_info->maxlsn.lsn >= tmp_lsn.lsn);
|
||||
tmp_lsn = lf_info->maxlsn;
|
||||
if (entry->cmd == LT_shutdown) {
|
||||
last_xid = entry->u.shutdown.last_xid;
|
||||
} else {
|
||||
last_xid = TXNID_NONE;
|
||||
}
|
||||
}
|
||||
else {
|
||||
lf_info->maxlsn = tmp_lsn; // handle empty logfile (no LSN in file) case
|
||||
}
|
||||
|
||||
// add to logfilemgr
|
||||
toku_logfilemgr_add_logfile_info(lfm, lf_info);
|
||||
toku_logcursor_destroy(&cursor);
|
||||
}
|
||||
toku_logger_free_logfiles(logfiles, n_logfiles);
|
||||
*last_xid_if_clean_shutdown = last_xid;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int toku_logfilemgr_num_logfiles(TOKULOGFILEMGR lfm) {
|
||||
assert(lfm);
|
||||
return lfm->n_entries;
|
||||
}
|
||||
|
||||
int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info) {
|
||||
assert(lfm);
|
||||
struct lfm_entry *XMALLOC(entry);
|
||||
entry->lf_info = lf_info;
|
||||
entry->next = NULL;
|
||||
if ( lfm->n_entries != 0 )
|
||||
lfm->last->next = entry;
|
||||
lfm->last = entry;
|
||||
lfm->n_entries++;
|
||||
if (lfm->n_entries == 1 ) {
|
||||
lfm->first = lfm->last;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm) {
|
||||
assert(lfm);
|
||||
return lfm->first->lf_info;
|
||||
}
|
||||
|
||||
void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm) {
|
||||
assert(lfm);
|
||||
if ( lfm->n_entries > 0 ) {
|
||||
struct lfm_entry *entry = lfm->first;
|
||||
toku_free(entry->lf_info);
|
||||
lfm->first = entry->next;
|
||||
toku_free(entry);
|
||||
lfm->n_entries--;
|
||||
if ( lfm->n_entries == 0 ) {
|
||||
lfm->last = lfm->first = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm) {
|
||||
assert(lfm);
|
||||
if ( lfm->n_entries == 0 ) {
|
||||
LSN lsn;
|
||||
lsn.lsn = 0;
|
||||
return lsn;
|
||||
}
|
||||
return lfm->last->lf_info->maxlsn;
|
||||
}
|
||||
|
||||
void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn) {
|
||||
assert(lfm);
|
||||
assert(lfm->last!=NULL);
|
||||
lfm->last->lf_info->maxlsn = lsn;
|
||||
}
|
||||
|
||||
void toku_logfilemgr_print(TOKULOGFILEMGR lfm) {
|
||||
assert(lfm);
|
||||
printf("toku_logfilemgr_print [%p] : %d entries \n", lfm, lfm->n_entries);
|
||||
struct lfm_entry *entry = lfm->first;
|
||||
for (int i=0;i<lfm->n_entries;i++) {
|
||||
printf(" entry %d : index = %" PRId64 ", maxlsn = %" PRIu64 "\n", i, entry->lf_info->index, entry->lf_info->maxlsn.lsn);
|
||||
entry = entry->next;
|
||||
}
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ft/log_header.h>
|
||||
|
||||
// this is the basic information we need to keep per logfile
|
||||
struct toku_logfile_info {
|
||||
int64_t index;
|
||||
LSN maxlsn;
|
||||
uint32_t version;
|
||||
};
|
||||
typedef struct toku_logfile_info *TOKULOGFILEINFO;
|
||||
|
||||
struct toku_logfilemgr;
|
||||
typedef struct toku_logfilemgr *TOKULOGFILEMGR;
|
||||
|
||||
int toku_logfilemgr_create(TOKULOGFILEMGR *lfm);
|
||||
int toku_logfilemgr_destroy(TOKULOGFILEMGR *lfm);
|
||||
|
||||
int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir, TXNID *last_xid_if_clean_shutdown);
|
||||
int toku_logfilemgr_num_logfiles(TOKULOGFILEMGR lfm);
|
||||
int toku_logfilemgr_add_logfile_info(TOKULOGFILEMGR lfm, TOKULOGFILEINFO lf_info);
|
||||
TOKULOGFILEINFO toku_logfilemgr_get_oldest_logfile_info(TOKULOGFILEMGR lfm);
|
||||
void toku_logfilemgr_delete_oldest_logfile_info(TOKULOGFILEMGR lfm);
|
||||
LSN toku_logfilemgr_get_last_lsn(TOKULOGFILEMGR lfm);
|
||||
void toku_logfilemgr_update_last_lsn(TOKULOGFILEMGR lfm, LSN lsn);
|
||||
|
||||
void toku_logfilemgr_print(TOKULOGFILEMGR lfm);
|
@ -1,835 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
/* This file defines the logformat in an executable fashion.
|
||||
* This code is used to generate
|
||||
* The code that writes into the log.
|
||||
* The code that reads the log and prints it to stdout (the log_print utility)
|
||||
* The code that reads the log for recovery.
|
||||
* The struct definitions.
|
||||
* The Latex documentation.
|
||||
*/
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <toku_portability.h>
|
||||
#include <toku_assert.h>
|
||||
|
||||
|
||||
typedef struct field {
|
||||
const char *type;
|
||||
const char *name;
|
||||
const char *format; // optional format string
|
||||
} F;
|
||||
|
||||
#define NULLFIELD {0,0,0}
|
||||
#define FA (F[])
|
||||
|
||||
enum log_begin_action {
|
||||
IGNORE_LOG_BEGIN,
|
||||
SHOULD_LOG_BEGIN,
|
||||
ASSERT_BEGIN_WAS_LOGGED,
|
||||
LOG_BEGIN_ACTION_NA = IGNORE_LOG_BEGIN
|
||||
};
|
||||
|
||||
struct logtype {
|
||||
const char *name;
|
||||
unsigned int command_and_flags;
|
||||
struct field *fields;
|
||||
enum log_begin_action log_begin_action;
|
||||
};
|
||||
|
||||
// In the fields, don't mention the command, the LSN, the CRC or the trailing LEN.
|
||||
|
||||
const struct logtype rollbacks[] = {
|
||||
//TODO: #2037 Add dname
|
||||
{"fdelete", 'U', FA{{"FILENUM", "filenum", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
//TODO: #2037 Add dname
|
||||
{"fcreate", 'F', FA{{"FILENUM", "filenum", 0},
|
||||
{"BYTESTRING", "iname", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
//rename file
|
||||
{"frename", 'n', FA{{"BYTESTRING", "old_iname", 0},
|
||||
{"BYTESTRING", "new_iname", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
// cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data.
|
||||
{"cmdinsert", 'i', FA{
|
||||
{"FILENUM", "filenum", 0},
|
||||
{"BYTESTRING", "key", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{"cmddelete", 'd', FA{
|
||||
{"FILENUM", "filenum", 0},
|
||||
{"BYTESTRING", "key", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{"rollinclude", 'r', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"uint64_t", "num_nodes", 0},
|
||||
{"BLOCKNUM", "spilled_head", 0},
|
||||
{"BLOCKNUM", "spilled_tail", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{"load", 'l', FA{{"FILENUM", "old_filenum", 0},
|
||||
{"BYTESTRING", "new_iname", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
// #2954
|
||||
{"hot_index", 'h', FA{{"FILENUMS", "hot_index_filenums", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{"dictionary_redirect", 'R', FA{{"FILENUM", "old_filenum", 0},
|
||||
{"FILENUM", "new_filenum", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{"cmdupdate", 'u', FA{{"FILENUM", "filenum", 0},
|
||||
{"BYTESTRING", "key", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{"cmdupdatebroadcast", 'B', FA{{"FILENUM", "filenum", 0},
|
||||
{"bool", "is_resetting_op", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{"change_fdescriptor", 'D', FA{{"FILENUM", "filenum", 0},
|
||||
{"BYTESTRING", "old_descriptor", 0},
|
||||
NULLFIELD}, LOG_BEGIN_ACTION_NA},
|
||||
{0,0,FA{NULLFIELD}, LOG_BEGIN_ACTION_NA}
|
||||
};
|
||||
|
||||
const struct logtype logtypes[] = {
|
||||
// Records produced by checkpoints
|
||||
#if 0 // no longer used, but reserve the type
|
||||
{"local_txn_checkpoint", 'c', FA{{"TXNID", "xid", 0}, NULLFIELD}},
|
||||
#endif
|
||||
{"begin_checkpoint", 'x', FA{{"uint64_t", "timestamp", 0}, {"TXNID", "last_xid", 0}, NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
{"end_checkpoint", 'X', FA{{"LSN", "lsn_begin_checkpoint", 0},
|
||||
{"uint64_t", "timestamp", 0},
|
||||
{"uint32_t", "num_fassociate_entries", 0}, // how many files were checkpointed
|
||||
{"uint32_t", "num_xstillopen_entries", 0}, // how many txns were checkpointed
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
//TODO: #2037 Add dname
|
||||
{"fassociate", 'f', FA{{"FILENUM", "filenum", 0},
|
||||
{"uint32_t", "treeflags", 0},
|
||||
{"BYTESTRING", "iname", 0}, // pathname of file
|
||||
{"uint8_t", "unlink_on_close", 0},
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
//We do not use a txninfo struct since recovery log has
|
||||
//FILENUMS and TOKUTXN has FTs (for open_fts)
|
||||
{"xstillopen", 's', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"TXNID_PAIR", "parentxid", 0},
|
||||
{"uint64_t", "rollentry_raw_count", 0},
|
||||
{"FILENUMS", "open_filenums", 0},
|
||||
{"uint8_t", "force_fsync_on_commit", 0},
|
||||
{"uint64_t", "num_rollback_nodes", 0},
|
||||
{"uint64_t", "num_rollentries", 0},
|
||||
{"BLOCKNUM", "spilled_rollback_head", 0},
|
||||
{"BLOCKNUM", "spilled_rollback_tail", 0},
|
||||
{"BLOCKNUM", "current_rollback", 0},
|
||||
NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions
|
||||
// prepared txns need a gid
|
||||
{"xstillopenprepared", 'p', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"XIDP", "xa_xid", 0}, // prepared transactions need a gid, and have no parentxid.
|
||||
{"uint64_t", "rollentry_raw_count", 0},
|
||||
{"FILENUMS", "open_filenums", 0},
|
||||
{"uint8_t", "force_fsync_on_commit", 0},
|
||||
{"uint64_t", "num_rollback_nodes", 0},
|
||||
{"uint64_t", "num_rollentries", 0},
|
||||
{"BLOCKNUM", "spilled_rollback_head", 0},
|
||||
{"BLOCKNUM", "spilled_rollback_tail", 0},
|
||||
{"BLOCKNUM", "current_rollback", 0},
|
||||
NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions
|
||||
// Records produced by transactions
|
||||
{"xbegin", 'b', FA{{"TXNID_PAIR", "xid", 0},{"TXNID_PAIR", "parentxid", 0},NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
{"xcommit",'C', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
|
||||
{"xprepare",'P', FA{{"TXNID_PAIR", "xid", 0}, {"XIDP", "xa_xid", 0}, NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
|
||||
{"xabort", 'q', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
|
||||
//TODO: #2037 Add dname
|
||||
{"fcreate", 'F', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"FILENUM", "filenum", 0},
|
||||
{"BYTESTRING", "iname", 0},
|
||||
{"uint32_t", "mode", "0%o"},
|
||||
{"uint32_t", "treeflags", 0},
|
||||
{"uint32_t", "nodesize", 0},
|
||||
{"uint32_t", "basementnodesize", 0},
|
||||
{"uint32_t", "compression_method", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
//TODO: #2037 Add dname
|
||||
{"fopen", 'O', FA{{"BYTESTRING", "iname", 0},
|
||||
{"FILENUM", "filenum", 0},
|
||||
{"uint32_t", "treeflags", 0},
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
//TODO: #2037 Add dname
|
||||
{"fclose", 'e', FA{{"BYTESTRING", "iname", 0},
|
||||
{"FILENUM", "filenum", 0},
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
//TODO: #2037 Add dname
|
||||
{"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"FILENUM", "filenum", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"frename", 'n', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "old_iname", 0},
|
||||
{"FILENUM", "old_filenum", 0},
|
||||
{"BYTESTRING", "new_iname", 0},
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
{"enq_insert", 'I', FA{{"FILENUM", "filenum", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "key", 0},
|
||||
{"BYTESTRING", "value", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"enq_insert_no_overwrite", 'i', FA{{"FILENUM", "filenum", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "key", 0},
|
||||
{"BYTESTRING", "value", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"enq_delete_any", 'E', FA{{"FILENUM", "filenum", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "key", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"enq_insert_multiple", 'm', FA{{"FILENUM", "src_filenum", 0},
|
||||
{"FILENUMS", "dest_filenums", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "src_key", 0},
|
||||
{"BYTESTRING", "src_val", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"enq_delete_multiple", 'M', FA{{"FILENUM", "src_filenum", 0},
|
||||
{"FILENUMS", "dest_filenums", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "src_key", 0},
|
||||
{"BYTESTRING", "src_val", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"comment", 'T', FA{{"uint64_t", "timestamp", 0},
|
||||
{"BYTESTRING", "comment", 0},
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
// Note: shutdown_up_to_19 log entry is NOT ALLOWED TO BE CHANGED.
|
||||
// Do not change the letter ('Q'), do not add fields,
|
||||
// do not remove fields.
|
||||
// TODO: Kill this logentry entirely once we no longer support version 19.
|
||||
{"shutdown_up_to_19", 'Q', FA{{"uint64_t", "timestamp", 0},
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
// Note: Shutdown log entry is NOT ALLOWED TO BE CHANGED.
|
||||
// Do not change the letter ('0'), do not add fields,
|
||||
// do not remove fields.
|
||||
// You CAN leave this alone and add a new one, but then you have
|
||||
// to deal with the upgrade mechanism again.
|
||||
// This is how we detect clean shutdowns from OLDER VERSIONS.
|
||||
// This log entry must always be readable for future versions.
|
||||
// If you DO change it, you need to write a separate log upgrade mechanism.
|
||||
{"shutdown", '0', FA{{"uint64_t", "timestamp", 0},
|
||||
{"TXNID", "last_xid", 0},
|
||||
NULLFIELD}, IGNORE_LOG_BEGIN},
|
||||
{"load", 'l', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"FILENUM", "old_filenum", 0},
|
||||
{"BYTESTRING", "new_iname", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
// #2954
|
||||
{"hot_index", 'h', FA{{"TXNID_PAIR", "xid", 0},
|
||||
{"FILENUMS", "hot_index_filenums", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"enq_update", 'u', FA{{"FILENUM", "filenum", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "key", 0},
|
||||
{"BYTESTRING", "extra", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"enq_updatebroadcast", 'B', FA{{"FILENUM", "filenum", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "extra", 0},
|
||||
{"bool", "is_resetting_op", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{"change_fdescriptor", 'D', FA{{"FILENUM", "filenum", 0},
|
||||
{"TXNID_PAIR", "xid", 0},
|
||||
{"BYTESTRING", "old_descriptor", 0},
|
||||
{"BYTESTRING", "new_descriptor", 0},
|
||||
{"bool", "update_cmp_descriptor", 0},
|
||||
NULLFIELD}, SHOULD_LOG_BEGIN},
|
||||
{0,0,FA{NULLFIELD}, (enum log_begin_action) 0}
|
||||
};
|
||||
|
||||
|
||||
#define DO_STRUCTS(lt, array, body) do { \
|
||||
const struct logtype *lt; \
|
||||
for (lt=&array[0]; lt->name; lt++) { \
|
||||
body; \
|
||||
} } while (0)
|
||||
|
||||
#define DO_ROLLBACKS(lt, body) DO_STRUCTS(lt, rollbacks, body)
|
||||
|
||||
#define DO_LOGTYPES(lt, body) DO_STRUCTS(lt, logtypes, body)
|
||||
|
||||
#define DO_LOGTYPES_AND_ROLLBACKS(lt, body) (DO_ROLLBACKS(lt,body), DO_LOGTYPES(lt, body))
|
||||
|
||||
#define DO_FIELDS(fld, lt, body) do { \
|
||||
struct field *fld; \
|
||||
for (fld=lt->fields; fld->type; fld++) { \
|
||||
body; \
|
||||
} } while (0)
|
||||
|
||||
|
||||
static void __attribute__((format (printf, 3, 4))) fprintf2 (FILE *f1, FILE *f2, const char *format, ...) {
|
||||
va_list ap;
|
||||
int r;
|
||||
va_start(ap, format);
|
||||
r=vfprintf(f1, format, ap); assert(r>=0);
|
||||
va_end(ap);
|
||||
va_start(ap, format);
|
||||
r=vfprintf(f2, format, ap); assert(r>=0);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
FILE *hf=0, *cf=0, *pf=0;
|
||||
|
||||
static void
|
||||
generate_enum_internal (const char *enum_name, const char *enum_prefix, const struct logtype *lts) {
|
||||
char used_cmds[256];
|
||||
int count=0;
|
||||
memset(used_cmds, 0, 256);
|
||||
fprintf(hf, "enum %s {", enum_name);
|
||||
DO_STRUCTS(lt, lts,
|
||||
{
|
||||
unsigned char cmd = (unsigned char)(lt->command_and_flags&0xff);
|
||||
if (count!=0) fprintf(hf, ",");
|
||||
count++;
|
||||
fprintf(hf, "\n");
|
||||
fprintf(hf," %s_%-16s = '%c'", enum_prefix, lt->name, cmd);
|
||||
if (used_cmds[cmd]!=0) { fprintf(stderr, "%s:%d: error: Command %d (%c) was used twice (second time for %s)\n", __FILE__, __LINE__, cmd, cmd, lt->name); abort(); }
|
||||
used_cmds[cmd]=1;
|
||||
});
|
||||
fprintf(hf, "\n};\n\n");
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
generate_enum (void) {
|
||||
generate_enum_internal("lt_cmd", "LT", logtypes);
|
||||
generate_enum_internal("rt_cmd", "RT", rollbacks);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_log_struct (void) {
|
||||
DO_LOGTYPES(lt,
|
||||
{ fprintf(hf, "struct logtype_%s {\n", lt->name);
|
||||
fprintf(hf, " %-16s lsn;\n", "LSN");
|
||||
DO_FIELDS(field_type, lt,
|
||||
fprintf(hf, " %-16s %s;\n", field_type->type, field_type->name));
|
||||
fprintf(hf, " %-16s crc;\n", "uint32_t");
|
||||
fprintf(hf, " %-16s len;\n", "uint32_t");
|
||||
fprintf(hf, "};\n");
|
||||
//fprintf(hf, "void toku_recover_%s (LSN lsn", lt->name);
|
||||
//DO_FIELDS(field_type, lt, fprintf(hf, ", %s %s", field_type->type, field_type->name));
|
||||
//fprintf(hf, ");\n");
|
||||
});
|
||||
DO_ROLLBACKS(lt,
|
||||
{ fprintf(hf, "struct rolltype_%s {\n", lt->name);
|
||||
DO_FIELDS(field_type, lt,
|
||||
fprintf(hf, " %-16s %s;\n", field_type->type, field_type->name));
|
||||
fprintf(hf, "};\n");
|
||||
fprintf(hf, "int toku_rollback_%s (", lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf(hf, "%s %s,", field_type->type, field_type->name));
|
||||
fprintf(hf, "TOKUTXN txn, LSN oplsn);\n");
|
||||
fprintf(hf, "int toku_commit_%s (", lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf(hf, "%s %s,", field_type->type, field_type->name));
|
||||
fprintf(hf, "TOKUTXN txn, LSN oplsn);\n");
|
||||
});
|
||||
fprintf(hf, "struct log_entry {\n");
|
||||
fprintf(hf, " enum lt_cmd cmd;\n");
|
||||
fprintf(hf, " union {\n");
|
||||
DO_LOGTYPES(lt, fprintf(hf," struct logtype_%s %s;\n", lt->name, lt->name));
|
||||
fprintf(hf, " } u;\n");
|
||||
fprintf(hf, "};\n");
|
||||
|
||||
fprintf(hf, "struct roll_entry {\n");
|
||||
fprintf(hf, " enum rt_cmd cmd;\n");
|
||||
fprintf(hf, " struct roll_entry *prev; /* for in-memory list of log entries. Threads from newest to oldest. */\n");
|
||||
fprintf(hf, " union {\n");
|
||||
DO_ROLLBACKS(lt, fprintf(hf," struct rolltype_%s %s;\n", lt->name, lt->name));
|
||||
fprintf(hf, " } u;\n");
|
||||
fprintf(hf, "};\n");
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
generate_dispatch (void) {
|
||||
fprintf(hf, "#define rolltype_dispatch(s, funprefix) ({ switch((s)->cmd) {\\\n");
|
||||
DO_ROLLBACKS(lt, fprintf(hf, " case RT_%s: funprefix ## %s (&(s)->u.%s); break;\\\n", lt->name, lt->name, lt->name));
|
||||
fprintf(hf, " }})\n");
|
||||
|
||||
fprintf(hf, "#define logtype_dispatch_assign(s, funprefix, var, ...) do { switch((s)->cmd) {\\\n");
|
||||
DO_LOGTYPES(lt, fprintf(hf, " case LT_%s: var = funprefix ## %s (&(s)->u.%s, __VA_ARGS__); break;\\\n", lt->name, lt->name, lt->name));
|
||||
fprintf(hf, " }} while (0)\n");
|
||||
|
||||
fprintf(hf, "#define rolltype_dispatch_assign(s, funprefix, var, ...) do { \\\n");
|
||||
fprintf(hf, " switch((s)->cmd) {\\\n");
|
||||
DO_ROLLBACKS(lt, {
|
||||
fprintf(hf, " case RT_%s: var = funprefix ## %s (", lt->name, lt->name);
|
||||
int fieldcount=0;
|
||||
DO_FIELDS(field_type, lt, {
|
||||
if (fieldcount>0) fprintf(hf, ",");
|
||||
fprintf(hf, "(s)->u.%s.%s", lt->name, field_type->name);
|
||||
fieldcount++;
|
||||
});
|
||||
fprintf(hf, ", __VA_ARGS__); break;\\\n");
|
||||
});
|
||||
fprintf(hf, " default: assert(0);} } while (0)\n");
|
||||
|
||||
fprintf(hf, "#define logtype_dispatch_args(s, funprefix, ...) do { switch((s)->cmd) {\\\n");
|
||||
DO_LOGTYPES(lt,
|
||||
{
|
||||
fprintf(hf, " case LT_%s: funprefix ## %s ((s)->u.%s.lsn", lt->name, lt->name, lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf(hf, ",(s)->u.%s.%s", lt->name, field_type->name));
|
||||
fprintf(hf, ", __VA_ARGS__); break;\\\n");
|
||||
});
|
||||
fprintf(hf, " }} while (0)\n");
|
||||
}
|
||||
|
||||
static void
|
||||
generate_get_timestamp(void) {
|
||||
fprintf(cf, "static uint64_t toku_get_timestamp(void) {\n");
|
||||
fprintf(cf, " struct timeval tv; int r = gettimeofday(&tv, NULL);\n");
|
||||
fprintf(cf, " assert(r==0);\n");
|
||||
fprintf(cf, " return tv.tv_sec * 1000000ULL + tv.tv_usec;\n");
|
||||
fprintf(cf, "}\n");
|
||||
}
|
||||
|
||||
static void
|
||||
generate_log_writer (void) {
|
||||
generate_get_timestamp();
|
||||
DO_LOGTYPES(lt, {
|
||||
//TODO(yoni): The overhead variables are NOT correct for BYTESTRING, FILENUMS (or any other variable length type)
|
||||
// We should switch to something like using toku_logsizeof_*.
|
||||
fprintf(hf, "static const size_t toku_log_%s_overhead = (+4+1+8", lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf(hf, "+sizeof(%s)", field_type->type));
|
||||
fprintf(hf, "+8);\n");
|
||||
fprintf2(cf, hf, "void toku_log_%s (TOKULOGGER logger, LSN *lsnp, int do_fsync", lt->name);
|
||||
switch (lt->log_begin_action) {
|
||||
case SHOULD_LOG_BEGIN:
|
||||
case ASSERT_BEGIN_WAS_LOGGED: {
|
||||
fprintf2(cf, hf, ", TOKUTXN txn");
|
||||
break;
|
||||
}
|
||||
case IGNORE_LOG_BEGIN: break;
|
||||
}
|
||||
DO_FIELDS(field_type, lt, fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name));
|
||||
fprintf(hf, ");\n");
|
||||
fprintf(cf, ") {\n");
|
||||
fprintf(cf, " if (logger == NULL) {\n");
|
||||
fprintf(cf, " return;\n");
|
||||
fprintf(cf, " }\n");
|
||||
switch (lt->log_begin_action) {
|
||||
case SHOULD_LOG_BEGIN: {
|
||||
fprintf(cf, " //txn can be NULL during tests\n");
|
||||
fprintf(cf, " //never null when not checkpoint.\n");
|
||||
fprintf(cf, " if (txn && !txn->begin_was_logged) {\n");
|
||||
fprintf(cf, " invariant(!txn_declared_read_only(txn));\n");
|
||||
fprintf(cf, " toku_maybe_log_begin_txn_for_write_operation(txn);\n");
|
||||
fprintf(cf, " }\n");
|
||||
break;
|
||||
}
|
||||
case ASSERT_BEGIN_WAS_LOGGED: {
|
||||
fprintf(cf, " //txn can be NULL during tests\n");
|
||||
fprintf(cf, " invariant(!txn || txn->begin_was_logged);\n");
|
||||
fprintf(cf, " invariant(!txn || !txn_declared_read_only(txn));\n");
|
||||
break;
|
||||
}
|
||||
case IGNORE_LOG_BEGIN: break;
|
||||
}
|
||||
fprintf(cf, " if (!logger->write_log_files) {\n");
|
||||
fprintf(cf, " ml_lock(&logger->input_lock);\n");
|
||||
fprintf(cf, " logger->lsn.lsn++;\n");
|
||||
fprintf(cf, " if (lsnp) *lsnp=logger->lsn;\n");
|
||||
fprintf(cf, " ml_unlock(&logger->input_lock);\n");
|
||||
fprintf(cf, " return;\n");
|
||||
fprintf(cf, " }\n");
|
||||
fprintf(cf, " const unsigned int buflen= (+4 // len at the beginning\n");
|
||||
fprintf(cf, " +1 // log command\n");
|
||||
fprintf(cf, " +8 // lsn\n");
|
||||
DO_FIELDS(field_type, lt,
|
||||
fprintf(cf, " +toku_logsizeof_%s(%s)\n", field_type->type, field_type->name));
|
||||
fprintf(cf, " +8 // crc + len\n");
|
||||
fprintf(cf, " );\n");
|
||||
fprintf(cf, " struct wbuf wbuf;\n");
|
||||
fprintf(cf, " ml_lock(&logger->input_lock);\n");
|
||||
fprintf(cf, " toku_logger_make_space_in_inbuf(logger, buflen);\n");
|
||||
fprintf(cf, " wbuf_nocrc_init(&wbuf, logger->inbuf.buf+logger->inbuf.n_in_buf, buflen);\n");
|
||||
fprintf(cf, " wbuf_nocrc_int(&wbuf, buflen);\n");
|
||||
fprintf(cf, " wbuf_nocrc_char(&wbuf, '%c');\n", (char)(0xff<->command_and_flags));
|
||||
fprintf(cf, " logger->lsn.lsn++;\n");
|
||||
fprintf(cf, " logger->inbuf.max_lsn_in_buf = logger->lsn;\n");
|
||||
fprintf(cf, " wbuf_nocrc_LSN(&wbuf, logger->lsn);\n");
|
||||
fprintf(cf, " if (lsnp) *lsnp=logger->lsn;\n");
|
||||
DO_FIELDS(field_type, lt,
|
||||
if (strcmp(field_type->name, "timestamp") == 0)
|
||||
fprintf(cf, " if (timestamp == 0) timestamp = toku_get_timestamp();\n");
|
||||
fprintf(cf, " wbuf_nocrc_%s(&wbuf, %s);\n", field_type->type, field_type->name));
|
||||
fprintf(cf, " wbuf_nocrc_int(&wbuf, toku_x1764_memory(wbuf.buf, wbuf.ndone));\n");
|
||||
fprintf(cf, " wbuf_nocrc_int(&wbuf, buflen);\n");
|
||||
fprintf(cf, " assert(wbuf.ndone==buflen);\n");
|
||||
fprintf(cf, " logger->inbuf.n_in_buf += buflen;\n");
|
||||
fprintf(cf, " toku_logger_maybe_fsync(logger, logger->lsn, do_fsync, true);\n");
|
||||
fprintf(cf, "}\n\n");
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
generate_log_reader (void) {
|
||||
DO_LOGTYPES(lt, {
|
||||
fprintf(cf, "static int toku_log_fread_%s (FILE *infile, uint32_t len1, struct logtype_%s *data, struct x1764 *checksum)", lt->name, lt->name);
|
||||
fprintf(cf, " {\n");
|
||||
fprintf(cf, " int r=0;\n");
|
||||
fprintf(cf, " uint32_t actual_len=5; // 1 for the command, 4 for the first len.\n");
|
||||
fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, checksum, &actual_len); if (r!=0) return r;\n", "LSN", "lsn");
|
||||
DO_FIELDS(field_type, lt,
|
||||
fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, checksum, &actual_len); if (r!=0) return r;\n", field_type->type, field_type->name));
|
||||
fprintf(cf, " uint32_t checksum_in_file, len_in_file;\n");
|
||||
fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &checksum_in_file); actual_len+=4; if (r!=0) return r;\n");
|
||||
fprintf(cf, " r=toku_fread_uint32_t_nocrclen(infile, &len_in_file); actual_len+=4; if (r!=0) return r;\n");
|
||||
fprintf(cf, " if (checksum_in_file!=toku_x1764_finish(checksum) || len_in_file!=actual_len || len1 != len_in_file) return DB_BADFORMAT;\n");
|
||||
fprintf(cf, " return 0;\n");
|
||||
fprintf(cf, "}\n\n");
|
||||
});
|
||||
fprintf2(cf, hf, "int toku_log_fread (FILE *infile, struct log_entry *le)");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(cf, " {\n");
|
||||
fprintf(cf, " uint32_t len1; int r;\n");
|
||||
fprintf(cf, " uint32_t ignorelen=0;\n");
|
||||
fprintf(cf, " struct x1764 checksum;\n");
|
||||
fprintf(cf, " toku_x1764_init(&checksum);\n");
|
||||
fprintf(cf, " r = toku_fread_uint32_t(infile, &len1, &checksum, &ignorelen); if (r!=0) return r;\n");
|
||||
fprintf(cf, " int cmd=fgetc(infile);\n");
|
||||
fprintf(cf, " if (cmd==EOF) return EOF;\n");
|
||||
fprintf(cf, " char cmdchar = (char)cmd;\n");
|
||||
fprintf(cf, " toku_x1764_add(&checksum, &cmdchar, 1);\n");
|
||||
fprintf(cf, " le->cmd=(enum lt_cmd)cmd;\n");
|
||||
fprintf(cf, " switch ((enum lt_cmd)cmd) {\n");
|
||||
DO_LOGTYPES(lt, {
|
||||
fprintf(cf, " case LT_%s:\n", lt->name);
|
||||
fprintf(cf, " return toku_log_fread_%s (infile, len1, &le->u.%s, &checksum);\n", lt->name, lt->name);
|
||||
});
|
||||
fprintf(cf, " };\n");
|
||||
fprintf(cf, " return DB_BADFORMAT;\n"); // Should read past the record using the len field.
|
||||
fprintf(cf, "}\n\n");
|
||||
//fprintf2(cf, hf, "// Return 0 if there is something to read, return -1 if nothing to read, abort if an error.\n");
|
||||
fprintf2(cf, hf, "// Return 0 if there is something to read, -1 if nothing to read, >0 on error\n");
|
||||
fprintf2(cf, hf, "int toku_log_fread_backward (FILE *infile, struct log_entry *le)");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(cf, "{\n");
|
||||
fprintf(cf, " memset(le, 0, sizeof(*le));\n");
|
||||
fprintf(cf, " long pos = ftell(infile);\n");
|
||||
fprintf(cf, " if (pos<=12) return -1;\n");
|
||||
fprintf(cf, " int r = fseek(infile, -4, SEEK_CUR); \n");// assert(r==0);\n");
|
||||
fprintf(cf, " if (r!=0) return get_error_errno();\n");
|
||||
fprintf(cf, " uint32_t len;\n");
|
||||
fprintf(cf, " r = toku_fread_uint32_t_nocrclen(infile, &len); \n");// assert(r==0);\n");
|
||||
fprintf(cf, " if (r!=0) return 1;\n");
|
||||
fprintf(cf, " r = fseek(infile, -(int)len, SEEK_CUR) ; \n");// assert(r==0);\n");
|
||||
fprintf(cf, " if (r!=0) return get_error_errno();\n");
|
||||
fprintf(cf, " r = toku_log_fread(infile, le); \n");// assert(r==0);\n");
|
||||
fprintf(cf, " if (r!=0) return 1;\n");
|
||||
fprintf(cf, " long afterpos = ftell(infile);\n");
|
||||
fprintf(cf, " if (afterpos != pos) return 1;\n");
|
||||
fprintf(cf, " r = fseek(infile, -(int)len, SEEK_CUR); \n");// assert(r==0);\n");
|
||||
fprintf(cf, " if (r!=0) return get_error_errno();\n");
|
||||
fprintf(cf, " return 0;\n");
|
||||
fprintf(cf, "}\n\n");
|
||||
|
||||
DO_LOGTYPES(lt, ({
|
||||
fprintf(cf, "static void toku_log_free_log_entry_%s_resources (struct logtype_%s *data", lt->name, lt->name);
|
||||
if (!lt->fields->type) fprintf(cf, " __attribute__((__unused__))");
|
||||
fprintf(cf, ") {\n");
|
||||
DO_FIELDS(field_type, lt,
|
||||
fprintf(cf, " toku_free_%s(data->%s);\n", field_type->type, field_type->name);
|
||||
);
|
||||
fprintf(cf, "}\n\n");
|
||||
}));
|
||||
fprintf2(cf, hf, "void toku_log_free_log_entry_resources (struct log_entry *le)");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(cf, " {\n");
|
||||
fprintf(cf, " switch ((enum lt_cmd)le->cmd) {\n");
|
||||
DO_LOGTYPES(lt, {
|
||||
fprintf(cf, " case LT_%s:\n", lt->name);
|
||||
fprintf(cf, " return toku_log_free_log_entry_%s_resources (&(le->u.%s));\n", lt->name, lt->name);
|
||||
});
|
||||
fprintf(cf, " };\n");
|
||||
fprintf(cf, " return;\n");
|
||||
fprintf(cf, "}\n\n");
|
||||
}
|
||||
|
||||
static void
|
||||
generate_logprint (void) {
|
||||
unsigned maxnamelen=0;
|
||||
fprintf2(pf, hf, "int toku_logprint_one_record(FILE *outf, FILE *f)");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(pf, " {\n");
|
||||
fprintf(pf, " int cmd, r;\n");
|
||||
fprintf(pf, " uint32_t len1, crc_in_file;\n");
|
||||
fprintf(pf, " uint32_t ignorelen=0;\n");
|
||||
fprintf(pf, " struct x1764 checksum;\n");
|
||||
fprintf(pf, " toku_x1764_init(&checksum);\n");
|
||||
fprintf(pf, " r=toku_fread_uint32_t(f, &len1, &checksum, &ignorelen);\n");
|
||||
fprintf(pf, " if (r==EOF) return EOF;\n");
|
||||
fprintf(pf, " cmd=fgetc(f);\n");
|
||||
fprintf(pf, " if (cmd==EOF) return DB_BADFORMAT;\n");
|
||||
fprintf(pf, " uint32_t len_in_file, len=1+4; // cmd + len1\n");
|
||||
fprintf(pf, " char charcmd = (char)cmd;\n");
|
||||
fprintf(pf, " toku_x1764_add(&checksum, &charcmd, 1);\n");
|
||||
fprintf(pf, " switch ((enum lt_cmd)cmd) {\n");
|
||||
DO_LOGTYPES(lt, { if (strlen(lt->name)>maxnamelen) maxnamelen=strlen(lt->name); });
|
||||
DO_LOGTYPES(lt, {
|
||||
unsigned char cmd = (unsigned char)(0xff<->command_and_flags);
|
||||
fprintf(pf, " case LT_%s: \n", lt->name);
|
||||
// We aren't using the log reader here because we want better diagnostics as soon as things go wrong.
|
||||
fprintf(pf, " fprintf(outf, \"%%-%us \", \"%s\");\n", maxnamelen, lt->name);
|
||||
if (isprint(cmd)) fprintf(pf," fprintf(outf, \" '%c':\");\n", cmd);
|
||||
else fprintf(pf," fprintf(outf, \"0%03o:\");\n", cmd);
|
||||
fprintf(pf, " r = toku_logprint_%-16s(outf, f, \"lsn\", &checksum, &len, 0); if (r!=0) return r;\n", "LSN");
|
||||
DO_FIELDS(field_type, lt, {
|
||||
fprintf(pf, " r = toku_logprint_%-16s(outf, f, \"%s\", &checksum, &len,", field_type->type, field_type->name);
|
||||
if (field_type->format) fprintf(pf, "\"%s\"", field_type->format);
|
||||
else fprintf(pf, "0");
|
||||
fprintf(pf, "); if (r!=0) return r;\n");
|
||||
});
|
||||
fprintf(pf, " {\n");
|
||||
fprintf(pf, " uint32_t actual_murmur = toku_x1764_finish(&checksum);\n");
|
||||
fprintf(pf, " r = toku_fread_uint32_t_nocrclen (f, &crc_in_file); len+=4; if (r!=0) return r;\n");
|
||||
fprintf(pf, " fprintf(outf, \" crc=%%08x\", crc_in_file);\n");
|
||||
fprintf(pf, " if (crc_in_file!=actual_murmur) fprintf(outf, \" checksum=%%08x\", actual_murmur);\n");
|
||||
fprintf(pf, " r = toku_fread_uint32_t_nocrclen (f, &len_in_file); len+=4; if (r!=0) return r;\n");
|
||||
fprintf(pf, " fprintf(outf, \" len=%%u\", len_in_file);\n");
|
||||
fprintf(pf, " if (len_in_file!=len) fprintf(outf, \" actual_len=%%u\", len);\n");
|
||||
fprintf(pf, " if (len_in_file!=len || crc_in_file!=actual_murmur) return DB_BADFORMAT;\n");
|
||||
fprintf(pf, " };\n");
|
||||
fprintf(pf, " fprintf(outf, \"\\n\");\n");
|
||||
fprintf(pf, " return 0;\n\n");
|
||||
});
|
||||
fprintf(pf, " }\n");
|
||||
fprintf(pf, " fprintf(outf, \"Unknown command %%d ('%%c')\", cmd, cmd);\n");
|
||||
fprintf(pf, " return DB_BADFORMAT;\n");
|
||||
fprintf(pf, "}\n\n");
|
||||
}
|
||||
|
||||
static void
|
||||
generate_rollbacks (void) {
|
||||
DO_ROLLBACKS(lt, {
|
||||
fprintf2(cf, hf, "void toku_logger_save_rollback_%s (TOKUTXN txn", lt->name);
|
||||
DO_FIELDS(field_type, lt, {
|
||||
if ( strcmp(field_type->type, "BYTESTRING") == 0 ) {
|
||||
fprintf2(cf, hf, ", BYTESTRING *%s_ptr", field_type->name);
|
||||
}
|
||||
else if ( strcmp(field_type->type, "FILENUMS") == 0 ) {
|
||||
fprintf2(cf, hf, ", FILENUMS *%s_ptr", field_type->name);
|
||||
}
|
||||
else {
|
||||
fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name);
|
||||
}
|
||||
});
|
||||
|
||||
fprintf(hf, ");\n");
|
||||
fprintf(cf, ") {\n");
|
||||
fprintf(cf, " toku_txn_lock(txn);\n");
|
||||
fprintf(cf, " ROLLBACK_LOG_NODE log;\n");
|
||||
fprintf(cf, " toku_get_and_pin_rollback_log_for_new_entry(txn, &log);\n");
|
||||
// 'memdup' all BYTESTRINGS here
|
||||
DO_FIELDS(field_type, lt, {
|
||||
if ( strcmp(field_type->type, "BYTESTRING") == 0 ) {
|
||||
fprintf(cf, " BYTESTRING %s = {\n"
|
||||
" .len = %s_ptr->len,\n"
|
||||
" .data = cast_to_typeof(%s.data) toku_memdup_in_rollback(log, %s_ptr->data, %s_ptr->len)\n"
|
||||
" };\n",
|
||||
field_type->name, field_type->name, field_type->name, field_type->name, field_type->name);
|
||||
}
|
||||
if ( strcmp(field_type->type, "FILENUMS") == 0 ) {
|
||||
fprintf(cf, " FILENUMS %s = {\n"
|
||||
" .num = %s_ptr->num,\n"
|
||||
" .filenums = cast_to_typeof(%s.filenums) toku_memdup_in_rollback(log, %s_ptr->filenums, %s_ptr->num * (sizeof (FILENUM)))\n"
|
||||
" };\n",
|
||||
field_type->name, field_type->name, field_type->name, field_type->name, field_type->name);
|
||||
}
|
||||
});
|
||||
{
|
||||
int count=0;
|
||||
fprintf(cf, " uint32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", field_type->name));
|
||||
fprintf(cf, ");\n");
|
||||
}
|
||||
fprintf(cf, " struct roll_entry *v;\n");
|
||||
fprintf(cf, " size_t mem_needed = sizeof(v->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name);
|
||||
fprintf(cf, " CAST_FROM_VOIDP(v, toku_malloc_in_rollback(log, mem_needed));\n");
|
||||
fprintf(cf, " assert(v);\n");
|
||||
fprintf(cf, " v->cmd = (enum rt_cmd)%u;\n", lt->command_and_flags&0xff);
|
||||
DO_FIELDS(field_type, lt, fprintf(cf, " v->u.%s.%s = %s;\n", lt->name, field_type->name, field_type->name));
|
||||
fprintf(cf, " v->prev = log->newest_logentry;\n");
|
||||
fprintf(cf, " if (log->oldest_logentry==NULL) log->oldest_logentry=v;\n");
|
||||
fprintf(cf, " log->newest_logentry = v;\n");
|
||||
fprintf(cf, " log->rollentry_resident_bytecount += rollback_fsize;\n");
|
||||
fprintf(cf, " txn->roll_info.rollentry_raw_count += rollback_fsize;\n");
|
||||
fprintf(cf, " txn->roll_info.num_rollentries++;\n");
|
||||
fprintf(cf, " log->dirty = true;\n");
|
||||
fprintf(cf, " // spill and unpin assert success internally\n");
|
||||
fprintf(cf, " toku_maybe_spill_rollbacks(txn, log);\n");
|
||||
fprintf(cf, " toku_rollback_log_unpin(txn, log);\n");
|
||||
fprintf(cf, " toku_txn_unlock(txn);\n");
|
||||
fprintf(cf, "}\n");
|
||||
});
|
||||
|
||||
DO_ROLLBACKS(lt, {
|
||||
fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write_%s (struct wbuf *wbuf", lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf2(cf, hf, ", %s %s", field_type->type, field_type->name));
|
||||
fprintf2(cf, hf, ")");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(cf, " {\n");
|
||||
|
||||
{
|
||||
int count=0;
|
||||
fprintf(cf, " uint32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", field_type->name));
|
||||
fprintf(cf, ");\n");
|
||||
fprintf(cf, " wbuf_nocrc_int(wbuf, rollback_fsize);\n");
|
||||
}
|
||||
fprintf(cf, " wbuf_nocrc_char(wbuf, '%c');\n", (char)(0xff<->command_and_flags));
|
||||
DO_FIELDS(field_type, lt, fprintf(cf, " wbuf_nocrc_%s(wbuf, %s);\n", field_type->type, field_type->name));
|
||||
fprintf(cf, "}\n");
|
||||
});
|
||||
fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write (struct wbuf *wbuf, struct roll_entry *r)");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(cf, " {\n switch (r->cmd) {\n");
|
||||
DO_ROLLBACKS(lt, {
|
||||
fprintf(cf, " case RT_%s: toku_logger_rollback_wbuf_nocrc_write_%s(wbuf", lt->name, lt->name);
|
||||
DO_FIELDS(field_type, lt, fprintf(cf, ", r->u.%s.%s", lt->name, field_type->name));
|
||||
fprintf(cf, "); return;\n");
|
||||
});
|
||||
fprintf(cf, " }\n assert(0);\n");
|
||||
fprintf(cf, "}\n");
|
||||
DO_ROLLBACKS(lt, {
|
||||
fprintf2(cf, hf, "uint32_t toku_logger_rollback_fsize_%s (", lt->name);
|
||||
int count=0;
|
||||
DO_FIELDS(field_type, lt, fprintf2(cf, hf, "%s%s %s", (count++>0)?", ":"", field_type->type, field_type->name));
|
||||
fprintf(hf, ");\n");
|
||||
fprintf(cf, ") {\n");
|
||||
fprintf(cf, " return 1 /* the cmd*/\n");
|
||||
fprintf(cf, " + 4 /* the int at the end saying the size */");
|
||||
DO_FIELDS(field_type, lt,
|
||||
fprintf(cf, "\n + toku_logsizeof_%s(%s)", field_type->type, field_type->name));
|
||||
fprintf(cf, ";\n}\n");
|
||||
});
|
||||
fprintf2(cf, hf, "uint32_t toku_logger_rollback_fsize(struct roll_entry *item)");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(cf, "{\n switch(item->cmd) {\n");
|
||||
DO_ROLLBACKS(lt, {
|
||||
fprintf(cf, " case RT_%s: return toku_logger_rollback_fsize_%s(", lt->name, lt->name);
|
||||
int count=0;
|
||||
DO_FIELDS(field_type, lt, fprintf(cf, "%sitem->u.%s.%s", (count++>0)?", ":"", lt->name, field_type->name));
|
||||
fprintf(cf, ");\n");
|
||||
});
|
||||
fprintf(cf, " }\n assert(0);\n return 0;\n");
|
||||
fprintf(cf, "}\n");
|
||||
|
||||
fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, memarena *ma)");
|
||||
fprintf(hf, ";\n");
|
||||
fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item;\n enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n size_t mem_needed;\n");
|
||||
fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n");
|
||||
fprintf(cf, " switch(cmd) {\n");
|
||||
DO_ROLLBACKS(lt, {
|
||||
fprintf(cf, " case RT_%s:\n", lt->name);
|
||||
fprintf(cf, " mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name);
|
||||
fprintf(cf, " CAST_FROM_VOIDP(item, ma->malloc_from_arena(mem_needed));\n");
|
||||
fprintf(cf, " item->cmd = cmd;\n");
|
||||
DO_FIELDS(field_type, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", field_type->type, lt->name, field_type->name));
|
||||
fprintf(cf, " *itemp = item;\n");
|
||||
fprintf(cf, " return 0;\n");
|
||||
});
|
||||
fprintf(cf, " }\n return EINVAL;\n}\n");
|
||||
}
|
||||
|
||||
static void
|
||||
generate_log_entry_functions(void) {
|
||||
fprintf(hf, "LSN toku_log_entry_get_lsn(struct log_entry *);\n");
|
||||
fprintf(cf, "LSN toku_log_entry_get_lsn(struct log_entry *le) {\n");
|
||||
fprintf(cf, " return le->u.begin_checkpoint.lsn;\n");
|
||||
fprintf(cf, "}\n");
|
||||
}
|
||||
|
||||
const char codefile[] = "log_code.cc";
|
||||
const char printfile[] = "log_print.cc";
|
||||
const char headerfile[] = "log_header.h";
|
||||
int main (int argc, const char *const argv[]) {
|
||||
assert(argc==2); // the single argument is the directory into which to put things
|
||||
const char *dir = argv[1];
|
||||
size_t codepathlen = sizeof(codefile) + strlen(dir) + 4;
|
||||
size_t printpathlen = sizeof(printfile) + strlen(dir) + 4;
|
||||
size_t headerpathlen = sizeof(headerfile) + strlen(dir) + 4;
|
||||
char codepath[codepathlen];
|
||||
char printpath[printpathlen];
|
||||
char headerpath[headerpathlen];
|
||||
{ int r = snprintf(codepath, codepathlen, "%s/%s", argv[1], codefile); assert(r<(int)codepathlen); }
|
||||
{ int r = snprintf(printpath, printpathlen, "%s/%s", argv[1], printfile); assert(r<(int)printpathlen); }
|
||||
{ int r = snprintf(headerpath, headerpathlen, "%s/%s", argv[1], headerfile); assert(r<(int)headerpathlen); }
|
||||
chmod(codepath, S_IRUSR|S_IWUSR);
|
||||
chmod(headerpath, S_IRUSR|S_IWUSR);
|
||||
unlink(codepath);
|
||||
unlink(headerpath);
|
||||
cf = fopen(codepath, "w");
|
||||
if (cf==0) { int r = get_error_errno(); printf("fopen of %s failed because of errno=%d (%s)\n", codepath, r, strerror(r)); } // sometimes this is failing, so let's make a better diagnostic
|
||||
assert(cf!=0);
|
||||
hf = fopen(headerpath, "w"); assert(hf!=0);
|
||||
pf = fopen(printpath, "w"); assert(pf!=0);
|
||||
fprintf2(cf, hf, "/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */\n");
|
||||
fprintf2(cf, hf, "// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:\n");
|
||||
fprintf(hf, "#pragma once\n");
|
||||
fprintf2(cf, hf, "/* Do not edit this file. This code generated by logformat.c. Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. */\n");
|
||||
fprintf2(cf, hf, "#ident \"Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.\"\n");
|
||||
fprintf2(cf, pf, "#include <stdint.h>\n");
|
||||
fprintf2(cf, pf, "#include <sys/time.h>\n");
|
||||
fprintf2(cf, pf, "#include <ft/logger/log-internal.h>\n");
|
||||
fprintf(hf, "#include <ft/ft-internal.h>\n");
|
||||
fprintf(hf, "#include <util/bytestring.h>\n");
|
||||
fprintf(hf, "#include <util/memarena.h>\n");
|
||||
generate_enum();
|
||||
generate_log_struct();
|
||||
generate_dispatch();
|
||||
generate_log_writer();
|
||||
generate_log_reader();
|
||||
generate_rollbacks();
|
||||
generate_log_entry_functions();
|
||||
generate_logprint();
|
||||
{
|
||||
int r=fclose(hf); assert(r==0);
|
||||
r=fclose(cf); assert(r==0);
|
||||
r=fclose(pf); assert(r==0);
|
||||
// Make it tougher to modify by mistake
|
||||
chmod(codepath, S_IRUSR|S_IRGRP|S_IROTH);
|
||||
chmod(headerpath, S_IRUSR|S_IRGRP|S_IROTH);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,274 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ft/serialize/block_table.h"
|
||||
#include "ft/serialize/ft_layout_version.h"
|
||||
#include "ft/txn/txn.h"
|
||||
|
||||
typedef struct tokulogger *TOKULOGGER;
|
||||
|
||||
enum {
|
||||
TOKU_LOG_VERSION_1 = 1,
|
||||
TOKU_LOG_VERSION_2 = 2,
|
||||
//After 2 we linked the log version to the FT_LAYOUT VERSION.
|
||||
//So it went from 2 to 13 (3-12 do not exist)
|
||||
TOKU_LOG_VERSION_24 = 24,
|
||||
TOKU_LOG_VERSION_25 = 25, // change rollinclude rollback log entry
|
||||
TOKU_LOG_VERSION_26 = 26, // no change from 25
|
||||
TOKU_LOG_VERSION_27 = 27, // no change from 26
|
||||
TOKU_LOG_VERSION_28 = 28, // no change from 27
|
||||
TOKU_LOG_VERSION_29 = 29, // no change from 28
|
||||
TOKU_LOG_VERSION = FT_LAYOUT_VERSION,
|
||||
TOKU_LOG_MIN_SUPPORTED_VERSION = FT_LAYOUT_MIN_SUPPORTED_VERSION,
|
||||
};
|
||||
|
||||
int toku_logger_create (TOKULOGGER *resultp);
|
||||
int toku_logger_open (const char *directory, TOKULOGGER logger);
|
||||
int toku_logger_open_with_last_xid(const char *directory, TOKULOGGER logger, TXNID last_xid);
|
||||
void toku_logger_shutdown(TOKULOGGER logger);
|
||||
int toku_logger_close(TOKULOGGER *loggerp);
|
||||
void toku_logger_initialize_rollback_cache(TOKULOGGER logger, struct ft *ft);
|
||||
int toku_logger_open_rollback(TOKULOGGER logger, struct cachetable *ct, bool create);
|
||||
void toku_logger_close_rollback(TOKULOGGER logger);
|
||||
void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdown);
|
||||
bool toku_logger_rollback_is_open (TOKULOGGER); // return true iff the rollback is open.
|
||||
|
||||
void toku_logger_fsync (TOKULOGGER logger);
|
||||
void toku_logger_fsync_if_lsn_not_fsynced(TOKULOGGER logger, LSN lsn);
|
||||
int toku_logger_is_open(TOKULOGGER logger);
|
||||
void toku_logger_set_cachetable (TOKULOGGER logger, struct cachetable *ct);
|
||||
int toku_logger_set_lg_max(TOKULOGGER logger, uint32_t lg_max);
|
||||
int toku_logger_get_lg_max(TOKULOGGER logger, uint32_t *lg_maxp);
|
||||
int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize);
|
||||
|
||||
void toku_logger_write_log_files (TOKULOGGER logger, bool write_log_files);
|
||||
void toku_logger_trim_log_files(TOKULOGGER logger, bool trim_log_files);
|
||||
bool toku_logger_txns_exist(TOKULOGGER logger);
|
||||
|
||||
// Restart the logger. This function is used by recovery to really start
|
||||
// logging.
|
||||
// Effects: Flush the current log buffer, reset the logger's lastlsn, and
|
||||
// open a new log file.
|
||||
// Returns: 0 if success
|
||||
int toku_logger_restart(TOKULOGGER logger, LSN lastlsn);
|
||||
|
||||
// Maybe trim the log entries from the log that are older than the given LSN
|
||||
// Effect: find all of the log files whose largest LSN is smaller than the
|
||||
// given LSN and delete them.
|
||||
void toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn);
|
||||
|
||||
// At the ft layer, a FILENUM uniquely identifies an open file.
|
||||
struct FILENUM {
|
||||
uint32_t fileid;
|
||||
};
|
||||
static const FILENUM FILENUM_NONE = { .fileid = UINT32_MAX };
|
||||
|
||||
struct FILENUMS {
|
||||
uint32_t num;
|
||||
FILENUM *filenums;
|
||||
};
|
||||
|
||||
void toku_logger_log_fcreate(TOKUTXN txn, const char *fname, FILENUM filenum, uint32_t mode, uint32_t flags, uint32_t nodesize, uint32_t basementnodesize, enum toku_compression_method compression_method);
|
||||
void toku_logger_log_fdelete(TOKUTXN txn, FILENUM filenum);
|
||||
void toku_logger_log_fopen(TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags);
|
||||
|
||||
// the log generation code requires a typedef if we want to pass by pointer
|
||||
typedef TOKU_XA_XID *XIDP;
|
||||
|
||||
int toku_fread_uint8_t (FILE *f, uint8_t *v, struct x1764 *mm, uint32_t *len);
|
||||
int toku_fread_uint32_t_nocrclen (FILE *f, uint32_t *v);
|
||||
int toku_fread_uint32_t (FILE *f, uint32_t *v, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_uint64_t (FILE *f, uint64_t *v, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_bool (FILE *f, bool *v, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *lsn, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, uint32_t *len);
|
||||
int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, uint32_t *len);
|
||||
|
||||
int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||
int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||
int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||
int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||
int toku_logprint_uint8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||
int toku_logprint_uint32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||
int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||
int toku_logprint_uint64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||
int toku_logprint_bool (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||
void toku_print_BYTESTRING (FILE *outf, uint32_t len, char *data);
|
||||
int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
|
||||
int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||
int toku_logprint_FILENUMS (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
|
||||
int toku_read_and_print_logmagic (FILE *f, uint32_t *versionp);
|
||||
int toku_read_logmagic (FILE *f, uint32_t *versionp);
|
||||
|
||||
TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn);
|
||||
LSN toku_logger_last_lsn(TOKULOGGER logger);
|
||||
TOKULOGGER toku_txn_logger (TOKUTXN txn);
|
||||
|
||||
void toku_txnid2txn (TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result);
|
||||
|
||||
int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags);
|
||||
|
||||
TOKUTXN toku_logger_txn_parent (TOKUTXN txn);
|
||||
void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn);
|
||||
|
||||
void toku_logger_make_space_in_inbuf (TOKULOGGER logger, int n_bytes_needed);
|
||||
|
||||
int toku_logger_write_inbuf (TOKULOGGER logger);
|
||||
// Effect: Write the buffered data (from the inbuf) to a file. No fsync, however.
|
||||
// As a side effect, the inbuf will be made empty.
|
||||
// Return 0 on success, otherwise return an error number.
|
||||
// Requires: The inbuf lock is currently held, and the outbuf lock is not held.
|
||||
// Upon return, the inbuf lock will be held, and the outbuf lock is not held.
|
||||
// However, no side effects should have been made to the logger. The lock was acquired simply to determine that the buffer will overflow if we try to put something into it.
|
||||
// The inbuf lock will be released, so the operations before and after this function call will not be atomic.
|
||||
// Rationale: When the buffer becomes nearly full, call this function so that more can be put in.
|
||||
// Implementation note: Since the output lock is acquired first, we must release the input lock, and then grab both in the right order.
|
||||
|
||||
void toku_logger_maybe_fsync (TOKULOGGER logger, LSN lsn, int do_fsync, bool holds_input_lock);
|
||||
// Effect: If fsync is nonzero, then make sure that the log is flushed and synced at least up to lsn.
|
||||
// Entry: Holds input lock iff 'holds_input_lock'.
|
||||
// Exit: Holds no locks.
|
||||
|
||||
// Discussion: How does the logger work:
|
||||
// The logger has two buffers: an inbuf and an outbuf.
|
||||
// There are two locks, called the inlock, and the outlock. To write, both locks must be held, and the outlock is acquired first.
|
||||
// Roughly speaking, the inbuf is used to accumulate logged data, and the outbuf is used to write to disk.
|
||||
// When something is to be logged we do the following:
|
||||
// acquire the inlock.
|
||||
// Make sure there is space in the inbuf for the logentry. (We know the size of the logentry in advance):
|
||||
// if the inbuf doesn't have enough space then
|
||||
// release the inlock
|
||||
// acquire the outlock
|
||||
// acquire the inlock
|
||||
// it's possible that some other thread made space.
|
||||
// if there still isn't space
|
||||
// swap the inbuf and the outbuf
|
||||
// release the inlock
|
||||
// write the outbuf
|
||||
// acquire the inlock
|
||||
// release the outlock
|
||||
// if the inbuf is still too small, then increase the size of the inbuf
|
||||
// Increment the LSN and fill the inbuf.
|
||||
// If fsync is required then
|
||||
// release the inlock
|
||||
// acquire the outlock
|
||||
// acquire the inlock
|
||||
// if the LSN has been flushed and fsynced (if so we are done. Some other thread did the flush.)
|
||||
// release the locks
|
||||
// if the LSN has been flushed but not fsynced up to the LSN:
|
||||
// release the inlock
|
||||
// fsync
|
||||
// release the outlock
|
||||
// otherwise:
|
||||
// swap the outbuf and the inbuf
|
||||
// release the inlock
|
||||
// write the outbuf
|
||||
// fsync
|
||||
// release the outlock
|
||||
|
||||
void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s);
|
||||
|
||||
int toku_get_version_of_logs_on_disk(const char *log_dir, bool *found_any_logs, uint32_t *version_found);
|
||||
|
||||
struct txn_manager *toku_logger_get_txn_manager(TOKULOGGER logger);
|
||||
|
||||
// For serialize / deserialize
|
||||
|
||||
#include "ft/serialize/wbuf.h"
|
||||
|
||||
static inline void wbuf_nocrc_FILENUM(struct wbuf *wb, FILENUM fileid) {
|
||||
wbuf_nocrc_uint(wb, fileid.fileid);
|
||||
}
|
||||
|
||||
static inline void wbuf_FILENUM(struct wbuf *wb, FILENUM fileid) {
|
||||
wbuf_uint(wb, fileid.fileid);
|
||||
}
|
||||
|
||||
static inline void wbuf_nocrc_FILENUMS(struct wbuf *wb, FILENUMS v) {
|
||||
wbuf_nocrc_uint(wb, v.num);
|
||||
for (uint32_t i = 0; i < v.num; i++) {
|
||||
wbuf_nocrc_FILENUM(wb, v.filenums[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void wbuf_FILENUMS(struct wbuf *wb, FILENUMS v) {
|
||||
wbuf_uint(wb, v.num);
|
||||
for (uint32_t i = 0; i < v.num; i++) {
|
||||
wbuf_FILENUM(wb, v.filenums[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void wbuf_nocrc_XIDP (struct wbuf *w, TOKU_XA_XID *xid) {
|
||||
wbuf_nocrc_uint32_t(w, xid->formatID);
|
||||
wbuf_nocrc_uint8_t(w, xid->gtrid_length);
|
||||
wbuf_nocrc_uint8_t(w, xid->bqual_length);
|
||||
wbuf_nocrc_literal_bytes(w, xid->data, xid->gtrid_length+xid->bqual_length);
|
||||
}
|
||||
|
||||
#include "ft/serialize/rbuf.h"
|
||||
|
||||
static inline void rbuf_FILENUM(struct rbuf *rb, FILENUM *filenum) {
|
||||
filenum->fileid = rbuf_int(rb);
|
||||
}
|
||||
static inline void rbuf_ma_FILENUM(struct rbuf *rb, memarena *UU(ma), FILENUM *filenum) {
|
||||
rbuf_FILENUM(rb, filenum);
|
||||
}
|
||||
|
||||
static inline void rbuf_FILENUMS(struct rbuf *rb, FILENUMS *filenums) {
|
||||
filenums->num = rbuf_int(rb);
|
||||
XMALLOC_N(filenums->num, filenums->filenums);
|
||||
for (uint32_t i = 0; i < filenums->num; i++) {
|
||||
rbuf_FILENUM(rb, &(filenums->filenums[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rbuf_ma_FILENUMS(struct rbuf *rb, memarena *ma, FILENUMS *filenums) {
|
||||
rbuf_ma_uint32_t(rb, ma, &(filenums->num));
|
||||
filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM));
|
||||
assert(filenums->filenums != NULL);
|
||||
for (uint32_t i = 0; i < filenums->num; i++) {
|
||||
rbuf_ma_FILENUM(rb, ma, &(filenums->filenums[i]));
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,85 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "portability/memory.h"
|
||||
#include "portability/toku_portability.h"
|
||||
|
||||
#include "ft/comparator.h"
|
||||
#include "ft/ft-ops.h"
|
||||
#include "util/x1764.h"
|
||||
|
||||
typedef void (*prepared_txn_callback_t)(DB_ENV *env, struct tokutxn *txn);
|
||||
typedef void (*keep_cachetable_callback_t)(DB_ENV *env, struct cachetable *ct);
|
||||
|
||||
// Run tokuft recovery from the log
|
||||
// Returns 0 if success
|
||||
int tokuft_recover(DB_ENV *env,
|
||||
prepared_txn_callback_t prepared_txn_callback,
|
||||
keep_cachetable_callback_t keep_cachetable_callback,
|
||||
struct tokulogger *logger,
|
||||
const char *env_dir,
|
||||
const char *log_dir,
|
||||
ft_compare_func bt_compare,
|
||||
ft_update_func update_function,
|
||||
generate_row_for_put_func generate_row_for_put,
|
||||
generate_row_for_del_func generate_row_for_del,
|
||||
size_t cachetable_size);
|
||||
|
||||
// Effect: Check the tokuft logs to determine whether or not we need to run recovery.
|
||||
// If the log is empty or if there is a clean shutdown at the end of the log, then we
|
||||
// don't need to run recovery.
|
||||
// Returns: true if we need recovery, otherwise false.
|
||||
int tokuft_needs_recovery(const char *logdir, bool ignore_empty_log);
|
||||
|
||||
// Return 0 if recovery log exists, ENOENT if log is missing
|
||||
int tokuft_recover_log_exists(const char * log_dir);
|
||||
|
||||
// For test only - set callbacks for recovery testing
|
||||
void toku_recover_set_callback (void (*)(void*), void*);
|
||||
void toku_recover_set_callback2 (void (*)(void*), void*);
|
||||
|
||||
extern int tokuft_recovery_trace;
|
||||
|
||||
int toku_recover_lock (const char *lock_dir, int *lockfd);
|
||||
|
||||
int toku_recover_unlock(int lockfd);
|
@ -1,120 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include "portability/toku_portability.h"
|
||||
|
||||
#include "ft/msg.h"
|
||||
#include "ft/txn/xids.h"
|
||||
#include "util/dbt.h"
|
||||
|
||||
ft_msg::ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x) :
|
||||
_key(key ? *key : toku_empty_dbt()),
|
||||
_val(val ? *val : toku_empty_dbt()),
|
||||
_type(t), _msn(m), _xids(x) {
|
||||
}
|
||||
|
||||
ft_msg ft_msg::deserialize_from_rbuf(struct rbuf *rb, XIDS *x, bool *is_fresh) {
|
||||
const void *keyp, *valp;
|
||||
uint32_t keylen, vallen;
|
||||
enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
|
||||
*is_fresh = rbuf_char(rb);
|
||||
MSN m = rbuf_MSN(rb);
|
||||
toku_xids_create_from_buffer(rb, x);
|
||||
rbuf_bytes(rb, &keyp, &keylen);
|
||||
rbuf_bytes(rb, &valp, &vallen);
|
||||
|
||||
DBT k, v;
|
||||
return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x);
|
||||
}
|
||||
|
||||
ft_msg ft_msg::deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *x) {
|
||||
const void *keyp, *valp;
|
||||
uint32_t keylen, vallen;
|
||||
enum ft_msg_type t = (enum ft_msg_type) rbuf_char(rb);
|
||||
toku_xids_create_from_buffer(rb, x);
|
||||
rbuf_bytes(rb, &keyp, &keylen);
|
||||
rbuf_bytes(rb, &valp, &vallen);
|
||||
|
||||
DBT k, v;
|
||||
return ft_msg(toku_fill_dbt(&k, keyp, keylen), toku_fill_dbt(&v, valp, vallen), t, m, *x);
|
||||
}
|
||||
|
||||
const DBT *ft_msg::kdbt() const {
|
||||
return &_key;
|
||||
}
|
||||
|
||||
const DBT *ft_msg::vdbt() const {
|
||||
return &_val;
|
||||
}
|
||||
|
||||
enum ft_msg_type ft_msg::type() const {
|
||||
return _type;
|
||||
}
|
||||
|
||||
MSN ft_msg::msn() const {
|
||||
return _msn;
|
||||
}
|
||||
|
||||
XIDS ft_msg::xids() const {
|
||||
return _xids;
|
||||
}
|
||||
|
||||
size_t ft_msg::total_size() const {
|
||||
// Must store two 4-byte lengths
|
||||
static const size_t key_val_overhead = 8;
|
||||
|
||||
// 1 byte type, 1 byte freshness, then 8 byte MSN
|
||||
static const size_t msg_overhead = 2 + sizeof(MSN);
|
||||
|
||||
static const size_t total_overhead = key_val_overhead + msg_overhead;
|
||||
|
||||
const size_t keyval_size = _key.size + _val.size;
|
||||
const size_t xids_size = toku_xids_get_serialize_size(xids());
|
||||
return total_overhead + keyval_size + xids_size;
|
||||
}
|
||||
|
||||
void ft_msg::serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const {
|
||||
wbuf_nocrc_char(wb, (unsigned char) _type);
|
||||
wbuf_nocrc_char(wb, (unsigned char) is_fresh);
|
||||
wbuf_MSN(wb, _msn);
|
||||
wbuf_nocrc_xids(wb, _xids);
|
||||
wbuf_nocrc_bytes(wb, _key.data, _key.size);
|
||||
wbuf_nocrc_bytes(wb, _val.data, _val.size);
|
||||
}
|
||||
|
@ -1,191 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
/* The purpose of this file is to provide access to the ft_msg,
|
||||
* which is the ephemeral version of the messages that lives in
|
||||
* a message buffer.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
|
||||
#include "portability/toku_assert.h"
|
||||
#include "portability/toku_stdint.h"
|
||||
|
||||
#include "ft/txn/xids.h"
|
||||
|
||||
// Message Sequence Number (MSN)
|
||||
typedef struct __toku_msn { uint64_t msn; } MSN;
|
||||
|
||||
// dummy used for message construction, to be filled in when msg is applied to tree
|
||||
static const MSN ZERO_MSN = { .msn = 0 };
|
||||
|
||||
// first 2^62 values reserved for messages created before Dr. No (for upgrade)
|
||||
static const MSN MIN_MSN = { .msn = 1ULL << 62 };
|
||||
static const MSN MAX_MSN = { .msn = UINT64_MAX };
|
||||
|
||||
/* tree command types */
|
||||
enum ft_msg_type {
|
||||
FT_NONE = 0,
|
||||
FT_INSERT = 1,
|
||||
FT_DELETE_ANY = 2, // Delete any matching key. This used to be called FT_DELETE.
|
||||
//FT_DELETE_BOTH = 3,
|
||||
FT_ABORT_ANY = 4, // Abort any commands on any matching key.
|
||||
//FT_ABORT_BOTH = 5, // Abort commands that match both the key and the value
|
||||
FT_COMMIT_ANY = 6,
|
||||
//FT_COMMIT_BOTH = 7,
|
||||
FT_COMMIT_BROADCAST_ALL = 8, // Broadcast to all leafentries, (commit all transactions).
|
||||
FT_COMMIT_BROADCAST_TXN = 9, // Broadcast to all leafentries, (commit specific transaction).
|
||||
FT_ABORT_BROADCAST_TXN = 10, // Broadcast to all leafentries, (commit specific transaction).
|
||||
FT_INSERT_NO_OVERWRITE = 11,
|
||||
FT_OPTIMIZE = 12, // Broadcast
|
||||
FT_OPTIMIZE_FOR_UPGRADE = 13, // same as FT_OPTIMIZE, but record version number in leafnode
|
||||
FT_UPDATE = 14,
|
||||
FT_UPDATE_BROADCAST_ALL = 15
|
||||
};
|
||||
|
||||
static inline bool
|
||||
ft_msg_type_applies_once(enum ft_msg_type type)
|
||||
{
|
||||
bool ret_val;
|
||||
switch (type) {
|
||||
case FT_INSERT_NO_OVERWRITE:
|
||||
case FT_INSERT:
|
||||
case FT_DELETE_ANY:
|
||||
case FT_ABORT_ANY:
|
||||
case FT_COMMIT_ANY:
|
||||
case FT_UPDATE:
|
||||
ret_val = true;
|
||||
break;
|
||||
case FT_COMMIT_BROADCAST_ALL:
|
||||
case FT_COMMIT_BROADCAST_TXN:
|
||||
case FT_ABORT_BROADCAST_TXN:
|
||||
case FT_OPTIMIZE:
|
||||
case FT_OPTIMIZE_FOR_UPGRADE:
|
||||
case FT_UPDATE_BROADCAST_ALL:
|
||||
case FT_NONE:
|
||||
ret_val = false;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
ft_msg_type_applies_all(enum ft_msg_type type)
|
||||
{
|
||||
bool ret_val;
|
||||
switch (type) {
|
||||
case FT_NONE:
|
||||
case FT_INSERT_NO_OVERWRITE:
|
||||
case FT_INSERT:
|
||||
case FT_DELETE_ANY:
|
||||
case FT_ABORT_ANY:
|
||||
case FT_COMMIT_ANY:
|
||||
case FT_UPDATE:
|
||||
ret_val = false;
|
||||
break;
|
||||
case FT_COMMIT_BROADCAST_ALL:
|
||||
case FT_COMMIT_BROADCAST_TXN:
|
||||
case FT_ABORT_BROADCAST_TXN:
|
||||
case FT_OPTIMIZE:
|
||||
case FT_OPTIMIZE_FOR_UPGRADE:
|
||||
case FT_UPDATE_BROADCAST_ALL:
|
||||
ret_val = true;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
ft_msg_type_does_nothing(enum ft_msg_type type)
|
||||
{
|
||||
return (type == FT_NONE);
|
||||
}
|
||||
|
||||
class ft_msg {
|
||||
public:
|
||||
ft_msg(const DBT *key, const DBT *val, enum ft_msg_type t, MSN m, XIDS x);
|
||||
|
||||
enum ft_msg_type type() const;
|
||||
|
||||
MSN msn() const;
|
||||
|
||||
XIDS xids() const;
|
||||
|
||||
const DBT *kdbt() const;
|
||||
|
||||
const DBT *vdbt() const;
|
||||
|
||||
size_t total_size() const;
|
||||
|
||||
void serialize_to_wbuf(struct wbuf *wb, bool is_fresh) const;
|
||||
|
||||
// deserialization goes through a static factory function so the ft msg
|
||||
// API stays completely const and there's no default constructor
|
||||
static ft_msg deserialize_from_rbuf(struct rbuf *rb, XIDS *xids, bool *is_fresh);
|
||||
|
||||
// Version 13/14 messages did not have an msn - so `m' is the MSN
|
||||
// that will be assigned to the message that gets deserialized.
|
||||
static ft_msg deserialize_from_rbuf_v13(struct rbuf *rb, MSN m, XIDS *xids);
|
||||
|
||||
private:
|
||||
const DBT _key;
|
||||
const DBT _val;
|
||||
enum ft_msg_type _type;
|
||||
MSN _msn;
|
||||
XIDS _xids;
|
||||
};
|
||||
|
||||
// For serialize / deserialize
|
||||
|
||||
#include "ft/serialize/wbuf.h"
|
||||
|
||||
static inline void wbuf_MSN(struct wbuf *wb, MSN msn) {
|
||||
wbuf_ulonglong(wb, msn.msn);
|
||||
}
|
||||
|
||||
#include "ft/serialize/rbuf.h"
|
||||
|
||||
static inline MSN rbuf_MSN(struct rbuf *rb) {
|
||||
MSN msn = { .msn = rbuf_ulonglong(rb) };
|
||||
return msn;
|
||||
}
|
@ -1,292 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include "ft/msg_buffer.h"
|
||||
#include "util/dbt.h"
|
||||
|
||||
void message_buffer::create() {
|
||||
_num_entries = 0;
|
||||
_memory = nullptr;
|
||||
_memory_usable = 0;
|
||||
_memory_size = 0;
|
||||
_memory_used = 0;
|
||||
}
|
||||
|
||||
void message_buffer::clone(message_buffer *src) {
|
||||
_num_entries = src->_num_entries;
|
||||
_memory_used = src->_memory_used;
|
||||
_memory_size = src->_memory_size;
|
||||
XMALLOC_N(_memory_size, _memory);
|
||||
memcpy(_memory, src->_memory, _memory_size);
|
||||
_memory_usable = toku_malloc_usable_size(_memory);
|
||||
}
|
||||
|
||||
void message_buffer::destroy() {
|
||||
if (_memory != nullptr) {
|
||||
toku_free(_memory);
|
||||
_memory_usable = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void message_buffer::deserialize_from_rbuf(struct rbuf *rb,
|
||||
int32_t **fresh_offsets, int32_t *nfresh,
|
||||
int32_t **stale_offsets, int32_t *nstale,
|
||||
int32_t **broadcast_offsets, int32_t *nbroadcast) {
|
||||
// read the number of messages in this buffer
|
||||
int n_in_this_buffer = rbuf_int(rb);
|
||||
if (fresh_offsets != nullptr) {
|
||||
XMALLOC_N(n_in_this_buffer, *fresh_offsets);
|
||||
}
|
||||
if (stale_offsets != nullptr) {
|
||||
XMALLOC_N(n_in_this_buffer, *stale_offsets);
|
||||
}
|
||||
if (broadcast_offsets != nullptr) {
|
||||
XMALLOC_N(n_in_this_buffer, *broadcast_offsets);
|
||||
}
|
||||
|
||||
_resize(rb->size + 64); // rb->size is a good hint for how big the buffer will be
|
||||
|
||||
// deserialize each message individually, noting whether it was fresh
|
||||
// and putting its buffer offset in the appropriate offsets array
|
||||
for (int i = 0; i < n_in_this_buffer; i++) {
|
||||
XIDS xids;
|
||||
bool is_fresh;
|
||||
const ft_msg msg = ft_msg::deserialize_from_rbuf(rb, &xids, &is_fresh);
|
||||
|
||||
int32_t *dest;
|
||||
if (ft_msg_type_applies_once(msg.type())) {
|
||||
if (is_fresh) {
|
||||
dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr;
|
||||
} else {
|
||||
dest = stale_offsets ? *stale_offsets + (*nstale)++ : nullptr;
|
||||
}
|
||||
} else {
|
||||
invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type()));
|
||||
dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr;
|
||||
}
|
||||
|
||||
enqueue(msg, is_fresh, dest);
|
||||
toku_xids_destroy(&xids);
|
||||
}
|
||||
|
||||
invariant(_num_entries == n_in_this_buffer);
|
||||
}
|
||||
|
||||
MSN message_buffer::deserialize_from_rbuf_v13(struct rbuf *rb,
|
||||
MSN *highest_unused_msn_for_upgrade,
|
||||
int32_t **fresh_offsets, int32_t *nfresh,
|
||||
int32_t **broadcast_offsets, int32_t *nbroadcast) {
|
||||
// read the number of messages in this buffer
|
||||
int n_in_this_buffer = rbuf_int(rb);
|
||||
if (fresh_offsets != nullptr) {
|
||||
XMALLOC_N(n_in_this_buffer, *fresh_offsets);
|
||||
}
|
||||
if (broadcast_offsets != nullptr) {
|
||||
XMALLOC_N(n_in_this_buffer, *broadcast_offsets);
|
||||
}
|
||||
|
||||
// Atomically decrement the header's MSN count by the number
|
||||
// of messages in the buffer.
|
||||
MSN highest_msn_in_this_buffer = {
|
||||
.msn = toku_sync_sub_and_fetch(&highest_unused_msn_for_upgrade->msn, n_in_this_buffer)
|
||||
};
|
||||
|
||||
// Create the message buffers from the deserialized buffer.
|
||||
for (int i = 0; i < n_in_this_buffer; i++) {
|
||||
XIDS xids;
|
||||
// There were no stale messages at this version, so call it fresh.
|
||||
const bool is_fresh = true;
|
||||
|
||||
// Increment our MSN, the last message should have the
|
||||
// newest/highest MSN. See above for a full explanation.
|
||||
highest_msn_in_this_buffer.msn++;
|
||||
const ft_msg msg = ft_msg::deserialize_from_rbuf_v13(rb, highest_msn_in_this_buffer, &xids);
|
||||
|
||||
int32_t *dest;
|
||||
if (ft_msg_type_applies_once(msg.type())) {
|
||||
dest = fresh_offsets ? *fresh_offsets + (*nfresh)++ : nullptr;
|
||||
} else {
|
||||
invariant(ft_msg_type_applies_all(msg.type()) || ft_msg_type_does_nothing(msg.type()));
|
||||
dest = broadcast_offsets ? *broadcast_offsets + (*nbroadcast)++ : nullptr;
|
||||
}
|
||||
|
||||
enqueue(msg, is_fresh, dest);
|
||||
toku_xids_destroy(&xids);
|
||||
}
|
||||
|
||||
return highest_msn_in_this_buffer;
|
||||
}
|
||||
|
||||
void message_buffer::_resize(size_t new_size) {
|
||||
XREALLOC_N(new_size, _memory);
|
||||
_memory_size = new_size;
|
||||
_memory_usable = toku_malloc_usable_size(_memory);
|
||||
}
|
||||
|
||||
static int next_power_of_two (int n) {
|
||||
int r = 4096;
|
||||
while (r < n) {
|
||||
r*=2;
|
||||
assert(r>0);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
struct message_buffer::buffer_entry *message_buffer::get_buffer_entry(int32_t offset) const {
|
||||
return (struct buffer_entry *) (_memory + offset);
|
||||
}
|
||||
|
||||
void message_buffer::enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset) {
|
||||
int need_space_here = msg_memsize_in_buffer(msg);
|
||||
int need_space_total = _memory_used + need_space_here;
|
||||
if (_memory == nullptr || need_space_total > _memory_size) {
|
||||
// resize the buffer to the next power of 2 greater than the needed space
|
||||
int next_2 = next_power_of_two(need_space_total);
|
||||
_resize(next_2);
|
||||
}
|
||||
uint32_t keylen = msg.kdbt()->size;
|
||||
uint32_t datalen = msg.vdbt()->size;
|
||||
struct buffer_entry *entry = get_buffer_entry(_memory_used);
|
||||
entry->type = (unsigned char) msg.type();
|
||||
entry->msn = msg.msn();
|
||||
toku_xids_cpy(&entry->xids_s, msg.xids());
|
||||
entry->is_fresh = is_fresh;
|
||||
unsigned char *e_key = toku_xids_get_end_of_array(&entry->xids_s);
|
||||
entry->keylen = keylen;
|
||||
memcpy(e_key, msg.kdbt()->data, keylen);
|
||||
entry->vallen = datalen;
|
||||
memcpy(e_key + keylen, msg.vdbt()->data, datalen);
|
||||
if (offset) {
|
||||
*offset = _memory_used;
|
||||
}
|
||||
_num_entries++;
|
||||
_memory_used += need_space_here;
|
||||
}
|
||||
|
||||
void message_buffer::set_freshness(int32_t offset, bool is_fresh) {
|
||||
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||
entry->is_fresh = is_fresh;
|
||||
}
|
||||
|
||||
bool message_buffer::get_freshness(int32_t offset) const {
|
||||
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||
return entry->is_fresh;
|
||||
}
|
||||
|
||||
ft_msg message_buffer::get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const {
|
||||
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||
uint32_t keylen = entry->keylen;
|
||||
uint32_t vallen = entry->vallen;
|
||||
enum ft_msg_type type = (enum ft_msg_type) entry->type;
|
||||
MSN msn = entry->msn;
|
||||
const XIDS xids = (XIDS) &entry->xids_s;
|
||||
const void *key = toku_xids_get_end_of_array(xids);
|
||||
const void *val = (uint8_t *) key + entry->keylen;
|
||||
return ft_msg(toku_fill_dbt(keydbt, key, keylen), toku_fill_dbt(valdbt, val, vallen), type, msn, xids);
|
||||
}
|
||||
|
||||
void message_buffer::get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const {
|
||||
struct buffer_entry *entry = get_buffer_entry(offset);
|
||||
if (key != nullptr) {
|
||||
toku_fill_dbt(key, toku_xids_get_end_of_array((XIDS) &entry->xids_s), entry->keylen);
|
||||
}
|
||||
if (msn != nullptr) {
|
||||
*msn = entry->msn;
|
||||
}
|
||||
}
|
||||
|
||||
int message_buffer::num_entries() const {
|
||||
return _num_entries;
|
||||
}
|
||||
|
||||
size_t message_buffer::buffer_size_in_use() const {
|
||||
return _memory_used;
|
||||
}
|
||||
|
||||
size_t message_buffer::memory_size_in_use() const {
|
||||
return sizeof(*this) + _memory_used;
|
||||
}
|
||||
|
||||
size_t message_buffer::memory_footprint() const {
|
||||
#ifdef TOKU_DEBUG_PARANOID
|
||||
// Enable this code if you want to verify that the new way of computing
|
||||
// the memory footprint is the same as the old.
|
||||
// It slows the code down by perhaps 10%.
|
||||
assert(_memory_usable == toku_malloc_usable_size(_memory));
|
||||
size_t fp = toku_memory_footprint(_memory, _memory_used);
|
||||
size_t fpg = toku_memory_footprint_given_usable_size(_memory_used, _memory_usable);
|
||||
if (fp != fpg) printf("ptr=%p mu=%ld fp=%ld fpg=%ld\n", _memory, _memory_usable, fp, fpg);
|
||||
assert(fp == fpg);
|
||||
#endif // TOKU_DEBUG_PARANOID
|
||||
return sizeof(*this) + toku_memory_footprint_given_usable_size(_memory_used, _memory_usable);
|
||||
}
|
||||
|
||||
bool message_buffer::equals(message_buffer *other) const {
|
||||
return (_memory_used == other->_memory_used &&
|
||||
memcmp(_memory, other->_memory, _memory_used) == 0);
|
||||
}
|
||||
|
||||
void message_buffer::serialize_to_wbuf(struct wbuf *wb) const {
|
||||
wbuf_nocrc_int(wb, _num_entries);
|
||||
struct msg_serialize_fn {
|
||||
struct wbuf *wb;
|
||||
msg_serialize_fn(struct wbuf *w) : wb(w) { }
|
||||
int operator()(const ft_msg &msg, bool is_fresh) {
|
||||
msg.serialize_to_wbuf(wb, is_fresh);
|
||||
return 0;
|
||||
}
|
||||
} serialize_fn(wb);
|
||||
iterate(serialize_fn);
|
||||
}
|
||||
//void static stats(struct wbuf *wb) const {
|
||||
// wbuf_nocrc_int(wb, _num_entries);
|
||||
// struct msg_serialize_fn {
|
||||
// struct wbuf *wb;
|
||||
// msg_serialize_fn(struct wbuf *w) : wb(w) { }
|
||||
// int operator()(const ft_msg &msg, bool is_fresh) {
|
||||
// msg.serialize_to_wbuf(wb, is_fresh);
|
||||
// return 0;
|
||||
// }
|
||||
// } serialize_fn(wb);
|
||||
// iterate(serialize_fn);
|
||||
//}
|
||||
size_t message_buffer::msg_memsize_in_buffer(const ft_msg &msg) {
|
||||
const uint32_t keylen = msg.kdbt()->size;
|
||||
const uint32_t datalen = msg.vdbt()->size;
|
||||
const size_t xidslen = toku_xids_get_size(msg.xids());
|
||||
return sizeof(struct buffer_entry) + keylen + datalen + xidslen - sizeof(XIDS_S);
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ft/msg.h"
|
||||
#include "ft/txn/xids.h"
|
||||
#include "util/dbt.h"
|
||||
|
||||
class message_buffer {
|
||||
public:
|
||||
void create();
|
||||
|
||||
void clone(message_buffer *dst);
|
||||
|
||||
void destroy();
|
||||
|
||||
// effect: deserializes a message buffer from the given rbuf
|
||||
// returns: *fresh_offsets (etc) malloc'd to be num_entries large and
|
||||
// populated with *nfresh (etc) offsets in the message buffer
|
||||
// requires: if fresh_offsets (etc) != nullptr, then nfresh != nullptr
|
||||
void deserialize_from_rbuf(struct rbuf *rb,
|
||||
int32_t **fresh_offsets, int32_t *nfresh,
|
||||
int32_t **stale_offsets, int32_t *nstale,
|
||||
int32_t **broadcast_offsets, int32_t *nbroadcast);
|
||||
|
||||
// effect: deserializes a message buffer whose messages are at version 13/14
|
||||
// returns: similar to deserialize_from_rbuf(), excpet there are no stale messages
|
||||
// and each message is assigned a sequential value from *highest_unused_msn_for_upgrade,
|
||||
// which is modified as needed using toku_sync_fech_and_sub()
|
||||
// returns: the highest MSN assigned to any message in this buffer
|
||||
// requires: similar to deserialize_from_rbuf(), and highest_unused_msn_for_upgrade != nullptr
|
||||
MSN deserialize_from_rbuf_v13(struct rbuf *rb,
|
||||
MSN *highest_unused_msn_for_upgrade,
|
||||
int32_t **fresh_offsets, int32_t *nfresh,
|
||||
int32_t **broadcast_offsets, int32_t *nbroadcast);
|
||||
|
||||
void enqueue(const ft_msg &msg, bool is_fresh, int32_t *offset);
|
||||
|
||||
void set_freshness(int32_t offset, bool is_fresh);
|
||||
|
||||
bool get_freshness(int32_t offset) const;
|
||||
|
||||
ft_msg get_message(int32_t offset, DBT *keydbt, DBT *valdbt) const;
|
||||
|
||||
void get_message_key_msn(int32_t offset, DBT *key, MSN *msn) const;
|
||||
|
||||
int num_entries() const;
|
||||
|
||||
size_t buffer_size_in_use() const;
|
||||
|
||||
size_t memory_size_in_use() const;
|
||||
|
||||
size_t memory_footprint() const;
|
||||
|
||||
template <typename F>
|
||||
int iterate(F &fn) const {
|
||||
for (int32_t offset = 0; offset < _memory_used; ) {
|
||||
DBT k, v;
|
||||
const ft_msg msg = get_message(offset, &k, &v);
|
||||
bool is_fresh = get_freshness(offset);
|
||||
int r = fn(msg, is_fresh);
|
||||
if (r != 0) {
|
||||
return r;
|
||||
}
|
||||
offset += msg_memsize_in_buffer(msg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool equals(message_buffer *other) const;
|
||||
|
||||
void serialize_to_wbuf(struct wbuf *wb) const;
|
||||
|
||||
static size_t msg_memsize_in_buffer(const ft_msg &msg);
|
||||
|
||||
private:
|
||||
void _resize(size_t new_size);
|
||||
|
||||
// If this isn't packged, the compiler aligns the xids array and we waste a lot of space
|
||||
struct __attribute__((__packed__)) buffer_entry {
|
||||
unsigned int keylen;
|
||||
unsigned int vallen;
|
||||
unsigned char type;
|
||||
bool is_fresh;
|
||||
MSN msn;
|
||||
XIDS_S xids_s;
|
||||
};
|
||||
|
||||
struct buffer_entry *get_buffer_entry(int32_t offset) const;
|
||||
|
||||
int _num_entries;
|
||||
char *_memory; // An array of bytes into which buffer entries are embedded.
|
||||
int _memory_size; // How big is _memory
|
||||
int _memory_used; // How many bytes are in use?
|
||||
size_t _memory_usable; // a cached result of toku_malloc_usable_size(_memory).
|
||||
};
|
File diff suppressed because it is too large
Load Diff
@ -1,608 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ft/bndata.h"
|
||||
#include "ft/comparator.h"
|
||||
#include "ft/ft.h"
|
||||
#include "ft/msg_buffer.h"
|
||||
|
||||
/* Pivot keys.
|
||||
* Child 0's keys are <= pivotkeys[0].
|
||||
* Child 1's keys are <= pivotkeys[1].
|
||||
* Child 1's keys are > pivotkeys[0].
|
||||
* etc
|
||||
*/
|
||||
class ftnode_pivot_keys {
|
||||
public:
|
||||
// effect: create an empty set of pivot keys
|
||||
void create_empty();
|
||||
|
||||
// effect: create pivot keys by copying the given DBT array
|
||||
void create_from_dbts(const DBT *keys, int n);
|
||||
|
||||
// effect: create pivot keys as a clone of an existing set of pivotkeys
|
||||
void create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys);
|
||||
|
||||
void destroy();
|
||||
|
||||
// effect: deserialize pivot keys previously serialized by serialize_to_wbuf()
|
||||
void deserialize_from_rbuf(struct rbuf *rb, int n);
|
||||
|
||||
// returns: unowned DBT representing the i'th pivot key
|
||||
DBT get_pivot(int i) const;
|
||||
|
||||
// effect: fills a DBT with the i'th pivot key
|
||||
// returns: the given dbt
|
||||
DBT *fill_pivot(int i, DBT *dbt) const;
|
||||
|
||||
// effect: insert a pivot into the i'th position, shifting others to the right
|
||||
void insert_at(const DBT *key, int i);
|
||||
|
||||
// effect: append pivotkeys to the end of our own pivot keys
|
||||
void append(const ftnode_pivot_keys &pivotkeys);
|
||||
|
||||
// effect: replace the pivot at the i'th position
|
||||
void replace_at(const DBT *key, int i);
|
||||
|
||||
// effect: removes the i'th pivot key, shifting others to the left
|
||||
void delete_at(int i);
|
||||
|
||||
// effect: split the pivot keys, removing all pivots at position greater
|
||||
// than or equal to `i' and storing them in *other
|
||||
// requires: *other is empty (size == 0)
|
||||
void split_at(int i, ftnode_pivot_keys *other);
|
||||
|
||||
// effect: serialize pivot keys to a wbuf
|
||||
// requires: wbuf has at least ftnode_pivot_keys::total_size() bytes available
|
||||
void serialize_to_wbuf(struct wbuf *wb) const;
|
||||
|
||||
int num_pivots() const;
|
||||
|
||||
// return: the total size of this data structure
|
||||
size_t total_size() const;
|
||||
|
||||
// return: the sum of the keys sizes of each pivot (for serialization)
|
||||
size_t serialized_size() const;
|
||||
|
||||
private:
|
||||
inline size_t _align4(size_t x) const {
|
||||
return roundup_to_multiple(4, x);
|
||||
}
|
||||
|
||||
// effect: create pivot keys, in fixed key format, by copying the given key array
|
||||
void _create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n);
|
||||
|
||||
char *_fixed_key(int i) const {
|
||||
return &_fixed_keys[i * _fixed_keylen_aligned];
|
||||
}
|
||||
|
||||
bool _fixed_format() const {
|
||||
return _fixed_keys != nullptr;
|
||||
}
|
||||
|
||||
void sanity_check() const;
|
||||
|
||||
void _insert_at_dbt(const DBT *key, int i);
|
||||
void _append_dbt(const ftnode_pivot_keys &pivotkeys);
|
||||
void _replace_at_dbt(const DBT *key, int i);
|
||||
void _delete_at_dbt(int i);
|
||||
void _split_at_dbt(int i, ftnode_pivot_keys *other);
|
||||
|
||||
void _insert_at_fixed(const DBT *key, int i);
|
||||
void _append_fixed(const ftnode_pivot_keys &pivotkeys);
|
||||
void _replace_at_fixed(const DBT *key, int i);
|
||||
void _delete_at_fixed(int i);
|
||||
void _split_at_fixed(int i, ftnode_pivot_keys *other);
|
||||
|
||||
// adds/destroys keys at a certain index (in dbt format),
|
||||
// maintaining _total_size, but not _num_pivots
|
||||
void _add_key_dbt(const DBT *key, int i);
|
||||
void _destroy_key_dbt(int i);
|
||||
|
||||
// conversions to and from packed key array format
|
||||
void _convert_to_dbt_format();
|
||||
void _convert_to_fixed_format();
|
||||
|
||||
// If every key is _fixed_keylen long, then _fixed_key is a
|
||||
// packed array of keys..
|
||||
char *_fixed_keys;
|
||||
// The actual length of the fixed key
|
||||
size_t _fixed_keylen;
|
||||
// The aligned length that we use for fixed key storage
|
||||
size_t _fixed_keylen_aligned;
|
||||
|
||||
// ..otherwise _fixed_keys is null and we store an array of dbts,
|
||||
// each representing a key. this is simpler but less cache-efficient.
|
||||
DBT *_dbt_keys;
|
||||
|
||||
int _num_pivots;
|
||||
size_t _total_size;
|
||||
};
|
||||
|
||||
extern int writing_rollback;
|
||||
|
||||
extern "C" {
|
||||
extern uint force_recovery;
|
||||
}
|
||||
|
||||
// TODO: class me up
|
||||
struct ftnode {
|
||||
// max_msn_applied that will be written to disk
|
||||
MSN max_msn_applied_to_node_on_disk;
|
||||
unsigned int flags;
|
||||
// Which block number is this node?
|
||||
BLOCKNUM blocknum;
|
||||
// What version of the data structure?
|
||||
int layout_version;
|
||||
// different (<) from layout_version if upgraded from a previous version
|
||||
// (useful for debugging)
|
||||
int layout_version_original;
|
||||
// transient, not serialized to disk, (useful for debugging)
|
||||
int layout_version_read_from_disk;
|
||||
// build_id (svn rev number) of software that wrote this node to disk
|
||||
uint32_t build_id;
|
||||
// height is always >= 0. 0 for leaf, >0 for nonleaf.
|
||||
int height;
|
||||
int dirty_;
|
||||
uint32_t fullhash;
|
||||
|
||||
void set_dirty() {
|
||||
if(force_recovery) assert(writing_rollback);
|
||||
dirty_ = 1;
|
||||
}
|
||||
|
||||
void clear_dirty() {
|
||||
dirty_ = 0;
|
||||
}
|
||||
|
||||
bool dirty() {
|
||||
return dirty_;
|
||||
}
|
||||
|
||||
// for internal nodes, if n_children==fanout+1 then the tree needs to be
|
||||
// rebalanced. for leaf nodes, represents number of basement nodes
|
||||
int n_children;
|
||||
ftnode_pivot_keys pivotkeys;
|
||||
|
||||
// What's the oldest referenced xid that this node knows about? The real
|
||||
// oldest referenced xid might be younger, but this is our best estimate.
|
||||
// We use it as a heuristic to transition provisional mvcc entries from
|
||||
// provisional to committed (from implicity committed to really committed).
|
||||
//
|
||||
// A better heuristic would be the oldest live txnid, but we use this since
|
||||
// it still works well most of the time, and its readily available on the
|
||||
// inject code path.
|
||||
TXNID oldest_referenced_xid_known;
|
||||
|
||||
// array of size n_children, consisting of ftnode partitions
|
||||
// each one is associated with a child for internal nodes, the ith
|
||||
// partition corresponds to the ith message buffer for leaf nodes, the ith
|
||||
// partition corresponds to the ith basement node
|
||||
struct ftnode_partition *bp;
|
||||
struct ctpair *ct_pair;
|
||||
};
|
||||
typedef struct ftnode *FTNODE;
|
||||
|
||||
// data of an available partition of a leaf ftnode
|
||||
struct ftnode_leaf_basement_node {
|
||||
bn_data data_buffer;
|
||||
unsigned int seqinsert; // number of sequential inserts to this leaf
|
||||
MSN max_msn_applied; // max message sequence number applied
|
||||
bool stale_ancestor_messages_applied;
|
||||
// current count of rows added or removed as a result of message application
|
||||
// to this basement node, gets reset when node is undirtied.
|
||||
// Used to back out tree scoped LRC id node is evicted but not persisted
|
||||
int64_t logical_rows_delta;
|
||||
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
|
||||
};
|
||||
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
|
||||
|
||||
enum pt_state { // declare this to be packed so that when used below it will only take 1 byte.
|
||||
PT_INVALID = 0,
|
||||
PT_ON_DISK = 1,
|
||||
PT_COMPRESSED = 2,
|
||||
PT_AVAIL = 3};
|
||||
|
||||
enum ftnode_child_tag {
|
||||
BCT_INVALID = 0,
|
||||
BCT_NULL,
|
||||
BCT_SUBBLOCK,
|
||||
BCT_LEAF,
|
||||
BCT_NONLEAF
|
||||
};
|
||||
|
||||
typedef toku::omt<int32_t> off_omt_t;
|
||||
typedef toku::omt<int32_t, int32_t, true> marked_off_omt_t;
|
||||
|
||||
// data of an available partition of a nonleaf ftnode
|
||||
struct ftnode_nonleaf_childinfo {
|
||||
message_buffer msg_buffer;
|
||||
off_omt_t broadcast_list;
|
||||
marked_off_omt_t fresh_message_tree;
|
||||
off_omt_t stale_message_tree;
|
||||
uint64_t flow[2]; // current and last checkpoint
|
||||
};
|
||||
typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO;
|
||||
|
||||
typedef struct ftnode_child_pointer {
|
||||
union {
|
||||
struct sub_block *subblock;
|
||||
struct ftnode_nonleaf_childinfo *nonleaf;
|
||||
struct ftnode_leaf_basement_node *leaf;
|
||||
} u;
|
||||
enum ftnode_child_tag tag;
|
||||
} FTNODE_CHILD_POINTER;
|
||||
|
||||
struct ftnode_disk_data {
|
||||
//
|
||||
// stores the offset to the beginning of the partition on disk from the ftnode, and the length, needed to read a partition off of disk
|
||||
// the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless
|
||||
// The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition
|
||||
// The SIZE is the size of the compressed partition.
|
||||
// Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be.
|
||||
// However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align.
|
||||
uint32_t start;
|
||||
uint32_t size;
|
||||
};
|
||||
typedef struct ftnode_disk_data *FTNODE_DISK_DATA;
|
||||
|
||||
// TODO: Turn these into functions instead of macros
|
||||
#define BP_START(node_dd,i) ((node_dd)[i].start)
|
||||
#define BP_SIZE(node_dd,i) ((node_dd)[i].size)
|
||||
|
||||
// a ftnode partition, associated with a child of a node
|
||||
struct ftnode_partition {
|
||||
// the following three variables are used for nonleaf nodes
|
||||
// for leaf nodes, they are meaningless
|
||||
BLOCKNUM blocknum; // blocknum of child
|
||||
|
||||
// How many bytes worth of work was performed by messages in each buffer.
|
||||
uint64_t workdone;
|
||||
|
||||
//
|
||||
// pointer to the partition. Depending on the state, they may be different things
|
||||
// if state == PT_INVALID, then the node was just initialized and ptr == NULL
|
||||
// if state == PT_ON_DISK, then ptr == NULL
|
||||
// if state == PT_COMPRESSED, then ptr points to a struct sub_block*
|
||||
// if state == PT_AVAIL, then ptr is:
|
||||
// a struct ftnode_nonleaf_childinfo for internal nodes,
|
||||
// a struct ftnode_leaf_basement_node for leaf nodes
|
||||
//
|
||||
struct ftnode_child_pointer ptr;
|
||||
//
|
||||
// at any time, the partitions may be in one of the following three states (stored in pt_state):
|
||||
// PT_INVALID - means that the partition was just initialized
|
||||
// PT_ON_DISK - means that the partition is not in memory and needs to be read from disk. To use, must read off disk and decompress
|
||||
// PT_COMPRESSED - means that the partition is compressed in memory. To use, must decompress
|
||||
// PT_AVAIL - means the partition is decompressed and in memory
|
||||
//
|
||||
enum pt_state state; // make this an enum to make debugging easier.
|
||||
|
||||
// clock count used to for pe_callback to determine if a node should be evicted or not
|
||||
// for now, saturating the count at 1
|
||||
uint8_t clock_count;
|
||||
};
|
||||
|
||||
//
|
||||
// TODO: Fix all these names
|
||||
// Organize declarations
|
||||
// Fix widespread parameter ordering inconsistencies
|
||||
//
|
||||
BASEMENTNODE toku_create_empty_bn(void);
|
||||
BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer.
|
||||
NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo);
|
||||
BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn);
|
||||
NONLEAF_CHILDINFO toku_create_empty_nl(void);
|
||||
void destroy_basement_node (BASEMENTNODE bn);
|
||||
void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl);
|
||||
void toku_destroy_ftnode_internals(FTNODE node);
|
||||
void toku_ftnode_free (FTNODE *node);
|
||||
bool toku_ftnode_fully_in_memory(FTNODE node);
|
||||
void toku_ftnode_assert_fully_in_memory(FTNODE node);
|
||||
void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft);
|
||||
BASEMENTNODE toku_detach_bn(FTNODE node, int childnum);
|
||||
void toku_ftnode_update_disk_stats(FTNODE ftnode, FT ft, bool for_checkpoint);
|
||||
void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node);
|
||||
|
||||
void toku_initialize_empty_ftnode(FTNODE node, BLOCKNUM blocknum, int height, int num_children,
|
||||
int layout_version, unsigned int flags);
|
||||
|
||||
int toku_ftnode_which_child(FTNODE node, const DBT *k, const toku::comparator &cmp);
|
||||
void toku_ftnode_save_ct_pair(CACHEKEY key, void *value_data, PAIR p);
|
||||
|
||||
//
|
||||
// TODO: put the heaviside functions into their respective 'struct .*extra;' namespaces
|
||||
//
|
||||
struct toku_msg_buffer_key_msn_heaviside_extra {
|
||||
const toku::comparator &cmp;
|
||||
message_buffer *msg_buffer;
|
||||
const DBT *key;
|
||||
MSN msn;
|
||||
toku_msg_buffer_key_msn_heaviside_extra(const toku::comparator &c, message_buffer *mb, const DBT *k, MSN m) :
|
||||
cmp(c), msg_buffer(mb), key(k), msn(m) {
|
||||
}
|
||||
};
|
||||
int toku_msg_buffer_key_msn_heaviside(const int32_t &v, const struct toku_msg_buffer_key_msn_heaviside_extra &extra);
|
||||
|
||||
struct toku_msg_buffer_key_msn_cmp_extra {
|
||||
const toku::comparator &cmp;
|
||||
message_buffer *msg_buffer;
|
||||
toku_msg_buffer_key_msn_cmp_extra(const toku::comparator &c, message_buffer *mb) :
|
||||
cmp(c), msg_buffer(mb) {
|
||||
}
|
||||
};
|
||||
int toku_msg_buffer_key_msn_cmp(const struct toku_msg_buffer_key_msn_cmp_extra &extrap, const int &a, const int &b);
|
||||
|
||||
struct toku_msg_leafval_heaviside_extra {
|
||||
const toku::comparator &cmp;
|
||||
DBT const *const key;
|
||||
toku_msg_leafval_heaviside_extra(const toku::comparator &c, const DBT *k) :
|
||||
cmp(c), key(k) {
|
||||
}
|
||||
};
|
||||
int toku_msg_leafval_heaviside(DBT const &kdbt, const struct toku_msg_leafval_heaviside_extra &be);
|
||||
|
||||
unsigned int toku_bnc_nbytesinbuf(NONLEAF_CHILDINFO bnc);
|
||||
int toku_bnc_n_entries(NONLEAF_CHILDINFO bnc);
|
||||
long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc);
|
||||
long toku_bnc_memory_used(NONLEAF_CHILDINFO bnc);
|
||||
void toku_bnc_insert_msg(NONLEAF_CHILDINFO bnc, const void *key, uint32_t keylen, const void *data, uint32_t datalen, enum ft_msg_type type, MSN msn, XIDS xids, bool is_fresh, const toku::comparator &cmp);
|
||||
void toku_bnc_empty(NONLEAF_CHILDINFO bnc);
|
||||
void toku_bnc_flush_to_child(FT ft, NONLEAF_CHILDINFO bnc, FTNODE child, TXNID parent_oldest_referenced_xid_known);
|
||||
bool toku_bnc_should_promote(FT ft, NONLEAF_CHILDINFO bnc) __attribute__((const, nonnull));
|
||||
|
||||
bool toku_ftnode_nonleaf_is_gorged(FTNODE node, uint32_t nodesize);
|
||||
uint32_t toku_ftnode_leaf_num_entries(FTNODE node);
|
||||
void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize);
|
||||
|
||||
void toku_ftnode_leaf_run_gc(FT ft, FTNODE node);
|
||||
|
||||
enum reactivity {
|
||||
RE_STABLE,
|
||||
RE_FUSIBLE,
|
||||
RE_FISSIBLE
|
||||
};
|
||||
|
||||
enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node);
|
||||
enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout);
|
||||
enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize);
|
||||
|
||||
inline const char* toku_ftnode_get_cachefile_fname_in_env(FTNODE node) {
|
||||
if (node->ct_pair) {
|
||||
CACHEFILE cf = toku_pair_get_cachefile(node->ct_pair);
|
||||
if (cf) {
|
||||
return toku_cachefile_fname_in_env(cf);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the next child for HOT to flush to, given that everything up to
|
||||
* and including k has been flattened.
|
||||
*
|
||||
* If k falls between pivots in node, then we return the childnum where k
|
||||
* lies.
|
||||
*
|
||||
* If k is equal to some pivot, then we return the next (to the right)
|
||||
* childnum.
|
||||
*/
|
||||
int toku_ftnode_hot_next_child(
|
||||
FTNODE node,
|
||||
const DBT* k,
|
||||
const toku::comparator &cmp);
|
||||
|
||||
void toku_ftnode_put_msg(
|
||||
const toku::comparator& cmp,
|
||||
ft_update_func update_fun,
|
||||
FTNODE node,
|
||||
int target_childnum,
|
||||
const ft_msg& msg,
|
||||
bool is_fresh,
|
||||
txn_gc_info* gc_info,
|
||||
size_t flow_deltas[],
|
||||
STAT64INFO stats_to_update,
|
||||
int64_t* logical_rows_delta);
|
||||
|
||||
void toku_ft_bn_apply_msg_once(
|
||||
BASEMENTNODE bn,
|
||||
const ft_msg& msg,
|
||||
uint32_t idx,
|
||||
uint32_t le_keylen,
|
||||
LEAFENTRY le,
|
||||
txn_gc_info* gc_info,
|
||||
uint64_t* workdonep,
|
||||
STAT64INFO stats_to_update,
|
||||
int64_t* logical_rows_delta);
|
||||
|
||||
void toku_ft_bn_apply_msg(
|
||||
const toku::comparator& cmp,
|
||||
ft_update_func update_fun,
|
||||
BASEMENTNODE bn,
|
||||
const ft_msg& msg,
|
||||
txn_gc_info* gc_info,
|
||||
uint64_t* workdone,
|
||||
STAT64INFO stats_to_update,
|
||||
int64_t* logical_rows_delta);
|
||||
|
||||
void toku_ft_leaf_apply_msg(
|
||||
const toku::comparator& cmp,
|
||||
ft_update_func update_fun,
|
||||
FTNODE node,
|
||||
int target_childnum,
|
||||
const ft_msg& msg,
|
||||
txn_gc_info* gc_info,
|
||||
uint64_t* workdone,
|
||||
STAT64INFO stats_to_update,
|
||||
int64_t* logical_rows_delta);
|
||||
|
||||
//
|
||||
// Message management for orthopush
|
||||
//
|
||||
|
||||
struct ancestors {
|
||||
// This is the root node if next is NULL (since the root has no ancestors)
|
||||
FTNODE node;
|
||||
// Which buffer holds messages destined to the node whose ancestors this list represents.
|
||||
int childnum;
|
||||
struct ancestors *next;
|
||||
};
|
||||
typedef struct ancestors *ANCESTORS;
|
||||
|
||||
void toku_ft_bnc_move_messages_to_stale(FT ft, NONLEAF_CHILDINFO bnc);
|
||||
|
||||
void toku_move_ftnode_messages_to_stale(FT ft, FTNODE node);
|
||||
|
||||
// TODO: Should ft_handle just be FT?
|
||||
class pivot_bounds;
|
||||
void toku_apply_ancestors_messages_to_node(FT_HANDLE t, FTNODE node, ANCESTORS ancestors,
|
||||
const pivot_bounds &bounds,
|
||||
bool *msgs_applied, int child_to_read);
|
||||
|
||||
bool toku_ft_leaf_needs_ancestors_messages(FT ft, FTNODE node, ANCESTORS ancestors,
|
||||
const pivot_bounds &bounds,
|
||||
MSN *const max_msn_in_path, int child_to_read);
|
||||
|
||||
void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read);
|
||||
|
||||
struct ft_search;
|
||||
int toku_ft_search_which_child(const toku::comparator &cmp, FTNODE node, ft_search *search);
|
||||
|
||||
//
|
||||
// internal node inline functions
|
||||
// TODO: Turn the macros into real functions
|
||||
//
|
||||
|
||||
static inline void set_BNULL(FTNODE node, int i) {
|
||||
paranoid_invariant(i >= 0);
|
||||
paranoid_invariant(i < node->n_children);
|
||||
node->bp[i].ptr.tag = BCT_NULL;
|
||||
}
|
||||
|
||||
static inline bool is_BNULL (FTNODE node, int i) {
|
||||
paranoid_invariant(i >= 0);
|
||||
paranoid_invariant(i < node->n_children);
|
||||
return node->bp[i].ptr.tag == BCT_NULL;
|
||||
}
|
||||
|
||||
static inline NONLEAF_CHILDINFO BNC(FTNODE node, int i) {
|
||||
paranoid_invariant(i >= 0);
|
||||
paranoid_invariant(i < node->n_children);
|
||||
FTNODE_CHILD_POINTER p = node->bp[i].ptr;
|
||||
paranoid_invariant(p.tag==BCT_NONLEAF);
|
||||
return p.u.nonleaf;
|
||||
}
|
||||
|
||||
static inline void set_BNC(FTNODE node, int i, NONLEAF_CHILDINFO nl) {
|
||||
paranoid_invariant(i >= 0);
|
||||
paranoid_invariant(i < node->n_children);
|
||||
FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
|
||||
p->tag = BCT_NONLEAF;
|
||||
p->u.nonleaf = nl;
|
||||
}
|
||||
|
||||
static inline BASEMENTNODE BLB(FTNODE node, int i) {
|
||||
paranoid_invariant(i >= 0);
|
||||
// The optimizer really doesn't like it when we compare
|
||||
// i to n_children as signed integers. So we assert that
|
||||
// n_children is in fact positive before doing a comparison
|
||||
// on the values forcibly cast to unsigned ints.
|
||||
paranoid_invariant(node->n_children > 0);
|
||||
paranoid_invariant((unsigned) i < (unsigned) node->n_children);
|
||||
FTNODE_CHILD_POINTER p = node->bp[i].ptr;
|
||||
paranoid_invariant(p.tag==BCT_LEAF);
|
||||
return p.u.leaf;
|
||||
}
|
||||
|
||||
static inline void set_BLB(FTNODE node, int i, BASEMENTNODE bn) {
|
||||
paranoid_invariant(i >= 0);
|
||||
paranoid_invariant(i < node->n_children);
|
||||
FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
|
||||
p->tag = BCT_LEAF;
|
||||
p->u.leaf = bn;
|
||||
}
|
||||
|
||||
static inline struct sub_block *BSB(FTNODE node, int i) {
|
||||
paranoid_invariant(i >= 0);
|
||||
paranoid_invariant(i < node->n_children);
|
||||
FTNODE_CHILD_POINTER p = node->bp[i].ptr;
|
||||
paranoid_invariant(p.tag==BCT_SUBBLOCK);
|
||||
return p.u.subblock;
|
||||
}
|
||||
|
||||
static inline void set_BSB(FTNODE node, int i, struct sub_block *sb) {
|
||||
paranoid_invariant(i >= 0);
|
||||
paranoid_invariant(i < node->n_children);
|
||||
FTNODE_CHILD_POINTER *p = &node->bp[i].ptr;
|
||||
p->tag = BCT_SUBBLOCK;
|
||||
p->u.subblock = sb;
|
||||
}
|
||||
|
||||
// ftnode partition macros
|
||||
// BP stands for ftnode_partition
|
||||
#define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum)
|
||||
#define BP_STATE(node,i) ((node)->bp[i].state)
|
||||
#define BP_WORKDONE(node, i)((node)->bp[i].workdone)
|
||||
|
||||
//
|
||||
// macros for managing a node's clock
|
||||
// Should be managed by ft-ops.c, NOT by serialize/deserialize
|
||||
//
|
||||
|
||||
//
|
||||
// BP_TOUCH_CLOCK uses a compare and swap because multiple threads
|
||||
// that have a read lock on an internal node may try to touch the clock
|
||||
// simultaneously
|
||||
//
|
||||
#define BP_TOUCH_CLOCK(node, i) ((node)->bp[i].clock_count = 1)
|
||||
#define BP_SWEEP_CLOCK(node, i) ((node)->bp[i].clock_count = 0)
|
||||
#define BP_SHOULD_EVICT(node, i) ((node)->bp[i].clock_count == 0)
|
||||
// not crazy about having these two here, one is for the case where we create new
|
||||
// nodes, such as in splits and creating new roots, and the other is for when
|
||||
// we are deserializing a node and not all bp's are touched
|
||||
#define BP_INIT_TOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 1)
|
||||
#define BP_INIT_UNTOUCHED_CLOCK(node, i) ((node)->bp[i].clock_count = 0)
|
||||
|
||||
// ftnode leaf basementnode macros,
|
||||
#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied)
|
||||
#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied)
|
||||
#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer))
|
||||
#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size())
|
||||
#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
|
||||
#define BLB_LRD(node, i) (BLB(node,i)->logical_rows_delta)
|
@ -1,438 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <my_global.h>
|
||||
#include <string>
|
||||
|
||||
#include "portability/memory.h"
|
||||
|
||||
#include "ft/node.h"
|
||||
#include "ft/serialize/rbuf.h"
|
||||
#include "ft/serialize/wbuf.h"
|
||||
|
||||
void ftnode_pivot_keys::create_empty() {
|
||||
_num_pivots = 0;
|
||||
_total_size = 0;
|
||||
_fixed_keys = nullptr;
|
||||
_fixed_keylen = 0;
|
||||
_fixed_keylen_aligned = 0;
|
||||
_dbt_keys = nullptr;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::create_from_dbts(const DBT *keys, int n) {
|
||||
create_empty();
|
||||
_num_pivots = n;
|
||||
|
||||
// see if every key has the same length
|
||||
bool keys_same_size = true;
|
||||
for (int i = 1; i < _num_pivots; i++) {
|
||||
if (keys[i].size != keys[i - 1].size) {
|
||||
keys_same_size = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (keys_same_size && _num_pivots > 0) {
|
||||
// if so, store pivots in a tightly packed array of fixed length keys
|
||||
_fixed_keylen = keys[0].size;
|
||||
_fixed_keylen_aligned = _align4(_fixed_keylen);
|
||||
_total_size = _fixed_keylen_aligned * _num_pivots;
|
||||
XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys);
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
invariant(keys[i].size == _fixed_keylen);
|
||||
memcpy(_fixed_key(i), keys[i].data, _fixed_keylen);
|
||||
}
|
||||
} else {
|
||||
// otherwise we'll just store the pivots in an array of dbts
|
||||
XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys);
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
size_t size = keys[i].size;
|
||||
toku_memdup_dbt(&_dbt_keys[i], keys[i].data, size);
|
||||
_total_size += size;
|
||||
}
|
||||
}
|
||||
|
||||
sanity_check();
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_create_from_fixed_keys(const char *fixedkeys, size_t fixed_keylen, int n) {
|
||||
create_empty();
|
||||
_num_pivots = n;
|
||||
_fixed_keylen = fixed_keylen;
|
||||
_fixed_keylen_aligned = _align4(fixed_keylen);
|
||||
_total_size = _fixed_keylen_aligned * _num_pivots;
|
||||
XMEMDUP_N(_fixed_keys, fixedkeys, _total_size);
|
||||
}
|
||||
|
||||
// effect: create pivot keys as a clone of an existing set of pivotkeys
|
||||
void ftnode_pivot_keys::create_from_pivot_keys(const ftnode_pivot_keys &pivotkeys) {
|
||||
if (pivotkeys._fixed_format()) {
|
||||
_create_from_fixed_keys(pivotkeys._fixed_keys, pivotkeys._fixed_keylen, pivotkeys._num_pivots);
|
||||
} else {
|
||||
create_from_dbts(pivotkeys._dbt_keys, pivotkeys._num_pivots);
|
||||
}
|
||||
|
||||
sanity_check();
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::destroy() {
|
||||
if (_dbt_keys != nullptr) {
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
toku_destroy_dbt(&_dbt_keys[i]);
|
||||
}
|
||||
toku_free(_dbt_keys);
|
||||
_dbt_keys = nullptr;
|
||||
}
|
||||
if (_fixed_keys != nullptr) {
|
||||
toku_free(_fixed_keys);
|
||||
_fixed_keys = nullptr;
|
||||
}
|
||||
_fixed_keylen = 0;
|
||||
_fixed_keylen_aligned = 0;
|
||||
_num_pivots = 0;
|
||||
_total_size = 0;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_convert_to_fixed_format() {
|
||||
invariant(!_fixed_format());
|
||||
|
||||
// convert to a tightly packed array of fixed length keys
|
||||
_fixed_keylen = _dbt_keys[0].size;
|
||||
_fixed_keylen_aligned = _align4(_fixed_keylen);
|
||||
_total_size = _fixed_keylen_aligned * _num_pivots;
|
||||
XMALLOC_N_ALIGNED(64, _total_size, _fixed_keys);
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
invariant(_dbt_keys[i].size == _fixed_keylen);
|
||||
memcpy(_fixed_key(i), _dbt_keys[i].data, _fixed_keylen);
|
||||
}
|
||||
|
||||
// destroy the dbt array format
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
toku_destroy_dbt(&_dbt_keys[i]);
|
||||
}
|
||||
toku_free(_dbt_keys);
|
||||
_dbt_keys = nullptr;
|
||||
|
||||
invariant(_fixed_format());
|
||||
sanity_check();
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_convert_to_dbt_format() {
|
||||
invariant(_fixed_format());
|
||||
|
||||
// convert to an aray of dbts
|
||||
REALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys);
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
toku_memdup_dbt(&_dbt_keys[i], _fixed_key(i), _fixed_keylen);
|
||||
}
|
||||
// pivots sizes are not aligned up dbt format
|
||||
_total_size = _num_pivots * _fixed_keylen;
|
||||
|
||||
// destroy the fixed key format
|
||||
toku_free(_fixed_keys);
|
||||
_fixed_keys = nullptr;
|
||||
_fixed_keylen = 0;
|
||||
_fixed_keylen_aligned = 0;
|
||||
|
||||
invariant(!_fixed_format());
|
||||
sanity_check();
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::deserialize_from_rbuf(struct rbuf *rb, int n) {
|
||||
_num_pivots = n;
|
||||
_total_size = 0;
|
||||
_fixed_keys = nullptr;
|
||||
_fixed_keylen = 0;
|
||||
_dbt_keys = nullptr;
|
||||
|
||||
XMALLOC_N_ALIGNED(64, _num_pivots, _dbt_keys);
|
||||
bool keys_same_size = true;
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
const void *pivotkeyptr;
|
||||
uint32_t size;
|
||||
rbuf_bytes(rb, &pivotkeyptr, &size);
|
||||
toku_memdup_dbt(&_dbt_keys[i], pivotkeyptr, size);
|
||||
_total_size += size;
|
||||
if (i > 0 && keys_same_size && _dbt_keys[i].size != _dbt_keys[i - 1].size) {
|
||||
// not all keys are the same size, we'll stick to the dbt array format
|
||||
keys_same_size = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (keys_same_size && _num_pivots > 0) {
|
||||
_convert_to_fixed_format();
|
||||
}
|
||||
|
||||
sanity_check();
|
||||
}
|
||||
|
||||
DBT ftnode_pivot_keys::get_pivot(int i) const {
|
||||
paranoid_invariant(i < _num_pivots);
|
||||
if (_fixed_format()) {
|
||||
paranoid_invariant(i * _fixed_keylen_aligned < _total_size);
|
||||
DBT dbt;
|
||||
toku_fill_dbt(&dbt, _fixed_key(i), _fixed_keylen);
|
||||
return dbt;
|
||||
} else {
|
||||
return _dbt_keys[i];
|
||||
}
|
||||
}
|
||||
|
||||
DBT *ftnode_pivot_keys::fill_pivot(int i, DBT *dbt) const {
|
||||
paranoid_invariant(i < _num_pivots);
|
||||
if (_fixed_format()) {
|
||||
toku_fill_dbt(dbt, _fixed_key(i), _fixed_keylen);
|
||||
} else {
|
||||
toku_copyref_dbt(dbt, _dbt_keys[i]);
|
||||
}
|
||||
return dbt;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_add_key_dbt(const DBT *key, int i) {
|
||||
toku_clone_dbt(&_dbt_keys[i], *key);
|
||||
_total_size += _dbt_keys[i].size;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_destroy_key_dbt(int i) {
|
||||
invariant(_total_size >= _dbt_keys[i].size);
|
||||
_total_size -= _dbt_keys[i].size;
|
||||
toku_destroy_dbt(&_dbt_keys[i]);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_insert_at_dbt(const DBT *key, int i) {
|
||||
// make space for a new pivot, slide existing keys to the right
|
||||
REALLOC_N_ALIGNED(64, _num_pivots + 1, _dbt_keys);
|
||||
memmove(&_dbt_keys[i + 1], &_dbt_keys[i], (_num_pivots - i) * sizeof(DBT));
|
||||
_add_key_dbt(key, i);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_insert_at_fixed(const DBT *key, int i) {
|
||||
REALLOC_N_ALIGNED(64, (_num_pivots + 1) * _fixed_keylen_aligned, _fixed_keys);
|
||||
// TODO: This is not going to be valgrind-safe, because we do not initialize the space
|
||||
// between _fixed_keylen and _fixed_keylen_aligned (but we probably should)
|
||||
memmove(_fixed_key(i + 1), _fixed_key(i), (_num_pivots - i) * _fixed_keylen_aligned);
|
||||
memcpy(_fixed_key(i), key->data, _fixed_keylen);
|
||||
_total_size += _fixed_keylen_aligned;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::insert_at(const DBT *key, int i) {
|
||||
invariant(i <= _num_pivots); // it's ok to insert at the end, so we check <= n
|
||||
|
||||
// if the new key doesn't have the same size, we can't be in fixed format
|
||||
if (_fixed_format() && key->size != _fixed_keylen) {
|
||||
_convert_to_dbt_format();
|
||||
}
|
||||
|
||||
if (_fixed_format()) {
|
||||
_insert_at_fixed(key, i);
|
||||
} else {
|
||||
_insert_at_dbt(key, i);
|
||||
}
|
||||
_num_pivots++;
|
||||
|
||||
invariant(total_size() > 0);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_append_dbt(const ftnode_pivot_keys &pivotkeys) {
|
||||
REALLOC_N_ALIGNED(64, _num_pivots + pivotkeys._num_pivots, _dbt_keys);
|
||||
bool other_fixed = pivotkeys._fixed_format();
|
||||
for (int i = 0; i < pivotkeys._num_pivots; i++) {
|
||||
size_t size = other_fixed ? pivotkeys._fixed_keylen :
|
||||
pivotkeys._dbt_keys[i].size;
|
||||
toku_memdup_dbt(&_dbt_keys[_num_pivots + i],
|
||||
other_fixed ? pivotkeys._fixed_key(i) :
|
||||
pivotkeys._dbt_keys[i].data,
|
||||
size);
|
||||
_total_size += size;
|
||||
}
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_append_fixed(const ftnode_pivot_keys &pivotkeys) {
|
||||
if (pivotkeys._fixed_format() && pivotkeys._fixed_keylen == _fixed_keylen) {
|
||||
// other pivotkeys have the same fixed keylen
|
||||
REALLOC_N_ALIGNED(64, (_num_pivots + pivotkeys._num_pivots) * _fixed_keylen_aligned, _fixed_keys);
|
||||
memcpy(_fixed_key(_num_pivots), pivotkeys._fixed_keys, pivotkeys._total_size);
|
||||
_total_size += pivotkeys._total_size;
|
||||
} else {
|
||||
// must convert to dbt format, other pivotkeys have different length'd keys
|
||||
_convert_to_dbt_format();
|
||||
_append_dbt(pivotkeys);
|
||||
}
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::append(const ftnode_pivot_keys &pivotkeys) {
|
||||
if (_fixed_format()) {
|
||||
_append_fixed(pivotkeys);
|
||||
} else {
|
||||
_append_dbt(pivotkeys);
|
||||
}
|
||||
_num_pivots += pivotkeys._num_pivots;
|
||||
|
||||
sanity_check();
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_replace_at_dbt(const DBT *key, int i) {
|
||||
_destroy_key_dbt(i);
|
||||
_add_key_dbt(key, i);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_replace_at_fixed(const DBT *key, int i) {
|
||||
if (key->size == _fixed_keylen) {
|
||||
memcpy(_fixed_key(i), key->data, _fixed_keylen);
|
||||
} else {
|
||||
// must convert to dbt format, replacement key has different length
|
||||
_convert_to_dbt_format();
|
||||
_replace_at_dbt(key, i);
|
||||
}
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::replace_at(const DBT *key, int i) {
|
||||
if (i < _num_pivots) {
|
||||
if (_fixed_format()) {
|
||||
_replace_at_fixed(key, i);
|
||||
} else {
|
||||
_replace_at_dbt(key, i);
|
||||
}
|
||||
} else {
|
||||
invariant(i == _num_pivots); // appending to the end is ok
|
||||
insert_at(key, i);
|
||||
}
|
||||
invariant(total_size() > 0);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_delete_at_fixed(int i) {
|
||||
memmove(_fixed_key(i), _fixed_key(i + 1), (_num_pivots - 1 - i) * _fixed_keylen_aligned);
|
||||
_total_size -= _fixed_keylen_aligned;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_delete_at_dbt(int i) {
|
||||
// slide over existing keys, then shrink down to size
|
||||
_destroy_key_dbt(i);
|
||||
memmove(&_dbt_keys[i], &_dbt_keys[i + 1], (_num_pivots - 1 - i) * sizeof(DBT));
|
||||
REALLOC_N_ALIGNED(64, _num_pivots - 1, _dbt_keys);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::delete_at(int i) {
|
||||
invariant(i < _num_pivots);
|
||||
|
||||
if (_fixed_format()) {
|
||||
_delete_at_fixed(i);
|
||||
} else {
|
||||
_delete_at_dbt(i);
|
||||
}
|
||||
|
||||
_num_pivots--;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_split_at_fixed(int i, ftnode_pivot_keys *other) {
|
||||
// recreate the other set of pivots from index >= i
|
||||
other->_create_from_fixed_keys(_fixed_key(i), _fixed_keylen, _num_pivots - i);
|
||||
|
||||
// shrink down to size
|
||||
_total_size = i * _fixed_keylen_aligned;
|
||||
REALLOC_N_ALIGNED(64, _total_size, _fixed_keys);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::_split_at_dbt(int i, ftnode_pivot_keys *other) {
|
||||
// recreate the other set of pivots from index >= i
|
||||
other->create_from_dbts(&_dbt_keys[i], _num_pivots - i);
|
||||
|
||||
// destroy everything greater, shrink down to size
|
||||
for (int k = i; k < _num_pivots; k++) {
|
||||
_destroy_key_dbt(k);
|
||||
}
|
||||
REALLOC_N_ALIGNED(64, i, _dbt_keys);
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::split_at(int i, ftnode_pivot_keys *other) {
|
||||
if (i < _num_pivots) {
|
||||
if (_fixed_format()) {
|
||||
_split_at_fixed(i, other);
|
||||
} else {
|
||||
_split_at_dbt(i, other);
|
||||
}
|
||||
_num_pivots = i;
|
||||
}
|
||||
|
||||
sanity_check();
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::serialize_to_wbuf(struct wbuf *wb) const {
|
||||
bool fixed = _fixed_format();
|
||||
size_t written = 0;
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
size_t size = fixed ? _fixed_keylen : _dbt_keys[i].size;
|
||||
invariant(size);
|
||||
wbuf_nocrc_bytes(wb, fixed ? _fixed_key(i) : _dbt_keys[i].data, size);
|
||||
written += size;
|
||||
}
|
||||
invariant(written == serialized_size());
|
||||
}
|
||||
|
||||
int ftnode_pivot_keys::num_pivots() const {
|
||||
// if we have fixed size keys, the number of pivots should be consistent
|
||||
paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots));
|
||||
return _num_pivots;
|
||||
}
|
||||
|
||||
size_t ftnode_pivot_keys::total_size() const {
|
||||
// if we have fixed size keys, the total size should be consistent
|
||||
paranoid_invariant(_fixed_keys == nullptr || (_total_size == _fixed_keylen_aligned * _num_pivots));
|
||||
return _total_size;
|
||||
}
|
||||
|
||||
size_t ftnode_pivot_keys::serialized_size() const {
|
||||
// we only return the size that will be used when serialized, so we calculate based
|
||||
// on the fixed keylen and not the aligned keylen.
|
||||
return _fixed_format() ? _num_pivots * _fixed_keylen : _total_size;
|
||||
}
|
||||
|
||||
void ftnode_pivot_keys::sanity_check() const {
|
||||
if (_fixed_format()) {
|
||||
invariant(_dbt_keys == nullptr);
|
||||
invariant(_fixed_keylen_aligned == _align4(_fixed_keylen));
|
||||
invariant(_num_pivots * _fixed_keylen <= _total_size);
|
||||
invariant(_num_pivots * _fixed_keylen_aligned == _total_size);
|
||||
} else {
|
||||
invariant(_num_pivots == 0 || _dbt_keys != nullptr);
|
||||
size_t size = 0;
|
||||
for (int i = 0; i < _num_pivots; i++) {
|
||||
size += _dbt_keys[i].size;
|
||||
}
|
||||
invariant(size == _total_size);
|
||||
}
|
||||
}
|
@ -1,260 +0,0 @@
|
||||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "toku_portability.h"
|
||||
#include "portability/memory.h"
|
||||
#include "portability/toku_assert.h"
|
||||
#include "portability/toku_stdint.h"
|
||||
#include "portability/toku_stdlib.h"
|
||||
|
||||
#include "ft/serialize/block_allocator.h"
|
||||
#include "ft/serialize/rbtree_mhs.h"
|
||||
|
||||
#if defined(TOKU_DEBUG_PARANOID) && TOKU_DEBUG_PARANOID
|
||||
#define VALIDATE() Validate()
|
||||
#else
|
||||
#define VALIDATE()
|
||||
#endif
|
||||
|
||||
void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
|
||||
uint64_t alignment) {
|
||||
// the alignment must be at least 512 and aligned with 512 to work with
|
||||
// direct I/O
|
||||
invariant(alignment >= 512 && (alignment % 512) == 0);
|
||||
|
||||
_reserve_at_beginning = reserve_at_beginning;
|
||||
_alignment = alignment;
|
||||
_n_blocks = 0;
|
||||
_n_bytes_in_use = reserve_at_beginning;
|
||||
_tree = new MhsRbTree::Tree(alignment);
|
||||
}
|
||||
|
||||
void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
|
||||
CreateInternal(reserve_at_beginning, alignment);
|
||||
_tree->Insert({reserve_at_beginning, MAX_BYTE});
|
||||
VALIDATE();
|
||||
}
|
||||
|
||||
void BlockAllocator::Destroy() {
|
||||
delete _tree;
|
||||
}
|
||||
|
||||
void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
|
||||
uint64_t alignment,
|
||||
struct BlockPair *translation_pairs,
|
||||
uint64_t n_blocks) {
|
||||
CreateInternal(reserve_at_beginning, alignment);
|
||||
_n_blocks = n_blocks;
|
||||
|
||||
struct BlockPair *XMALLOC_N(n_blocks, pairs);
|
||||
memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
|
||||
std::sort(pairs, pairs + n_blocks);
|
||||
|
||||
if (pairs[0]._offset > reserve_at_beginning) {
|
||||
_tree->Insert(
|
||||
{reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
|
||||
}
|
||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
||||
// Allocator does not support size 0 blocks. See
|
||||
// block_allocator_free_block.
|
||||
invariant(pairs[i]._size > 0);
|
||||
invariant(pairs[i]._offset >= _reserve_at_beginning);
|
||||
invariant(pairs[i]._offset % _alignment == 0);
|
||||
|
||||
_n_bytes_in_use += pairs[i]._size;
|
||||
|
||||
MhsRbTree::OUUInt64 free_size(MAX_BYTE);
|
||||
MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
|
||||
if (i < n_blocks - 1) {
|
||||
MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
|
||||
invariant(next_offset >= free_offset);
|
||||
free_size = next_offset - free_offset;
|
||||
if (free_size == 0)
|
||||
continue;
|
||||
}
|
||||
_tree->Insert({free_offset, free_size});
|
||||
}
|
||||
toku_free(pairs);
|
||||
VALIDATE();
|
||||
}
|
||||
|
||||
// Effect: align a value by rounding up.
|
||||
static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
|
||||
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
||||
}
|
||||
|
||||
// Effect: Allocate a block. The resulting block must be aligned on the
|
||||
// ba->alignment (which to make direct_io happy must be a positive multiple of
|
||||
// 512).
|
||||
void BlockAllocator::AllocBlock(uint64_t size,
|
||||
uint64_t *offset) {
|
||||
// Allocator does not support size 0 blocks. See block_allocator_free_block.
|
||||
invariant(size > 0);
|
||||
|
||||
_n_bytes_in_use += size;
|
||||
*offset = _tree->Remove(size);
|
||||
|
||||
_n_blocks++;
|
||||
VALIDATE();
|
||||
}
|
||||
|
||||
// To support 0-sized blocks, we need to include size as an input to this
|
||||
// function.
|
||||
// All 0-sized blocks at the same offset can be considered identical, but
|
||||
// a 0-sized block can share offset with a non-zero sized block.
|
||||
// The non-zero sized block is not exchangable with a zero sized block (or vice
|
||||
// versa), so inserting 0-sized blocks can cause corruption here.
|
||||
void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
|
||||
VALIDATE();
|
||||
_n_bytes_in_use -= size;
|
||||
_tree->Insert({offset, size});
|
||||
_n_blocks--;
|
||||
VALIDATE();
|
||||
}
|
||||
|
||||
uint64_t BlockAllocator::AllocatedLimit() const {
|
||||
MhsRbTree::Node *max_node = _tree->MaxNode();
|
||||
return rbn_offset(max_node).ToInt();
|
||||
}
|
||||
|
||||
// Effect: Consider the blocks in sorted order. The reserved block at the
|
||||
// beginning is number 0. The next one is number 1 and so forth.
|
||||
// Return the offset and size of the block with that number.
|
||||
// Return 0 if there is a block that big, return nonzero if b is too big.
|
||||
int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
|
||||
uint64_t *offset,
|
||||
uint64_t *size) {
|
||||
MhsRbTree::Node *x, *y;
|
||||
if (b == 0) {
|
||||
*offset = 0;
|
||||
*size = _reserve_at_beginning;
|
||||
return 0;
|
||||
} else if (b > _n_blocks) {
|
||||
return -1;
|
||||
} else {
|
||||
x = _tree->MinNode();
|
||||
for (uint64_t i = 1; i <= b; i++) {
|
||||
y = x;
|
||||
x = _tree->Successor(x);
|
||||
}
|
||||
*size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
|
||||
*offset = (rbn_offset(y) + rbn_size(y)).ToInt();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
struct VisUnusedExtra {
|
||||
TOKU_DB_FRAGMENTATION _report;
|
||||
uint64_t _align;
|
||||
};
|
||||
|
||||
static void VisUnusedCollector(void *extra,
|
||||
MhsRbTree::Node *node,
|
||||
uint64_t UU(depth)) {
|
||||
struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
|
||||
TOKU_DB_FRAGMENTATION report = v_e->_report;
|
||||
uint64_t alignm = v_e->_align;
|
||||
|
||||
MhsRbTree::OUUInt64 offset = rbn_offset(node);
|
||||
MhsRbTree::OUUInt64 size = rbn_size(node);
|
||||
MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
|
||||
uint64_t free_space = (offset + size - answer_offset).ToInt();
|
||||
if (free_space > 0) {
|
||||
report->unused_bytes += free_space;
|
||||
report->unused_blocks++;
|
||||
if (free_space > report->largest_unused_block) {
|
||||
report->largest_unused_block = free_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Requires: report->file_size_bytes is filled in
|
||||
// Requires: report->data_bytes is filled in
|
||||
// Requires: report->checkpoint_bytes_additional is filled in
|
||||
void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
|
||||
invariant(_n_bytes_in_use ==
|
||||
report->data_bytes + report->checkpoint_bytes_additional);
|
||||
|
||||
report->unused_bytes = 0;
|
||||
report->unused_blocks = 0;
|
||||
report->largest_unused_block = 0;
|
||||
struct VisUnusedExtra extra = {report, _alignment};
|
||||
_tree->InOrderVisitor(VisUnusedCollector, &extra);
|
||||
}
|
||||
|
||||
void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
|
||||
report->data_bytes = _n_bytes_in_use;
|
||||
report->data_blocks = _n_blocks;
|
||||
report->file_size_bytes = 0;
|
||||
report->checkpoint_bytes_additional = 0;
|
||||
UnusedStatistics(report);
|
||||
}
|
||||
|
||||
struct ValidateExtra {
|
||||
uint64_t _bytes;
|
||||
MhsRbTree::Node *_pre_node;
|
||||
};
|
||||
static void VisUsedBlocksInOrder(void *extra,
|
||||
MhsRbTree::Node *cur_node,
|
||||
uint64_t UU(depth)) {
|
||||
struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
|
||||
MhsRbTree::Node *pre_node = v_e->_pre_node;
|
||||
// verify no overlaps
|
||||
if (pre_node) {
|
||||
invariant(rbn_size(pre_node) > 0);
|
||||
invariant(rbn_offset(cur_node) >
|
||||
rbn_offset(pre_node) + rbn_size(pre_node));
|
||||
MhsRbTree::OUUInt64 used_space =
|
||||
rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
|
||||
v_e->_bytes += used_space.ToInt();
|
||||
} else {
|
||||
v_e->_bytes += rbn_offset(cur_node).ToInt();
|
||||
}
|
||||
v_e->_pre_node = cur_node;
|
||||
}
|
||||
|
||||
void BlockAllocator::Validate() const {
|
||||
_tree->ValidateBalance();
|
||||
_tree->ValidateMhs();
|
||||
struct ValidateExtra extra = {0, nullptr};
|
||||
_tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
|
||||
invariant(extra._bytes == _n_bytes_in_use);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user